digitalkin 0.3.2.dev7__py3-none-any.whl → 0.3.2.dev10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. digitalkin/__version__.py +1 -1
  2. digitalkin/grpc_servers/module_servicer.py +0 -11
  3. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +2 -2
  4. digitalkin/grpc_servers/utils/utility_schema_extender.py +2 -1
  5. digitalkin/models/grpc_servers/models.py +91 -6
  6. digitalkin/models/module/module_context.py +136 -23
  7. digitalkin/models/module/setup_types.py +177 -260
  8. digitalkin/models/module/tool_cache.py +27 -187
  9. digitalkin/models/module/tool_reference.py +42 -45
  10. digitalkin/models/services/registry.py +0 -7
  11. digitalkin/modules/_base_module.py +85 -58
  12. digitalkin/services/registry/__init__.py +1 -1
  13. digitalkin/services/registry/default_registry.py +1 -1
  14. digitalkin/services/registry/grpc_registry.py +1 -1
  15. digitalkin/services/registry/registry_models.py +1 -29
  16. digitalkin/services/registry/registry_strategy.py +1 -1
  17. digitalkin/utils/schema_splitter.py +207 -0
  18. {digitalkin-0.3.2.dev7.dist-info → digitalkin-0.3.2.dev10.dist-info}/METADATA +1 -1
  19. {digitalkin-0.3.2.dev7.dist-info → digitalkin-0.3.2.dev10.dist-info}/RECORD +29 -22
  20. {digitalkin-0.3.2.dev7.dist-info → digitalkin-0.3.2.dev10.dist-info}/top_level.txt +1 -0
  21. modules/archetype_with_tools_module.py +244 -0
  22. monitoring/digitalkin_observability/__init__.py +46 -0
  23. monitoring/digitalkin_observability/http_server.py +150 -0
  24. monitoring/digitalkin_observability/interceptors.py +176 -0
  25. monitoring/digitalkin_observability/metrics.py +201 -0
  26. monitoring/digitalkin_observability/prometheus.py +137 -0
  27. monitoring/tests/test_metrics.py +172 -0
  28. digitalkin/models/module/module_helpers.py +0 -189
  29. {digitalkin-0.3.2.dev7.dist-info → digitalkin-0.3.2.dev10.dist-info}/WHEEL +0 -0
  30. {digitalkin-0.3.2.dev7.dist-info → digitalkin-0.3.2.dev10.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,137 @@
1
+ """Prometheus metrics exporter for DigitalKin.
2
+
3
+ This module exports metrics in Prometheus text exposition format.
4
+ No external dependencies required.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from digitalkin_observability.metrics import get_metrics
10
+
11
+
12
+ class PrometheusExporter:
13
+ """Exports metrics in Prometheus text format.
14
+
15
+ Usage:
16
+ output = PrometheusExporter.export()
17
+ # Returns Prometheus-compatible text format
18
+ """
19
+
20
+ @staticmethod
21
+ def export() -> str:
22
+ """Generate Prometheus-compatible metrics output."""
23
+ snapshot = get_metrics().snapshot()
24
+ lines: list[str] = []
25
+
26
+ # Counters
27
+ lines.extend([
28
+ "# HELP digitalkin_jobs_started_total Total jobs started",
29
+ "# TYPE digitalkin_jobs_started_total counter",
30
+ f"digitalkin_jobs_started_total {snapshot['jobs_started_total']}",
31
+ "",
32
+ "# HELP digitalkin_jobs_completed_total Total jobs completed successfully",
33
+ "# TYPE digitalkin_jobs_completed_total counter",
34
+ f"digitalkin_jobs_completed_total {snapshot['jobs_completed_total']}",
35
+ "",
36
+ "# HELP digitalkin_jobs_failed_total Total jobs failed",
37
+ "# TYPE digitalkin_jobs_failed_total counter",
38
+ f"digitalkin_jobs_failed_total {snapshot['jobs_failed_total']}",
39
+ "",
40
+ "# HELP digitalkin_jobs_cancelled_total Total jobs cancelled",
41
+ "# TYPE digitalkin_jobs_cancelled_total counter",
42
+ f"digitalkin_jobs_cancelled_total {snapshot['jobs_cancelled_total']}",
43
+ "",
44
+ "# HELP digitalkin_messages_sent_total Total messages sent",
45
+ "# TYPE digitalkin_messages_sent_total counter",
46
+ f"digitalkin_messages_sent_total {snapshot['messages_sent_total']}",
47
+ "",
48
+ "# HELP digitalkin_heartbeats_sent_total Total heartbeats sent",
49
+ "# TYPE digitalkin_heartbeats_sent_total counter",
50
+ f"digitalkin_heartbeats_sent_total {snapshot['heartbeats_sent_total']}",
51
+ "",
52
+ "# HELP digitalkin_errors_total Total errors",
53
+ "# TYPE digitalkin_errors_total counter",
54
+ f"digitalkin_errors_total {snapshot['errors_total']}",
55
+ "",
56
+ ])
57
+
58
+ # Gauges
59
+ lines.extend([
60
+ "# HELP digitalkin_active_jobs Current number of active jobs",
61
+ "# TYPE digitalkin_active_jobs gauge",
62
+ f"digitalkin_active_jobs {snapshot['active_jobs']}",
63
+ "",
64
+ "# HELP digitalkin_active_connections Current number of active connections",
65
+ "# TYPE digitalkin_active_connections gauge",
66
+ f"digitalkin_active_connections {snapshot['active_connections']}",
67
+ "",
68
+ "# HELP digitalkin_total_queue_depth Total items in all job queues",
69
+ "# TYPE digitalkin_total_queue_depth gauge",
70
+ f"digitalkin_total_queue_depth {snapshot['total_queue_depth']}",
71
+ "",
72
+ ])
73
+
74
+ # Job duration histogram
75
+ lines.extend(PrometheusExporter._format_histogram(
76
+ "digitalkin_job_duration_seconds",
77
+ "Job execution duration in seconds",
78
+ snapshot["job_duration_seconds"],
79
+ ))
80
+
81
+ # gRPC request duration histogram
82
+ lines.extend(PrometheusExporter._format_histogram(
83
+ "digitalkin_grpc_request_duration_seconds",
84
+ "gRPC request duration in seconds",
85
+ snapshot["grpc_request_duration_seconds"],
86
+ ))
87
+
88
+ # Per-module breakdown
89
+ if snapshot["by_module"]:
90
+ lines.extend([
91
+ "",
92
+ "# HELP digitalkin_jobs_by_module Jobs breakdown by module and status",
93
+ "# TYPE digitalkin_jobs_by_module counter",
94
+ ])
95
+ for module_name, counts in snapshot["by_module"].items():
96
+ for status, value in counts.items():
97
+ lines.append(
98
+ f'digitalkin_jobs_by_module{{module="{module_name}",status="{status}"}} {value}'
99
+ )
100
+
101
+ # Per-protocol breakdown
102
+ if snapshot["by_protocol"]:
103
+ lines.extend([
104
+ "",
105
+ "# HELP digitalkin_messages_by_protocol Messages breakdown by protocol",
106
+ "# TYPE digitalkin_messages_by_protocol counter",
107
+ ])
108
+ for protocol, counts in snapshot["by_protocol"].items():
109
+ for metric, value in counts.items():
110
+ lines.append(
111
+ f'digitalkin_messages_by_protocol{{protocol="{protocol}",metric="{metric}"}} {value}'
112
+ )
113
+
114
+ return "\n".join(lines)
115
+
116
+ @staticmethod
117
+ def _format_histogram(name: str, help_text: str, data: dict) -> list[str]:
118
+ """Format a histogram for Prometheus output."""
119
+ lines = [
120
+ "",
121
+ f"# HELP {name} {help_text}",
122
+ f"# TYPE {name} histogram",
123
+ ]
124
+
125
+ # Sort buckets and output cumulative counts
126
+ cumulative = 0
127
+ for bucket in sorted(data.get("buckets", {}).keys()):
128
+ cumulative += data["buckets"][bucket]
129
+ lines.append(f'{name}_bucket{{le="{bucket}"}} {cumulative}')
130
+
131
+ lines.extend([
132
+ f'{name}_bucket{{le="+Inf"}} {data.get("count", 0)}',
133
+ f'{name}_sum {data.get("sum", 0)}',
134
+ f'{name}_count {data.get("count", 0)}',
135
+ ])
136
+
137
+ return lines
@@ -0,0 +1,172 @@
1
+ """Tests for metrics collection.
2
+
3
+ Run with: python -m pytest tests/test_metrics.py
4
+ """
5
+
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ import pytest
10
+
11
+ # Add the parent directory to the path so we can import digitalkin_observability
12
+ sys.path.insert(0, str(Path(__file__).parent.parent))
13
+
14
+ from digitalkin_observability import MetricsCollector, PrometheusExporter, get_metrics
15
+
16
+
17
+ class TestMetricsCollector:
18
+ """Tests for MetricsCollector singleton."""
19
+
20
+ def setup_method(self) -> None:
21
+ """Reset metrics before each test."""
22
+ get_metrics().reset()
23
+
24
+ def test_singleton_returns_same_instance(self) -> None:
25
+ """Test that get_metrics returns the same instance."""
26
+ m1 = get_metrics()
27
+ m2 = get_metrics()
28
+ assert m1 is m2
29
+
30
+ def test_inc_jobs_started(self) -> None:
31
+ """Test incrementing jobs started counter."""
32
+ metrics = get_metrics()
33
+ metrics.inc_jobs_started("TestModule")
34
+
35
+ assert metrics.jobs_started_total == 1
36
+ assert metrics.active_jobs == 1
37
+
38
+ def test_inc_jobs_completed(self) -> None:
39
+ """Test incrementing jobs completed counter."""
40
+ metrics = get_metrics()
41
+ metrics.inc_jobs_started("TestModule")
42
+ metrics.inc_jobs_completed("TestModule", 1.5)
43
+
44
+ assert metrics.jobs_completed_total == 1
45
+ assert metrics.active_jobs == 0
46
+ assert metrics.job_duration_seconds.count == 1
47
+ assert metrics.job_duration_seconds.total_sum == 1.5
48
+
49
+ def test_inc_jobs_failed(self) -> None:
50
+ """Test incrementing jobs failed counter."""
51
+ metrics = get_metrics()
52
+ metrics.inc_jobs_started("TestModule")
53
+ metrics.inc_jobs_failed("TestModule")
54
+
55
+ assert metrics.jobs_failed_total == 1
56
+ assert metrics.active_jobs == 0
57
+
58
+ def test_inc_jobs_cancelled(self) -> None:
59
+ """Test incrementing jobs cancelled counter."""
60
+ metrics = get_metrics()
61
+ metrics.inc_jobs_started("TestModule")
62
+ metrics.inc_jobs_cancelled("TestModule")
63
+
64
+ assert metrics.jobs_cancelled_total == 1
65
+ assert metrics.active_jobs == 0
66
+
67
+ def test_inc_messages_sent(self) -> None:
68
+ """Test incrementing messages sent counter."""
69
+ metrics = get_metrics()
70
+ metrics.inc_messages_sent("message")
71
+ metrics.inc_messages_sent("file")
72
+ metrics.inc_messages_sent()
73
+
74
+ assert metrics.messages_sent_total == 3
75
+
76
+ def test_queue_depth_tracking(self) -> None:
77
+ """Test queue depth tracking."""
78
+ metrics = get_metrics()
79
+ metrics.set_queue_depth("job1", 5)
80
+ metrics.set_queue_depth("job2", 3)
81
+
82
+ assert metrics.queue_depth["job1"] == 5
83
+ assert metrics.queue_depth["job2"] == 3
84
+
85
+ metrics.clear_queue_depth("job1")
86
+ assert "job1" not in metrics.queue_depth
87
+
88
+ def test_snapshot(self) -> None:
89
+ """Test snapshot returns all metrics."""
90
+ metrics = get_metrics()
91
+ metrics.inc_jobs_started("TestModule")
92
+ metrics.inc_jobs_completed("TestModule", 0.5)
93
+ metrics.inc_messages_sent("message")
94
+
95
+ snapshot = metrics.snapshot()
96
+
97
+ assert snapshot["jobs_started_total"] == 1
98
+ assert snapshot["jobs_completed_total"] == 1
99
+ assert snapshot["messages_sent_total"] == 1
100
+ assert "job_duration_seconds" in snapshot
101
+ assert "by_module" in snapshot
102
+ assert "TestModule" in snapshot["by_module"]
103
+
104
+ def test_histogram_observe(self) -> None:
105
+ """Test histogram observations."""
106
+ metrics = get_metrics()
107
+ metrics.observe_grpc_duration(0.05)
108
+ metrics.observe_grpc_duration(0.15)
109
+
110
+ assert metrics.grpc_request_duration_seconds.count == 2
111
+ assert metrics.grpc_request_duration_seconds.total_sum == pytest.approx(0.2)
112
+
113
+ def test_reset_clears_all_metrics(self) -> None:
114
+ """Test reset clears all metrics."""
115
+ metrics = get_metrics()
116
+ metrics.inc_jobs_started("TestModule")
117
+ metrics.inc_errors()
118
+
119
+ metrics.reset()
120
+
121
+ assert metrics.jobs_started_total == 0
122
+ assert metrics.errors_total == 0
123
+ assert metrics.active_jobs == 0
124
+
125
+
126
+ class TestPrometheusExporter:
127
+ """Tests for Prometheus exporter."""
128
+
129
+ def setup_method(self) -> None:
130
+ """Reset metrics before each test."""
131
+ get_metrics().reset()
132
+
133
+ def test_export_returns_string(self) -> None:
134
+ """Test that export returns a string."""
135
+ output = PrometheusExporter.export()
136
+ assert isinstance(output, str)
137
+
138
+ def test_export_contains_job_counters(self) -> None:
139
+ """Test export contains job counters."""
140
+ metrics = get_metrics()
141
+ metrics.inc_jobs_started("TestModule")
142
+
143
+ output = PrometheusExporter.export()
144
+
145
+ assert "digitalkin_jobs_started_total 1" in output
146
+ assert "digitalkin_active_jobs 1" in output
147
+
148
+ def test_export_contains_histogram(self) -> None:
149
+ """Test export contains histogram data."""
150
+ metrics = get_metrics()
151
+ metrics.observe_grpc_duration(0.05)
152
+
153
+ output = PrometheusExporter.export()
154
+
155
+ assert "digitalkin_grpc_request_duration_seconds" in output
156
+ assert "# TYPE digitalkin_grpc_request_duration_seconds histogram" in output
157
+
158
+ def test_export_contains_module_breakdown(self) -> None:
159
+ """Test export contains per-module breakdown."""
160
+ metrics = get_metrics()
161
+ metrics.inc_jobs_started("MyModule")
162
+
163
+ output = PrometheusExporter.export()
164
+
165
+ assert 'digitalkin_jobs_by_module{module="MyModule",status="started"} 1' in output
166
+
167
+ def test_export_contains_help_and_type(self) -> None:
168
+ """Test export contains HELP and TYPE comments."""
169
+ output = PrometheusExporter.export()
170
+
171
+ assert "# HELP digitalkin_jobs_started_total" in output
172
+ assert "# TYPE digitalkin_jobs_started_total counter" in output
@@ -1,189 +0,0 @@
1
- """Module helpers for inter-module communication."""
2
-
3
- from collections.abc import AsyncGenerator, Callable, Coroutine
4
- from types import SimpleNamespace
5
- from typing import TYPE_CHECKING, Any
6
-
7
- from digitalkin.logger import logger
8
-
9
- if TYPE_CHECKING:
10
- from digitalkin.models.module.module_context import ModuleContext
11
-
12
-
13
- class ModuleHelpers(SimpleNamespace):
14
- """Helpers for module-to-module communication.
15
-
16
- Extends SimpleNamespace to allow dynamic attribute assignment
17
- while providing built-in helper methods.
18
- """
19
-
20
- def __init__(self, context: "ModuleContext", **kwargs: dict[str, Any]) -> None:
21
- """Initialize helpers with context reference.
22
-
23
- Args:
24
- context: ModuleContext providing access to services.
25
- **kwargs: Additional attributes to set on the namespace.
26
- """
27
- super().__init__(**kwargs)
28
- self._context = context
29
-
30
- async def call_module_by_id(
31
- self,
32
- module_id: str,
33
- input_data: dict,
34
- setup_id: str,
35
- mission_id: str,
36
- callback: Callable[[dict], Coroutine[Any, Any, None]] | None = None,
37
- ) -> AsyncGenerator[dict, None]:
38
- """Call a module by ID, discovering address/port from registry.
39
-
40
- Args:
41
- module_id: Module identifier to look up in registry
42
- input_data: Input data as dictionary
43
- setup_id: Setup configuration ID
44
- mission_id: Mission context ID
45
- callback: Optional callback for each response
46
-
47
- Yields:
48
- Streaming responses from module as dictionaries
49
- """
50
- module_info = self._context.registry.discover_by_id(module_id)
51
-
52
- logger.debug(
53
- "Calling module by ID",
54
- extra={
55
- "module_id": module_id,
56
- "address": module_info.address,
57
- "port": module_info.port,
58
- },
59
- )
60
-
61
- async for response in self._context.communication.call_module(
62
- module_address=module_info.address,
63
- module_port=module_info.port,
64
- input_data=input_data,
65
- setup_id=setup_id,
66
- mission_id=mission_id,
67
- callback=callback,
68
- ):
69
- yield response
70
-
71
- async def get_module_schemas_by_id(
72
- self,
73
- module_id: str,
74
- *,
75
- llm_format: bool = False,
76
- ) -> dict[str, dict]:
77
- """Get module schemas by ID, discovering address/port from registry.
78
-
79
- Args:
80
- module_id: Module identifier to look up in registry
81
- llm_format: If True, return LLM-optimized schema format
82
-
83
- Returns:
84
- Dictionary containing schemas: {"input": ..., "output": ..., "setup": ..., "secret": ...}
85
- """
86
- module_info = self._context.registry.discover_by_id(module_id)
87
-
88
- logger.debug(
89
- "Getting module schemas by ID",
90
- extra={
91
- "module_id": module_id,
92
- "address": module_info.address,
93
- "port": module_info.port,
94
- },
95
- )
96
-
97
- return await self._context.communication.get_module_schemas(
98
- module_address=module_info.address,
99
- module_port=module_info.port,
100
- llm_format=llm_format,
101
- )
102
-
103
- async def create_openai_style_tool(self, module_id: str) -> dict[str, Any] | None:
104
- """Create OpenAI-style function calling schema for a tool.
105
-
106
- Uses tool cache (fast path) with registry fallback. Fetches the tool's
107
- input schema and wraps it in OpenAI function calling format.
108
-
109
- Args:
110
- module_id: Module ID to look up (checks cache first, then registry)
111
-
112
- Returns:
113
- OpenAI-style tool schema if found:
114
- {
115
- "type": "function",
116
- "function": {
117
- "name": "...",
118
- "description": "...",
119
- "parameters": {...} # Input JSON Schema
120
- }
121
- }
122
- None if tool not found.
123
- """
124
- module_info = self._context.tool_cache.check_and_get(module_id, self._context.registry)
125
- if not module_info:
126
- return None
127
-
128
- schemas = await self._context.communication.get_module_schemas(
129
- module_address=module_info.address,
130
- module_port=module_info.port,
131
- llm_format=True,
132
- )
133
-
134
- return {
135
- "type": "function",
136
- "function": {
137
- "name": module_info.name or module_info.module_id,
138
- "description": module_info.documentation or "",
139
- "parameters": schemas["input"],
140
- },
141
- }
142
-
143
- def create_tool_function(
144
- self,
145
- module_id: str,
146
- ) -> Callable[..., AsyncGenerator[dict, None]] | None:
147
- """Create async generator function for a tool.
148
-
149
- Returns an async generator that calls the remote tool module via gRPC
150
- and yields each response as it arrives until end_of_stream or gRPC ends.
151
-
152
- Args:
153
- module_id: Module ID to look up (checks cache first, then registry)
154
-
155
- Returns:
156
- Async generator function if tool found, None otherwise.
157
- The function accepts **kwargs matching the tool's input schema
158
- and yields dict responses.
159
- """
160
- module_info = self._context.tool_cache.check_and_get(module_id, self._context.registry)
161
- if not module_info:
162
- return None
163
-
164
- # Capture references for closure
165
- communication = self._context.communication
166
- session = self._context.session
167
- address = module_info.address
168
- port = module_info.port
169
-
170
- async def tool_function(**kwargs: Any) -> AsyncGenerator[dict, None]: # noqa: ANN401
171
- """Call remote tool module and yield responses.
172
-
173
- Yields:
174
- dict: Each response from the module until end_of_stream.
175
- """
176
- wrapped_input = {"root": kwargs}
177
- async for response in communication.call_module(
178
- module_address=address,
179
- module_port=port,
180
- input_data=wrapped_input,
181
- setup_id=session.setup_id,
182
- mission_id=session.mission_id,
183
- ):
184
- yield response
185
-
186
- tool_function.__name__ = module_info.name or module_info.module_id
187
- tool_function.__doc__ = module_info.documentation or ""
188
-
189
- return tool_function