digitalkin 0.3.1.dev2__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. base_server/server_async_insecure.py +6 -5
  2. base_server/server_async_secure.py +6 -5
  3. base_server/server_sync_insecure.py +5 -4
  4. base_server/server_sync_secure.py +5 -4
  5. digitalkin/__version__.py +1 -1
  6. digitalkin/core/job_manager/base_job_manager.py +1 -1
  7. digitalkin/core/job_manager/single_job_manager.py +28 -9
  8. digitalkin/core/job_manager/taskiq_broker.py +7 -6
  9. digitalkin/core/job_manager/taskiq_job_manager.py +1 -1
  10. digitalkin/core/task_manager/surrealdb_repository.py +7 -7
  11. digitalkin/core/task_manager/task_session.py +60 -98
  12. digitalkin/grpc_servers/module_server.py +109 -168
  13. digitalkin/grpc_servers/module_servicer.py +38 -16
  14. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +24 -8
  15. digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
  16. digitalkin/models/__init__.py +1 -1
  17. digitalkin/models/core/job_manager_models.py +0 -8
  18. digitalkin/models/core/task_monitor.py +4 -0
  19. digitalkin/models/grpc_servers/models.py +91 -6
  20. digitalkin/models/module/__init__.py +18 -13
  21. digitalkin/models/module/base_types.py +61 -0
  22. digitalkin/models/module/module_context.py +173 -13
  23. digitalkin/models/module/module_types.py +28 -392
  24. digitalkin/models/module/setup_types.py +490 -0
  25. digitalkin/models/module/tool_cache.py +68 -0
  26. digitalkin/models/module/tool_reference.py +117 -0
  27. digitalkin/models/module/utility.py +167 -0
  28. digitalkin/models/services/registry.py +35 -0
  29. digitalkin/modules/__init__.py +5 -1
  30. digitalkin/modules/_base_module.py +154 -61
  31. digitalkin/modules/archetype_module.py +6 -1
  32. digitalkin/modules/tool_module.py +6 -1
  33. digitalkin/modules/triggers/__init__.py +8 -0
  34. digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
  35. digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
  36. digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
  37. digitalkin/services/__init__.py +4 -0
  38. digitalkin/services/communication/__init__.py +7 -0
  39. digitalkin/services/communication/communication_strategy.py +76 -0
  40. digitalkin/services/communication/default_communication.py +101 -0
  41. digitalkin/services/communication/grpc_communication.py +234 -0
  42. digitalkin/services/cost/grpc_cost.py +1 -1
  43. digitalkin/services/filesystem/grpc_filesystem.py +1 -1
  44. digitalkin/services/registry/__init__.py +22 -1
  45. digitalkin/services/registry/default_registry.py +135 -4
  46. digitalkin/services/registry/exceptions.py +47 -0
  47. digitalkin/services/registry/grpc_registry.py +306 -0
  48. digitalkin/services/registry/registry_models.py +15 -0
  49. digitalkin/services/registry/registry_strategy.py +88 -4
  50. digitalkin/services/services_config.py +25 -3
  51. digitalkin/services/services_models.py +5 -1
  52. digitalkin/services/setup/default_setup.py +1 -1
  53. digitalkin/services/setup/grpc_setup.py +1 -1
  54. digitalkin/services/storage/grpc_storage.py +1 -1
  55. digitalkin/services/user_profile/__init__.py +11 -0
  56. digitalkin/services/user_profile/grpc_user_profile.py +2 -2
  57. digitalkin/services/user_profile/user_profile_strategy.py +0 -15
  58. digitalkin/utils/schema_splitter.py +207 -0
  59. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +5 -5
  60. digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
  61. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
  62. modules/archetype_with_tools_module.py +244 -0
  63. modules/cpu_intensive_module.py +1 -1
  64. modules/dynamic_setup_module.py +5 -29
  65. modules/minimal_llm_module.py +1 -1
  66. modules/text_transform_module.py +1 -1
  67. monitoring/digitalkin_observability/__init__.py +46 -0
  68. monitoring/digitalkin_observability/http_server.py +150 -0
  69. monitoring/digitalkin_observability/interceptors.py +176 -0
  70. monitoring/digitalkin_observability/metrics.py +201 -0
  71. monitoring/digitalkin_observability/prometheus.py +137 -0
  72. monitoring/tests/test_metrics.py +172 -0
  73. services/filesystem_module.py +7 -5
  74. services/storage_module.py +4 -2
  75. digitalkin/grpc_servers/registry_server.py +0 -65
  76. digitalkin/grpc_servers/registry_servicer.py +0 -456
  77. digitalkin-0.3.1.dev2.dist-info/RECORD +0 -119
  78. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
  79. {digitalkin-0.3.1.dev2.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,244 @@
1
+ """Example archetype module with tool cache integration."""
2
+
3
+ import logging
4
+ from typing import Any, ClassVar, Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from digitalkin.models.grpc_servers.models import ClientConfig, SecurityMode, ServerMode
9
+ from digitalkin.models.module.module_context import ModuleContext
10
+ from digitalkin.models.module.setup_types import SetupModel
11
+ from digitalkin.models.module.tool_reference import (
12
+ ToolReference,
13
+ ToolReferenceConfig,
14
+ ToolSelectionMode,
15
+ )
16
+ from digitalkin.modules._base_module import BaseModule # noqa: PLC2701
17
+ from digitalkin.services.services_models import ServicesStrategy
18
+
19
+ logging.basicConfig(
20
+ level=logging.DEBUG,
21
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
22
+ )
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class MessageInputPayload(BaseModel):
27
+ """Message input payload."""
28
+
29
+ payload_type: Literal["message"] = "message"
30
+ user_prompt: str
31
+
32
+
33
+ class ArchetypeInput(BaseModel):
34
+ """Archetype input."""
35
+
36
+ payload: MessageInputPayload = Field(discriminator="payload_type")
37
+
38
+
39
+ class MessageOutputPayload(BaseModel):
40
+ """Message output payload."""
41
+
42
+ payload_type: Literal["message"] = "message"
43
+ response: str
44
+ tools_used: list[str] = Field(default_factory=list)
45
+
46
+
47
+ class ArchetypeOutput(BaseModel):
48
+ """Archetype output."""
49
+
50
+ payload: MessageOutputPayload = Field(discriminator="payload_type")
51
+
52
+
53
+ class ArchetypeSetup(SetupModel):
54
+ """Setup with tool references resolved during config setup."""
55
+
56
+ model_name: str = Field(
57
+ default="gpt-4",
58
+ json_schema_extra={"config": True},
59
+ )
60
+ temperature: float = Field(
61
+ default=0.7,
62
+ json_schema_extra={"config": True},
63
+ )
64
+
65
+ search_tool: ToolReference = Field(
66
+ default_factory=lambda: ToolReference(
67
+ config=ToolReferenceConfig(
68
+ mode=ToolSelectionMode.FIXED,
69
+ module_id="search-tool-v1",
70
+ )
71
+ ),
72
+ json_schema_extra={"config": True},
73
+ )
74
+
75
+ calculator_tool: ToolReference = Field(
76
+ default_factory=lambda: ToolReference(
77
+ config=ToolReferenceConfig(
78
+ mode=ToolSelectionMode.TAG,
79
+ tag="math-calculator",
80
+ )
81
+ ),
82
+ json_schema_extra={"config": True},
83
+ )
84
+
85
+ dynamic_tool: ToolReference = Field(
86
+ default_factory=lambda: ToolReference(
87
+ config=ToolReferenceConfig(
88
+ mode=ToolSelectionMode.DISCOVERABLE,
89
+ )
90
+ ),
91
+ json_schema_extra={"config": True},
92
+ )
93
+
94
+ system_prompt: str = Field(
95
+ default="You are a helpful assistant with access to tools.",
96
+ json_schema_extra={"hidden": True},
97
+ )
98
+
99
+
100
+ class ArchetypeConfigSetup(BaseModel):
101
+ """Config setup model."""
102
+
103
+ additional_instructions: str | None = None
104
+
105
+
106
+ class ArchetypeSecret(BaseModel):
107
+ """Secrets model."""
108
+
109
+
110
+ client_config = ClientConfig(
111
+ host="[::]",
112
+ port=50152,
113
+ mode=ServerMode.ASYNC,
114
+ security=SecurityMode.INSECURE,
115
+ credentials=None,
116
+ )
117
+
118
+
119
+ class ArchetypeWithToolsModule(
120
+ BaseModule[
121
+ ArchetypeInput,
122
+ ArchetypeOutput,
123
+ ArchetypeSetup,
124
+ ArchetypeSecret,
125
+ ]
126
+ ):
127
+ """Archetype module demonstrating tool cache usage."""
128
+
129
+ name = "ArchetypeWithToolsModule"
130
+ description = "Archetype with tool cache integration"
131
+
132
+ config_setup_format = ArchetypeConfigSetup
133
+ input_format = ArchetypeInput
134
+ output_format = ArchetypeOutput
135
+ setup_format = ArchetypeSetup
136
+ secret_format = ArchetypeSecret
137
+
138
+ metadata: ClassVar[dict[str, Any]] = {
139
+ "name": "ArchetypeWithToolsModule",
140
+ "version": "1.0.0",
141
+ "tags": ["archetype", "tools"],
142
+ }
143
+
144
+ services_config_strategies: ClassVar[dict[str, ServicesStrategy | None]] = {}
145
+ services_config_params: ClassVar[dict[str, dict[str, Any | None] | None]] = {
146
+ "registry": {
147
+ "config": {},
148
+ "client_config": client_config,
149
+ },
150
+ }
151
+
152
+ async def run_config_setup(
153
+ self,
154
+ context: ModuleContext, # noqa: ARG002
155
+ config_setup_data: ArchetypeSetup,
156
+ ) -> ArchetypeSetup:
157
+ """Custom config setup logic, runs in parallel with tool resolution.
158
+
159
+ Args:
160
+ context: Module context with services.
161
+ config_setup_data: Setup data being configured.
162
+
163
+ Returns:
164
+ Configured setup data.
165
+ """
166
+ logger.info("Running config setup for %s", self.name)
167
+ return config_setup_data
168
+
169
+ async def initialize(self, context: ModuleContext, setup_data: ArchetypeSetup) -> None: # noqa: ARG002
170
+ """Initialize module.
171
+
172
+ Args:
173
+ context: Module context with services and tool cache.
174
+ setup_data: Setup data for the module.
175
+ """
176
+ logger.info("Initializing %s", self.name)
177
+ if context.tool_cache:
178
+ logger.info("Available tools: %s", context.tool_cache.list_tools())
179
+
180
+ async def run(
181
+ self,
182
+ input_data: ArchetypeInput,
183
+ setup_data: ArchetypeSetup, # noqa: ARG002
184
+ ) -> None:
185
+ """Run module with tool cache lookups and call_module_by_id.
186
+
187
+ Args:
188
+ input_data: Input data to process.
189
+ setup_data: Setup configuration.
190
+ """
191
+ logger.info("Running %s", self.name)
192
+
193
+ tools_used: list[str] = []
194
+ tool_results: list[str] = []
195
+
196
+ # Get search tool from cache and call via call_module_by_id
197
+ search_info = self.context.tool_cache.get("search_tool")
198
+ if search_info:
199
+ tools_used.append(f"search:{search_info.module_id}")
200
+ async for response in self.context.call_module_by_id(
201
+ module_id=search_info.module_id,
202
+ input_data={"query": input_data.payload.user_prompt},
203
+ setup_id=self.context.session.setup_id,
204
+ mission_id=self.context.session.mission_id,
205
+ ):
206
+ tool_results.append(f"search_result: {response}")
207
+
208
+ # Get calculator tool from cache
209
+ calc_info = self.context.tool_cache.get("calculator_tool")
210
+ if calc_info:
211
+ tools_used.append(f"calculator:{calc_info.module_id}")
212
+ async for response in self.context.call_module_by_id(
213
+ module_id=calc_info.module_id,
214
+ input_data={"expression": "2 + 2"},
215
+ setup_id=self.context.session.setup_id,
216
+ mission_id=self.context.session.mission_id,
217
+ ):
218
+ tool_results.append(f"calc_result: {response}")
219
+
220
+ # Dynamic discovery via registry fallback for tools not in cache
221
+ dynamic_info = self.context.tool_cache.get(
222
+ "some_dynamic_tool",
223
+ registry=self.context.registry,
224
+ )
225
+ if dynamic_info:
226
+ tools_used.append(f"dynamic:{dynamic_info.module_id}")
227
+ async for response in self.context.call_module_by_id(
228
+ module_id=dynamic_info.module_id,
229
+ input_data={"prompt": input_data.payload.user_prompt},
230
+ setup_id=self.context.session.setup_id,
231
+ mission_id=self.context.session.mission_id,
232
+ ):
233
+ tool_results.append(f"dynamic_result: {response}")
234
+
235
+ response = MessageOutputPayload(
236
+ response=f"Processed: {input_data.payload.user_prompt} | Results: {len(tool_results)}",
237
+ tools_used=tools_used,
238
+ )
239
+
240
+ await self.context.callbacks.send_message(ArchetypeOutput(payload=response))
241
+
242
+ async def cleanup(self) -> None:
243
+ """Clean up resources."""
244
+ logger.info("Cleaning up %s", self.name)
@@ -4,9 +4,9 @@ import logging
4
4
  from collections.abc import Callable
5
5
  from typing import Any, ClassVar, Literal
6
6
 
7
+ from digitalkin.grpc_servers.utils.models import ClientConfig, SecurityMode, ServerConfig, ServerMode
7
8
  from pydantic import BaseModel, Field
8
9
 
9
- from digitalkin.grpc_servers.utils.models import ClientConfig, SecurityMode, ServerConfig, ServerMode
10
10
  from digitalkin.modules._base_module import BaseModule
11
11
  from digitalkin.services.services_models import ServicesStrategy
12
12
  from digitalkin.services.setup.setup_strategy import SetupData
@@ -209,8 +209,6 @@ class DynamicModuleOutput(DataModel[MessageOutputTrigger]):
209
209
  class DynamicModuleSecret(BaseModel):
210
210
  """Secret model (empty for this example)."""
211
211
 
212
- pass
213
-
214
212
 
215
213
  # =============================================================================
216
214
  # Module Implementation
@@ -285,13 +283,7 @@ class DynamicSetupModule(
285
283
 
286
284
  async def demonstrate_dynamic_schema() -> None:
287
285
  """Demonstrate the dynamic schema functionality."""
288
- print("=" * 60)
289
- print("Dynamic Schema Demonstration")
290
- print("=" * 60)
291
-
292
286
  # 1. Show schema WITHOUT force (dynamic fields not resolved)
293
- print("\n1. Schema without force=True (fetchers NOT called):")
294
- print("-" * 40)
295
287
 
296
288
  model_no_force = await DynamicAgentSetup.get_clean_model(
297
289
  config_fields=True,
@@ -302,13 +294,10 @@ async def demonstrate_dynamic_schema() -> None:
302
294
 
303
295
  # Check if enum is present
304
296
  model_name_schema = schema_no_force.get("properties", {}).get("model_name", {})
305
- print(f"model_name has enum: {'enum' in model_name_schema}")
306
297
  if "enum" in model_name_schema:
307
- print(f" enum values: {model_name_schema['enum']}")
298
+ pass
308
299
 
309
300
  # 2. Show schema WITH force (dynamic fields resolved)
310
- print("\n2. Schema with force=True (fetchers called):")
311
- print("-" * 40)
312
301
 
313
302
  model_with_force = await DynamicAgentSetup.get_clean_model(
314
303
  config_fields=True,
@@ -319,43 +308,30 @@ async def demonstrate_dynamic_schema() -> None:
319
308
 
320
309
  # Check enum values after force
321
310
  model_name_schema = schema_with_force.get("properties", {}).get("model_name", {})
322
- print(f"model_name has enum: {'enum' in model_name_schema}")
323
311
  if "enum" in model_name_schema:
324
- print(f" enum values: {model_name_schema['enum']}")
312
+ pass
325
313
 
326
314
  language_schema = schema_with_force.get("properties", {}).get("language", {})
327
- print(f"language has enum: {'enum' in language_schema}")
328
315
  if "enum" in language_schema:
329
- print(f" enum values: {language_schema['enum']}")
316
+ pass
330
317
 
331
318
  # 3. Show that static json_schema_extra is preserved
332
- print("\n3. Static json_schema_extra preserved:")
333
- print("-" * 40)
334
- print(f"model_name ui:widget: {model_name_schema.get('ui:widget', 'NOT FOUND')}")
335
319
 
336
320
  # 4. Show field filtering
337
- print("\n4. Field filtering demonstration:")
338
- print("-" * 40)
339
321
 
340
322
  # Config fields only (hidden excluded)
341
- config_model = await DynamicAgentSetup.get_clean_model(
323
+ await DynamicAgentSetup.get_clean_model(
342
324
  config_fields=True,
343
325
  hidden_fields=False,
344
326
  force=False,
345
327
  )
346
- print(f"Config fields (hidden=False): {list(config_model.model_fields.keys())}")
347
328
 
348
329
  # All fields including hidden
349
- all_model = await DynamicAgentSetup.get_clean_model(
330
+ await DynamicAgentSetup.get_clean_model(
350
331
  config_fields=True,
351
332
  hidden_fields=True,
352
333
  force=False,
353
334
  )
354
- print(f"All fields (hidden=True): {list(all_model.model_fields.keys())}")
355
-
356
- print("\n" + "=" * 60)
357
- print("Demonstration complete!")
358
- print("=" * 60)
359
335
 
360
336
 
361
337
  if __name__ == "__main__":
@@ -6,9 +6,9 @@ from collections.abc import Callable
6
6
  from typing import Any, ClassVar, Literal
7
7
 
8
8
  import openai
9
+ from digitalkin.grpc_servers.utils.models import ClientConfig, SecurityMode, ServerMode
9
10
  from pydantic import BaseModel, Field
10
11
 
11
- from digitalkin.grpc_servers.utils.models import ClientConfig, SecurityMode, ServerMode
12
12
  from digitalkin.modules._base_module import BaseModule
13
13
  from digitalkin.services.services_models import ServicesStrategy
14
14
 
@@ -4,9 +4,9 @@ import logging
4
4
  from collections.abc import Callable
5
5
  from typing import Any, ClassVar
6
6
 
7
+ from digitalkin.grpc_servers.utils.models import ClientConfig, SecurityMode, ServerMode
7
8
  from pydantic import BaseModel
8
9
 
9
- from digitalkin.grpc_servers.utils.models import ClientConfig, SecurityMode, ServerMode
10
10
  from digitalkin.modules._base_module import BaseModule
11
11
  from digitalkin.services.setup.setup_strategy import SetupData
12
12
  from digitalkin.services.storage.storage_strategy import DataType, StorageRecord
@@ -0,0 +1,46 @@
1
+ """Standalone observability module for DigitalKin.
2
+
3
+ This module can be copied into your project and used independently.
4
+ It has no dependencies on the digitalkin package.
5
+
6
+ Usage:
7
+ from digitalkin_observability import (
8
+ MetricsCollector,
9
+ MetricsServer,
10
+ MetricsServerInterceptor,
11
+ PrometheusExporter,
12
+ get_metrics,
13
+ start_metrics_server,
14
+ stop_metrics_server,
15
+ )
16
+
17
+ # Start metrics HTTP server
18
+ start_metrics_server(port=8081)
19
+
20
+ # Track metrics
21
+ metrics = get_metrics()
22
+ metrics.inc_jobs_started("my_module")
23
+ metrics.inc_jobs_completed("my_module", duration=1.5)
24
+
25
+ # Export to Prometheus format
26
+ print(PrometheusExporter.export())
27
+ """
28
+
29
+ from digitalkin_observability.http_server import (
30
+ MetricsServer,
31
+ start_metrics_server,
32
+ stop_metrics_server,
33
+ )
34
+ from digitalkin_observability.interceptors import MetricsServerInterceptor
35
+ from digitalkin_observability.metrics import MetricsCollector, get_metrics
36
+ from digitalkin_observability.prometheus import PrometheusExporter
37
+
38
+ __all__ = [
39
+ "MetricsCollector",
40
+ "MetricsServer",
41
+ "MetricsServerInterceptor",
42
+ "PrometheusExporter",
43
+ "get_metrics",
44
+ "start_metrics_server",
45
+ "stop_metrics_server",
46
+ ]
@@ -0,0 +1,150 @@
1
+ """Simple HTTP server for exposing Prometheus metrics.
2
+
3
+ This module provides an HTTP server that exposes metrics at /metrics endpoint.
4
+ No external dependencies required beyond Python standard library.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from http.server import BaseHTTPRequestHandler, HTTPServer
11
+ from threading import Thread
12
+ from typing import TYPE_CHECKING, ClassVar
13
+
14
+ if TYPE_CHECKING:
15
+ from typing import Self
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class MetricsHandler(BaseHTTPRequestHandler):
21
+ """HTTP request handler for metrics endpoint."""
22
+
23
+ def do_GET(self) -> None:
24
+ """Handle GET requests."""
25
+ if self.path == "/metrics":
26
+ self._serve_metrics()
27
+ elif self.path == "/health":
28
+ self._serve_health()
29
+ else:
30
+ self.send_error(404, "Not Found")
31
+
32
+ def _serve_metrics(self) -> None:
33
+ """Serve Prometheus metrics."""
34
+ from digitalkin_observability.prometheus import PrometheusExporter
35
+
36
+ content = PrometheusExporter.export()
37
+ self.send_response(200)
38
+ self.send_header("Content-Type", "text/plain; charset=utf-8")
39
+ self.send_header("Content-Length", str(len(content)))
40
+ self.end_headers()
41
+ self.wfile.write(content.encode("utf-8"))
42
+
43
+ def _serve_health(self) -> None:
44
+ """Serve health check."""
45
+ content = '{"status": "ok"}'
46
+ self.send_response(200)
47
+ self.send_header("Content-Type", "application/json")
48
+ self.send_header("Content-Length", str(len(content)))
49
+ self.end_headers()
50
+ self.wfile.write(content.encode("utf-8"))
51
+
52
+ def log_message(self, format: str, *args: object) -> None:
53
+ """Suppress default logging."""
54
+
55
+
56
+ class MetricsServer:
57
+ """HTTP server for exposing metrics to Prometheus.
58
+
59
+ Usage:
60
+ server = MetricsServer(port=8081)
61
+ server.start()
62
+ # ... run your application ...
63
+ server.stop()
64
+
65
+ Or as context manager:
66
+ with MetricsServer(port=8081):
67
+ # ... run your application ...
68
+
69
+ Or as async context manager:
70
+ async with MetricsServer(port=8081):
71
+ # ... run your application ...
72
+ """
73
+
74
+ instance: ClassVar["MetricsServer | None"] = None
75
+
76
+ def __init__(self, host: str = "0.0.0.0", port: int = 8081) -> None:
77
+ """Initialize the metrics server.
78
+
79
+ Args:
80
+ host: Host to bind to (default: 0.0.0.0 for all interfaces).
81
+ port: Port to listen on (default: 8081).
82
+ """
83
+ self.host = host
84
+ self.port = port
85
+ self._server: HTTPServer | None = None
86
+ self._thread: Thread | None = None
87
+
88
+ def start(self) -> None:
89
+ """Start the metrics server in a background thread."""
90
+ if self._server is not None:
91
+ logger.warning("Metrics server already running")
92
+ return
93
+
94
+ self._server = HTTPServer((self.host, self.port), MetricsHandler)
95
+ self._thread = Thread(target=self._server.serve_forever, daemon=True)
96
+ self._thread.start()
97
+ logger.info(
98
+ "Metrics server started on http://%s:%s/metrics",
99
+ self.host,
100
+ self.port,
101
+ )
102
+
103
+ def stop(self) -> None:
104
+ """Stop the metrics server."""
105
+ if self._server is not None:
106
+ self._server.shutdown()
107
+ self._server = None
108
+ self._thread = None
109
+ logger.info("Metrics server stopped")
110
+
111
+ async def __aenter__(self) -> "Self":
112
+ """Async context manager entry."""
113
+ self.start()
114
+ return self
115
+
116
+ async def __aexit__(self, *args: object) -> None:
117
+ """Async context manager exit."""
118
+ self.stop()
119
+
120
+ def __enter__(self) -> "Self":
121
+ """Context manager entry."""
122
+ self.start()
123
+ return self
124
+
125
+ def __exit__(self, *args: object) -> None:
126
+ """Context manager exit."""
127
+ self.stop()
128
+
129
+
130
+ def start_metrics_server(host: str = "0.0.0.0", port: int = 8081) -> MetricsServer:
131
+ """Start a metrics server singleton.
132
+
133
+ Args:
134
+ host: Host to bind to.
135
+ port: Port to listen on.
136
+
137
+ Returns:
138
+ The MetricsServer instance.
139
+ """
140
+ if MetricsServer.instance is None:
141
+ MetricsServer.instance = MetricsServer(host, port)
142
+ MetricsServer.instance.start()
143
+ return MetricsServer.instance
144
+
145
+
146
+ def stop_metrics_server() -> None:
147
+ """Stop the metrics server singleton."""
148
+ if MetricsServer.instance is not None:
149
+ MetricsServer.instance.stop()
150
+ MetricsServer.instance = None