omnibase_infra 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. omnibase_infra/__init__.py +1 -1
  2. omnibase_infra/adapters/adapter_onex_tool_execution.py +446 -0
  3. omnibase_infra/cli/commands.py +1 -1
  4. omnibase_infra/configs/widget_mapping.yaml +176 -0
  5. omnibase_infra/contracts/handlers/filesystem/handler_contract.yaml +4 -1
  6. omnibase_infra/contracts/handlers/mcp/handler_contract.yaml +4 -1
  7. omnibase_infra/errors/error_compute_registry.py +4 -1
  8. omnibase_infra/errors/error_event_bus_registry.py +4 -1
  9. omnibase_infra/errors/error_infra.py +3 -1
  10. omnibase_infra/errors/error_policy_registry.py +4 -1
  11. omnibase_infra/handlers/handler_db.py +2 -1
  12. omnibase_infra/handlers/handler_graph.py +10 -5
  13. omnibase_infra/handlers/handler_mcp.py +736 -63
  14. omnibase_infra/handlers/mixins/mixin_consul_kv.py +4 -3
  15. omnibase_infra/handlers/mixins/mixin_consul_service.py +2 -1
  16. omnibase_infra/handlers/service_discovery/handler_service_discovery_consul.py +301 -4
  17. omnibase_infra/handlers/service_discovery/models/model_service_info.py +10 -0
  18. omnibase_infra/mixins/mixin_async_circuit_breaker.py +3 -2
  19. omnibase_infra/mixins/mixin_node_introspection.py +24 -7
  20. omnibase_infra/mixins/mixin_retry_execution.py +1 -1
  21. omnibase_infra/models/handlers/__init__.py +10 -0
  22. omnibase_infra/models/handlers/model_bootstrap_handler_descriptor.py +162 -0
  23. omnibase_infra/models/handlers/model_handler_descriptor.py +15 -0
  24. omnibase_infra/models/mcp/__init__.py +15 -0
  25. omnibase_infra/models/mcp/model_mcp_contract_config.py +80 -0
  26. omnibase_infra/models/mcp/model_mcp_server_config.py +67 -0
  27. omnibase_infra/models/mcp/model_mcp_tool_definition.py +73 -0
  28. omnibase_infra/models/mcp/model_mcp_tool_parameter.py +35 -0
  29. omnibase_infra/models/registration/model_node_capabilities.py +11 -0
  30. omnibase_infra/nodes/architecture_validator/contract_architecture_validator.yaml +0 -5
  31. omnibase_infra/nodes/architecture_validator/registry/registry_infra_architecture_validator.py +17 -10
  32. omnibase_infra/nodes/effects/contract.yaml +0 -5
  33. omnibase_infra/nodes/node_registration_orchestrator/contract.yaml +7 -0
  34. omnibase_infra/nodes/node_registration_orchestrator/handlers/handler_node_introspected.py +86 -1
  35. omnibase_infra/nodes/node_registration_orchestrator/introspection_event_router.py +3 -3
  36. omnibase_infra/nodes/node_registration_orchestrator/registry/registry_infra_node_registration_orchestrator.py +9 -8
  37. omnibase_infra/nodes/node_registration_orchestrator/wiring.py +14 -13
  38. omnibase_infra/nodes/node_registration_storage_effect/contract.yaml +0 -5
  39. omnibase_infra/nodes/node_registration_storage_effect/registry/registry_infra_registration_storage.py +46 -25
  40. omnibase_infra/nodes/node_registry_effect/contract.yaml +0 -5
  41. omnibase_infra/nodes/node_registry_effect/handlers/handler_partial_retry.py +2 -1
  42. omnibase_infra/nodes/node_service_discovery_effect/registry/registry_infra_service_discovery.py +24 -19
  43. omnibase_infra/plugins/examples/plugin_json_normalizer.py +2 -2
  44. omnibase_infra/plugins/examples/plugin_json_normalizer_error_handling.py +2 -2
  45. omnibase_infra/plugins/plugin_compute_base.py +16 -2
  46. omnibase_infra/protocols/protocol_event_projector.py +1 -1
  47. omnibase_infra/runtime/__init__.py +51 -1
  48. omnibase_infra/runtime/binding_config_resolver.py +102 -37
  49. omnibase_infra/runtime/constants_notification.py +75 -0
  50. omnibase_infra/runtime/contract_handler_discovery.py +6 -1
  51. omnibase_infra/runtime/handler_bootstrap_source.py +514 -0
  52. omnibase_infra/runtime/handler_contract_config_loader.py +603 -0
  53. omnibase_infra/runtime/handler_contract_source.py +289 -167
  54. omnibase_infra/runtime/handler_plugin_loader.py +4 -2
  55. omnibase_infra/runtime/mixin_semver_cache.py +25 -1
  56. omnibase_infra/runtime/mixins/__init__.py +7 -0
  57. omnibase_infra/runtime/mixins/mixin_projector_notification_publishing.py +566 -0
  58. omnibase_infra/runtime/mixins/mixin_projector_sql_operations.py +31 -10
  59. omnibase_infra/runtime/models/__init__.py +24 -0
  60. omnibase_infra/runtime/models/model_health_check_result.py +2 -1
  61. omnibase_infra/runtime/models/model_projector_notification_config.py +171 -0
  62. omnibase_infra/runtime/models/model_transition_notification_outbox_config.py +112 -0
  63. omnibase_infra/runtime/models/model_transition_notification_outbox_metrics.py +140 -0
  64. omnibase_infra/runtime/models/model_transition_notification_publisher_metrics.py +357 -0
  65. omnibase_infra/runtime/projector_plugin_loader.py +1 -1
  66. omnibase_infra/runtime/projector_shell.py +229 -1
  67. omnibase_infra/runtime/protocols/__init__.py +10 -0
  68. omnibase_infra/runtime/registry/registry_protocol_binding.py +3 -2
  69. omnibase_infra/runtime/registry_policy.py +9 -326
  70. omnibase_infra/runtime/secret_resolver.py +4 -2
  71. omnibase_infra/runtime/service_kernel.py +10 -2
  72. omnibase_infra/runtime/service_message_dispatch_engine.py +4 -2
  73. omnibase_infra/runtime/service_runtime_host_process.py +225 -15
  74. omnibase_infra/runtime/transition_notification_outbox.py +1190 -0
  75. omnibase_infra/runtime/transition_notification_publisher.py +764 -0
  76. omnibase_infra/runtime/util_container_wiring.py +6 -5
  77. omnibase_infra/runtime/util_wiring.py +5 -1
  78. omnibase_infra/schemas/schema_transition_notification_outbox.sql +245 -0
  79. omnibase_infra/services/mcp/__init__.py +31 -0
  80. omnibase_infra/services/mcp/mcp_server_lifecycle.py +443 -0
  81. omnibase_infra/services/mcp/service_mcp_tool_discovery.py +411 -0
  82. omnibase_infra/services/mcp/service_mcp_tool_registry.py +329 -0
  83. omnibase_infra/services/mcp/service_mcp_tool_sync.py +547 -0
  84. omnibase_infra/services/registry_api/__init__.py +40 -0
  85. omnibase_infra/services/registry_api/main.py +243 -0
  86. omnibase_infra/services/registry_api/models/__init__.py +66 -0
  87. omnibase_infra/services/registry_api/models/model_capability_widget_mapping.py +38 -0
  88. omnibase_infra/services/registry_api/models/model_pagination_info.py +48 -0
  89. omnibase_infra/services/registry_api/models/model_registry_discovery_response.py +73 -0
  90. omnibase_infra/services/registry_api/models/model_registry_health_response.py +49 -0
  91. omnibase_infra/services/registry_api/models/model_registry_instance_view.py +88 -0
  92. omnibase_infra/services/registry_api/models/model_registry_node_view.py +88 -0
  93. omnibase_infra/services/registry_api/models/model_registry_summary.py +60 -0
  94. omnibase_infra/services/registry_api/models/model_response_list_instances.py +43 -0
  95. omnibase_infra/services/registry_api/models/model_response_list_nodes.py +51 -0
  96. omnibase_infra/services/registry_api/models/model_warning.py +49 -0
  97. omnibase_infra/services/registry_api/models/model_widget_defaults.py +28 -0
  98. omnibase_infra/services/registry_api/models/model_widget_mapping.py +51 -0
  99. omnibase_infra/services/registry_api/routes.py +371 -0
  100. omnibase_infra/services/registry_api/service.py +846 -0
  101. omnibase_infra/services/service_capability_query.py +4 -4
  102. omnibase_infra/services/service_health.py +3 -2
  103. omnibase_infra/services/service_timeout_emitter.py +13 -2
  104. omnibase_infra/utils/util_dsn_validation.py +1 -1
  105. omnibase_infra/validation/__init__.py +3 -19
  106. omnibase_infra/validation/contracts/security.validation.yaml +114 -0
  107. omnibase_infra/validation/infra_validators.py +35 -24
  108. omnibase_infra/validation/validation_exemptions.yaml +113 -9
  109. omnibase_infra/validation/validator_chain_propagation.py +2 -2
  110. omnibase_infra/validation/validator_runtime_shape.py +1 -1
  111. omnibase_infra/validation/validator_security.py +473 -370
  112. {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/METADATA +2 -2
  113. {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/RECORD +116 -74
  114. {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/WHEEL +0 -0
  115. {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/entry_points.txt +0 -0
  116. {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,846 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2025 OmniNode Team
3
+ """Registry Discovery Service.
4
+
5
+ Combines ProjectionReaderRegistration and HandlerServiceDiscoveryConsul
6
+ to provide a unified discovery interface for the Registry API.
7
+
8
+ Design Principles:
9
+ - Partial success: Returns data even if one backend fails
10
+ - Warnings array: Communicates backend failures without crashing
11
+ - Async-first: All methods are async for non-blocking I/O
12
+ - Correlation IDs: Full traceability across all operations
13
+ - Container DI: Accepts ModelONEXContainer for dependency injection
14
+
15
+ Related Tickets:
16
+ - OMN-1278: Contract-Driven Dashboard - Registry Discovery
17
+ - OMN-1282: MCP Handler Contract-Driven Config
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from datetime import UTC, datetime
24
+ from pathlib import Path
25
+ from typing import TYPE_CHECKING
26
+ from uuid import UUID, uuid4
27
+
28
+ import yaml
29
+
30
+ from omnibase_core.container import ModelONEXContainer
31
+ from omnibase_core.types import JsonType
32
+ from omnibase_infra.enums import EnumRegistrationState
33
+ from omnibase_infra.nodes.node_service_discovery_effect.models.enum_health_status import (
34
+ EnumHealthStatus,
35
+ )
36
+ from omnibase_infra.services.registry_api.models import (
37
+ ModelCapabilityWidgetMapping,
38
+ ModelPaginationInfo,
39
+ ModelRegistryDiscoveryResponse,
40
+ ModelRegistryHealthResponse,
41
+ ModelRegistryInstanceView,
42
+ ModelRegistryNodeView,
43
+ ModelRegistrySummary,
44
+ ModelWarning,
45
+ ModelWidgetDefaults,
46
+ ModelWidgetMapping,
47
+ )
48
+
49
+ if TYPE_CHECKING:
50
+ from omnibase_infra.handlers.service_discovery import HandlerServiceDiscoveryConsul
51
+ from omnibase_infra.models.projection import ModelRegistrationProjection
52
+ from omnibase_infra.projectors import ProjectionReaderRegistration
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+ # Maximum records to fetch when node_type filtering requires in-memory pagination.
57
+ # The projection reader API doesn't support node_type filtering, so we fetch all
58
+ # records matching the state filter and apply node_type filter in-memory.
59
+ MAX_NODE_TYPE_FILTER_FETCH = 10000
60
+
61
+ # Default config path relative to this module
62
+ DEFAULT_WIDGET_MAPPING_PATH = (
63
+ Path(__file__).parent.parent.parent / "configs" / "widget_mapping.yaml"
64
+ )
65
+
66
+
67
+ class ServiceRegistryDiscovery:
68
+ """Registry discovery service combining projection and Consul data.
69
+
70
+ Provides a unified interface for querying both registered nodes
71
+ (from PostgreSQL projections) and live service instances (from Consul).
72
+
73
+ Partial Success Pattern:
74
+ If one backend fails, the service still returns data from the
75
+ successful backend along with warnings indicating the failure.
76
+ This allows dashboards to display partial data rather than
77
+ showing complete errors.
78
+
79
+ Dependency Injection:
80
+ This service accepts a ModelONEXContainer for ONEX-style dependency
81
+ injection. Dependencies can also be provided directly via constructor
82
+ parameters for testing or when the container is not available.
83
+
84
+ Priority: Direct parameters > Container resolution > None (with warnings)
85
+
86
+ Thread Safety:
87
+ This service is coroutine-safe. All methods are async and
88
+ delegate to underlying services that handle their own
89
+ concurrency requirements.
90
+
91
+ Example:
92
+ >>> # Using container for DI
93
+ >>> service = ServiceRegistryDiscovery(container=container)
94
+ >>> response = await service.get_discovery()
95
+ >>>
96
+ >>> # Using direct dependencies (for testing)
97
+ >>> service = ServiceRegistryDiscovery(
98
+ ... projection_reader=reader,
99
+ ... consul_handler=handler,
100
+ ... )
101
+ >>> response = await service.get_discovery()
102
+ >>> if response.warnings:
103
+ ... logger.warning("Partial data: %s", response.warnings)
104
+
105
+ Attributes:
106
+ projection_reader: Reader for node registration projections.
107
+ consul_handler: Handler for Consul service discovery.
108
+ widget_mapping_path: Path to widget mapping YAML configuration.
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ container: ModelONEXContainer | None = None,
114
+ projection_reader: ProjectionReaderRegistration | None = None,
115
+ consul_handler: HandlerServiceDiscoveryConsul | None = None,
116
+ widget_mapping_path: Path | None = None,
117
+ ) -> None:
118
+ """Initialize the registry discovery service.
119
+
120
+ Args:
121
+ container: Optional ONEX container for dependency injection.
122
+ When provided, dependencies will be resolved from the container
123
+ if not explicitly passed via other parameters.
124
+ projection_reader: Optional projection reader for node registrations.
125
+ If not provided, will attempt to resolve from container.
126
+ If still None, node queries will return empty results with warnings.
127
+ consul_handler: Optional Consul handler for live instances.
128
+ If not provided, will attempt to resolve from container.
129
+ If still None, instance queries will return empty results with warnings.
130
+ widget_mapping_path: Path to widget mapping YAML file.
131
+ Defaults to configs/widget_mapping.yaml relative to package.
132
+
133
+ Note:
134
+ Direct dependency parameters take precedence over container resolution.
135
+ This allows easy mocking in tests while supporting full DI in production.
136
+ """
137
+ self._container = container
138
+
139
+ # Resolve projection_reader: direct param > None
140
+ # NOTE: Container-based resolution removed in omnibase_core ^0.9.0.
141
+ # The new ServiceRegistry uses async interface-based resolution which
142
+ # doesn't fit the sync __init__ pattern. Use explicit dependency injection
143
+ # via the projection_reader parameter instead.
144
+ self._projection_reader = projection_reader
145
+
146
+ # Resolve consul_handler: direct param > None
147
+ # NOTE: Container-based resolution removed in omnibase_core ^0.9.0.
148
+ # The new ServiceRegistry uses async interface-based resolution which
149
+ # doesn't fit the sync __init__ pattern. Use explicit dependency injection
150
+ # via the consul_handler parameter instead.
151
+ self._consul_handler = consul_handler
152
+
153
+ self._widget_mapping_path = widget_mapping_path or DEFAULT_WIDGET_MAPPING_PATH
154
+ self._widget_mapping_cache: ModelWidgetMapping | None = None
155
+ self._widget_mapping_mtime: float | None = None
156
+
157
+ logger.info(
158
+ "ServiceRegistryDiscovery initialized",
159
+ extra={
160
+ "has_container": container is not None,
161
+ "has_projection_reader": self._projection_reader is not None,
162
+ "has_consul_handler": self._consul_handler is not None,
163
+ "widget_mapping_path": str(self._widget_mapping_path),
164
+ },
165
+ )
166
+
167
+ @property
168
+ def has_projection_reader(self) -> bool:
169
+ """Check if projection reader is configured."""
170
+ return self._projection_reader is not None
171
+
172
+ @property
173
+ def has_consul_handler(self) -> bool:
174
+ """Check if Consul handler is configured."""
175
+ return self._consul_handler is not None
176
+
177
+ @property
178
+ def consul_handler(self) -> HandlerServiceDiscoveryConsul | None:
179
+ """Get the Consul handler for lifecycle management."""
180
+ return self._consul_handler
181
+
182
+ def invalidate_widget_mapping_cache(self) -> None:
183
+ """Clear widget mapping cache, forcing reload on next access.
184
+
185
+ Use this method when you know the widget mapping file has changed
186
+ and want to force an immediate reload, rather than waiting for
187
+ file modification time detection.
188
+
189
+ Example:
190
+ >>> service.invalidate_widget_mapping_cache()
191
+ >>> mapping, warnings = service.get_widget_mapping() # Fresh load
192
+ """
193
+ self._widget_mapping_cache = None
194
+ self._widget_mapping_mtime = None
195
+ logger.debug(
196
+ "Widget mapping cache invalidated",
197
+ extra={"widget_mapping_path": str(self._widget_mapping_path)},
198
+ )
199
+
200
+ async def list_nodes(
201
+ self,
202
+ limit: int = 100,
203
+ offset: int = 0,
204
+ state: EnumRegistrationState | None = None,
205
+ node_type: str | None = None,
206
+ correlation_id: UUID | None = None,
207
+ ) -> tuple[list[ModelRegistryNodeView], ModelPaginationInfo, list[ModelWarning]]:
208
+ """List registered nodes with pagination.
209
+
210
+ Args:
211
+ limit: Maximum number of nodes to return (1-1000).
212
+ offset: Number of nodes to skip for pagination.
213
+ state: Optional filter by registration state. When None, queries
214
+ all active states (ACTIVE, ACCEPTED, AWAITING_ACK, ACK_RECEIVED).
215
+ node_type: Optional filter by node type (effect, compute, reducer,
216
+ orchestrator). Case-insensitive.
217
+ correlation_id: Optional correlation ID for tracing.
218
+
219
+ Returns:
220
+ Tuple of (nodes, pagination_info, warnings).
221
+
222
+ Note:
223
+ When node_type filter is specified, all matching records are fetched
224
+ to provide accurate pagination totals. For large datasets, consider
225
+ using state filters to reduce the query scope.
226
+ """
227
+ correlation_id = correlation_id or uuid4()
228
+ warnings: list[ModelWarning] = []
229
+ nodes: list[ModelRegistryNodeView] = []
230
+ total = 0
231
+
232
+ if self._projection_reader is None:
233
+ warnings.append(
234
+ ModelWarning(
235
+ source="postgres",
236
+ message="Projection reader not configured",
237
+ code="NO_PROJECTION_READER",
238
+ timestamp=datetime.now(UTC),
239
+ )
240
+ )
241
+ else:
242
+ try:
243
+ # Determine fetch limit based on whether node_type filter is applied
244
+ # When node_type is specified, we need all records for accurate totals
245
+ # since the projection reader doesn't support node_type filtering
246
+ if node_type:
247
+ # Fetch all matching records to get accurate count after filtering
248
+ fetch_limit = MAX_NODE_TYPE_FILTER_FETCH
249
+ else:
250
+ # No node_type filter - can use normal pagination
251
+ fetch_limit = limit + offset + 1 # +1 to detect has_more
252
+
253
+ # Query projections based on state filter
254
+ projections: list[ModelRegistrationProjection] = []
255
+
256
+ if state is not None:
257
+ # Single state filter
258
+ projections = await self._projection_reader.get_by_state(
259
+ state=state,
260
+ limit=fetch_limit,
261
+ correlation_id=correlation_id,
262
+ )
263
+ else:
264
+ # No state filter - query all active states and combine
265
+ # This provides results across all relevant states, not just ACTIVE
266
+ active_states = [
267
+ EnumRegistrationState.ACTIVE,
268
+ EnumRegistrationState.ACCEPTED,
269
+ EnumRegistrationState.AWAITING_ACK,
270
+ EnumRegistrationState.ACK_RECEIVED,
271
+ EnumRegistrationState.PENDING_REGISTRATION,
272
+ ]
273
+ all_projections: list[ModelRegistrationProjection] = []
274
+ for query_state in active_states:
275
+ state_projections = await self._projection_reader.get_by_state(
276
+ state=query_state,
277
+ limit=fetch_limit,
278
+ correlation_id=correlation_id,
279
+ )
280
+ all_projections.extend(state_projections)
281
+
282
+ # Sort combined results by updated_at descending
283
+ projections = sorted(
284
+ all_projections,
285
+ key=lambda p: p.updated_at,
286
+ reverse=True,
287
+ )
288
+
289
+ # Apply node_type filter in-memory if specified
290
+ # The projection reader API doesn't support node_type filtering
291
+ node_type_filter = node_type.upper() if node_type else None
292
+ if node_type_filter:
293
+ projections = [
294
+ p
295
+ for p in projections
296
+ if p.node_type.value.upper() == node_type_filter
297
+ ]
298
+
299
+ # Calculate total from ALL filtered records (accurate count)
300
+ total = len(projections)
301
+
302
+ # Apply offset and limit for pagination
303
+ projections_slice = projections[offset : offset + limit]
304
+
305
+ # Convert to view models
306
+ for proj in projections_slice:
307
+ # Map EnumNodeKind to API node_type string
308
+ node_type_str = proj.node_type.value.upper()
309
+ if node_type_str not in (
310
+ "EFFECT",
311
+ "COMPUTE",
312
+ "REDUCER",
313
+ "ORCHESTRATOR",
314
+ ):
315
+ node_type_str = "EFFECT" # Fallback
316
+
317
+ nodes.append(
318
+ ModelRegistryNodeView(
319
+ node_id=proj.entity_id,
320
+ name=f"onex-{proj.node_type.value}",
321
+ service_name=f"onex-{proj.node_type.value}-{str(proj.entity_id)[:8]}",
322
+ namespace=proj.domain
323
+ if proj.domain != "registration"
324
+ else None,
325
+ display_name=None,
326
+ node_type=node_type_str, # type: ignore[arg-type]
327
+ version=proj.node_version,
328
+ state=proj.current_state.value,
329
+ capabilities=proj.capability_tags,
330
+ registered_at=proj.registered_at,
331
+ last_heartbeat_at=proj.last_heartbeat_at,
332
+ )
333
+ )
334
+
335
+ except Exception as e:
336
+ logger.exception(
337
+ "Failed to query projections",
338
+ extra={"correlation_id": str(correlation_id)},
339
+ )
340
+ warnings.append(
341
+ ModelWarning(
342
+ source="postgres",
343
+ message=f"Failed to query projections: {type(e).__name__}",
344
+ code="PROJECTION_QUERY_FAILED",
345
+ timestamp=datetime.now(UTC),
346
+ )
347
+ )
348
+
349
+ pagination = ModelPaginationInfo(
350
+ total=total,
351
+ limit=limit,
352
+ offset=offset,
353
+ has_more=offset + len(nodes) < total,
354
+ )
355
+
356
+ return nodes, pagination, warnings
357
+
358
+ async def get_node(
359
+ self,
360
+ node_id: UUID,
361
+ correlation_id: UUID | None = None,
362
+ ) -> tuple[ModelRegistryNodeView | None, list[ModelWarning]]:
363
+ """Get a single node by ID.
364
+
365
+ Args:
366
+ node_id: Node UUID to retrieve.
367
+ correlation_id: Optional correlation ID for tracing.
368
+
369
+ Returns:
370
+ Tuple of (node or None, warnings).
371
+ """
372
+ correlation_id = correlation_id or uuid4()
373
+ warnings: list[ModelWarning] = []
374
+
375
+ if self._projection_reader is None:
376
+ warnings.append(
377
+ ModelWarning(
378
+ source="postgres",
379
+ message="Projection reader not configured",
380
+ code="NO_PROJECTION_READER",
381
+ timestamp=datetime.now(UTC),
382
+ )
383
+ )
384
+ return None, warnings
385
+
386
+ try:
387
+ proj = await self._projection_reader.get_entity_state(
388
+ entity_id=node_id,
389
+ correlation_id=correlation_id,
390
+ )
391
+
392
+ if proj is None:
393
+ return None, warnings
394
+
395
+ node_type_str = proj.node_type.value.upper()
396
+ if node_type_str not in ("EFFECT", "COMPUTE", "REDUCER", "ORCHESTRATOR"):
397
+ node_type_str = "EFFECT"
398
+
399
+ node = ModelRegistryNodeView(
400
+ node_id=proj.entity_id,
401
+ name=f"onex-{proj.node_type.value}",
402
+ service_name=f"onex-{proj.node_type.value}-{str(proj.entity_id)[:8]}",
403
+ namespace=proj.domain if proj.domain != "registration" else None,
404
+ display_name=None,
405
+ node_type=node_type_str, # type: ignore[arg-type]
406
+ version=proj.node_version,
407
+ state=proj.current_state.value,
408
+ capabilities=proj.capability_tags,
409
+ registered_at=proj.registered_at,
410
+ last_heartbeat_at=proj.last_heartbeat_at,
411
+ )
412
+
413
+ return node, warnings
414
+
415
+ except Exception as e:
416
+ logger.exception(
417
+ "Failed to get node",
418
+ extra={"node_id": str(node_id), "correlation_id": str(correlation_id)},
419
+ )
420
+ warnings.append(
421
+ ModelWarning(
422
+ source="postgres",
423
+ message=f"Failed to get node: {type(e).__name__}",
424
+ code="NODE_QUERY_FAILED",
425
+ timestamp=datetime.now(UTC),
426
+ )
427
+ )
428
+ return None, warnings
429
+
430
+ async def list_instances(
431
+ self,
432
+ service_name: str | None = None,
433
+ include_unhealthy: bool = False,
434
+ correlation_id: UUID | None = None,
435
+ ) -> tuple[list[ModelRegistryInstanceView], list[ModelWarning]]:
436
+ """List live Consul service instances.
437
+
438
+ Args:
439
+ service_name: Optional service name filter. If not provided,
440
+ queries all services from the Consul catalog.
441
+ include_unhealthy: Whether to include unhealthy instances.
442
+ correlation_id: Optional correlation ID for tracing.
443
+
444
+ Returns:
445
+ Tuple of (instances, warnings).
446
+ """
447
+ correlation_id = correlation_id or uuid4()
448
+ warnings: list[ModelWarning] = []
449
+ instances: list[ModelRegistryInstanceView] = []
450
+
451
+ if self._consul_handler is None:
452
+ warnings.append(
453
+ ModelWarning(
454
+ source="consul",
455
+ message="Consul handler not configured",
456
+ code="NO_CONSUL_HANDLER",
457
+ timestamp=datetime.now(UTC),
458
+ )
459
+ )
460
+ return instances, warnings
461
+
462
+ try:
463
+ # Determine which services to query
464
+ service_names_to_query: list[str] = []
465
+
466
+ if service_name:
467
+ # Single service specified
468
+ service_names_to_query = [service_name]
469
+ else:
470
+ # Get all service names from Consul catalog
471
+ try:
472
+ all_services = await self._consul_handler.list_all_services(
473
+ correlation_id=correlation_id,
474
+ )
475
+ service_names_to_query = list(all_services.keys())
476
+ except Exception as e:
477
+ logger.warning(
478
+ "Failed to list all services, falling back to empty discovery",
479
+ extra={
480
+ "error": str(e),
481
+ "correlation_id": str(correlation_id),
482
+ },
483
+ )
484
+ warnings.append(
485
+ ModelWarning(
486
+ source="consul",
487
+ message=f"Failed to list all services: {type(e).__name__}",
488
+ code="CONSUL_CATALOG_FAILED",
489
+ timestamp=datetime.now(UTC),
490
+ )
491
+ )
492
+ return instances, warnings
493
+
494
+ # Query each service for its instances
495
+ for svc_name in service_names_to_query:
496
+ try:
497
+ service_instances = (
498
+ await self._consul_handler.get_all_service_instances(
499
+ service_name=svc_name,
500
+ include_unhealthy=include_unhealthy,
501
+ correlation_id=correlation_id,
502
+ )
503
+ )
504
+
505
+ for svc in service_instances:
506
+ # Map EnumHealthStatus to API health_status string
507
+ health_status: str
508
+ if svc.health_status == EnumHealthStatus.HEALTHY:
509
+ health_status = "passing"
510
+ elif svc.health_status == EnumHealthStatus.UNHEALTHY:
511
+ health_status = "critical"
512
+ else:
513
+ health_status = "unknown"
514
+
515
+ instances.append(
516
+ ModelRegistryInstanceView(
517
+ node_id=svc.service_id,
518
+ service_name=svc.service_name,
519
+ service_id=svc.service_id,
520
+ instance_id=svc.service_id,
521
+ address=svc.address or "unknown",
522
+ port=svc.port or 0,
523
+ health_status=health_status, # type: ignore[arg-type]
524
+ health_output=svc.health_output,
525
+ last_check_at=svc.last_check_at or svc.registered_at,
526
+ tags=list(svc.tags),
527
+ meta=svc.metadata,
528
+ )
529
+ )
530
+
531
+ except Exception as e:
532
+ # Log but continue with other services (partial success)
533
+ logger.warning(
534
+ "Failed to query service instances",
535
+ extra={
536
+ "service_name": svc_name,
537
+ "error": str(e),
538
+ "correlation_id": str(correlation_id),
539
+ },
540
+ )
541
+ warnings.append(
542
+ ModelWarning(
543
+ source="consul",
544
+ message=f"Failed to query service '{svc_name}': {type(e).__name__}",
545
+ code="CONSUL_SERVICE_QUERY_FAILED",
546
+ timestamp=datetime.now(UTC),
547
+ )
548
+ )
549
+
550
+ except Exception as e:
551
+ logger.exception(
552
+ "Failed to discover services",
553
+ extra={"correlation_id": str(correlation_id)},
554
+ )
555
+ warnings.append(
556
+ ModelWarning(
557
+ source="consul",
558
+ message=f"Failed to discover services: {type(e).__name__}",
559
+ code="CONSUL_QUERY_FAILED",
560
+ timestamp=datetime.now(UTC),
561
+ )
562
+ )
563
+
564
+ return instances, warnings
565
+
566
+ async def get_discovery(
567
+ self,
568
+ limit: int = 100,
569
+ offset: int = 0,
570
+ correlation_id: UUID | None = None,
571
+ ) -> ModelRegistryDiscoveryResponse:
572
+ """Get full dashboard payload with nodes, instances, and summary.
573
+
574
+ This is the primary endpoint for dashboard consumption, providing
575
+ all needed data in a single request.
576
+
577
+ Args:
578
+ limit: Maximum number of nodes to return.
579
+ offset: Number of nodes to skip for pagination.
580
+ correlation_id: Optional correlation ID for tracing.
581
+
582
+ Returns:
583
+ Complete discovery response with all data and any warnings.
584
+ """
585
+ correlation_id = correlation_id or uuid4()
586
+ all_warnings: list[ModelWarning] = []
587
+
588
+ # Fetch nodes
589
+ nodes, pagination, node_warnings = await self.list_nodes(
590
+ limit=limit,
591
+ offset=offset,
592
+ correlation_id=correlation_id,
593
+ )
594
+ all_warnings.extend(node_warnings)
595
+
596
+ # Fetch instances
597
+ instances, instance_warnings = await self.list_instances(
598
+ include_unhealthy=True,
599
+ correlation_id=correlation_id,
600
+ )
601
+ all_warnings.extend(instance_warnings)
602
+
603
+ # Build summary
604
+ by_node_type: dict[str, int] = {}
605
+ by_state: dict[str, int] = {}
606
+ active_count = 0
607
+
608
+ for node in nodes:
609
+ by_node_type[node.node_type] = by_node_type.get(node.node_type, 0) + 1
610
+ by_state[node.state] = by_state.get(node.state, 0) + 1
611
+ if node.state == "active":
612
+ active_count += 1
613
+
614
+ healthy_count = sum(1 for i in instances if i.health_status == "passing")
615
+ unhealthy_count = len(instances) - healthy_count
616
+
617
+ summary = ModelRegistrySummary(
618
+ total_nodes=pagination.total,
619
+ active_nodes=active_count,
620
+ healthy_instances=healthy_count,
621
+ unhealthy_instances=unhealthy_count,
622
+ by_node_type=by_node_type,
623
+ by_state=by_state,
624
+ )
625
+
626
+ return ModelRegistryDiscoveryResponse(
627
+ timestamp=datetime.now(UTC),
628
+ warnings=all_warnings,
629
+ summary=summary,
630
+ nodes=nodes,
631
+ live_instances=instances,
632
+ pagination=pagination,
633
+ )
634
+
635
+ def get_widget_mapping(
636
+ self,
637
+ ) -> tuple[ModelWidgetMapping | None, list[ModelWarning]]:
638
+ """Load and return widget mapping configuration.
639
+
640
+ Returns cached mapping if available and file unchanged, otherwise
641
+ loads from YAML file.
642
+
643
+ The cache is automatically invalidated when the file's modification
644
+ time changes, enabling hot-reload of widget mappings without restart.
645
+
646
+ Returns:
647
+ Tuple of (widget_mapping or None, warnings).
648
+ """
649
+ warnings: list[ModelWarning] = []
650
+
651
+ # Check if file has been modified since last cache
652
+ current_mtime: float | None = None
653
+ try:
654
+ current_mtime = self._widget_mapping_path.stat().st_mtime
655
+ if (
656
+ self._widget_mapping_cache is not None
657
+ and self._widget_mapping_mtime == current_mtime
658
+ ):
659
+ return self._widget_mapping_cache, warnings
660
+ except OSError:
661
+ # File doesn't exist or can't be accessed - will be handled below
662
+ pass
663
+
664
+ # Log cache invalidation due to file change (only when cache existed)
665
+ if self._widget_mapping_cache is not None and current_mtime is not None:
666
+ logger.info(
667
+ "Widget mapping cache invalidated, reloading from file",
668
+ extra={
669
+ "widget_mapping_path": str(self._widget_mapping_path),
670
+ "old_mtime": self._widget_mapping_mtime,
671
+ "new_mtime": current_mtime,
672
+ },
673
+ )
674
+
675
+ if not self._widget_mapping_path.exists():
676
+ warnings.append(
677
+ ModelWarning(
678
+ source="config",
679
+ message=f"Widget mapping file not found: {self._widget_mapping_path}",
680
+ code="CONFIG_NOT_FOUND",
681
+ timestamp=datetime.now(UTC),
682
+ )
683
+ )
684
+ return None, warnings
685
+
686
+ try:
687
+ with open(self._widget_mapping_path) as f:
688
+ data = yaml.safe_load(f)
689
+
690
+ # Parse capability mappings
691
+ capability_mappings: dict[str, ModelCapabilityWidgetMapping] = {}
692
+ for key, value in data.get("capability_mappings", {}).items():
693
+ capability_mappings[key] = ModelCapabilityWidgetMapping(
694
+ widget_type=value.get("widget_type", "info_card"),
695
+ defaults=ModelWidgetDefaults(**value.get("defaults", {})),
696
+ )
697
+
698
+ # Parse semantic mappings
699
+ semantic_mappings: dict[str, ModelCapabilityWidgetMapping] = {}
700
+ for key, value in data.get("semantic_mappings", {}).items():
701
+ semantic_mappings[key] = ModelCapabilityWidgetMapping(
702
+ widget_type=value.get("widget_type", "info_card"),
703
+ defaults=ModelWidgetDefaults(**value.get("defaults", {})),
704
+ )
705
+
706
+ # Parse fallback
707
+ fallback_data = data.get("fallback", {})
708
+ fallback = ModelCapabilityWidgetMapping(
709
+ widget_type=fallback_data.get("widget_type", "info_card"),
710
+ defaults=ModelWidgetDefaults(**fallback_data.get("defaults", {})),
711
+ )
712
+
713
+ self._widget_mapping_cache = ModelWidgetMapping(
714
+ version=data.get("version", "1.0.0"),
715
+ capability_mappings=capability_mappings,
716
+ semantic_mappings=semantic_mappings,
717
+ fallback=fallback,
718
+ )
719
+ self._widget_mapping_mtime = current_mtime
720
+
721
+ logger.debug(
722
+ "Widget mapping loaded",
723
+ extra={
724
+ "widget_mapping_path": str(self._widget_mapping_path),
725
+ "mtime": current_mtime,
726
+ "version": data.get("version", "1.0.0"),
727
+ },
728
+ )
729
+
730
+ return self._widget_mapping_cache, warnings
731
+
732
+ except Exception as e:
733
+ logger.exception(
734
+ "Failed to load widget mapping",
735
+ extra={"path": str(self._widget_mapping_path)},
736
+ )
737
+ warnings.append(
738
+ ModelWarning(
739
+ source="config",
740
+ message=f"Failed to load widget mapping: {type(e).__name__}",
741
+ code="CONFIG_LOAD_FAILED",
742
+ timestamp=datetime.now(UTC),
743
+ )
744
+ )
745
+ return None, warnings
746
+
747
+ async def health_check(
748
+ self,
749
+ correlation_id: UUID | None = None,
750
+ ) -> ModelRegistryHealthResponse:
751
+ """Perform health check on all backend components.
752
+
753
+ Args:
754
+ correlation_id: Optional correlation ID for tracing.
755
+
756
+ Returns:
757
+ Health check response with component statuses.
758
+ """
759
+ correlation_id = correlation_id or uuid4()
760
+ components: dict[str, JsonType] = {}
761
+ overall_healthy = True
762
+
763
+ # Check projection reader
764
+ if self._projection_reader is None:
765
+ components["postgres"] = {
766
+ "healthy": False,
767
+ "message": "Not configured",
768
+ }
769
+ overall_healthy = False
770
+ else:
771
+ try:
772
+ # Simple query to verify connection
773
+ await self._projection_reader.count_by_state(
774
+ correlation_id=correlation_id,
775
+ )
776
+ components["postgres"] = {
777
+ "healthy": True,
778
+ "message": "Connected",
779
+ }
780
+ except Exception as e:
781
+ components["postgres"] = {
782
+ "healthy": False,
783
+ "message": f"Error: {type(e).__name__}",
784
+ }
785
+ overall_healthy = False
786
+
787
+ # Check Consul handler
788
+ if self._consul_handler is None:
789
+ components["consul"] = {
790
+ "healthy": False,
791
+ "message": "Not configured",
792
+ }
793
+ overall_healthy = False
794
+ else:
795
+ try:
796
+ result = await self._consul_handler.health_check(
797
+ correlation_id=correlation_id,
798
+ )
799
+ components["consul"] = {
800
+ "healthy": result.healthy,
801
+ "message": result.reason,
802
+ }
803
+ if not result.healthy:
804
+ overall_healthy = False
805
+ except Exception as e:
806
+ components["consul"] = {
807
+ "healthy": False,
808
+ "message": f"Error: {type(e).__name__}",
809
+ }
810
+ overall_healthy = False
811
+
812
+ # Check widget mapping
813
+ _, mapping_warnings = self.get_widget_mapping()
814
+ if mapping_warnings:
815
+ components["config"] = {
816
+ "healthy": False,
817
+ "message": mapping_warnings[0].message,
818
+ }
819
+ else:
820
+ components["config"] = {
821
+ "healthy": True,
822
+ "message": "Loaded",
823
+ }
824
+
825
+ # Determine overall status
826
+ unhealthy_count = sum(
827
+ 1
828
+ for c in components.values()
829
+ if isinstance(c, dict) and not c.get("healthy", False)
830
+ )
831
+ if unhealthy_count == 0:
832
+ status = "healthy"
833
+ elif unhealthy_count < len(components):
834
+ status = "degraded"
835
+ else:
836
+ status = "unhealthy"
837
+
838
+ return ModelRegistryHealthResponse(
839
+ status=status, # type: ignore[arg-type]
840
+ timestamp=datetime.now(UTC),
841
+ components=components,
842
+ version="1.0.0",
843
+ )
844
+
845
+
846
+ __all__ = ["ServiceRegistryDiscovery"]