omnibase_infra 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnibase_infra/__init__.py +1 -1
- omnibase_infra/adapters/adapter_onex_tool_execution.py +451 -0
- omnibase_infra/capabilities/__init__.py +15 -0
- omnibase_infra/capabilities/capability_inference_rules.py +211 -0
- omnibase_infra/capabilities/contract_capability_extractor.py +221 -0
- omnibase_infra/capabilities/intent_type_extractor.py +160 -0
- omnibase_infra/cli/commands.py +1 -1
- omnibase_infra/configs/widget_mapping.yaml +176 -0
- omnibase_infra/contracts/handlers/filesystem/handler_contract.yaml +5 -2
- omnibase_infra/contracts/handlers/mcp/handler_contract.yaml +5 -2
- omnibase_infra/enums/__init__.py +6 -0
- omnibase_infra/enums/enum_handler_error_type.py +10 -0
- omnibase_infra/enums/enum_handler_source_mode.py +72 -0
- omnibase_infra/enums/enum_kafka_acks.py +99 -0
- omnibase_infra/errors/error_compute_registry.py +4 -1
- omnibase_infra/errors/error_event_bus_registry.py +4 -1
- omnibase_infra/errors/error_infra.py +3 -1
- omnibase_infra/errors/error_policy_registry.py +4 -1
- omnibase_infra/event_bus/event_bus_kafka.py +1 -1
- omnibase_infra/event_bus/models/config/model_kafka_event_bus_config.py +59 -10
- omnibase_infra/handlers/__init__.py +8 -1
- omnibase_infra/handlers/handler_consul.py +7 -1
- omnibase_infra/handlers/handler_db.py +10 -3
- omnibase_infra/handlers/handler_graph.py +10 -5
- omnibase_infra/handlers/handler_http.py +8 -2
- omnibase_infra/handlers/handler_intent.py +387 -0
- omnibase_infra/handlers/handler_mcp.py +745 -63
- omnibase_infra/handlers/handler_vault.py +11 -5
- omnibase_infra/handlers/mixins/mixin_consul_kv.py +4 -3
- omnibase_infra/handlers/mixins/mixin_consul_service.py +2 -1
- omnibase_infra/handlers/registration_storage/handler_registration_storage_postgres.py +7 -0
- omnibase_infra/handlers/service_discovery/handler_service_discovery_consul.py +308 -4
- omnibase_infra/handlers/service_discovery/models/model_service_info.py +10 -0
- omnibase_infra/mixins/mixin_async_circuit_breaker.py +3 -2
- omnibase_infra/mixins/mixin_node_introspection.py +42 -7
- omnibase_infra/mixins/mixin_retry_execution.py +1 -1
- omnibase_infra/models/discovery/model_introspection_config.py +11 -0
- omnibase_infra/models/handlers/__init__.py +48 -5
- omnibase_infra/models/handlers/model_bootstrap_handler_descriptor.py +162 -0
- omnibase_infra/models/handlers/model_contract_discovery_result.py +6 -4
- omnibase_infra/models/handlers/model_handler_descriptor.py +15 -0
- omnibase_infra/models/handlers/model_handler_source_config.py +220 -0
- omnibase_infra/models/mcp/__init__.py +15 -0
- omnibase_infra/models/mcp/model_mcp_contract_config.py +80 -0
- omnibase_infra/models/mcp/model_mcp_server_config.py +67 -0
- omnibase_infra/models/mcp/model_mcp_tool_definition.py +73 -0
- omnibase_infra/models/mcp/model_mcp_tool_parameter.py +35 -0
- omnibase_infra/models/registration/model_node_capabilities.py +11 -0
- omnibase_infra/models/registration/model_node_introspection_event.py +9 -0
- omnibase_infra/models/runtime/model_handler_contract.py +25 -9
- omnibase_infra/models/runtime/model_loaded_handler.py +9 -0
- omnibase_infra/nodes/architecture_validator/contract_architecture_validator.yaml +0 -5
- omnibase_infra/nodes/architecture_validator/registry/registry_infra_architecture_validator.py +17 -10
- omnibase_infra/nodes/effects/contract.yaml +0 -5
- omnibase_infra/nodes/node_registration_orchestrator/contract.yaml +7 -0
- omnibase_infra/nodes/node_registration_orchestrator/handlers/handler_node_introspected.py +86 -1
- omnibase_infra/nodes/node_registration_orchestrator/introspection_event_router.py +3 -3
- omnibase_infra/nodes/node_registration_orchestrator/plugin.py +1 -1
- omnibase_infra/nodes/node_registration_orchestrator/registry/registry_infra_node_registration_orchestrator.py +9 -8
- omnibase_infra/nodes/node_registration_orchestrator/timeout_coordinator.py +4 -3
- omnibase_infra/nodes/node_registration_orchestrator/wiring.py +14 -13
- omnibase_infra/nodes/node_registration_storage_effect/contract.yaml +0 -5
- omnibase_infra/nodes/node_registration_storage_effect/node.py +4 -1
- omnibase_infra/nodes/node_registration_storage_effect/registry/registry_infra_registration_storage.py +47 -26
- omnibase_infra/nodes/node_registry_effect/contract.yaml +0 -5
- omnibase_infra/nodes/node_registry_effect/handlers/handler_partial_retry.py +2 -1
- omnibase_infra/nodes/node_service_discovery_effect/registry/registry_infra_service_discovery.py +28 -20
- omnibase_infra/plugins/examples/plugin_json_normalizer.py +2 -2
- omnibase_infra/plugins/examples/plugin_json_normalizer_error_handling.py +2 -2
- omnibase_infra/plugins/plugin_compute_base.py +16 -2
- omnibase_infra/protocols/__init__.py +2 -0
- omnibase_infra/protocols/protocol_container_aware.py +200 -0
- omnibase_infra/protocols/protocol_event_projector.py +1 -1
- omnibase_infra/runtime/__init__.py +90 -1
- omnibase_infra/runtime/binding_config_resolver.py +102 -37
- omnibase_infra/runtime/constants_notification.py +75 -0
- omnibase_infra/runtime/contract_handler_discovery.py +6 -1
- omnibase_infra/runtime/handler_bootstrap_source.py +507 -0
- omnibase_infra/runtime/handler_contract_config_loader.py +603 -0
- omnibase_infra/runtime/handler_contract_source.py +267 -186
- omnibase_infra/runtime/handler_identity.py +81 -0
- omnibase_infra/runtime/handler_plugin_loader.py +19 -2
- omnibase_infra/runtime/handler_registry.py +11 -3
- omnibase_infra/runtime/handler_source_resolver.py +326 -0
- omnibase_infra/runtime/mixin_semver_cache.py +25 -1
- omnibase_infra/runtime/mixins/__init__.py +7 -0
- omnibase_infra/runtime/mixins/mixin_projector_notification_publishing.py +566 -0
- omnibase_infra/runtime/mixins/mixin_projector_sql_operations.py +31 -10
- omnibase_infra/runtime/models/__init__.py +24 -0
- omnibase_infra/runtime/models/model_health_check_result.py +2 -1
- omnibase_infra/runtime/models/model_projector_notification_config.py +171 -0
- omnibase_infra/runtime/models/model_transition_notification_outbox_config.py +112 -0
- omnibase_infra/runtime/models/model_transition_notification_outbox_metrics.py +140 -0
- omnibase_infra/runtime/models/model_transition_notification_publisher_metrics.py +357 -0
- omnibase_infra/runtime/projector_plugin_loader.py +1 -1
- omnibase_infra/runtime/projector_shell.py +229 -1
- omnibase_infra/runtime/protocol_lifecycle_executor.py +6 -6
- omnibase_infra/runtime/protocols/__init__.py +10 -0
- omnibase_infra/runtime/registry/registry_protocol_binding.py +16 -15
- omnibase_infra/runtime/registry_contract_source.py +693 -0
- omnibase_infra/runtime/registry_policy.py +9 -326
- omnibase_infra/runtime/secret_resolver.py +4 -2
- omnibase_infra/runtime/service_kernel.py +11 -3
- omnibase_infra/runtime/service_message_dispatch_engine.py +4 -2
- omnibase_infra/runtime/service_runtime_host_process.py +589 -106
- omnibase_infra/runtime/transition_notification_outbox.py +1190 -0
- omnibase_infra/runtime/transition_notification_publisher.py +764 -0
- omnibase_infra/runtime/util_container_wiring.py +6 -5
- omnibase_infra/runtime/util_wiring.py +17 -4
- omnibase_infra/schemas/schema_transition_notification_outbox.sql +245 -0
- omnibase_infra/services/__init__.py +21 -0
- omnibase_infra/services/corpus_capture.py +7 -1
- omnibase_infra/services/mcp/__init__.py +31 -0
- omnibase_infra/services/mcp/mcp_server_lifecycle.py +449 -0
- omnibase_infra/services/mcp/service_mcp_tool_discovery.py +411 -0
- omnibase_infra/services/mcp/service_mcp_tool_registry.py +329 -0
- omnibase_infra/services/mcp/service_mcp_tool_sync.py +547 -0
- omnibase_infra/services/registry_api/__init__.py +40 -0
- omnibase_infra/services/registry_api/main.py +261 -0
- omnibase_infra/services/registry_api/models/__init__.py +66 -0
- omnibase_infra/services/registry_api/models/model_capability_widget_mapping.py +38 -0
- omnibase_infra/services/registry_api/models/model_pagination_info.py +48 -0
- omnibase_infra/services/registry_api/models/model_registry_discovery_response.py +73 -0
- omnibase_infra/services/registry_api/models/model_registry_health_response.py +49 -0
- omnibase_infra/services/registry_api/models/model_registry_instance_view.py +88 -0
- omnibase_infra/services/registry_api/models/model_registry_node_view.py +88 -0
- omnibase_infra/services/registry_api/models/model_registry_summary.py +60 -0
- omnibase_infra/services/registry_api/models/model_response_list_instances.py +43 -0
- omnibase_infra/services/registry_api/models/model_response_list_nodes.py +51 -0
- omnibase_infra/services/registry_api/models/model_warning.py +49 -0
- omnibase_infra/services/registry_api/models/model_widget_defaults.py +28 -0
- omnibase_infra/services/registry_api/models/model_widget_mapping.py +51 -0
- omnibase_infra/services/registry_api/routes.py +371 -0
- omnibase_infra/services/registry_api/service.py +837 -0
- omnibase_infra/services/service_capability_query.py +4 -4
- omnibase_infra/services/service_health.py +3 -2
- omnibase_infra/services/service_timeout_emitter.py +20 -3
- omnibase_infra/services/service_timeout_scanner.py +7 -3
- omnibase_infra/services/session/__init__.py +56 -0
- omnibase_infra/services/session/config_consumer.py +120 -0
- omnibase_infra/services/session/config_store.py +139 -0
- omnibase_infra/services/session/consumer.py +1007 -0
- omnibase_infra/services/session/protocol_session_aggregator.py +117 -0
- omnibase_infra/services/session/store.py +997 -0
- omnibase_infra/utils/__init__.py +19 -0
- omnibase_infra/utils/util_atomic_file.py +261 -0
- omnibase_infra/utils/util_db_transaction.py +239 -0
- omnibase_infra/utils/util_dsn_validation.py +1 -1
- omnibase_infra/utils/util_retry_optimistic.py +281 -0
- omnibase_infra/validation/__init__.py +3 -19
- omnibase_infra/validation/contracts/security.validation.yaml +114 -0
- omnibase_infra/validation/infra_validators.py +35 -24
- omnibase_infra/validation/validation_exemptions.yaml +140 -9
- omnibase_infra/validation/validator_chain_propagation.py +2 -2
- omnibase_infra/validation/validator_runtime_shape.py +1 -1
- omnibase_infra/validation/validator_security.py +473 -370
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/METADATA +3 -3
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/RECORD +161 -98
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/WHEEL +0 -0
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/entry_points.txt +0 -0
- {omnibase_infra-0.2.1.dist-info → omnibase_infra-0.2.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2025 OmniNode Team
|
|
3
|
+
"""
|
|
4
|
+
Transition Notification Publisher Implementation.
|
|
5
|
+
|
|
6
|
+
Publishes state transition notifications after projection commits. This enables
|
|
7
|
+
orchestrators to reliably detect state transitions via the Observer pattern,
|
|
8
|
+
maintaining loose coupling between reducers and workflow coordinators.
|
|
9
|
+
|
|
10
|
+
Architecture Overview:
|
|
11
|
+
This service implements post-commit notification publishing in the ONEX
|
|
12
|
+
state machine architecture:
|
|
13
|
+
|
|
14
|
+
1. Reducers commit state transitions to projections
|
|
15
|
+
2. Post-commit hook creates ModelStateTransitionNotification
|
|
16
|
+
3. TransitionNotificationPublisher publishes to event bus
|
|
17
|
+
4. Orchestrators subscribe and coordinate downstream workflows
|
|
18
|
+
|
|
19
|
+
```
|
|
20
|
+
Reducer -> Projection Commit -> Notification Publisher -> Event Bus
|
|
21
|
+
|
|
|
22
|
+
v
|
|
23
|
+
Orchestrators (subscribers)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Design Principles:
|
|
27
|
+
- **Loose Coupling**: Reducers don't know about orchestrators
|
|
28
|
+
- **At-Least-Once Delivery**: Consumers handle idempotency via projection_version
|
|
29
|
+
- **Circuit Breaker**: Resilience against event bus failures
|
|
30
|
+
- **Correlation Tracking**: Full distributed tracing support
|
|
31
|
+
|
|
32
|
+
Concurrency Safety:
|
|
33
|
+
This implementation is coroutine-safe for concurrent async publishing.
|
|
34
|
+
Uses asyncio locks for circuit breaker state management. Note: This is
|
|
35
|
+
coroutine-safe, not thread-safe. For multi-threaded access, additional
|
|
36
|
+
synchronization would be required.
|
|
37
|
+
|
|
38
|
+
Error Handling:
|
|
39
|
+
All methods raise ONEX error types:
|
|
40
|
+
- InfraConnectionError: Event bus unavailable or connection failed
|
|
41
|
+
- InfraTimeoutError: Publish operation timed out
|
|
42
|
+
- InfraUnavailableError: Circuit breaker open
|
|
43
|
+
|
|
44
|
+
Example Usage:
|
|
45
|
+
```python
|
|
46
|
+
from omnibase_infra.runtime import TransitionNotificationPublisher
|
|
47
|
+
from omnibase_core.models.notifications import ModelStateTransitionNotification
|
|
48
|
+
|
|
49
|
+
# Initialize publisher with event bus
|
|
50
|
+
publisher = TransitionNotificationPublisher(
|
|
51
|
+
event_bus=kafka_event_bus,
|
|
52
|
+
topic="onex.fsm.state.transitions.v1",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Publish single notification
|
|
56
|
+
notification = ModelStateTransitionNotification(
|
|
57
|
+
aggregate_type="registration",
|
|
58
|
+
aggregate_id=entity_id,
|
|
59
|
+
from_state="pending",
|
|
60
|
+
to_state="active",
|
|
61
|
+
projection_version=1,
|
|
62
|
+
correlation_id=correlation_id,
|
|
63
|
+
causation_id=event_id,
|
|
64
|
+
timestamp=datetime.now(UTC),
|
|
65
|
+
)
|
|
66
|
+
await publisher.publish(notification)
|
|
67
|
+
|
|
68
|
+
# Batch publish
|
|
69
|
+
await publisher.publish_batch([notification1, notification2])
|
|
70
|
+
|
|
71
|
+
# Get metrics
|
|
72
|
+
metrics = publisher.get_metrics()
|
|
73
|
+
print(f"Published {metrics.notifications_published} notifications")
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Related Tickets:
|
|
77
|
+
- OMN-1139: Implement TransitionNotificationPublisher
|
|
78
|
+
|
|
79
|
+
See Also:
|
|
80
|
+
- ProtocolTransitionNotificationPublisher: Protocol definition (omnibase_core)
|
|
81
|
+
- ModelStateTransitionNotification: Notification model (omnibase_core)
|
|
82
|
+
- ProtocolEventBusLike: Event bus protocol
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
from __future__ import annotations
|
|
86
|
+
|
|
87
|
+
import asyncio
|
|
88
|
+
import logging
|
|
89
|
+
import time
|
|
90
|
+
from datetime import UTC, datetime
|
|
91
|
+
from typing import TYPE_CHECKING, NamedTuple
|
|
92
|
+
from uuid import UUID
|
|
93
|
+
|
|
94
|
+
from omnibase_core.models.events.model_event_envelope import ModelEventEnvelope
|
|
95
|
+
from omnibase_core.models.notifications import ModelStateTransitionNotification
|
|
96
|
+
from omnibase_core.protocols.notifications import (
|
|
97
|
+
ProtocolTransitionNotificationPublisher,
|
|
98
|
+
)
|
|
99
|
+
from omnibase_core.utils.util_uuid_service import UtilUUID
|
|
100
|
+
from omnibase_infra.enums import EnumInfraTransportType
|
|
101
|
+
from omnibase_infra.errors import (
|
|
102
|
+
InfraConnectionError,
|
|
103
|
+
InfraTimeoutError,
|
|
104
|
+
InfraUnavailableError,
|
|
105
|
+
ModelInfraErrorContext,
|
|
106
|
+
ModelTimeoutErrorContext,
|
|
107
|
+
)
|
|
108
|
+
from omnibase_infra.mixins import MixinAsyncCircuitBreaker
|
|
109
|
+
from omnibase_infra.models.resilience import ModelCircuitBreakerConfig
|
|
110
|
+
from omnibase_infra.runtime.models.model_transition_notification_publisher_metrics import (
|
|
111
|
+
ModelTransitionNotificationPublisherMetrics,
|
|
112
|
+
)
|
|
113
|
+
from omnibase_infra.utils.util_error_sanitization import sanitize_error_string
|
|
114
|
+
|
|
115
|
+
if TYPE_CHECKING:
|
|
116
|
+
from omnibase_infra.protocols import ProtocolEventBusLike
|
|
117
|
+
|
|
118
|
+
logger = logging.getLogger(__name__)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class FailedNotificationRecord(NamedTuple):
|
|
122
|
+
"""Record of a failed notification publish attempt.
|
|
123
|
+
|
|
124
|
+
Used to track failures during batch publishing operations with clear
|
|
125
|
+
field semantics for error reporting and debugging.
|
|
126
|
+
|
|
127
|
+
Attributes:
|
|
128
|
+
aggregate_type: The type of aggregate that failed (e.g., "registration").
|
|
129
|
+
aggregate_id: The ID of the aggregate (as string for error reporting).
|
|
130
|
+
error_message: Sanitized error message describing the failure.
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
aggregate_type: str
|
|
134
|
+
aggregate_id: str
|
|
135
|
+
error_message: str
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class TransitionNotificationPublisher(MixinAsyncCircuitBreaker):
|
|
139
|
+
"""Publishes transition notifications after projection commits.
|
|
140
|
+
|
|
141
|
+
Implements ProtocolTransitionNotificationPublisher from omnibase_core.
|
|
142
|
+
Provides at-least-once delivery semantics for state transition notifications
|
|
143
|
+
to enable orchestrator coordination without tight coupling to reducers.
|
|
144
|
+
|
|
145
|
+
Features:
|
|
146
|
+
- Protocol compliant (ProtocolTransitionNotificationPublisher)
|
|
147
|
+
- Circuit breaker resilience (MixinAsyncCircuitBreaker)
|
|
148
|
+
- Metrics tracking for observability
|
|
149
|
+
- Batch publishing for efficiency
|
|
150
|
+
- Correlation ID propagation for distributed tracing
|
|
151
|
+
|
|
152
|
+
Circuit Breaker:
|
|
153
|
+
Uses MixinAsyncCircuitBreaker for resilience:
|
|
154
|
+
- Opens after consecutive failures (configurable threshold)
|
|
155
|
+
- Resets after timeout period (configurable)
|
|
156
|
+
- Raises InfraUnavailableError when open
|
|
157
|
+
|
|
158
|
+
Thread Safety:
|
|
159
|
+
Coroutine-safe via asyncio.Lock for circuit breaker state.
|
|
160
|
+
Not thread-safe - use only from async context.
|
|
161
|
+
|
|
162
|
+
Attributes:
|
|
163
|
+
_event_bus: Event bus for publishing notifications
|
|
164
|
+
_topic: Target topic for notifications
|
|
165
|
+
_lock: Async lock for metrics updates
|
|
166
|
+
_publisher_id: Unique identifier for this publisher instance
|
|
167
|
+
|
|
168
|
+
Example:
|
|
169
|
+
>>> publisher = TransitionNotificationPublisher(event_bus, topic="notifications.v1")
|
|
170
|
+
>>> await publisher.publish(notification)
|
|
171
|
+
>>> metrics = publisher.get_metrics()
|
|
172
|
+
>>> print(f"Success rate: {metrics.publish_success_rate():.2%}")
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
# Default maximum number of failures to track in memory during batch operations.
|
|
176
|
+
# Prevents unbounded memory growth for very large batches with many failures.
|
|
177
|
+
# Can be overridden via constructor parameter for large batch tuning.
|
|
178
|
+
DEFAULT_MAX_TRACKED_FAILURES: int = 100
|
|
179
|
+
|
|
180
|
+
def __init__(
|
|
181
|
+
self,
|
|
182
|
+
event_bus: ProtocolEventBusLike,
|
|
183
|
+
topic: str,
|
|
184
|
+
*,
|
|
185
|
+
publisher_id: str | None = None,
|
|
186
|
+
circuit_breaker_threshold: int = 5,
|
|
187
|
+
circuit_breaker_reset_timeout: float = 60.0,
|
|
188
|
+
max_tracked_failures: int = DEFAULT_MAX_TRACKED_FAILURES,
|
|
189
|
+
) -> None:
|
|
190
|
+
"""Initialize transition notification publisher.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
event_bus: Event bus implementing ProtocolEventBusLike for publishing.
|
|
194
|
+
Must support publish_envelope() method.
|
|
195
|
+
topic: Target topic for transition notifications. Required.
|
|
196
|
+
This should be configured in the projector's contract or
|
|
197
|
+
notification config rather than hardcoded. Example topics:
|
|
198
|
+
- "onex.fsm.state.transitions.v1"
|
|
199
|
+
- "registration.state.transitions.v1"
|
|
200
|
+
publisher_id: Optional unique identifier for this publisher instance.
|
|
201
|
+
If not provided, a UUID will be generated.
|
|
202
|
+
circuit_breaker_threshold: Maximum failures before opening circuit.
|
|
203
|
+
Default: 5
|
|
204
|
+
circuit_breaker_reset_timeout: Seconds before automatic reset.
|
|
205
|
+
Default: 60.0
|
|
206
|
+
max_tracked_failures: Maximum number of failures to track in memory
|
|
207
|
+
during batch operations. Prevents unbounded memory growth for
|
|
208
|
+
very large batches with many failures. For large batch operations,
|
|
209
|
+
this can be tuned higher to capture more failure details.
|
|
210
|
+
Default: 100
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
>>> publisher = TransitionNotificationPublisher(
|
|
214
|
+
... event_bus=kafka_event_bus,
|
|
215
|
+
... topic="onex.fsm.state.transitions.v1",
|
|
216
|
+
... circuit_breaker_threshold=3,
|
|
217
|
+
... circuit_breaker_reset_timeout=30.0,
|
|
218
|
+
... max_tracked_failures=200, # Tune for large batches
|
|
219
|
+
... )
|
|
220
|
+
"""
|
|
221
|
+
self._event_bus = event_bus
|
|
222
|
+
self._topic = topic
|
|
223
|
+
self._publisher_id = (
|
|
224
|
+
publisher_id or f"transition-publisher-{UtilUUID.generate()!s}"
|
|
225
|
+
)
|
|
226
|
+
self._lock = asyncio.Lock()
|
|
227
|
+
self._max_tracked_failures = max_tracked_failures
|
|
228
|
+
|
|
229
|
+
# Metrics tracking
|
|
230
|
+
self._notifications_published = 0
|
|
231
|
+
self._notifications_failed = 0
|
|
232
|
+
self._batch_operations = 0
|
|
233
|
+
self._batch_notifications_attempted = 0
|
|
234
|
+
self._batch_notifications_total = 0
|
|
235
|
+
self._batch_failures_truncated = 0
|
|
236
|
+
self._last_publish_at: datetime | None = None
|
|
237
|
+
self._last_publish_duration_ms: float = 0.0
|
|
238
|
+
self._total_publish_duration_ms: float = 0.0
|
|
239
|
+
self._max_publish_duration_ms: float = 0.0
|
|
240
|
+
self._started_at = datetime.now(UTC)
|
|
241
|
+
|
|
242
|
+
# Initialize circuit breaker with configured settings
|
|
243
|
+
# Note: the mixin sets self.circuit_breaker_threshold and
|
|
244
|
+
# self.circuit_breaker_reset_timeout as instance attributes
|
|
245
|
+
cb_config = ModelCircuitBreakerConfig(
|
|
246
|
+
threshold=circuit_breaker_threshold,
|
|
247
|
+
reset_timeout_seconds=circuit_breaker_reset_timeout,
|
|
248
|
+
service_name=f"transition-notification-publisher.{topic}",
|
|
249
|
+
transport_type=EnumInfraTransportType.KAFKA,
|
|
250
|
+
)
|
|
251
|
+
self._init_circuit_breaker_from_config(cb_config)
|
|
252
|
+
|
|
253
|
+
logger.info(
|
|
254
|
+
"TransitionNotificationPublisher initialized",
|
|
255
|
+
extra={
|
|
256
|
+
"publisher_id": self._publisher_id,
|
|
257
|
+
"topic": self._topic,
|
|
258
|
+
"circuit_breaker_threshold": circuit_breaker_threshold,
|
|
259
|
+
"circuit_breaker_reset_timeout": circuit_breaker_reset_timeout,
|
|
260
|
+
"max_tracked_failures": self._max_tracked_failures,
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def topic(self) -> str:
|
|
266
|
+
"""Get the configured topic."""
|
|
267
|
+
return self._topic
|
|
268
|
+
|
|
269
|
+
@property
|
|
270
|
+
def publisher_id(self) -> str:
|
|
271
|
+
"""Get the publisher identifier."""
|
|
272
|
+
return self._publisher_id
|
|
273
|
+
|
|
274
|
+
async def publish(
|
|
275
|
+
self,
|
|
276
|
+
notification: ModelStateTransitionNotification,
|
|
277
|
+
) -> None:
|
|
278
|
+
"""Publish a single state transition notification.
|
|
279
|
+
|
|
280
|
+
Wraps the notification in a ModelEventEnvelope and publishes to the
|
|
281
|
+
configured topic via the event bus. Implements at-least-once delivery
|
|
282
|
+
semantics - consumers should handle idempotency via projection_version.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
notification: The state transition notification to publish.
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
InfraConnectionError: If event bus connection fails.
|
|
289
|
+
InfraTimeoutError: If publish operation times out.
|
|
290
|
+
InfraUnavailableError: If circuit breaker is open.
|
|
291
|
+
|
|
292
|
+
Example:
|
|
293
|
+
>>> notification = ModelStateTransitionNotification(
|
|
294
|
+
... aggregate_type="registration",
|
|
295
|
+
... aggregate_id=uuid4(),
|
|
296
|
+
... from_state="pending",
|
|
297
|
+
... to_state="active",
|
|
298
|
+
... projection_version=1,
|
|
299
|
+
... correlation_id=uuid4(),
|
|
300
|
+
... causation_id=uuid4(),
|
|
301
|
+
... timestamp=datetime.now(UTC),
|
|
302
|
+
... )
|
|
303
|
+
>>> await publisher.publish(notification)
|
|
304
|
+
"""
|
|
305
|
+
correlation_id = notification.correlation_id
|
|
306
|
+
start_time = time.monotonic()
|
|
307
|
+
|
|
308
|
+
# Check circuit breaker before operation
|
|
309
|
+
async with self._circuit_breaker_lock:
|
|
310
|
+
await self._check_circuit_breaker("publish", correlation_id)
|
|
311
|
+
|
|
312
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
313
|
+
correlation_id=correlation_id,
|
|
314
|
+
transport_type=EnumInfraTransportType.KAFKA,
|
|
315
|
+
operation="publish_transition_notification",
|
|
316
|
+
target_name=self._topic,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
try:
|
|
320
|
+
# Create envelope wrapping the notification model directly.
|
|
321
|
+
# ModelEventEnvelope[T] is generic and handles Pydantic models natively,
|
|
322
|
+
# serializing them lazily when needed via to_dict_lazy().
|
|
323
|
+
envelope = ModelEventEnvelope[ModelStateTransitionNotification](
|
|
324
|
+
payload=notification,
|
|
325
|
+
correlation_id=notification.correlation_id,
|
|
326
|
+
source_tool=self._publisher_id,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Publish to event bus
|
|
330
|
+
await self._event_bus.publish_envelope(envelope, self._topic)
|
|
331
|
+
|
|
332
|
+
# Calculate duration
|
|
333
|
+
duration_ms = (time.monotonic() - start_time) * 1000
|
|
334
|
+
|
|
335
|
+
# Record success
|
|
336
|
+
async with self._circuit_breaker_lock:
|
|
337
|
+
await self._reset_circuit_breaker()
|
|
338
|
+
|
|
339
|
+
# Update metrics
|
|
340
|
+
async with self._lock:
|
|
341
|
+
self._notifications_published += 1
|
|
342
|
+
self._last_publish_at = datetime.now(UTC)
|
|
343
|
+
self._last_publish_duration_ms = duration_ms
|
|
344
|
+
self._total_publish_duration_ms += duration_ms
|
|
345
|
+
self._max_publish_duration_ms = max(
|
|
346
|
+
self._max_publish_duration_ms, duration_ms
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
logger.debug(
|
|
350
|
+
"Published transition notification",
|
|
351
|
+
extra={
|
|
352
|
+
"aggregate_type": notification.aggregate_type,
|
|
353
|
+
"aggregate_id": str(notification.aggregate_id),
|
|
354
|
+
"from_state": notification.from_state,
|
|
355
|
+
"to_state": notification.to_state,
|
|
356
|
+
"projection_version": notification.projection_version,
|
|
357
|
+
"correlation_id": str(correlation_id),
|
|
358
|
+
"duration_ms": duration_ms,
|
|
359
|
+
},
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
except (InfraUnavailableError, InfraTimeoutError):
|
|
363
|
+
# Re-raise infrastructure errors without wrapping - preserve error semantics
|
|
364
|
+
await self._handle_failure("publish", correlation_id)
|
|
365
|
+
raise
|
|
366
|
+
|
|
367
|
+
except TimeoutError as e:
|
|
368
|
+
await self._handle_failure("publish", correlation_id)
|
|
369
|
+
timeout_ctx = ModelTimeoutErrorContext(
|
|
370
|
+
transport_type=EnumInfraTransportType.KAFKA,
|
|
371
|
+
operation="publish_transition_notification",
|
|
372
|
+
target_name=self._topic,
|
|
373
|
+
correlation_id=correlation_id,
|
|
374
|
+
)
|
|
375
|
+
raise InfraTimeoutError(
|
|
376
|
+
f"Timeout publishing transition notification for "
|
|
377
|
+
f"{notification.aggregate_type}:{notification.aggregate_id}",
|
|
378
|
+
context=timeout_ctx,
|
|
379
|
+
) from e
|
|
380
|
+
|
|
381
|
+
except Exception as e:
|
|
382
|
+
await self._handle_failure("publish", correlation_id)
|
|
383
|
+
raise InfraConnectionError(
|
|
384
|
+
f"Failed to publish transition notification for "
|
|
385
|
+
f"{notification.aggregate_type}:{notification.aggregate_id}",
|
|
386
|
+
context=ctx,
|
|
387
|
+
) from e
|
|
388
|
+
|
|
389
|
+
async def publish_batch(
|
|
390
|
+
self,
|
|
391
|
+
notifications: list[ModelStateTransitionNotification],
|
|
392
|
+
) -> None:
|
|
393
|
+
"""Publish multiple state transition notifications.
|
|
394
|
+
|
|
395
|
+
Publishes each notification sequentially, continuing on individual
|
|
396
|
+
failures. This method is provided for efficiency when multiple
|
|
397
|
+
transitions occur in a single unit of work.
|
|
398
|
+
|
|
399
|
+
Ordering:
|
|
400
|
+
Notifications are published in the order provided. The order is
|
|
401
|
+
preserved when delivery order matters for workflow correctness.
|
|
402
|
+
|
|
403
|
+
Error Handling:
|
|
404
|
+
If any notification fails to publish, the error is raised after
|
|
405
|
+
attempting all notifications. Partial success is possible.
|
|
406
|
+
|
|
407
|
+
Circuit Breaker Behavior:
|
|
408
|
+
The circuit breaker is checked only at the start of the batch
|
|
409
|
+
operation. However, individual publish() calls within the batch
|
|
410
|
+
can trip the circuit breaker if they fail. If the circuit breaker
|
|
411
|
+
opens mid-batch (due to accumulated failures from individual
|
|
412
|
+
publish calls), subsequent notifications in the batch will fail
|
|
413
|
+
with InfraUnavailableError. This is expected "partial success"
|
|
414
|
+
behavior - the batch continues attempting all notifications, but
|
|
415
|
+
failures are recorded and reported at the end.
|
|
416
|
+
|
|
417
|
+
Correlation ID Behavior:
|
|
418
|
+
The batch uses the **first notification's correlation_id** for all
|
|
419
|
+
batch-level operations:
|
|
420
|
+
|
|
421
|
+
- Circuit breaker checks (at batch start)
|
|
422
|
+
- Batch summary logging ("Batch publish completed")
|
|
423
|
+
- Error context creation (when raising InfraConnectionError)
|
|
424
|
+
- Failure summary logging ("Batch publish failures - details")
|
|
425
|
+
|
|
426
|
+
However, **individual notification errors are logged with their own
|
|
427
|
+
correlation_id**. When a specific notification fails within the batch,
|
|
428
|
+
the warning log entry includes that notification's correlation_id,
|
|
429
|
+
not the batch correlation_id.
|
|
430
|
+
|
|
431
|
+
This design is intentional:
|
|
432
|
+
|
|
433
|
+
1. **Batch-level traceability**: Using a single correlation_id for
|
|
434
|
+
batch operations allows operators to correlate all batch-related
|
|
435
|
+
log entries and metrics under one trace ID.
|
|
436
|
+
|
|
437
|
+
2. **Per-notification traceability**: Individual failure logs retain
|
|
438
|
+
their specific correlation_id, enabling operators to trace the
|
|
439
|
+
complete lifecycle of each notification independently.
|
|
440
|
+
|
|
441
|
+
Example log correlation::
|
|
442
|
+
|
|
443
|
+
# Batch-level log (uses first notification's correlation_id)
|
|
444
|
+
{"message": "Batch publish completed", "correlation_id": "aaa-111"}
|
|
445
|
+
|
|
446
|
+
# Individual failure log (uses that notification's correlation_id)
|
|
447
|
+
{"message": "Failed to publish notification in batch",
|
|
448
|
+
"correlation_id": "bbb-222"}
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
notifications: List of notifications to publish.
|
|
452
|
+
|
|
453
|
+
Raises:
|
|
454
|
+
InfraConnectionError: If event bus connection fails.
|
|
455
|
+
InfraTimeoutError: If publish operation times out.
|
|
456
|
+
InfraUnavailableError: If circuit breaker is open (at batch start
|
|
457
|
+
or if tripped mid-batch by individual publish failures).
|
|
458
|
+
|
|
459
|
+
Example:
|
|
460
|
+
>>> notifications = [notification1, notification2, notification3]
|
|
461
|
+
>>> await publisher.publish_batch(notifications)
|
|
462
|
+
"""
|
|
463
|
+
if not notifications:
|
|
464
|
+
return
|
|
465
|
+
|
|
466
|
+
correlation_id = notifications[0].correlation_id
|
|
467
|
+
start_time = time.monotonic()
|
|
468
|
+
|
|
469
|
+
# Batch-level circuit breaker check for fail-fast behavior.
|
|
470
|
+
# NOTE: This check is NOT redundant with the per-notification check in publish().
|
|
471
|
+
# - This check: Fail-fast before starting any work if circuit is already open
|
|
472
|
+
# - Per-notification checks in publish(): Handle circuit opening MID-batch due to
|
|
473
|
+
# accumulated failures during batch processing (expected partial-success behavior)
|
|
474
|
+
# See docstring "Circuit Breaker Behavior" section for full explanation.
|
|
475
|
+
async with self._circuit_breaker_lock:
|
|
476
|
+
await self._check_circuit_breaker("publish_batch", correlation_id)
|
|
477
|
+
|
|
478
|
+
success_count = 0
|
|
479
|
+
last_error: Exception | None = None
|
|
480
|
+
failed_notifications: list[FailedNotificationRecord] = []
|
|
481
|
+
truncation_occurred = False
|
|
482
|
+
# Track error types to determine most severe error for final raise.
|
|
483
|
+
# Severity order: InfraUnavailableError > InfraTimeoutError > InfraConnectionError
|
|
484
|
+
encountered_unavailable = False
|
|
485
|
+
encountered_timeout = False
|
|
486
|
+
|
|
487
|
+
for notification in notifications:
|
|
488
|
+
try:
|
|
489
|
+
await self.publish(notification)
|
|
490
|
+
success_count += 1
|
|
491
|
+
except (
|
|
492
|
+
InfraConnectionError,
|
|
493
|
+
InfraTimeoutError,
|
|
494
|
+
InfraUnavailableError,
|
|
495
|
+
) as e:
|
|
496
|
+
last_error = e
|
|
497
|
+
# Track error types for determining most severe error to raise
|
|
498
|
+
if isinstance(e, InfraUnavailableError):
|
|
499
|
+
encountered_unavailable = True
|
|
500
|
+
elif isinstance(e, InfraTimeoutError):
|
|
501
|
+
encountered_timeout = True
|
|
502
|
+
# Only track failures up to the limit to prevent unbounded memory growth
|
|
503
|
+
if len(failed_notifications) < self._max_tracked_failures:
|
|
504
|
+
failed_notifications.append(
|
|
505
|
+
FailedNotificationRecord(
|
|
506
|
+
aggregate_type=notification.aggregate_type,
|
|
507
|
+
aggregate_id=str(notification.aggregate_id),
|
|
508
|
+
error_message=sanitize_error_string(str(e)),
|
|
509
|
+
)
|
|
510
|
+
)
|
|
511
|
+
else:
|
|
512
|
+
# Mark that truncation occurred (limit reached)
|
|
513
|
+
truncation_occurred = True
|
|
514
|
+
logger.warning(
|
|
515
|
+
"Failed to publish notification in batch",
|
|
516
|
+
extra={
|
|
517
|
+
"aggregate_type": notification.aggregate_type,
|
|
518
|
+
"aggregate_id": str(notification.aggregate_id),
|
|
519
|
+
"error": sanitize_error_string(str(e)),
|
|
520
|
+
"correlation_id": str(notification.correlation_id),
|
|
521
|
+
},
|
|
522
|
+
)
|
|
523
|
+
# Continue with remaining notifications
|
|
524
|
+
|
|
525
|
+
# Calculate duration
|
|
526
|
+
duration_ms = (time.monotonic() - start_time) * 1000
|
|
527
|
+
|
|
528
|
+
# Update batch metrics
|
|
529
|
+
async with self._lock:
|
|
530
|
+
self._batch_operations += 1
|
|
531
|
+
self._batch_notifications_attempted += len(notifications)
|
|
532
|
+
self._batch_notifications_total += success_count
|
|
533
|
+
if truncation_occurred:
|
|
534
|
+
self._batch_failures_truncated += 1
|
|
535
|
+
|
|
536
|
+
failure_count = len(notifications) - success_count
|
|
537
|
+
|
|
538
|
+
# Log aggregate failure information when truncation occurs
|
|
539
|
+
if truncation_occurred:
|
|
540
|
+
failure_summary = self._summarize_failure_types(failed_notifications)
|
|
541
|
+
untracked_failures = failure_count - len(failed_notifications)
|
|
542
|
+
logger.warning(
|
|
543
|
+
"Batch publish failure tracking truncated",
|
|
544
|
+
extra={
|
|
545
|
+
"correlation_id": str(correlation_id),
|
|
546
|
+
"total_failures": failure_count,
|
|
547
|
+
"tracked_failures": len(failed_notifications),
|
|
548
|
+
"untracked_failures": untracked_failures,
|
|
549
|
+
"max_tracked_failures": self._max_tracked_failures,
|
|
550
|
+
"failure_type_summary": failure_summary,
|
|
551
|
+
},
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
logger.info(
|
|
555
|
+
"Batch publish completed",
|
|
556
|
+
extra={
|
|
557
|
+
"total": len(notifications),
|
|
558
|
+
"success": success_count,
|
|
559
|
+
"failed": failure_count,
|
|
560
|
+
"duration_ms": duration_ms,
|
|
561
|
+
"correlation_id": str(correlation_id),
|
|
562
|
+
},
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Raise with detailed failure information if any failures occurred
|
|
566
|
+
if last_error is not None:
|
|
567
|
+
ctx = ModelInfraErrorContext.with_correlation(
|
|
568
|
+
correlation_id=correlation_id,
|
|
569
|
+
transport_type=EnumInfraTransportType.KAFKA,
|
|
570
|
+
operation="publish_batch",
|
|
571
|
+
target_name=self._topic,
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Log failure details for debugging before raising truncated error.
|
|
575
|
+
# Limit logged failures to prevent oversized log entries while
|
|
576
|
+
# preserving full counts for metrics and observability.
|
|
577
|
+
max_logged_failures = 10
|
|
578
|
+
logged_failures = [
|
|
579
|
+
{
|
|
580
|
+
"aggregate_type": record.aggregate_type,
|
|
581
|
+
"aggregate_id": record.aggregate_id,
|
|
582
|
+
"error_message": record.error_message,
|
|
583
|
+
}
|
|
584
|
+
for record in failed_notifications[:max_logged_failures]
|
|
585
|
+
]
|
|
586
|
+
failures_truncated = len(failed_notifications) > max_logged_failures
|
|
587
|
+
|
|
588
|
+
logger.warning(
|
|
589
|
+
"Batch publish failures - details",
|
|
590
|
+
extra={
|
|
591
|
+
"correlation_id": str(correlation_id),
|
|
592
|
+
"topic": self._topic,
|
|
593
|
+
"total_notifications": len(notifications),
|
|
594
|
+
"success_count": success_count,
|
|
595
|
+
"failure_count": failure_count,
|
|
596
|
+
"tracked_failures": len(failed_notifications),
|
|
597
|
+
"max_tracked_failures": self._max_tracked_failures,
|
|
598
|
+
"logged_failures": len(logged_failures),
|
|
599
|
+
"failures_truncated": failures_truncated,
|
|
600
|
+
"failures": logged_failures,
|
|
601
|
+
},
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
# Build detailed error message showing first 3 failures
|
|
605
|
+
failure_details = "; ".join(
|
|
606
|
+
f"{record.aggregate_type}:{record.aggregate_id[:8]}... - "
|
|
607
|
+
f"{record.error_message[:50]}"
|
|
608
|
+
for record in failed_notifications[:3]
|
|
609
|
+
)
|
|
610
|
+
if failure_count > 3:
|
|
611
|
+
failure_details += f" ... and {failure_count - 3} more"
|
|
612
|
+
|
|
613
|
+
error_message = (
|
|
614
|
+
f"Batch publish partially failed: {failure_count}/{len(notifications)} "
|
|
615
|
+
f"notifications failed ({success_count} succeeded). "
|
|
616
|
+
f"Failures: [{failure_details}]"
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
# Raise the most severe error type encountered during batch processing.
|
|
620
|
+
# Severity order: InfraUnavailableError > InfraTimeoutError > InfraConnectionError
|
|
621
|
+
# This preserves error semantics so callers can handle appropriately
|
|
622
|
+
# (e.g., retry on timeout, skip on unavailable).
|
|
623
|
+
if encountered_unavailable:
|
|
624
|
+
raise InfraUnavailableError(
|
|
625
|
+
error_message,
|
|
626
|
+
context=ctx,
|
|
627
|
+
) from last_error
|
|
628
|
+
if encountered_timeout:
|
|
629
|
+
timeout_ctx = ModelTimeoutErrorContext(
|
|
630
|
+
transport_type=EnumInfraTransportType.KAFKA,
|
|
631
|
+
operation="publish_batch",
|
|
632
|
+
target_name=self._topic,
|
|
633
|
+
correlation_id=correlation_id,
|
|
634
|
+
)
|
|
635
|
+
raise InfraTimeoutError(
|
|
636
|
+
error_message,
|
|
637
|
+
context=timeout_ctx,
|
|
638
|
+
) from last_error
|
|
639
|
+
raise InfraConnectionError(
|
|
640
|
+
error_message,
|
|
641
|
+
context=ctx,
|
|
642
|
+
) from last_error
|
|
643
|
+
|
|
644
|
+
async def _handle_failure(
|
|
645
|
+
self,
|
|
646
|
+
operation: str,
|
|
647
|
+
correlation_id: UUID,
|
|
648
|
+
) -> None:
|
|
649
|
+
"""Handle a publish failure by recording circuit breaker failure.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
operation: Operation name for logging
|
|
653
|
+
correlation_id: Correlation ID for tracing
|
|
654
|
+
"""
|
|
655
|
+
async with self._circuit_breaker_lock:
|
|
656
|
+
await self._record_circuit_failure(operation, correlation_id)
|
|
657
|
+
|
|
658
|
+
async with self._lock:
|
|
659
|
+
self._notifications_failed += 1
|
|
660
|
+
|
|
661
|
+
def _summarize_failure_types(
|
|
662
|
+
self, failures: list[FailedNotificationRecord]
|
|
663
|
+
) -> dict[str, int]:
|
|
664
|
+
"""Summarize failure types by grouping error messages.
|
|
665
|
+
|
|
666
|
+
Groups failures by a simplified error pattern (first 50 characters of
|
|
667
|
+
the error message) to help operators understand what types of errors
|
|
668
|
+
are occurring, even when detailed failure records are truncated.
|
|
669
|
+
|
|
670
|
+
Args:
|
|
671
|
+
failures: List of failed notification records to summarize.
|
|
672
|
+
|
|
673
|
+
Returns:
|
|
674
|
+
Dictionary mapping error pattern (truncated error message) to
|
|
675
|
+
the count of failures with that pattern.
|
|
676
|
+
|
|
677
|
+
Example:
|
|
678
|
+
>>> failures = [
|
|
679
|
+
... FailedNotificationRecord("reg", "id1", "Connection refused to broker"),
|
|
680
|
+
... FailedNotificationRecord("reg", "id2", "Connection refused to broker"),
|
|
681
|
+
... FailedNotificationRecord("reg", "id3", "Timeout waiting for response"),
|
|
682
|
+
... ]
|
|
683
|
+
>>> summary = publisher._summarize_failure_types(failures)
|
|
684
|
+
>>> # {"Connection refused to broker": 2, "Timeout waiting for response": 1}
|
|
685
|
+
"""
|
|
686
|
+
summary: dict[str, int] = {}
|
|
687
|
+
for failure in failures:
|
|
688
|
+
# Use first 50 chars as the pattern key for grouping
|
|
689
|
+
pattern = failure.error_message[:50]
|
|
690
|
+
summary[pattern] = summary.get(pattern, 0) + 1
|
|
691
|
+
return summary
|
|
692
|
+
|
|
693
|
+
def get_metrics(self) -> ModelTransitionNotificationPublisherMetrics:
|
|
694
|
+
"""Get current publisher metrics.
|
|
695
|
+
|
|
696
|
+
Returns a snapshot of the publisher's operational metrics including
|
|
697
|
+
notification counts, timing information, and circuit breaker state.
|
|
698
|
+
|
|
699
|
+
Returns:
|
|
700
|
+
ModelTransitionNotificationPublisherMetrics with current values.
|
|
701
|
+
|
|
702
|
+
Example:
|
|
703
|
+
>>> metrics = publisher.get_metrics()
|
|
704
|
+
>>> print(f"Published: {metrics.notifications_published}")
|
|
705
|
+
>>> print(f"Success rate: {metrics.publish_success_rate():.2%}")
|
|
706
|
+
>>> print(f"Healthy: {metrics.is_healthy()}")
|
|
707
|
+
"""
|
|
708
|
+
# Get circuit breaker state
|
|
709
|
+
cb_state = self._get_circuit_breaker_state()
|
|
710
|
+
cb_open = cb_state.get("state") == "open"
|
|
711
|
+
failures_value = cb_state.get("failures", 0)
|
|
712
|
+
consecutive_failures = failures_value if isinstance(failures_value, int) else 0
|
|
713
|
+
|
|
714
|
+
# Calculate average duration (only from successful publishes since
|
|
715
|
+
# _total_publish_duration_ms is only updated on success)
|
|
716
|
+
average_duration = (
|
|
717
|
+
self._total_publish_duration_ms / self._notifications_published
|
|
718
|
+
if self._notifications_published > 0
|
|
719
|
+
else 0.0
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
return ModelTransitionNotificationPublisherMetrics(
|
|
723
|
+
publisher_id=self._publisher_id,
|
|
724
|
+
topic=self._topic,
|
|
725
|
+
notifications_published=self._notifications_published,
|
|
726
|
+
notifications_failed=self._notifications_failed,
|
|
727
|
+
batch_operations=self._batch_operations,
|
|
728
|
+
batch_notifications_attempted=self._batch_notifications_attempted,
|
|
729
|
+
batch_notifications_total=self._batch_notifications_total,
|
|
730
|
+
batch_failures_truncated=self._batch_failures_truncated,
|
|
731
|
+
last_publish_at=self._last_publish_at,
|
|
732
|
+
last_publish_duration_ms=self._last_publish_duration_ms,
|
|
733
|
+
average_publish_duration_ms=average_duration,
|
|
734
|
+
max_publish_duration_ms=self._max_publish_duration_ms,
|
|
735
|
+
circuit_breaker_open=cb_open,
|
|
736
|
+
consecutive_failures=consecutive_failures,
|
|
737
|
+
started_at=self._started_at,
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
# Protocol compliance check (runtime_checkable allows isinstance checks)
|
|
742
|
+
def _verify_protocol_compliance() -> None: # pragma: no cover
|
|
743
|
+
"""Verify TransitionNotificationPublisher implements the protocol.
|
|
744
|
+
|
|
745
|
+
This function is never called at runtime - it exists purely for static
|
|
746
|
+
type checking verification that the implementation is protocol-compliant.
|
|
747
|
+
"""
|
|
748
|
+
from typing import cast
|
|
749
|
+
|
|
750
|
+
from omnibase_infra.event_bus.event_bus_inmemory import EventBusInmemory
|
|
751
|
+
|
|
752
|
+
# Create instance to verify protocol compliance
|
|
753
|
+
bus = cast("ProtocolEventBusLike", EventBusInmemory())
|
|
754
|
+
publisher: ProtocolTransitionNotificationPublisher = (
|
|
755
|
+
TransitionNotificationPublisher(
|
|
756
|
+
event_bus=bus,
|
|
757
|
+
topic="onex.fsm.state.transitions.v1",
|
|
758
|
+
)
|
|
759
|
+
)
|
|
760
|
+
# Use the variable to silence unused warnings
|
|
761
|
+
_ = publisher
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
__all__: list[str] = ["FailedNotificationRecord", "TransitionNotificationPublisher"]
|