omnibase_infra 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnibase_infra/__init__.py +1 -1
- omnibase_infra/enums/__init__.py +4 -0
- omnibase_infra/enums/enum_declarative_node_violation.py +102 -0
- omnibase_infra/event_bus/adapters/__init__.py +31 -0
- omnibase_infra/event_bus/adapters/adapter_protocol_event_publisher_kafka.py +517 -0
- omnibase_infra/mixins/mixin_async_circuit_breaker.py +113 -1
- omnibase_infra/models/__init__.py +9 -0
- omnibase_infra/models/event_bus/__init__.py +22 -0
- omnibase_infra/models/event_bus/model_consumer_retry_config.py +367 -0
- omnibase_infra/models/event_bus/model_dlq_config.py +177 -0
- omnibase_infra/models/event_bus/model_idempotency_config.py +131 -0
- omnibase_infra/models/event_bus/model_offset_policy_config.py +107 -0
- omnibase_infra/models/resilience/model_circuit_breaker_config.py +15 -0
- omnibase_infra/models/validation/__init__.py +8 -0
- omnibase_infra/models/validation/model_declarative_node_validation_result.py +139 -0
- omnibase_infra/models/validation/model_declarative_node_violation.py +169 -0
- omnibase_infra/nodes/architecture_validator/__init__.py +28 -7
- omnibase_infra/nodes/architecture_validator/constants.py +36 -0
- omnibase_infra/nodes/architecture_validator/handlers/__init__.py +28 -0
- omnibase_infra/nodes/architecture_validator/handlers/contract.yaml +120 -0
- omnibase_infra/nodes/architecture_validator/handlers/handler_architecture_validation.py +359 -0
- omnibase_infra/nodes/architecture_validator/node.py +1 -0
- omnibase_infra/nodes/architecture_validator/node_architecture_validator.py +48 -336
- omnibase_infra/nodes/node_ledger_projection_compute/__init__.py +16 -2
- omnibase_infra/nodes/node_ledger_projection_compute/contract.yaml +14 -4
- omnibase_infra/nodes/node_ledger_projection_compute/handlers/__init__.py +18 -0
- omnibase_infra/nodes/node_ledger_projection_compute/handlers/contract.yaml +53 -0
- omnibase_infra/nodes/node_ledger_projection_compute/handlers/handler_ledger_projection.py +354 -0
- omnibase_infra/nodes/node_ledger_projection_compute/node.py +20 -256
- omnibase_infra/nodes/node_registry_effect/node.py +20 -73
- omnibase_infra/protocols/protocol_dispatch_engine.py +90 -0
- omnibase_infra/runtime/__init__.py +11 -0
- omnibase_infra/runtime/baseline_subscriptions.py +150 -0
- omnibase_infra/runtime/event_bus_subcontract_wiring.py +455 -24
- omnibase_infra/runtime/kafka_contract_source.py +13 -5
- omnibase_infra/runtime/service_message_dispatch_engine.py +112 -0
- omnibase_infra/runtime/service_runtime_host_process.py +6 -11
- omnibase_infra/services/__init__.py +36 -0
- omnibase_infra/services/contract_publisher/__init__.py +95 -0
- omnibase_infra/services/contract_publisher/config.py +199 -0
- omnibase_infra/services/contract_publisher/errors.py +243 -0
- omnibase_infra/services/contract_publisher/models/__init__.py +28 -0
- omnibase_infra/services/contract_publisher/models/model_contract_error.py +67 -0
- omnibase_infra/services/contract_publisher/models/model_infra_error.py +62 -0
- omnibase_infra/services/contract_publisher/models/model_publish_result.py +112 -0
- omnibase_infra/services/contract_publisher/models/model_publish_stats.py +79 -0
- omnibase_infra/services/contract_publisher/service.py +617 -0
- omnibase_infra/services/contract_publisher/sources/__init__.py +52 -0
- omnibase_infra/services/contract_publisher/sources/model_discovered.py +155 -0
- omnibase_infra/services/contract_publisher/sources/protocol.py +101 -0
- omnibase_infra/services/contract_publisher/sources/source_composite.py +309 -0
- omnibase_infra/services/contract_publisher/sources/source_filesystem.py +174 -0
- omnibase_infra/services/contract_publisher/sources/source_package.py +221 -0
- omnibase_infra/services/observability/__init__.py +40 -0
- omnibase_infra/services/observability/agent_actions/__init__.py +64 -0
- omnibase_infra/services/observability/agent_actions/config.py +209 -0
- omnibase_infra/services/observability/agent_actions/consumer.py +1320 -0
- omnibase_infra/services/observability/agent_actions/models/__init__.py +87 -0
- omnibase_infra/services/observability/agent_actions/models/model_agent_action.py +142 -0
- omnibase_infra/services/observability/agent_actions/models/model_detection_failure.py +125 -0
- omnibase_infra/services/observability/agent_actions/models/model_envelope.py +85 -0
- omnibase_infra/services/observability/agent_actions/models/model_execution_log.py +159 -0
- omnibase_infra/services/observability/agent_actions/models/model_performance_metric.py +130 -0
- omnibase_infra/services/observability/agent_actions/models/model_routing_decision.py +138 -0
- omnibase_infra/services/observability/agent_actions/models/model_transformation_event.py +124 -0
- omnibase_infra/services/observability/agent_actions/tests/__init__.py +20 -0
- omnibase_infra/services/observability/agent_actions/tests/test_consumer.py +1154 -0
- omnibase_infra/services/observability/agent_actions/tests/test_models.py +645 -0
- omnibase_infra/services/observability/agent_actions/tests/test_writer.py +709 -0
- omnibase_infra/services/observability/agent_actions/writer_postgres.py +926 -0
- omnibase_infra/validation/__init__.py +12 -0
- omnibase_infra/validation/contracts/declarative_node.validation.yaml +143 -0
- omnibase_infra/validation/validation_exemptions.yaml +93 -0
- omnibase_infra/validation/validator_declarative_node.py +850 -0
- {omnibase_infra-0.2.8.dist-info → omnibase_infra-0.2.9.dist-info}/METADATA +2 -2
- {omnibase_infra-0.2.8.dist-info → omnibase_infra-0.2.9.dist-info}/RECORD +79 -27
- {omnibase_infra-0.2.8.dist-info → omnibase_infra-0.2.9.dist-info}/WHEEL +0 -0
- {omnibase_infra-0.2.8.dist-info → omnibase_infra-0.2.9.dist-info}/entry_points.txt +0 -0
- {omnibase_infra-0.2.8.dist-info → omnibase_infra-0.2.9.dist-info}/licenses/LICENSE +0 -0
|
@@ -191,6 +191,7 @@ class MixinAsyncCircuitBreaker:
|
|
|
191
191
|
reset_timeout: float = 60.0,
|
|
192
192
|
service_name: str = "unknown",
|
|
193
193
|
transport_type: EnumInfraTransportType = EnumInfraTransportType.HTTP,
|
|
194
|
+
half_open_successes: int = 1,
|
|
194
195
|
) -> None:
|
|
195
196
|
"""Initialize circuit breaker state and configuration.
|
|
196
197
|
|
|
@@ -202,9 +203,11 @@ class MixinAsyncCircuitBreaker:
|
|
|
202
203
|
reset_timeout: Seconds before automatic reset (default: 60.0)
|
|
203
204
|
service_name: Service identifier for error context (e.g., "kafka.dev")
|
|
204
205
|
transport_type: Transport type for error context (default: HTTP)
|
|
206
|
+
half_open_successes: Successful requests required to close circuit
|
|
207
|
+
from half-open state (default: 1)
|
|
205
208
|
|
|
206
209
|
Raises:
|
|
207
|
-
ValueError: If threshold < 1 or reset_timeout < 0
|
|
210
|
+
ValueError: If threshold < 1 or reset_timeout < 0 or half_open_successes < 1
|
|
208
211
|
|
|
209
212
|
Example:
|
|
210
213
|
```python
|
|
@@ -215,6 +218,7 @@ class MixinAsyncCircuitBreaker:
|
|
|
215
218
|
reset_timeout=config.circuit_breaker_reset_timeout,
|
|
216
219
|
service_name=f"my-service.{config.environment}",
|
|
217
220
|
transport_type=EnumInfraTransportType.HTTP,
|
|
221
|
+
half_open_successes=config.circuit_breaker_half_open_successes,
|
|
218
222
|
)
|
|
219
223
|
```
|
|
220
224
|
"""
|
|
@@ -243,15 +247,30 @@ class MixinAsyncCircuitBreaker:
|
|
|
243
247
|
parameter="reset_timeout",
|
|
244
248
|
value=reset_timeout,
|
|
245
249
|
)
|
|
250
|
+
if half_open_successes < 1:
|
|
251
|
+
context = ModelInfraErrorContext.with_correlation(
|
|
252
|
+
transport_type=transport_type,
|
|
253
|
+
operation="init_circuit_breaker",
|
|
254
|
+
target_name=service_name,
|
|
255
|
+
)
|
|
256
|
+
raise ProtocolConfigurationError(
|
|
257
|
+
f"Circuit breaker half_open_successes must be >= 1, got {half_open_successes}",
|
|
258
|
+
context=context,
|
|
259
|
+
parameter="half_open_successes",
|
|
260
|
+
value=half_open_successes,
|
|
261
|
+
)
|
|
246
262
|
|
|
247
263
|
# State variables
|
|
248
264
|
self._circuit_breaker_failures = 0
|
|
249
265
|
self._circuit_breaker_open = False
|
|
250
266
|
self._circuit_breaker_open_until: float = 0.0
|
|
267
|
+
self._circuit_breaker_half_open = False
|
|
268
|
+
self._circuit_breaker_half_open_success_count = 0
|
|
251
269
|
|
|
252
270
|
# Configuration
|
|
253
271
|
self.circuit_breaker_threshold = threshold
|
|
254
272
|
self.circuit_breaker_reset_timeout = reset_timeout
|
|
273
|
+
self.circuit_breaker_half_open_successes = half_open_successes
|
|
255
274
|
self.service_name = service_name
|
|
256
275
|
self._cb_transport_type = (
|
|
257
276
|
transport_type # Use private name to avoid property conflicts
|
|
@@ -265,6 +284,7 @@ class MixinAsyncCircuitBreaker:
|
|
|
265
284
|
extra={
|
|
266
285
|
"threshold": threshold,
|
|
267
286
|
"reset_timeout": reset_timeout,
|
|
287
|
+
"half_open_successes": half_open_successes,
|
|
268
288
|
"transport_type": transport_type.value,
|
|
269
289
|
},
|
|
270
290
|
)
|
|
@@ -298,6 +318,7 @@ class MixinAsyncCircuitBreaker:
|
|
|
298
318
|
reset_timeout_seconds=60.0,
|
|
299
319
|
service_name=f"kafka.{environment}",
|
|
300
320
|
transport_type=EnumInfraTransportType.KAFKA,
|
|
321
|
+
half_open_successes=2,
|
|
301
322
|
)
|
|
302
323
|
self._init_circuit_breaker_from_config(config)
|
|
303
324
|
```
|
|
@@ -311,6 +332,7 @@ class MixinAsyncCircuitBreaker:
|
|
|
311
332
|
reset_timeout=config.reset_timeout_seconds,
|
|
312
333
|
service_name=config.service_name,
|
|
313
334
|
transport_type=config.transport_type,
|
|
335
|
+
half_open_successes=config.half_open_successes,
|
|
314
336
|
)
|
|
315
337
|
|
|
316
338
|
async def _check_circuit_breaker(
|
|
@@ -388,12 +410,15 @@ class MixinAsyncCircuitBreaker:
|
|
|
388
410
|
if current_time >= self._circuit_breaker_open_until:
|
|
389
411
|
# Transition to HALF_OPEN (atomic write protected by caller's lock)
|
|
390
412
|
self._circuit_breaker_open = False
|
|
413
|
+
self._circuit_breaker_half_open = True
|
|
414
|
+
self._circuit_breaker_half_open_success_count = 0
|
|
391
415
|
self._circuit_breaker_failures = 0
|
|
392
416
|
logger.info(
|
|
393
417
|
f"Circuit breaker transitioning to half-open for {self.service_name}",
|
|
394
418
|
extra={
|
|
395
419
|
"service": self.service_name,
|
|
396
420
|
"operation": operation,
|
|
421
|
+
"required_successes": self.circuit_breaker_half_open_successes,
|
|
397
422
|
},
|
|
398
423
|
)
|
|
399
424
|
else:
|
|
@@ -484,6 +509,26 @@ class MixinAsyncCircuitBreaker:
|
|
|
484
509
|
# Increment failure counter (atomic write protected by caller's lock)
|
|
485
510
|
self._circuit_breaker_failures += 1
|
|
486
511
|
|
|
512
|
+
# If in half-open state, any failure immediately re-opens the circuit
|
|
513
|
+
if self._circuit_breaker_half_open:
|
|
514
|
+
self._circuit_breaker_open = True
|
|
515
|
+
self._circuit_breaker_half_open = False
|
|
516
|
+
self._circuit_breaker_half_open_success_count = 0
|
|
517
|
+
self._circuit_breaker_open_until = (
|
|
518
|
+
time.time() + self.circuit_breaker_reset_timeout
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
logger.warning(
|
|
522
|
+
f"Circuit breaker re-opened for {self.service_name} after failure in half-open state",
|
|
523
|
+
extra={
|
|
524
|
+
"service": self.service_name,
|
|
525
|
+
"operation": operation,
|
|
526
|
+
"reset_timeout": self.circuit_breaker_reset_timeout,
|
|
527
|
+
"correlation_id": str(correlation_id) if correlation_id else None,
|
|
528
|
+
},
|
|
529
|
+
)
|
|
530
|
+
return
|
|
531
|
+
|
|
487
532
|
# Check if threshold reached
|
|
488
533
|
if self._circuit_breaker_failures >= self.circuit_breaker_threshold:
|
|
489
534
|
# Transition to OPEN state (atomic write protected by caller's lock)
|
|
@@ -564,6 +609,39 @@ class MixinAsyncCircuitBreaker:
|
|
|
564
609
|
)
|
|
565
610
|
# Still proceed but log the violation for debugging
|
|
566
611
|
|
|
612
|
+
# If in half-open state, track successes
|
|
613
|
+
if self._circuit_breaker_half_open:
|
|
614
|
+
self._circuit_breaker_half_open_success_count += 1
|
|
615
|
+
|
|
616
|
+
if (
|
|
617
|
+
self._circuit_breaker_half_open_success_count
|
|
618
|
+
>= self.circuit_breaker_half_open_successes
|
|
619
|
+
):
|
|
620
|
+
# Enough successes - transition to CLOSED
|
|
621
|
+
logger.info(
|
|
622
|
+
f"Circuit breaker closed for {self.service_name} after {self._circuit_breaker_half_open_success_count} successful requests in half-open state",
|
|
623
|
+
extra={
|
|
624
|
+
"service": self.service_name,
|
|
625
|
+
"half_open_successes": self._circuit_breaker_half_open_success_count,
|
|
626
|
+
"required_successes": self.circuit_breaker_half_open_successes,
|
|
627
|
+
},
|
|
628
|
+
)
|
|
629
|
+
self._circuit_breaker_half_open = False
|
|
630
|
+
self._circuit_breaker_half_open_success_count = 0
|
|
631
|
+
self._circuit_breaker_failures = 0
|
|
632
|
+
self._circuit_breaker_open_until = 0.0
|
|
633
|
+
else:
|
|
634
|
+
# Still in half-open, waiting for more successes
|
|
635
|
+
logger.debug(
|
|
636
|
+
f"Circuit breaker half-open success {self._circuit_breaker_half_open_success_count}/{self.circuit_breaker_half_open_successes} for {self.service_name}",
|
|
637
|
+
extra={
|
|
638
|
+
"service": self.service_name,
|
|
639
|
+
"half_open_successes": self._circuit_breaker_half_open_success_count,
|
|
640
|
+
"required_successes": self.circuit_breaker_half_open_successes,
|
|
641
|
+
},
|
|
642
|
+
)
|
|
643
|
+
return
|
|
644
|
+
|
|
567
645
|
# Log state transition if circuit was open or had failures
|
|
568
646
|
if self._circuit_breaker_open or self._circuit_breaker_failures > 0:
|
|
569
647
|
previous_state = "open" if self._circuit_breaker_open else "closed"
|
|
@@ -578,6 +656,8 @@ class MixinAsyncCircuitBreaker:
|
|
|
578
656
|
|
|
579
657
|
# Reset state (atomic write protected by caller's lock)
|
|
580
658
|
self._circuit_breaker_open = False
|
|
659
|
+
self._circuit_breaker_half_open = False
|
|
660
|
+
self._circuit_breaker_half_open_success_count = 0
|
|
581
661
|
self._circuit_breaker_failures = 0
|
|
582
662
|
self._circuit_breaker_open_until = 0.0
|
|
583
663
|
|
|
@@ -621,10 +701,15 @@ class MixinAsyncCircuitBreaker:
|
|
|
621
701
|
|
|
622
702
|
# Read state variables with safe defaults for uninitialized state
|
|
623
703
|
cb_open = getattr(self, "_circuit_breaker_open", False)
|
|
704
|
+
cb_half_open = getattr(self, "_circuit_breaker_half_open", False)
|
|
624
705
|
cb_open_until = getattr(self, "_circuit_breaker_open_until", 0.0)
|
|
625
706
|
cb_failures = getattr(self, "_circuit_breaker_failures", 0)
|
|
626
707
|
cb_threshold = getattr(self, "circuit_breaker_threshold", 5)
|
|
627
708
|
cb_reset_timeout = getattr(self, "circuit_breaker_reset_timeout", 60.0)
|
|
709
|
+
cb_half_open_successes = getattr(self, "circuit_breaker_half_open_successes", 1)
|
|
710
|
+
cb_half_open_success_count = getattr(
|
|
711
|
+
self, "_circuit_breaker_half_open_success_count", 0
|
|
712
|
+
)
|
|
628
713
|
|
|
629
714
|
# Calculate state: closed, open, or half_open
|
|
630
715
|
current_time = time.time()
|
|
@@ -635,6 +720,9 @@ class MixinAsyncCircuitBreaker:
|
|
|
635
720
|
else:
|
|
636
721
|
cb_state = "open"
|
|
637
722
|
seconds_until_half_open = round(cb_open_until - current_time, 2)
|
|
723
|
+
elif cb_half_open:
|
|
724
|
+
cb_state = "half_open"
|
|
725
|
+
seconds_until_half_open = None
|
|
638
726
|
else:
|
|
639
727
|
cb_state = "closed"
|
|
640
728
|
seconds_until_half_open = None
|
|
@@ -645,12 +733,36 @@ class MixinAsyncCircuitBreaker:
|
|
|
645
733
|
"failures": cb_failures,
|
|
646
734
|
"threshold": cb_threshold,
|
|
647
735
|
"reset_timeout_seconds": cb_reset_timeout,
|
|
736
|
+
"half_open_successes_required": cb_half_open_successes,
|
|
648
737
|
}
|
|
649
738
|
|
|
650
739
|
if seconds_until_half_open is not None:
|
|
651
740
|
result["seconds_until_half_open"] = seconds_until_half_open
|
|
652
741
|
|
|
742
|
+
if cb_state == "half_open":
|
|
743
|
+
result["half_open_success_count"] = cb_half_open_success_count
|
|
744
|
+
|
|
653
745
|
return result
|
|
654
746
|
|
|
747
|
+
def get_circuit_breaker_state(self) -> dict[str, JsonType]:
|
|
748
|
+
"""Return current circuit breaker state for external introspection.
|
|
749
|
+
|
|
750
|
+
This is the public API for accessing circuit breaker state. Adapters and
|
|
751
|
+
other external consumers should use this method rather than accessing
|
|
752
|
+
private attributes.
|
|
753
|
+
|
|
754
|
+
See `_get_circuit_breaker_state()` for implementation details.
|
|
755
|
+
|
|
756
|
+
Returns:
|
|
757
|
+
dict containing:
|
|
758
|
+
- initialized: Whether circuit breaker has been initialized
|
|
759
|
+
- state: Current state ("closed", "open", or "half_open")
|
|
760
|
+
- failures: Current failure count
|
|
761
|
+
- threshold: Configured failure threshold
|
|
762
|
+
- reset_timeout_seconds: Configured reset timeout
|
|
763
|
+
- seconds_until_half_open: Seconds until half_open (only when open)
|
|
764
|
+
"""
|
|
765
|
+
return self._get_circuit_breaker_state()
|
|
766
|
+
|
|
655
767
|
|
|
656
768
|
__all__ = ["EnumCircuitState", "MixinAsyncCircuitBreaker", "ModelCircuitBreakerConfig"]
|
|
@@ -22,6 +22,11 @@ from omnibase_infra.models.dispatch import (
|
|
|
22
22
|
ModelTopicParser,
|
|
23
23
|
)
|
|
24
24
|
from omnibase_infra.models.errors import ModelHandlerValidationError
|
|
25
|
+
from omnibase_infra.models.event_bus import (
|
|
26
|
+
ModelConsumerRetryConfig,
|
|
27
|
+
ModelIdempotencyConfig,
|
|
28
|
+
ModelOffsetPolicyConfig,
|
|
29
|
+
)
|
|
25
30
|
from omnibase_infra.models.handlers import ModelHandlerIdentifier
|
|
26
31
|
from omnibase_infra.models.health import ModelHealthCheckResult
|
|
27
32
|
from omnibase_infra.models.logging import ModelLogContext
|
|
@@ -84,6 +89,10 @@ __all__: list[str] = [
|
|
|
84
89
|
# Dispatch models
|
|
85
90
|
"EnumDispatchStatus",
|
|
86
91
|
"EnumTopicStandard",
|
|
92
|
+
# Event bus models
|
|
93
|
+
"ModelConsumerRetryConfig",
|
|
94
|
+
"ModelIdempotencyConfig",
|
|
95
|
+
"ModelOffsetPolicyConfig",
|
|
87
96
|
# Resilience models
|
|
88
97
|
"ModelCircuitBreakerConfig",
|
|
89
98
|
# Validation models
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2025 OmniNode Team <info@omninode.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
"""Event bus models for message consumption, idempotency, and DLQ configuration."""
|
|
5
|
+
|
|
6
|
+
from omnibase_infra.models.event_bus.model_consumer_retry_config import (
|
|
7
|
+
ModelConsumerRetryConfig,
|
|
8
|
+
)
|
|
9
|
+
from omnibase_infra.models.event_bus.model_dlq_config import ModelDlqConfig
|
|
10
|
+
from omnibase_infra.models.event_bus.model_idempotency_config import (
|
|
11
|
+
ModelIdempotencyConfig,
|
|
12
|
+
)
|
|
13
|
+
from omnibase_infra.models.event_bus.model_offset_policy_config import (
|
|
14
|
+
ModelOffsetPolicyConfig,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ModelConsumerRetryConfig",
|
|
19
|
+
"ModelDlqConfig",
|
|
20
|
+
"ModelIdempotencyConfig",
|
|
21
|
+
"ModelOffsetPolicyConfig",
|
|
22
|
+
]
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2025 OmniNode Team
|
|
3
|
+
"""Consumer-side retry configuration model.
|
|
4
|
+
|
|
5
|
+
This module provides the configuration model for consumer-side retry behavior
|
|
6
|
+
when message handlers fail. It distinguishes between:
|
|
7
|
+
|
|
8
|
+
- **Content errors** (non-retryable): Malformed messages, validation failures,
|
|
9
|
+
business logic errors. These will fail regardless of retry attempts.
|
|
10
|
+
|
|
11
|
+
- **Infrastructure errors** (retryable): Network timeouts, temporary service
|
|
12
|
+
unavailability, rate limiting. These may succeed on retry.
|
|
13
|
+
|
|
14
|
+
The model uses exponential backoff with optional jitter to prevent thundering
|
|
15
|
+
herd problems in distributed systems.
|
|
16
|
+
|
|
17
|
+
Example:
|
|
18
|
+
>>> config = ModelConsumerRetryConfig(
|
|
19
|
+
... max_attempts=5,
|
|
20
|
+
... backoff_ms=500,
|
|
21
|
+
... backoff_multiplier=2.0,
|
|
22
|
+
... jitter_enabled=True,
|
|
23
|
+
... )
|
|
24
|
+
>>> config.calculate_delay_ms(attempt=3) # Returns ~2000ms + jitter
|
|
25
|
+
|
|
26
|
+
See Also:
|
|
27
|
+
- docs/patterns/error_recovery_patterns.md: Error recovery patterns
|
|
28
|
+
- docs/patterns/dispatcher_resilience.md: Dispatcher resilience patterns
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import random
|
|
34
|
+
from typing import Literal
|
|
35
|
+
|
|
36
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
37
|
+
|
|
38
|
+
from omnibase_core.errors import OnexError
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ModelConsumerRetryConfig(BaseModel):
|
|
42
|
+
"""Consumer-side retry configuration.
|
|
43
|
+
|
|
44
|
+
Controls retry behavior when message handlers fail. Distinguishes between
|
|
45
|
+
content errors (non-retryable) and infrastructure errors (retryable).
|
|
46
|
+
|
|
47
|
+
Attributes:
|
|
48
|
+
max_attempts: Maximum retry attempts before giving up. Includes the
|
|
49
|
+
initial attempt, so max_attempts=3 means 1 initial + 2 retries.
|
|
50
|
+
backoff_ms: Base backoff delay in milliseconds. For exponential
|
|
51
|
+
backoff, subsequent delays are backoff_ms * (multiplier ^ attempt).
|
|
52
|
+
backoff_multiplier: Exponential backoff multiplier. A value of 2.0
|
|
53
|
+
doubles the delay with each retry.
|
|
54
|
+
jitter_enabled: When True, adds random jitter (0-25% of delay) to
|
|
55
|
+
prevent thundering herd when multiple consumers retry simultaneously.
|
|
56
|
+
backoff_strategy: Strategy for calculating delays between retries.
|
|
57
|
+
"exponential" doubles delay each retry, "fixed" uses constant delay.
|
|
58
|
+
max_backoff_ms: Maximum backoff delay cap to prevent excessive waits.
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
```python
|
|
62
|
+
from omnibase_infra.models.event_bus import ModelConsumerRetryConfig
|
|
63
|
+
|
|
64
|
+
# Standard configuration with exponential backoff
|
|
65
|
+
config = ModelConsumerRetryConfig(
|
|
66
|
+
max_attempts=3,
|
|
67
|
+
backoff_ms=1000,
|
|
68
|
+
backoff_multiplier=2.0,
|
|
69
|
+
jitter_enabled=True,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Conservative configuration for critical operations
|
|
73
|
+
config = ModelConsumerRetryConfig.create_conservative()
|
|
74
|
+
|
|
75
|
+
# Aggressive configuration for resilient operations
|
|
76
|
+
config = ModelConsumerRetryConfig.create_aggressive()
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Configuration Guidelines:
|
|
80
|
+
- Critical operations: Use lower max_attempts (2-3), higher backoff
|
|
81
|
+
- Best-effort operations: Use higher max_attempts (5+), lower backoff
|
|
82
|
+
- High-concurrency: Always enable jitter to prevent thundering herd
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
model_config = ConfigDict(
|
|
86
|
+
frozen=True,
|
|
87
|
+
extra="forbid",
|
|
88
|
+
json_schema_extra={
|
|
89
|
+
"examples": [
|
|
90
|
+
{
|
|
91
|
+
"max_attempts": 3,
|
|
92
|
+
"backoff_ms": 1000,
|
|
93
|
+
"backoff_multiplier": 2.0,
|
|
94
|
+
"jitter_enabled": True,
|
|
95
|
+
"backoff_strategy": "exponential",
|
|
96
|
+
"max_backoff_ms": 30000,
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
"max_attempts": 5,
|
|
100
|
+
"backoff_ms": 500,
|
|
101
|
+
"backoff_multiplier": 1.5,
|
|
102
|
+
"jitter_enabled": True,
|
|
103
|
+
"backoff_strategy": "exponential",
|
|
104
|
+
"max_backoff_ms": 60000,
|
|
105
|
+
},
|
|
106
|
+
]
|
|
107
|
+
},
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
max_attempts: int = Field(
|
|
111
|
+
default=3,
|
|
112
|
+
ge=1,
|
|
113
|
+
le=10,
|
|
114
|
+
description="Maximum retry attempts before giving up (1-10). "
|
|
115
|
+
"Includes initial attempt, so 3 means 1 initial + 2 retries.",
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
backoff_ms: int = Field(
|
|
119
|
+
default=1000,
|
|
120
|
+
ge=100,
|
|
121
|
+
le=60000,
|
|
122
|
+
description="Base backoff delay in milliseconds (100-60000). "
|
|
123
|
+
"For exponential backoff, subsequent delays are backoff_ms * (multiplier ^ attempt).",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
backoff_multiplier: float = Field(
|
|
127
|
+
default=2.0,
|
|
128
|
+
ge=1.0,
|
|
129
|
+
le=4.0,
|
|
130
|
+
description="Exponential backoff multiplier (1.0-4.0). "
|
|
131
|
+
"A value of 2.0 doubles the delay with each retry.",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
jitter_enabled: bool = Field(
|
|
135
|
+
default=True,
|
|
136
|
+
description="Add random jitter (0-25% of delay) to backoff. "
|
|
137
|
+
"Prevents thundering herd when multiple consumers retry simultaneously.",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
backoff_strategy: Literal["exponential", "fixed"] = Field(
|
|
141
|
+
default="exponential",
|
|
142
|
+
description="Backoff strategy: 'exponential' multiplies delay each retry, "
|
|
143
|
+
"'fixed' uses constant delay.",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
max_backoff_ms: int = Field(
|
|
147
|
+
default=30000,
|
|
148
|
+
ge=1000,
|
|
149
|
+
le=300000,
|
|
150
|
+
description="Maximum backoff delay cap in milliseconds (1000-300000). "
|
|
151
|
+
"Prevents excessive waits in exponential backoff.",
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
@field_validator("max_backoff_ms")
|
|
155
|
+
@classmethod
|
|
156
|
+
def validate_max_backoff_greater_than_base(cls, v: int, info: object) -> int:
|
|
157
|
+
"""Ensure max_backoff_ms is at least as large as backoff_ms.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
v: The max_backoff_ms value to validate.
|
|
161
|
+
info: Pydantic validation info containing other field values.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
The validated max_backoff_ms value.
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
ValueError: If max_backoff_ms is less than backoff_ms.
|
|
168
|
+
"""
|
|
169
|
+
# Access data from validation info
|
|
170
|
+
# Use getattr for safer access pattern
|
|
171
|
+
data = getattr(info, "data", None) or {}
|
|
172
|
+
base_delay = data.get("backoff_ms", 1000)
|
|
173
|
+
if v < base_delay:
|
|
174
|
+
msg = f"max_backoff_ms ({v}) must be >= backoff_ms ({base_delay})"
|
|
175
|
+
raise ValueError(msg)
|
|
176
|
+
return v
|
|
177
|
+
|
|
178
|
+
def calculate_delay_ms(
|
|
179
|
+
self, attempt: int, include_jitter: bool | None = None
|
|
180
|
+
) -> int:
|
|
181
|
+
"""Calculate delay in milliseconds for a specific retry attempt.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
attempt: The retry attempt number (1-based). Attempt 1 is the first
|
|
185
|
+
retry after the initial failure.
|
|
186
|
+
include_jitter: Whether to include jitter. If None, uses the
|
|
187
|
+
jitter_enabled setting from configuration.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Delay in milliseconds for the specified attempt, capped at max_backoff_ms.
|
|
191
|
+
|
|
192
|
+
Raises:
|
|
193
|
+
OnexError: If attempt is less than 1 or exceeds allowed retries.
|
|
194
|
+
|
|
195
|
+
Example:
|
|
196
|
+
>>> config = ModelConsumerRetryConfig(backoff_ms=1000, backoff_multiplier=2.0)
|
|
197
|
+
>>> config.calculate_delay_ms(1) # ~1000ms
|
|
198
|
+
>>> config.calculate_delay_ms(2) # ~2000ms (max_attempts=3 allows 2 retries)
|
|
199
|
+
"""
|
|
200
|
+
if attempt < 1:
|
|
201
|
+
msg = f"Attempt must be >= 1, got {attempt}"
|
|
202
|
+
raise OnexError(msg)
|
|
203
|
+
|
|
204
|
+
# max_attempts includes the initial attempt, so valid retries are 1 to max_attempts-1
|
|
205
|
+
max_retry = self.max_attempts - 1
|
|
206
|
+
if attempt > max_retry:
|
|
207
|
+
msg = f"Attempt {attempt} exceeds max retries {max_retry} (max_attempts={self.max_attempts} includes initial attempt)"
|
|
208
|
+
raise OnexError(msg)
|
|
209
|
+
|
|
210
|
+
if self.backoff_strategy == "exponential":
|
|
211
|
+
# Exponential: backoff_ms * (multiplier ^ (attempt - 1))
|
|
212
|
+
delay = self.backoff_ms * (self.backoff_multiplier ** (attempt - 1))
|
|
213
|
+
else:
|
|
214
|
+
# Fixed: constant delay
|
|
215
|
+
delay = float(self.backoff_ms)
|
|
216
|
+
|
|
217
|
+
# Cap at max_backoff_ms
|
|
218
|
+
delay = min(delay, float(self.max_backoff_ms))
|
|
219
|
+
|
|
220
|
+
# Add jitter if enabled
|
|
221
|
+
use_jitter = (
|
|
222
|
+
include_jitter if include_jitter is not None else self.jitter_enabled
|
|
223
|
+
)
|
|
224
|
+
if use_jitter:
|
|
225
|
+
# Add 0-25% random jitter
|
|
226
|
+
jitter_factor = random.random() * 0.25
|
|
227
|
+
delay += delay * jitter_factor
|
|
228
|
+
|
|
229
|
+
return int(delay)
|
|
230
|
+
|
|
231
|
+
def get_all_delays_ms(self, include_jitter: bool = False) -> list[int]:
|
|
232
|
+
"""Get all delay times for the complete retry sequence.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
include_jitter: Whether to include jitter in calculations.
|
|
236
|
+
Defaults to False for predictable results.
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
List of delays in milliseconds for each retry attempt.
|
|
240
|
+
Since max_attempts includes the initial attempt, there are
|
|
241
|
+
max_attempts - 1 retries, and thus max_attempts - 1 delays.
|
|
242
|
+
|
|
243
|
+
Example:
|
|
244
|
+
>>> config = ModelConsumerRetryConfig(max_attempts=3, backoff_ms=1000)
|
|
245
|
+
>>> config.get_all_delays_ms()
|
|
246
|
+
[1000, 2000] # 2 retries (max_attempts=3 includes initial)
|
|
247
|
+
"""
|
|
248
|
+
# max_attempts includes initial attempt, so we have max_attempts - 1 retries
|
|
249
|
+
return [
|
|
250
|
+
self.calculate_delay_ms(i, include_jitter=include_jitter)
|
|
251
|
+
for i in range(1, self.max_attempts)
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
def get_total_retry_time_ms(self) -> int:
|
|
255
|
+
"""Get total time that all retries will take.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
Total time in milliseconds for all retry attempts (without jitter).
|
|
259
|
+
Since max_attempts includes the initial attempt, this sums delays
|
|
260
|
+
for max_attempts - 1 retries.
|
|
261
|
+
|
|
262
|
+
Example:
|
|
263
|
+
>>> config = ModelConsumerRetryConfig(max_attempts=3, backoff_ms=1000)
|
|
264
|
+
>>> config.get_total_retry_time_ms()
|
|
265
|
+
3000 # 1000 + 2000 (2 retries for max_attempts=3)
|
|
266
|
+
"""
|
|
267
|
+
return sum(self.get_all_delays_ms(include_jitter=False))
|
|
268
|
+
|
|
269
|
+
@classmethod
|
|
270
|
+
def create_conservative(cls) -> ModelConsumerRetryConfig:
|
|
271
|
+
"""Create conservative retry configuration for critical operations.
|
|
272
|
+
|
|
273
|
+
Conservative configuration minimizes retry attempts and uses longer
|
|
274
|
+
delays to reduce load on failing services.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
ModelConsumerRetryConfig with conservative settings.
|
|
278
|
+
|
|
279
|
+
Example:
|
|
280
|
+
>>> config = ModelConsumerRetryConfig.create_conservative()
|
|
281
|
+
>>> config.max_attempts
|
|
282
|
+
2
|
|
283
|
+
"""
|
|
284
|
+
return cls(
|
|
285
|
+
max_attempts=2,
|
|
286
|
+
backoff_ms=2000,
|
|
287
|
+
backoff_multiplier=2.0,
|
|
288
|
+
jitter_enabled=True,
|
|
289
|
+
backoff_strategy="exponential",
|
|
290
|
+
max_backoff_ms=30000,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
@classmethod
|
|
294
|
+
def create_standard(cls) -> ModelConsumerRetryConfig:
|
|
295
|
+
"""Create standard retry configuration for typical operations.
|
|
296
|
+
|
|
297
|
+
Standard configuration balances reliability with reasonable latency.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
ModelConsumerRetryConfig with standard settings.
|
|
301
|
+
|
|
302
|
+
Example:
|
|
303
|
+
>>> config = ModelConsumerRetryConfig.create_standard()
|
|
304
|
+
>>> config.max_attempts
|
|
305
|
+
3
|
|
306
|
+
"""
|
|
307
|
+
return cls(
|
|
308
|
+
max_attempts=3,
|
|
309
|
+
backoff_ms=1000,
|
|
310
|
+
backoff_multiplier=2.0,
|
|
311
|
+
jitter_enabled=True,
|
|
312
|
+
backoff_strategy="exponential",
|
|
313
|
+
max_backoff_ms=30000,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
@classmethod
|
|
317
|
+
def create_aggressive(cls) -> ModelConsumerRetryConfig:
|
|
318
|
+
"""Create aggressive retry configuration for resilient operations.
|
|
319
|
+
|
|
320
|
+
Aggressive configuration maximizes retry attempts with shorter delays
|
|
321
|
+
for operations that need high availability.
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
ModelConsumerRetryConfig with aggressive settings.
|
|
325
|
+
|
|
326
|
+
Example:
|
|
327
|
+
>>> config = ModelConsumerRetryConfig.create_aggressive()
|
|
328
|
+
>>> config.max_attempts
|
|
329
|
+
5
|
|
330
|
+
"""
|
|
331
|
+
return cls(
|
|
332
|
+
max_attempts=5,
|
|
333
|
+
backoff_ms=500,
|
|
334
|
+
backoff_multiplier=1.5,
|
|
335
|
+
jitter_enabled=True,
|
|
336
|
+
backoff_strategy="exponential",
|
|
337
|
+
max_backoff_ms=15000,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
@classmethod
|
|
341
|
+
def create_no_retry(cls) -> ModelConsumerRetryConfig:
|
|
342
|
+
"""Create configuration with no retries (fail-fast).
|
|
343
|
+
|
|
344
|
+
Use for operations where retries are not appropriate, such as
|
|
345
|
+
idempotency-sensitive operations or when circuit breaker is preferred.
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
ModelConsumerRetryConfig with no retry attempts.
|
|
349
|
+
|
|
350
|
+
Example:
|
|
351
|
+
>>> config = ModelConsumerRetryConfig.create_no_retry()
|
|
352
|
+
>>> config.max_attempts
|
|
353
|
+
1
|
|
354
|
+
"""
|
|
355
|
+
return cls(
|
|
356
|
+
max_attempts=1,
|
|
357
|
+
backoff_ms=100,
|
|
358
|
+
backoff_multiplier=1.0,
|
|
359
|
+
jitter_enabled=False,
|
|
360
|
+
backoff_strategy="fixed",
|
|
361
|
+
max_backoff_ms=1000,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
__all__: list[str] = [
|
|
366
|
+
"ModelConsumerRetryConfig",
|
|
367
|
+
]
|