omnibase_infra 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. omnibase_infra/__init__.py +1 -1
  2. omnibase_infra/enums/__init__.py +3 -0
  3. omnibase_infra/enums/enum_consumer_group_purpose.py +9 -0
  4. omnibase_infra/enums/enum_postgres_error_code.py +188 -0
  5. omnibase_infra/errors/__init__.py +4 -0
  6. omnibase_infra/errors/error_infra.py +60 -0
  7. omnibase_infra/handlers/__init__.py +3 -0
  8. omnibase_infra/handlers/handler_slack_webhook.py +426 -0
  9. omnibase_infra/handlers/models/__init__.py +14 -0
  10. omnibase_infra/handlers/models/enum_alert_severity.py +36 -0
  11. omnibase_infra/handlers/models/model_slack_alert.py +24 -0
  12. omnibase_infra/handlers/models/model_slack_alert_payload.py +77 -0
  13. omnibase_infra/handlers/models/model_slack_alert_result.py +73 -0
  14. omnibase_infra/handlers/registration_storage/handler_registration_storage_postgres.py +29 -20
  15. omnibase_infra/mixins/__init__.py +14 -0
  16. omnibase_infra/mixins/mixin_node_introspection.py +42 -20
  17. omnibase_infra/mixins/mixin_postgres_error_response.py +314 -0
  18. omnibase_infra/mixins/mixin_postgres_op_executor.py +298 -0
  19. omnibase_infra/models/__init__.py +3 -0
  20. omnibase_infra/models/discovery/model_dependency_spec.py +1 -0
  21. omnibase_infra/models/discovery/model_discovered_capabilities.py +1 -1
  22. omnibase_infra/models/discovery/model_introspection_config.py +28 -1
  23. omnibase_infra/models/discovery/model_introspection_performance_metrics.py +1 -0
  24. omnibase_infra/models/discovery/model_introspection_task_config.py +1 -0
  25. omnibase_infra/{nodes/effects/models → models}/model_backend_result.py +22 -6
  26. omnibase_infra/models/projection/__init__.py +11 -0
  27. omnibase_infra/models/projection/model_contract_projection.py +170 -0
  28. omnibase_infra/models/projection/model_topic_projection.py +148 -0
  29. omnibase_infra/models/runtime/__init__.py +4 -0
  30. omnibase_infra/models/runtime/model_resolved_dependencies.py +116 -0
  31. omnibase_infra/nodes/contract_registry_reducer/__init__.py +5 -0
  32. omnibase_infra/nodes/contract_registry_reducer/contract.yaml +6 -5
  33. omnibase_infra/nodes/contract_registry_reducer/contract_registration_event_router.py +689 -0
  34. omnibase_infra/nodes/contract_registry_reducer/reducer.py +9 -26
  35. omnibase_infra/nodes/effects/__init__.py +1 -1
  36. omnibase_infra/nodes/effects/models/__init__.py +6 -4
  37. omnibase_infra/nodes/effects/models/model_registry_response.py +1 -1
  38. omnibase_infra/nodes/effects/protocol_consul_client.py +1 -1
  39. omnibase_infra/nodes/effects/protocol_postgres_adapter.py +1 -1
  40. omnibase_infra/nodes/effects/registry_effect.py +1 -1
  41. omnibase_infra/nodes/node_contract_persistence_effect/__init__.py +101 -0
  42. omnibase_infra/nodes/node_contract_persistence_effect/contract.yaml +490 -0
  43. omnibase_infra/nodes/node_contract_persistence_effect/handlers/__init__.py +74 -0
  44. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_cleanup_topics.py +217 -0
  45. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_contract_upsert.py +242 -0
  46. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_deactivate.py +194 -0
  47. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_heartbeat.py +243 -0
  48. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_mark_stale.py +208 -0
  49. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_topic_update.py +298 -0
  50. omnibase_infra/nodes/node_contract_persistence_effect/models/__init__.py +15 -0
  51. omnibase_infra/nodes/node_contract_persistence_effect/models/model_persistence_result.py +52 -0
  52. omnibase_infra/nodes/node_contract_persistence_effect/node.py +131 -0
  53. omnibase_infra/nodes/node_contract_persistence_effect/registry/__init__.py +27 -0
  54. omnibase_infra/nodes/node_contract_persistence_effect/registry/registry_infra_contract_persistence_effect.py +251 -0
  55. omnibase_infra/nodes/node_registration_orchestrator/models/model_postgres_intent_payload.py +8 -12
  56. omnibase_infra/nodes/node_registry_effect/models/__init__.py +2 -2
  57. omnibase_infra/nodes/node_slack_alerter_effect/__init__.py +33 -0
  58. omnibase_infra/nodes/node_slack_alerter_effect/contract.yaml +291 -0
  59. omnibase_infra/nodes/node_slack_alerter_effect/node.py +106 -0
  60. omnibase_infra/projectors/__init__.py +6 -0
  61. omnibase_infra/projectors/projection_reader_contract.py +1301 -0
  62. omnibase_infra/runtime/__init__.py +12 -0
  63. omnibase_infra/runtime/baseline_subscriptions.py +13 -6
  64. omnibase_infra/runtime/contract_dependency_resolver.py +455 -0
  65. omnibase_infra/runtime/contract_registration_event_router.py +500 -0
  66. omnibase_infra/runtime/db/__init__.py +4 -0
  67. omnibase_infra/runtime/db/models/__init__.py +15 -10
  68. omnibase_infra/runtime/db/models/model_db_operation.py +40 -0
  69. omnibase_infra/runtime/db/models/model_db_param.py +24 -0
  70. omnibase_infra/runtime/db/models/model_db_repository_contract.py +40 -0
  71. omnibase_infra/runtime/db/models/model_db_return.py +26 -0
  72. omnibase_infra/runtime/db/models/model_db_safety_policy.py +32 -0
  73. omnibase_infra/runtime/emit_daemon/event_registry.py +34 -22
  74. omnibase_infra/runtime/event_bus_subcontract_wiring.py +63 -23
  75. omnibase_infra/runtime/intent_execution_router.py +430 -0
  76. omnibase_infra/runtime/models/__init__.py +6 -0
  77. omnibase_infra/runtime/models/model_contract_registry_config.py +41 -0
  78. omnibase_infra/runtime/models/model_intent_execution_summary.py +79 -0
  79. omnibase_infra/runtime/models/model_runtime_config.py +8 -0
  80. omnibase_infra/runtime/protocols/__init__.py +16 -0
  81. omnibase_infra/runtime/protocols/protocol_intent_executor.py +107 -0
  82. omnibase_infra/runtime/publisher_topic_scoped.py +16 -11
  83. omnibase_infra/runtime/registry_policy.py +29 -15
  84. omnibase_infra/runtime/request_response_wiring.py +793 -0
  85. omnibase_infra/runtime/service_kernel.py +295 -8
  86. omnibase_infra/runtime/service_runtime_host_process.py +149 -5
  87. omnibase_infra/runtime/util_version.py +5 -1
  88. omnibase_infra/schemas/schema_latency_baseline.sql +135 -0
  89. omnibase_infra/services/contract_publisher/config.py +4 -4
  90. omnibase_infra/services/contract_publisher/service.py +8 -5
  91. omnibase_infra/services/observability/injection_effectiveness/__init__.py +67 -0
  92. omnibase_infra/services/observability/injection_effectiveness/config.py +295 -0
  93. omnibase_infra/services/observability/injection_effectiveness/consumer.py +1461 -0
  94. omnibase_infra/services/observability/injection_effectiveness/models/__init__.py +32 -0
  95. omnibase_infra/services/observability/injection_effectiveness/models/model_agent_match.py +79 -0
  96. omnibase_infra/services/observability/injection_effectiveness/models/model_context_utilization.py +118 -0
  97. omnibase_infra/services/observability/injection_effectiveness/models/model_latency_breakdown.py +107 -0
  98. omnibase_infra/services/observability/injection_effectiveness/models/model_pattern_utilization.py +46 -0
  99. omnibase_infra/services/observability/injection_effectiveness/writer_postgres.py +596 -0
  100. omnibase_infra/services/registry_api/models/__init__.py +25 -0
  101. omnibase_infra/services/registry_api/models/model_contract_ref.py +44 -0
  102. omnibase_infra/services/registry_api/models/model_contract_view.py +81 -0
  103. omnibase_infra/services/registry_api/models/model_response_contracts.py +50 -0
  104. omnibase_infra/services/registry_api/models/model_response_topics.py +50 -0
  105. omnibase_infra/services/registry_api/models/model_topic_summary.py +57 -0
  106. omnibase_infra/services/registry_api/models/model_topic_view.py +63 -0
  107. omnibase_infra/services/registry_api/routes.py +205 -6
  108. omnibase_infra/services/registry_api/service.py +528 -1
  109. omnibase_infra/utils/__init__.py +7 -0
  110. omnibase_infra/utils/util_db_error_context.py +292 -0
  111. omnibase_infra/validation/infra_validators.py +3 -1
  112. omnibase_infra/validation/validation_exemptions.yaml +65 -0
  113. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.4.0.dist-info}/METADATA +3 -3
  114. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.4.0.dist-info}/RECORD +117 -58
  115. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.4.0.dist-info}/WHEEL +0 -0
  116. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.4.0.dist-info}/entry_points.txt +0 -0
  117. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,793 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2025 OmniNode Team
3
+ """Request-response wiring for correlation-based RPC-style Kafka communication.
4
+
5
+ This module provides the RequestResponseWiring class for implementing request-response
6
+ patterns over Kafka. Unlike the standard EventBusSubcontractWiring (designed for 24/7
7
+ consumers), this wiring supports correlation-based request-response flows where a
8
+ publisher sends a request and awaits a correlated response.
9
+
10
+ Architecture:
11
+ The RequestResponseWiring class is responsible for:
12
+ 1. Reading ModelRequestResponseConfig from contracts
13
+ 2. Creating dedicated consumers for reply topics (completed + failed)
14
+ 3. Managing correlation ID tracking with in-flight futures
15
+ 4. Injecting correlation IDs if not present in outgoing requests
16
+ 5. Matching incoming responses to pending requests via correlation ID
17
+ 6. Handling timeouts with InfraTimeoutError
18
+ 7. Circuit breaker protection for publish failures
19
+
20
+ This follows ARCH-002: "Runtime owns all Kafka plumbing." Nodes and handlers
21
+ declare request-response requirements in contracts but never directly interact
22
+ with Kafka consumers or producers.
23
+
24
+ Boot Nonce:
25
+ A per-process boot nonce (8-character hex string from UUID4) is generated once
26
+ at module load time. This ensures consumer groups are unique per process instance,
27
+ preventing message stealing between concurrent processes.
28
+
29
+ Consumer Group Naming:
30
+ Consumer groups are named as: {environment}.rr.{instance_name}.{boot_nonce}
31
+ Example: "dev.rr.code-analysis.a1b2c3d4"
32
+
33
+ This ensures:
34
+ - Each process instance has its own consumer group
35
+ - Multiple instances don't steal each other's responses
36
+ - Process restarts get new consumer groups
37
+
38
+ Correlation ID Handling:
39
+ When sending requests, the wiring:
40
+ 1. Checks if correlation_id exists at the configured location (default: body.correlation_id)
41
+ 2. If missing, injects a new UUID4 correlation_id into the payload
42
+ 3. Returns the correlation_id in the response for tracing
43
+
44
+ Error Handling:
45
+ - Timeout: Raises InfraTimeoutError (NOT InfraUnavailableError)
46
+ - Circuit breaker open: Raises InfraUnavailableError
47
+ - Publish failures: Recorded by circuit breaker, wrapped in appropriate error
48
+
49
+ Related:
50
+ - OMN-1742: Request-response wiring for Kafka RPC patterns
51
+ - ModelRequestResponseConfig: Contract model for request-response configuration
52
+ - EventBusSubcontractWiring: Standard 24/7 consumer wiring (different pattern)
53
+
54
+ .. versionadded:: 0.3.1
55
+ """
56
+
57
+ from __future__ import annotations
58
+
59
+ import asyncio
60
+ import json
61
+ import logging
62
+ from dataclasses import dataclass, field
63
+ from typing import TYPE_CHECKING
64
+ from uuid import UUID, uuid4
65
+
66
+ from aiokafka import AIOKafkaConsumer
67
+
68
+ from omnibase_core.models.contracts.subcontracts import (
69
+ ModelCorrelationConfig,
70
+ ModelRequestResponseConfig,
71
+ ModelRequestResponseInstance,
72
+ )
73
+ from omnibase_core.protocols.event_bus.protocol_event_bus_publisher import (
74
+ ProtocolEventBusPublisher,
75
+ )
76
+ from omnibase_infra.enums import EnumInfraTransportType
77
+ from omnibase_infra.errors import (
78
+ InfraTimeoutError,
79
+ InfraUnavailableError,
80
+ ModelInfraErrorContext,
81
+ ModelTimeoutErrorContext,
82
+ ProtocolConfigurationError,
83
+ )
84
+ from omnibase_infra.mixins import MixinAsyncCircuitBreaker
85
+
86
+ if TYPE_CHECKING:
87
+ from aiokafka import ConsumerRecord
88
+
89
+ logger = logging.getLogger(__name__)
90
+
91
+ # Boot nonce: Generated ONCE per process at module load time.
92
+ # Used to create unique consumer groups per process instance.
93
+ _BOOT_NONCE: str = uuid4().hex[:8]
94
+
95
+
96
+ @dataclass
97
+ class RequestResponseInstanceState:
98
+ """Internal state for a single request-response instance.
99
+
100
+ Tracks pending requests, consumer task, and consumer instance for
101
+ a configured request-response pattern.
102
+ """
103
+
104
+ name: str
105
+ request_topic: str
106
+ completed_topic: str
107
+ failed_topic: str
108
+ timeout_seconds: int
109
+ correlation_config: ModelCorrelationConfig
110
+ consumer_group: str
111
+ pending: dict[str, asyncio.Future[dict[str, object]]] = field(default_factory=dict)
112
+ consumer: AIOKafkaConsumer | None = None
113
+ consumer_task: asyncio.Task[None] | None = None
114
+
115
+
116
+ class RequestResponseWiring(MixinAsyncCircuitBreaker):
117
+ """Wires request-response patterns to Kafka for correlation-based RPC.
118
+
119
+ This class implements the request-response pattern over Kafka, where:
120
+ 1. A request is published to a request topic
121
+ 2. The wiring awaits a correlated response on reply topics
122
+ 3. Correlation is tracked via correlation_id in the message payload
123
+
124
+ Unlike EventBusSubcontractWiring (designed for 24/7 consumers), this wiring
125
+ creates ephemeral consumers that match responses to pending requests.
126
+
127
+ Consumer Startup:
128
+ Consumers are started eagerly when wire_request_response() is called.
129
+ This ensures responses can be received immediately after the first request.
130
+
131
+ Correlation ID Injection:
132
+ If the outgoing payload lacks a correlation_id at the configured location,
133
+ the wiring injects a new UUID4. The correlation_id is always returned
134
+ in the response for tracing.
135
+
136
+ Timeout Handling:
137
+ If no response is received within the configured timeout (default: 30s),
138
+ InfraTimeoutError is raised. Note: This is a timeout error, NOT
139
+ InfraUnavailableError which is reserved for circuit breaker states.
140
+
141
+ Circuit Breaker:
142
+ Publish failures are tracked by the circuit breaker. When the circuit
143
+ opens, InfraUnavailableError is raised immediately without attempting
144
+ to publish.
145
+
146
+ Thread Safety:
147
+ This class is designed for single-threaded async use. All operations
148
+ should be performed from a single async context.
149
+
150
+ Example:
151
+ ```python
152
+ from omnibase_infra.runtime import RequestResponseWiring
153
+ from omnibase_core.models.contracts.subcontracts import (
154
+ ModelRequestResponseConfig,
155
+ ModelRequestResponseInstance,
156
+ ModelReplyTopics,
157
+ )
158
+
159
+ # Create wiring
160
+ wiring = RequestResponseWiring(
161
+ event_bus=event_bus,
162
+ environment="dev",
163
+ app_name="my-service",
164
+ )
165
+
166
+ # Wire from config
167
+ config = ModelRequestResponseConfig(
168
+ instances=[
169
+ ModelRequestResponseInstance(
170
+ name="code-analysis",
171
+ request_topic="onex.cmd.intelligence.analyze-code.v1",
172
+ reply_topics=ModelReplyTopics(
173
+ completed="onex.evt.intelligence.code-analyzed.v1",
174
+ failed="onex.evt.intelligence.code-analysis-failed.v1",
175
+ ),
176
+ timeout_seconds=30,
177
+ )
178
+ ]
179
+ )
180
+ await wiring.wire_request_response(config)
181
+
182
+ # Send request and await response
183
+ response = await wiring.send_request(
184
+ instance_name="code-analysis",
185
+ payload={"code": "print('hello')"},
186
+ )
187
+
188
+ # Cleanup on shutdown
189
+ await wiring.cleanup()
190
+ ```
191
+
192
+ Attributes:
193
+ _event_bus: Event bus for publishing requests
194
+ _environment: Environment identifier for consumer groups (e.g., 'dev', 'prod')
195
+ _app_name: Application name for consumer group identification
196
+ _instances: Dict mapping instance names to their state
197
+ _bootstrap_servers: Kafka bootstrap servers from event bus
198
+
199
+ .. versionadded:: 0.3.1
200
+ """
201
+
202
+ def __init__(
203
+ self,
204
+ event_bus: ProtocolEventBusPublisher,
205
+ environment: str,
206
+ app_name: str,
207
+ bootstrap_servers: str | None = None,
208
+ ) -> None:
209
+ """Initialize request-response wiring.
210
+
211
+ Args:
212
+ event_bus: Event bus for publishing requests. Must implement
213
+ ProtocolEventBusPublisher interface.
214
+ environment: Environment identifier (e.g., 'dev', 'prod').
215
+ Used for consumer group naming. Topics are realm-agnostic and
216
+ do not include environment prefixes.
217
+ app_name: Application name for logging and consumer group naming.
218
+ bootstrap_servers: Kafka bootstrap servers. If not provided, attempts
219
+ to read from event_bus._bootstrap_servers or environment variable.
220
+
221
+ Raises:
222
+ ValueError: If environment is empty or whitespace-only.
223
+ ProtocolConfigurationError: If bootstrap_servers cannot be determined.
224
+ """
225
+ if not environment or not environment.strip():
226
+ raise ValueError("environment must be a non-empty string")
227
+ if not app_name or not app_name.strip():
228
+ raise ValueError("app_name must be a non-empty string")
229
+
230
+ self._event_bus = event_bus
231
+ self._environment = environment
232
+ self._app_name = app_name
233
+ self._instances: dict[str, RequestResponseInstanceState] = {}
234
+ self._logger = logging.getLogger(__name__)
235
+
236
+ # Resolve bootstrap servers
237
+ if bootstrap_servers:
238
+ self._bootstrap_servers = bootstrap_servers
239
+ elif hasattr(event_bus, "_bootstrap_servers"):
240
+ self._bootstrap_servers = event_bus._bootstrap_servers # type: ignore[union-attr]
241
+ else:
242
+ import os
243
+
244
+ self._bootstrap_servers = os.environ.get(
245
+ "KAFKA_BOOTSTRAP_SERVERS", "localhost:9092"
246
+ )
247
+
248
+ # Initialize circuit breaker for publish protection
249
+ self._init_circuit_breaker(
250
+ threshold=5,
251
+ reset_timeout=60.0,
252
+ service_name=f"request-response.{app_name}",
253
+ transport_type=EnumInfraTransportType.KAFKA,
254
+ half_open_successes=1,
255
+ )
256
+
257
+ self._logger.debug(
258
+ "RequestResponseWiring initialized: environment=%s, app_name=%s, "
259
+ "boot_nonce=%s, bootstrap_servers=%s",
260
+ environment,
261
+ app_name,
262
+ _BOOT_NONCE,
263
+ self._bootstrap_servers,
264
+ )
265
+
266
+ def resolve_topic(self, topic_suffix: str) -> str:
267
+ """Resolve topic suffix to topic name (realm-agnostic, no environment prefix).
268
+
269
+ Topics are realm-agnostic in ONEX. The environment/realm is enforced via
270
+ envelope identity, not topic naming. This enables cross-environment event
271
+ routing when needed while maintaining proper isolation through identity.
272
+
273
+ Args:
274
+ topic_suffix: ONEX format topic suffix
275
+ (e.g., 'onex.cmd.intelligence.analyze-code.v1')
276
+
277
+ Returns:
278
+ Topic name (same as suffix, no environment prefix)
279
+ (e.g., 'onex.cmd.intelligence.analyze-code.v1')
280
+
281
+ Note:
282
+ Consumer groups still include environment for proper isolation.
283
+ """
284
+ return topic_suffix
285
+
286
+ async def wire_request_response(
287
+ self,
288
+ config: ModelRequestResponseConfig,
289
+ ) -> None:
290
+ """Wire request-response instances from configuration.
291
+
292
+ Creates consumers for each instance's reply topics and starts them
293
+ eagerly. Consumers run in background tasks, matching incoming responses
294
+ to pending requests via correlation ID.
295
+
296
+ Consumer Group Naming:
297
+ Consumer groups are named as: {environment}.rr.{instance_name}.{boot_nonce}
298
+ The boot_nonce ensures each process instance has unique consumer groups.
299
+
300
+ Args:
301
+ config: Request-response configuration with instance definitions.
302
+
303
+ Raises:
304
+ ProtocolConfigurationError: If instance name conflicts with existing.
305
+ InfraConnectionError: If Kafka connection fails during consumer start.
306
+ """
307
+ for instance in config.instances:
308
+ await self._wire_instance(instance)
309
+
310
+ async def _wire_instance(self, instance: ModelRequestResponseInstance) -> None:
311
+ """Wire a single request-response instance.
312
+
313
+ Args:
314
+ instance: Instance configuration to wire.
315
+
316
+ Raises:
317
+ ProtocolConfigurationError: If instance name already wired.
318
+ """
319
+ if instance.name in self._instances:
320
+ raise ProtocolConfigurationError(
321
+ f"Request-response instance '{instance.name}' already wired",
322
+ context=ModelInfraErrorContext.with_correlation(
323
+ transport_type=EnumInfraTransportType.KAFKA,
324
+ operation="wire_request_response",
325
+ ),
326
+ instance_name=instance.name,
327
+ )
328
+
329
+ # Build consumer group: {environment}.rr.{instance_name}.{boot_nonce}
330
+ consumer_group = f"{self._environment}.rr.{instance.name}.{_BOOT_NONCE}"
331
+
332
+ # Resolve topics with environment prefix
333
+ request_topic = self.resolve_topic(instance.request_topic)
334
+ completed_topic = self.resolve_topic(instance.reply_topics.completed)
335
+ failed_topic = self.resolve_topic(instance.reply_topics.failed)
336
+
337
+ # Default correlation config if not specified
338
+ correlation_config = instance.correlation or ModelCorrelationConfig()
339
+
340
+ # Create instance state
341
+ rr_instance = RequestResponseInstanceState(
342
+ name=instance.name,
343
+ request_topic=request_topic,
344
+ completed_topic=completed_topic,
345
+ failed_topic=failed_topic,
346
+ timeout_seconds=instance.timeout_seconds,
347
+ correlation_config=correlation_config,
348
+ consumer_group=consumer_group,
349
+ )
350
+
351
+ # Create consumer for reply topics
352
+ consumer = AIOKafkaConsumer(
353
+ completed_topic,
354
+ failed_topic,
355
+ bootstrap_servers=self._bootstrap_servers,
356
+ group_id=consumer_group,
357
+ auto_offset_reset=instance.auto_offset_reset,
358
+ enable_auto_commit=True,
359
+ )
360
+
361
+ rr_instance.consumer = consumer
362
+
363
+ # Start consumer eagerly
364
+ await consumer.start()
365
+ self._logger.info(
366
+ "Started request-response consumer: instance=%s, "
367
+ "consumer_group=%s, topics=[%s, %s]",
368
+ instance.name,
369
+ consumer_group,
370
+ completed_topic,
371
+ failed_topic,
372
+ )
373
+
374
+ # Start background task to process responses
375
+ consumer_task = asyncio.create_task(
376
+ self._consume_responses(rr_instance),
377
+ name=f"rr-consumer-{instance.name}",
378
+ )
379
+ rr_instance.consumer_task = consumer_task
380
+
381
+ # Store instance
382
+ self._instances[instance.name] = rr_instance
383
+
384
+ async def _consume_responses(self, instance: RequestResponseInstanceState) -> None:
385
+ """Background task that consumes responses and resolves pending futures.
386
+
387
+ Runs continuously until cleanup() is called. Matches incoming responses
388
+ to pending requests via correlation ID.
389
+
390
+ Args:
391
+ instance: The request-response instance to consume for.
392
+ """
393
+ consumer = instance.consumer
394
+ if consumer is None:
395
+ return
396
+
397
+ try:
398
+ async for message in consumer:
399
+ await self._handle_response_message(instance, message)
400
+ except asyncio.CancelledError:
401
+ self._logger.debug(
402
+ "Consumer task cancelled for instance: %s",
403
+ instance.name,
404
+ )
405
+ raise
406
+ except Exception as e:
407
+ self._logger.exception(
408
+ "Unexpected error in consumer task for instance %s: %s",
409
+ instance.name,
410
+ e,
411
+ )
412
+
413
+ async def _handle_response_message(
414
+ self,
415
+ instance: RequestResponseInstanceState,
416
+ message: ConsumerRecord,
417
+ ) -> None:
418
+ """Handle a single response message from Kafka.
419
+
420
+ Extracts correlation ID and resolves the corresponding pending future.
421
+
422
+ Args:
423
+ instance: The request-response instance.
424
+ message: The Kafka message received.
425
+ """
426
+ try:
427
+ # Deserialize message value
428
+ if message.value is None:
429
+ self._logger.warning(
430
+ "Received empty message on topic %s, skipping",
431
+ message.topic,
432
+ )
433
+ return
434
+
435
+ response_data: dict[str, object] = json.loads(message.value.decode("utf-8"))
436
+
437
+ # Extract correlation ID based on config
438
+ correlation_id = self._extract_correlation_id(
439
+ response_data,
440
+ instance.correlation_config,
441
+ )
442
+
443
+ if correlation_id is None:
444
+ self._logger.warning(
445
+ "Response missing correlation_id: topic=%s, instance=%s",
446
+ message.topic,
447
+ instance.name,
448
+ )
449
+ return
450
+
451
+ correlation_key = str(correlation_id)
452
+
453
+ # Look up pending future
454
+ future = instance.pending.pop(correlation_key, None)
455
+ if future is None:
456
+ self._logger.debug(
457
+ "Orphan response received (no pending request): "
458
+ "correlation_id=%s, topic=%s, instance=%s",
459
+ correlation_key,
460
+ message.topic,
461
+ instance.name,
462
+ )
463
+ return
464
+
465
+ # Determine if this is a success or failure based on topic
466
+ is_failure = message.topic == instance.failed_topic
467
+
468
+ if is_failure:
469
+ # Set exception for failed responses
470
+ error_message = response_data.get("error", "Request failed")
471
+ future.set_exception(RuntimeError(f"Request failed: {error_message}"))
472
+ else:
473
+ # Set result for successful responses
474
+ # Include correlation_id in response for tracing
475
+ response_data["_correlation_id"] = correlation_key
476
+ future.set_result(response_data)
477
+
478
+ self._logger.debug(
479
+ "Resolved pending request: correlation_id=%s, topic=%s, "
480
+ "is_failure=%s, instance=%s",
481
+ correlation_key,
482
+ message.topic,
483
+ is_failure,
484
+ instance.name,
485
+ )
486
+
487
+ except json.JSONDecodeError as e:
488
+ self._logger.warning(
489
+ "Failed to decode response JSON: topic=%s, error=%s",
490
+ message.topic,
491
+ e,
492
+ )
493
+ except Exception as e:
494
+ self._logger.exception(
495
+ "Error handling response message: topic=%s, error=%s",
496
+ message.topic,
497
+ e,
498
+ )
499
+
500
+ def _extract_correlation_id(
501
+ self,
502
+ data: dict[str, object],
503
+ config: ModelCorrelationConfig,
504
+ ) -> UUID | None:
505
+ """Extract correlation ID from response data based on configuration.
506
+
507
+ Args:
508
+ data: Response data dictionary.
509
+ config: Correlation configuration specifying location and field.
510
+
511
+ Returns:
512
+ Correlation ID as UUID if found, None otherwise.
513
+ """
514
+ value: object | None = None
515
+
516
+ if config.location == "body":
517
+ value = data.get(config.field)
518
+ elif config.location == "headers":
519
+ # Headers would be in message headers, not body
520
+ # For now, we only support body location
521
+ self._logger.warning(
522
+ "Header-based correlation not implemented, falling back to body"
523
+ )
524
+ value = data.get(config.field)
525
+
526
+ if value is None:
527
+ return None
528
+
529
+ # Parse to UUID - correlation IDs are always UUIDs
530
+ try:
531
+ return UUID(str(value))
532
+ except ValueError:
533
+ self._logger.warning(
534
+ "Invalid correlation_id format (not a UUID): %s",
535
+ value,
536
+ )
537
+ return None
538
+
539
+ async def send_request(
540
+ self,
541
+ instance_name: str,
542
+ payload: dict[str, object],
543
+ timeout_seconds: int | None = None,
544
+ ) -> dict[str, object]:
545
+ """Send a request and await the correlated response.
546
+
547
+ Publishes a request to the instance's request topic and waits for
548
+ a response on the reply topics. If the payload lacks a correlation_id
549
+ at the configured location, one is injected.
550
+
551
+ Correlation ID Handling:
552
+ - If correlation_id exists in payload: Use existing value
553
+ - If missing: Inject new UUID4 into payload
554
+ - Always: Return correlation_id in response (as _correlation_id)
555
+
556
+ Args:
557
+ instance_name: Name of the wired request-response instance.
558
+ payload: Request payload dictionary. Modified in place to add
559
+ correlation_id if not present.
560
+ timeout_seconds: Override timeout for this request. If None,
561
+ uses the instance's configured timeout (default: 30s).
562
+
563
+ Returns:
564
+ Response dictionary from the reply topic. Includes _correlation_id
565
+ field for tracing.
566
+
567
+ Raises:
568
+ ProtocolConfigurationError: If instance_name is not wired.
569
+ InfraTimeoutError: If no response received within timeout.
570
+ InfraUnavailableError: If circuit breaker is open.
571
+ RuntimeError: If request failed (response on failed topic).
572
+ """
573
+ # Get instance
574
+ instance = self._instances.get(instance_name)
575
+ if instance is None:
576
+ raise ProtocolConfigurationError(
577
+ f"Request-response instance '{instance_name}' not wired",
578
+ context=ModelInfraErrorContext.with_correlation(
579
+ transport_type=EnumInfraTransportType.KAFKA,
580
+ operation="send_request",
581
+ ),
582
+ instance_name=instance_name,
583
+ )
584
+
585
+ # Determine timeout
586
+ timeout = (
587
+ timeout_seconds if timeout_seconds is not None else instance.timeout_seconds
588
+ )
589
+
590
+ # Extract or inject correlation_id
591
+ correlation_id = self._ensure_correlation_id(
592
+ payload,
593
+ instance.correlation_config,
594
+ )
595
+ correlation_key = str(correlation_id)
596
+
597
+ # Create future for response
598
+ future: asyncio.Future[dict[str, object]] = (
599
+ asyncio.get_running_loop().create_future()
600
+ )
601
+ instance.pending[correlation_key] = future
602
+
603
+ try:
604
+ # Check circuit breaker before publish
605
+ async with self._circuit_breaker_lock:
606
+ await self._check_circuit_breaker(
607
+ operation="send_request",
608
+ correlation_id=correlation_id,
609
+ )
610
+
611
+ # Publish request
612
+ await self._publish_request(instance, payload, correlation_id)
613
+
614
+ # Wait for response with timeout
615
+ try:
616
+ response = await asyncio.wait_for(future, timeout=timeout)
617
+
618
+ # Record success in circuit breaker
619
+ async with self._circuit_breaker_lock:
620
+ await self._reset_circuit_breaker()
621
+
622
+ return response
623
+
624
+ except TimeoutError:
625
+ # Remove pending future on timeout
626
+ instance.pending.pop(correlation_key, None)
627
+
628
+ # Raise InfraTimeoutError (NOT InfraUnavailableError)
629
+ timeout_context = ModelTimeoutErrorContext(
630
+ transport_type=EnumInfraTransportType.KAFKA,
631
+ operation="send_request",
632
+ target_name=instance.request_topic,
633
+ correlation_id=correlation_id,
634
+ timeout_seconds=float(timeout),
635
+ )
636
+ raise InfraTimeoutError(
637
+ f"Request-response timeout after {timeout}s: "
638
+ f"instance={instance_name}, correlation_id={correlation_key}",
639
+ context=timeout_context,
640
+ ) from None
641
+
642
+ except InfraUnavailableError:
643
+ # Circuit breaker open - re-raise without modification
644
+ instance.pending.pop(correlation_key, None)
645
+ raise
646
+
647
+ except Exception:
648
+ # Record failure in circuit breaker
649
+ async with self._circuit_breaker_lock:
650
+ await self._record_circuit_failure(
651
+ operation="send_request",
652
+ correlation_id=correlation_id,
653
+ )
654
+
655
+ # Clean up pending future
656
+ instance.pending.pop(correlation_key, None)
657
+ raise
658
+
659
+ def _ensure_correlation_id(
660
+ self,
661
+ payload: dict[str, object],
662
+ config: ModelCorrelationConfig,
663
+ ) -> UUID:
664
+ """Ensure correlation_id exists in payload, injecting if missing.
665
+
666
+ Args:
667
+ payload: Request payload dictionary. Modified in place.
668
+ config: Correlation configuration.
669
+
670
+ Returns:
671
+ The correlation ID as UUID (existing parsed or newly generated).
672
+ """
673
+ existing = payload.get(config.field)
674
+
675
+ if existing is not None:
676
+ # Parse existing to UUID - correlation IDs are always UUIDs
677
+ correlation_id = UUID(str(existing))
678
+ else:
679
+ # Generate new UUID
680
+ correlation_id = uuid4()
681
+ payload[config.field] = str(correlation_id)
682
+
683
+ return correlation_id
684
+
685
+ async def _publish_request(
686
+ self,
687
+ instance: RequestResponseInstanceState,
688
+ payload: dict[str, object],
689
+ correlation_id: UUID,
690
+ ) -> None:
691
+ """Publish request to the instance's request topic.
692
+
693
+ Args:
694
+ instance: Request-response instance.
695
+ payload: Request payload.
696
+ correlation_id: Correlation ID for logging and message key.
697
+ """
698
+ # Serialize payload
699
+ value = json.dumps(payload).encode("utf-8")
700
+
701
+ # Publish via event bus - convert UUID to string at serialization boundary
702
+ await self._event_bus.publish(
703
+ topic=instance.request_topic,
704
+ key=str(correlation_id).encode("utf-8"),
705
+ value=value,
706
+ )
707
+
708
+ self._logger.debug(
709
+ "Published request: topic=%s, correlation_id=%s, instance=%s",
710
+ instance.request_topic,
711
+ correlation_id,
712
+ instance.name,
713
+ )
714
+
715
+ async def cleanup(self) -> None:
716
+ """Clean up all request-response instances.
717
+
718
+ Cancels all consumer tasks, stops consumers, and clears pending futures
719
+ with exceptions. Should be called during runtime shutdown.
720
+
721
+ This method is safe to call multiple times - subsequent calls are no-ops.
722
+ """
723
+ cleanup_count = len(self._instances)
724
+ if cleanup_count == 0:
725
+ return
726
+
727
+ for instance_name, instance in list(self._instances.items()):
728
+ await self._cleanup_instance(instance)
729
+
730
+ self._instances.clear()
731
+ self._logger.info(
732
+ "Cleaned up %d request-response instance(s)",
733
+ cleanup_count,
734
+ )
735
+
736
+ async def _cleanup_instance(self, instance: RequestResponseInstanceState) -> None:
737
+ """Clean up a single request-response instance.
738
+
739
+ Args:
740
+ instance: Instance to clean up.
741
+ """
742
+ # Cancel consumer task
743
+ if instance.consumer_task is not None and not instance.consumer_task.done():
744
+ instance.consumer_task.cancel()
745
+ try:
746
+ await instance.consumer_task
747
+ except asyncio.CancelledError:
748
+ pass
749
+
750
+ # Stop consumer
751
+ if instance.consumer is not None:
752
+ try:
753
+ await instance.consumer.stop()
754
+ except Exception as e:
755
+ self._logger.warning(
756
+ "Error stopping consumer for instance %s: %s",
757
+ instance.name,
758
+ e,
759
+ )
760
+
761
+ # Fail all pending futures
762
+ cleanup_error = RuntimeError(
763
+ f"Request-response instance '{instance.name}' was cleaned up"
764
+ )
765
+ for correlation_key, future in instance.pending.items():
766
+ if not future.done():
767
+ future.set_exception(cleanup_error)
768
+ self._logger.debug(
769
+ "Failed pending request on cleanup: correlation_id=%s, instance=%s",
770
+ correlation_key,
771
+ instance.name,
772
+ )
773
+
774
+ instance.pending.clear()
775
+ self._logger.debug(
776
+ "Cleaned up instance: %s",
777
+ instance.name,
778
+ )
779
+
780
+ def get_boot_nonce(self) -> str:
781
+ """Return the boot nonce for this process.
782
+
783
+ Useful for debugging and logging consumer group identification.
784
+
785
+ Returns:
786
+ 8-character hex string unique to this process instance.
787
+ """
788
+ return _BOOT_NONCE
789
+
790
+
791
+ __all__: list[str] = [
792
+ "RequestResponseWiring",
793
+ ]