omnibase_infra 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. omnibase_infra/__init__.py +1 -1
  2. omnibase_infra/enums/__init__.py +3 -0
  3. omnibase_infra/enums/enum_consumer_group_purpose.py +9 -0
  4. omnibase_infra/enums/enum_postgres_error_code.py +188 -0
  5. omnibase_infra/handlers/registration_storage/handler_registration_storage_postgres.py +29 -20
  6. omnibase_infra/mixins/__init__.py +14 -0
  7. omnibase_infra/mixins/mixin_postgres_error_response.py +314 -0
  8. omnibase_infra/mixins/mixin_postgres_op_executor.py +298 -0
  9. omnibase_infra/models/__init__.py +3 -0
  10. omnibase_infra/{nodes/effects/models → models}/model_backend_result.py +22 -6
  11. omnibase_infra/models/projection/__init__.py +11 -0
  12. omnibase_infra/models/projection/model_contract_projection.py +170 -0
  13. omnibase_infra/models/projection/model_topic_projection.py +148 -0
  14. omnibase_infra/nodes/contract_registry_reducer/__init__.py +5 -0
  15. omnibase_infra/nodes/contract_registry_reducer/contract_registration_event_router.py +689 -0
  16. omnibase_infra/nodes/effects/__init__.py +1 -1
  17. omnibase_infra/nodes/effects/models/__init__.py +6 -4
  18. omnibase_infra/nodes/effects/models/model_registry_response.py +1 -1
  19. omnibase_infra/nodes/effects/protocol_consul_client.py +1 -1
  20. omnibase_infra/nodes/effects/protocol_postgres_adapter.py +1 -1
  21. omnibase_infra/nodes/effects/registry_effect.py +1 -1
  22. omnibase_infra/nodes/node_contract_persistence_effect/__init__.py +101 -0
  23. omnibase_infra/nodes/node_contract_persistence_effect/contract.yaml +490 -0
  24. omnibase_infra/nodes/node_contract_persistence_effect/handlers/__init__.py +74 -0
  25. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_cleanup_topics.py +217 -0
  26. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_contract_upsert.py +242 -0
  27. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_deactivate.py +194 -0
  28. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_heartbeat.py +243 -0
  29. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_mark_stale.py +208 -0
  30. omnibase_infra/nodes/node_contract_persistence_effect/handlers/handler_postgres_topic_update.py +298 -0
  31. omnibase_infra/nodes/node_contract_persistence_effect/models/__init__.py +15 -0
  32. omnibase_infra/nodes/node_contract_persistence_effect/models/model_persistence_result.py +52 -0
  33. omnibase_infra/nodes/node_contract_persistence_effect/node.py +114 -0
  34. omnibase_infra/nodes/node_contract_persistence_effect/registry/__init__.py +27 -0
  35. omnibase_infra/nodes/node_contract_persistence_effect/registry/registry_infra_contract_persistence_effect.py +220 -0
  36. omnibase_infra/nodes/node_registry_effect/models/__init__.py +2 -2
  37. omnibase_infra/projectors/__init__.py +6 -0
  38. omnibase_infra/projectors/projection_reader_contract.py +1301 -0
  39. omnibase_infra/runtime/__init__.py +5 -0
  40. omnibase_infra/runtime/contract_registration_event_router.py +500 -0
  41. omnibase_infra/runtime/db/__init__.py +4 -0
  42. omnibase_infra/runtime/db/models/__init__.py +15 -10
  43. omnibase_infra/runtime/db/models/model_db_operation.py +40 -0
  44. omnibase_infra/runtime/db/models/model_db_param.py +24 -0
  45. omnibase_infra/runtime/db/models/model_db_repository_contract.py +40 -0
  46. omnibase_infra/runtime/db/models/model_db_return.py +26 -0
  47. omnibase_infra/runtime/db/models/model_db_safety_policy.py +32 -0
  48. omnibase_infra/runtime/intent_execution_router.py +430 -0
  49. omnibase_infra/runtime/models/__init__.py +6 -0
  50. omnibase_infra/runtime/models/model_contract_registry_config.py +41 -0
  51. omnibase_infra/runtime/models/model_intent_execution_summary.py +79 -0
  52. omnibase_infra/runtime/models/model_runtime_config.py +8 -0
  53. omnibase_infra/runtime/protocols/__init__.py +16 -0
  54. omnibase_infra/runtime/protocols/protocol_intent_executor.py +107 -0
  55. omnibase_infra/runtime/request_response_wiring.py +785 -0
  56. omnibase_infra/runtime/service_kernel.py +295 -8
  57. omnibase_infra/services/registry_api/models/__init__.py +25 -0
  58. omnibase_infra/services/registry_api/models/model_contract_ref.py +44 -0
  59. omnibase_infra/services/registry_api/models/model_contract_view.py +81 -0
  60. omnibase_infra/services/registry_api/models/model_response_contracts.py +50 -0
  61. omnibase_infra/services/registry_api/models/model_response_topics.py +50 -0
  62. omnibase_infra/services/registry_api/models/model_topic_summary.py +57 -0
  63. omnibase_infra/services/registry_api/models/model_topic_view.py +63 -0
  64. omnibase_infra/services/registry_api/routes.py +205 -6
  65. omnibase_infra/services/registry_api/service.py +528 -1
  66. omnibase_infra/validation/infra_validators.py +3 -1
  67. omnibase_infra/validation/validation_exemptions.yaml +54 -0
  68. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.3.2.dist-info}/METADATA +3 -3
  69. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.3.2.dist-info}/RECORD +72 -34
  70. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.3.2.dist-info}/WHEEL +0 -0
  71. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.3.2.dist-info}/entry_points.txt +0 -0
  72. {omnibase_infra-0.3.1.dist-info → omnibase_infra-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,785 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2025 OmniNode Team
3
+ """Request-response wiring for correlation-based RPC-style Kafka communication.
4
+
5
+ This module provides the RequestResponseWiring class for implementing request-response
6
+ patterns over Kafka. Unlike the standard EventBusSubcontractWiring (designed for 24/7
7
+ consumers), this wiring supports correlation-based request-response flows where a
8
+ publisher sends a request and awaits a correlated response.
9
+
10
+ Architecture:
11
+ The RequestResponseWiring class is responsible for:
12
+ 1. Reading ModelRequestResponseConfig from contracts
13
+ 2. Creating dedicated consumers for reply topics (completed + failed)
14
+ 3. Managing correlation ID tracking with in-flight futures
15
+ 4. Injecting correlation IDs if not present in outgoing requests
16
+ 5. Matching incoming responses to pending requests via correlation ID
17
+ 6. Handling timeouts with InfraTimeoutError
18
+ 7. Circuit breaker protection for publish failures
19
+
20
+ This follows ARCH-002: "Runtime owns all Kafka plumbing." Nodes and handlers
21
+ declare request-response requirements in contracts but never directly interact
22
+ with Kafka consumers or producers.
23
+
24
+ Boot Nonce:
25
+ A per-process boot nonce (8-character hex string from UUID4) is generated once
26
+ at module load time. This ensures consumer groups are unique per process instance,
27
+ preventing message stealing between concurrent processes.
28
+
29
+ Consumer Group Naming:
30
+ Consumer groups are named as: {environment}.rr.{instance_name}.{boot_nonce}
31
+ Example: "dev.rr.code-analysis.a1b2c3d4"
32
+
33
+ This ensures:
34
+ - Each process instance has its own consumer group
35
+ - Multiple instances don't steal each other's responses
36
+ - Process restarts get new consumer groups
37
+
38
+ Correlation ID Handling:
39
+ When sending requests, the wiring:
40
+ 1. Checks if correlation_id exists at the configured location (default: body.correlation_id)
41
+ 2. If missing, injects a new UUID4 correlation_id into the payload
42
+ 3. Returns the correlation_id in the response for tracing
43
+
44
+ Error Handling:
45
+ - Timeout: Raises InfraTimeoutError (NOT InfraUnavailableError)
46
+ - Circuit breaker open: Raises InfraUnavailableError
47
+ - Publish failures: Recorded by circuit breaker, wrapped in appropriate error
48
+
49
+ Related:
50
+ - OMN-1742: Request-response wiring for Kafka RPC patterns
51
+ - ModelRequestResponseConfig: Contract model for request-response configuration
52
+ - EventBusSubcontractWiring: Standard 24/7 consumer wiring (different pattern)
53
+
54
+ .. versionadded:: 0.3.1
55
+ """
56
+
57
+ from __future__ import annotations
58
+
59
+ import asyncio
60
+ import json
61
+ import logging
62
+ from dataclasses import dataclass, field
63
+ from typing import TYPE_CHECKING
64
+ from uuid import UUID, uuid4
65
+
66
+ from aiokafka import AIOKafkaConsumer
67
+
68
+ from omnibase_core.models.contracts.subcontracts import (
69
+ ModelCorrelationConfig,
70
+ ModelRequestResponseConfig,
71
+ ModelRequestResponseInstance,
72
+ )
73
+ from omnibase_core.protocols.event_bus.protocol_event_bus_publisher import (
74
+ ProtocolEventBusPublisher,
75
+ )
76
+ from omnibase_infra.enums import EnumInfraTransportType
77
+ from omnibase_infra.errors import (
78
+ InfraTimeoutError,
79
+ InfraUnavailableError,
80
+ ModelInfraErrorContext,
81
+ ModelTimeoutErrorContext,
82
+ ProtocolConfigurationError,
83
+ )
84
+ from omnibase_infra.mixins import MixinAsyncCircuitBreaker
85
+
86
+ if TYPE_CHECKING:
87
+ from aiokafka import ConsumerRecord
88
+
89
+ logger = logging.getLogger(__name__)
90
+
91
+ # Boot nonce: Generated ONCE per process at module load time.
92
+ # Used to create unique consumer groups per process instance.
93
+ _BOOT_NONCE: str = uuid4().hex[:8]
94
+
95
+
96
+ @dataclass
97
+ class RequestResponseInstanceState:
98
+ """Internal state for a single request-response instance.
99
+
100
+ Tracks pending requests, consumer task, and consumer instance for
101
+ a configured request-response pattern.
102
+ """
103
+
104
+ name: str
105
+ request_topic: str
106
+ completed_topic: str
107
+ failed_topic: str
108
+ timeout_seconds: int
109
+ correlation_config: ModelCorrelationConfig
110
+ consumer_group: str
111
+ pending: dict[str, asyncio.Future[dict[str, object]]] = field(default_factory=dict)
112
+ consumer: AIOKafkaConsumer | None = None
113
+ consumer_task: asyncio.Task[None] | None = None
114
+
115
+
116
+ class RequestResponseWiring(MixinAsyncCircuitBreaker):
117
+ """Wires request-response patterns to Kafka for correlation-based RPC.
118
+
119
+ This class implements the request-response pattern over Kafka, where:
120
+ 1. A request is published to a request topic
121
+ 2. The wiring awaits a correlated response on reply topics
122
+ 3. Correlation is tracked via correlation_id in the message payload
123
+
124
+ Unlike EventBusSubcontractWiring (designed for 24/7 consumers), this wiring
125
+ creates ephemeral consumers that match responses to pending requests.
126
+
127
+ Consumer Startup:
128
+ Consumers are started eagerly when wire_request_response() is called.
129
+ This ensures responses can be received immediately after the first request.
130
+
131
+ Correlation ID Injection:
132
+ If the outgoing payload lacks a correlation_id at the configured location,
133
+ the wiring injects a new UUID4. The correlation_id is always returned
134
+ in the response for tracing.
135
+
136
+ Timeout Handling:
137
+ If no response is received within the configured timeout (default: 30s),
138
+ InfraTimeoutError is raised. Note: This is a timeout error, NOT
139
+ InfraUnavailableError which is reserved for circuit breaker states.
140
+
141
+ Circuit Breaker:
142
+ Publish failures are tracked by the circuit breaker. When the circuit
143
+ opens, InfraUnavailableError is raised immediately without attempting
144
+ to publish.
145
+
146
+ Thread Safety:
147
+ This class is designed for single-threaded async use. All operations
148
+ should be performed from a single async context.
149
+
150
+ Example:
151
+ ```python
152
+ from omnibase_infra.runtime import RequestResponseWiring
153
+ from omnibase_core.models.contracts.subcontracts import (
154
+ ModelRequestResponseConfig,
155
+ ModelRequestResponseInstance,
156
+ ModelReplyTopics,
157
+ )
158
+
159
+ # Create wiring
160
+ wiring = RequestResponseWiring(
161
+ event_bus=event_bus,
162
+ environment="dev",
163
+ app_name="my-service",
164
+ )
165
+
166
+ # Wire from config
167
+ config = ModelRequestResponseConfig(
168
+ instances=[
169
+ ModelRequestResponseInstance(
170
+ name="code-analysis",
171
+ request_topic="onex.cmd.intelligence.analyze-code.v1",
172
+ reply_topics=ModelReplyTopics(
173
+ completed="onex.evt.intelligence.code-analyzed.v1",
174
+ failed="onex.evt.intelligence.code-analysis-failed.v1",
175
+ ),
176
+ timeout_seconds=30,
177
+ )
178
+ ]
179
+ )
180
+ await wiring.wire_request_response(config)
181
+
182
+ # Send request and await response
183
+ response = await wiring.send_request(
184
+ instance_name="code-analysis",
185
+ payload={"code": "print('hello')"},
186
+ )
187
+
188
+ # Cleanup on shutdown
189
+ await wiring.cleanup()
190
+ ```
191
+
192
+ Attributes:
193
+ _event_bus: Event bus for publishing requests
194
+ _environment: Environment prefix for topics (e.g., 'dev', 'prod')
195
+ _app_name: Application name for consumer group identification
196
+ _instances: Dict mapping instance names to their state
197
+ _bootstrap_servers: Kafka bootstrap servers from event bus
198
+
199
+ .. versionadded:: 0.3.1
200
+ """
201
+
202
+ def __init__(
203
+ self,
204
+ event_bus: ProtocolEventBusPublisher,
205
+ environment: str,
206
+ app_name: str,
207
+ bootstrap_servers: str | None = None,
208
+ ) -> None:
209
+ """Initialize request-response wiring.
210
+
211
+ Args:
212
+ event_bus: Event bus for publishing requests. Must implement
213
+ ProtocolEventBusPublisher interface.
214
+ environment: Environment prefix for topics (e.g., 'dev', 'prod').
215
+ Used to resolve topic suffixes to full topic names.
216
+ app_name: Application name for logging and consumer group naming.
217
+ bootstrap_servers: Kafka bootstrap servers. If not provided, attempts
218
+ to read from event_bus._bootstrap_servers or environment variable.
219
+
220
+ Raises:
221
+ ValueError: If environment is empty or whitespace-only.
222
+ ProtocolConfigurationError: If bootstrap_servers cannot be determined.
223
+ """
224
+ if not environment or not environment.strip():
225
+ raise ValueError("environment must be a non-empty string")
226
+ if not app_name or not app_name.strip():
227
+ raise ValueError("app_name must be a non-empty string")
228
+
229
+ self._event_bus = event_bus
230
+ self._environment = environment
231
+ self._app_name = app_name
232
+ self._instances: dict[str, RequestResponseInstanceState] = {}
233
+ self._logger = logging.getLogger(__name__)
234
+
235
+ # Resolve bootstrap servers
236
+ if bootstrap_servers:
237
+ self._bootstrap_servers = bootstrap_servers
238
+ elif hasattr(event_bus, "_bootstrap_servers"):
239
+ self._bootstrap_servers = event_bus._bootstrap_servers # type: ignore[union-attr]
240
+ else:
241
+ import os
242
+
243
+ self._bootstrap_servers = os.environ.get(
244
+ "KAFKA_BOOTSTRAP_SERVERS", "localhost:9092"
245
+ )
246
+
247
+ # Initialize circuit breaker for publish protection
248
+ self._init_circuit_breaker(
249
+ threshold=5,
250
+ reset_timeout=60.0,
251
+ service_name=f"request-response.{app_name}",
252
+ transport_type=EnumInfraTransportType.KAFKA,
253
+ half_open_successes=1,
254
+ )
255
+
256
+ self._logger.debug(
257
+ "RequestResponseWiring initialized: environment=%s, app_name=%s, "
258
+ "boot_nonce=%s, bootstrap_servers=%s",
259
+ environment,
260
+ app_name,
261
+ _BOOT_NONCE,
262
+ self._bootstrap_servers,
263
+ )
264
+
265
+ def resolve_topic(self, topic_suffix: str) -> str:
266
+ """Resolve topic suffix to full topic name with environment prefix.
267
+
268
+ Args:
269
+ topic_suffix: ONEX format topic suffix
270
+ (e.g., 'onex.cmd.intelligence.analyze-code.v1')
271
+
272
+ Returns:
273
+ Full topic name with environment prefix
274
+ (e.g., 'dev.onex.cmd.intelligence.analyze-code.v1')
275
+ """
276
+ return f"{self._environment}.{topic_suffix}"
277
+
278
+ async def wire_request_response(
279
+ self,
280
+ config: ModelRequestResponseConfig,
281
+ ) -> None:
282
+ """Wire request-response instances from configuration.
283
+
284
+ Creates consumers for each instance's reply topics and starts them
285
+ eagerly. Consumers run in background tasks, matching incoming responses
286
+ to pending requests via correlation ID.
287
+
288
+ Consumer Group Naming:
289
+ Consumer groups are named as: {environment}.rr.{instance_name}.{boot_nonce}
290
+ The boot_nonce ensures each process instance has unique consumer groups.
291
+
292
+ Args:
293
+ config: Request-response configuration with instance definitions.
294
+
295
+ Raises:
296
+ ProtocolConfigurationError: If instance name conflicts with existing.
297
+ InfraConnectionError: If Kafka connection fails during consumer start.
298
+ """
299
+ for instance in config.instances:
300
+ await self._wire_instance(instance)
301
+
302
+ async def _wire_instance(self, instance: ModelRequestResponseInstance) -> None:
303
+ """Wire a single request-response instance.
304
+
305
+ Args:
306
+ instance: Instance configuration to wire.
307
+
308
+ Raises:
309
+ ProtocolConfigurationError: If instance name already wired.
310
+ """
311
+ if instance.name in self._instances:
312
+ raise ProtocolConfigurationError(
313
+ f"Request-response instance '{instance.name}' already wired",
314
+ context=ModelInfraErrorContext.with_correlation(
315
+ transport_type=EnumInfraTransportType.KAFKA,
316
+ operation="wire_request_response",
317
+ ),
318
+ instance_name=instance.name,
319
+ )
320
+
321
+ # Build consumer group: {environment}.rr.{instance_name}.{boot_nonce}
322
+ consumer_group = f"{self._environment}.rr.{instance.name}.{_BOOT_NONCE}"
323
+
324
+ # Resolve topics with environment prefix
325
+ request_topic = self.resolve_topic(instance.request_topic)
326
+ completed_topic = self.resolve_topic(instance.reply_topics.completed)
327
+ failed_topic = self.resolve_topic(instance.reply_topics.failed)
328
+
329
+ # Default correlation config if not specified
330
+ correlation_config = instance.correlation or ModelCorrelationConfig()
331
+
332
+ # Create instance state
333
+ rr_instance = RequestResponseInstanceState(
334
+ name=instance.name,
335
+ request_topic=request_topic,
336
+ completed_topic=completed_topic,
337
+ failed_topic=failed_topic,
338
+ timeout_seconds=instance.timeout_seconds,
339
+ correlation_config=correlation_config,
340
+ consumer_group=consumer_group,
341
+ )
342
+
343
+ # Create consumer for reply topics
344
+ consumer = AIOKafkaConsumer(
345
+ completed_topic,
346
+ failed_topic,
347
+ bootstrap_servers=self._bootstrap_servers,
348
+ group_id=consumer_group,
349
+ auto_offset_reset=instance.auto_offset_reset,
350
+ enable_auto_commit=True,
351
+ )
352
+
353
+ rr_instance.consumer = consumer
354
+
355
+ # Start consumer eagerly
356
+ await consumer.start()
357
+ self._logger.info(
358
+ "Started request-response consumer: instance=%s, "
359
+ "consumer_group=%s, topics=[%s, %s]",
360
+ instance.name,
361
+ consumer_group,
362
+ completed_topic,
363
+ failed_topic,
364
+ )
365
+
366
+ # Start background task to process responses
367
+ consumer_task = asyncio.create_task(
368
+ self._consume_responses(rr_instance),
369
+ name=f"rr-consumer-{instance.name}",
370
+ )
371
+ rr_instance.consumer_task = consumer_task
372
+
373
+ # Store instance
374
+ self._instances[instance.name] = rr_instance
375
+
376
+ async def _consume_responses(self, instance: RequestResponseInstanceState) -> None:
377
+ """Background task that consumes responses and resolves pending futures.
378
+
379
+ Runs continuously until cleanup() is called. Matches incoming responses
380
+ to pending requests via correlation ID.
381
+
382
+ Args:
383
+ instance: The request-response instance to consume for.
384
+ """
385
+ consumer = instance.consumer
386
+ if consumer is None:
387
+ return
388
+
389
+ try:
390
+ async for message in consumer:
391
+ await self._handle_response_message(instance, message)
392
+ except asyncio.CancelledError:
393
+ self._logger.debug(
394
+ "Consumer task cancelled for instance: %s",
395
+ instance.name,
396
+ )
397
+ raise
398
+ except Exception as e:
399
+ self._logger.exception(
400
+ "Unexpected error in consumer task for instance %s: %s",
401
+ instance.name,
402
+ e,
403
+ )
404
+
405
+ async def _handle_response_message(
406
+ self,
407
+ instance: RequestResponseInstanceState,
408
+ message: ConsumerRecord,
409
+ ) -> None:
410
+ """Handle a single response message from Kafka.
411
+
412
+ Extracts correlation ID and resolves the corresponding pending future.
413
+
414
+ Args:
415
+ instance: The request-response instance.
416
+ message: The Kafka message received.
417
+ """
418
+ try:
419
+ # Deserialize message value
420
+ if message.value is None:
421
+ self._logger.warning(
422
+ "Received empty message on topic %s, skipping",
423
+ message.topic,
424
+ )
425
+ return
426
+
427
+ response_data: dict[str, object] = json.loads(message.value.decode("utf-8"))
428
+
429
+ # Extract correlation ID based on config
430
+ correlation_id = self._extract_correlation_id(
431
+ response_data,
432
+ instance.correlation_config,
433
+ )
434
+
435
+ if correlation_id is None:
436
+ self._logger.warning(
437
+ "Response missing correlation_id: topic=%s, instance=%s",
438
+ message.topic,
439
+ instance.name,
440
+ )
441
+ return
442
+
443
+ correlation_key = str(correlation_id)
444
+
445
+ # Look up pending future
446
+ future = instance.pending.pop(correlation_key, None)
447
+ if future is None:
448
+ self._logger.debug(
449
+ "Orphan response received (no pending request): "
450
+ "correlation_id=%s, topic=%s, instance=%s",
451
+ correlation_key,
452
+ message.topic,
453
+ instance.name,
454
+ )
455
+ return
456
+
457
+ # Determine if this is a success or failure based on topic
458
+ is_failure = message.topic == instance.failed_topic
459
+
460
+ if is_failure:
461
+ # Set exception for failed responses
462
+ error_message = response_data.get("error", "Request failed")
463
+ future.set_exception(RuntimeError(f"Request failed: {error_message}"))
464
+ else:
465
+ # Set result for successful responses
466
+ # Include correlation_id in response for tracing
467
+ response_data["_correlation_id"] = correlation_key
468
+ future.set_result(response_data)
469
+
470
+ self._logger.debug(
471
+ "Resolved pending request: correlation_id=%s, topic=%s, "
472
+ "is_failure=%s, instance=%s",
473
+ correlation_key,
474
+ message.topic,
475
+ is_failure,
476
+ instance.name,
477
+ )
478
+
479
+ except json.JSONDecodeError as e:
480
+ self._logger.warning(
481
+ "Failed to decode response JSON: topic=%s, error=%s",
482
+ message.topic,
483
+ e,
484
+ )
485
+ except Exception as e:
486
+ self._logger.exception(
487
+ "Error handling response message: topic=%s, error=%s",
488
+ message.topic,
489
+ e,
490
+ )
491
+
492
+ def _extract_correlation_id(
493
+ self,
494
+ data: dict[str, object],
495
+ config: ModelCorrelationConfig,
496
+ ) -> UUID | None:
497
+ """Extract correlation ID from response data based on configuration.
498
+
499
+ Args:
500
+ data: Response data dictionary.
501
+ config: Correlation configuration specifying location and field.
502
+
503
+ Returns:
504
+ Correlation ID as UUID if found, None otherwise.
505
+ """
506
+ value: object | None = None
507
+
508
+ if config.location == "body":
509
+ value = data.get(config.field)
510
+ elif config.location == "headers":
511
+ # Headers would be in message headers, not body
512
+ # For now, we only support body location
513
+ self._logger.warning(
514
+ "Header-based correlation not implemented, falling back to body"
515
+ )
516
+ value = data.get(config.field)
517
+
518
+ if value is None:
519
+ return None
520
+
521
+ # Parse to UUID - correlation IDs are always UUIDs
522
+ try:
523
+ return UUID(str(value))
524
+ except ValueError:
525
+ self._logger.warning(
526
+ "Invalid correlation_id format (not a UUID): %s",
527
+ value,
528
+ )
529
+ return None
530
+
531
+ async def send_request(
532
+ self,
533
+ instance_name: str,
534
+ payload: dict[str, object],
535
+ timeout_seconds: int | None = None,
536
+ ) -> dict[str, object]:
537
+ """Send a request and await the correlated response.
538
+
539
+ Publishes a request to the instance's request topic and waits for
540
+ a response on the reply topics. If the payload lacks a correlation_id
541
+ at the configured location, one is injected.
542
+
543
+ Correlation ID Handling:
544
+ - If correlation_id exists in payload: Use existing value
545
+ - If missing: Inject new UUID4 into payload
546
+ - Always: Return correlation_id in response (as _correlation_id)
547
+
548
+ Args:
549
+ instance_name: Name of the wired request-response instance.
550
+ payload: Request payload dictionary. Modified in place to add
551
+ correlation_id if not present.
552
+ timeout_seconds: Override timeout for this request. If None,
553
+ uses the instance's configured timeout (default: 30s).
554
+
555
+ Returns:
556
+ Response dictionary from the reply topic. Includes _correlation_id
557
+ field for tracing.
558
+
559
+ Raises:
560
+ ProtocolConfigurationError: If instance_name is not wired.
561
+ InfraTimeoutError: If no response received within timeout.
562
+ InfraUnavailableError: If circuit breaker is open.
563
+ RuntimeError: If request failed (response on failed topic).
564
+ """
565
+ # Get instance
566
+ instance = self._instances.get(instance_name)
567
+ if instance is None:
568
+ raise ProtocolConfigurationError(
569
+ f"Request-response instance '{instance_name}' not wired",
570
+ context=ModelInfraErrorContext.with_correlation(
571
+ transport_type=EnumInfraTransportType.KAFKA,
572
+ operation="send_request",
573
+ ),
574
+ instance_name=instance_name,
575
+ )
576
+
577
+ # Determine timeout
578
+ timeout = (
579
+ timeout_seconds if timeout_seconds is not None else instance.timeout_seconds
580
+ )
581
+
582
+ # Extract or inject correlation_id
583
+ correlation_id = self._ensure_correlation_id(
584
+ payload,
585
+ instance.correlation_config,
586
+ )
587
+ correlation_key = str(correlation_id)
588
+
589
+ # Create future for response
590
+ future: asyncio.Future[dict[str, object]] = (
591
+ asyncio.get_running_loop().create_future()
592
+ )
593
+ instance.pending[correlation_key] = future
594
+
595
+ try:
596
+ # Check circuit breaker before publish
597
+ async with self._circuit_breaker_lock:
598
+ await self._check_circuit_breaker(
599
+ operation="send_request",
600
+ correlation_id=correlation_id,
601
+ )
602
+
603
+ # Publish request
604
+ await self._publish_request(instance, payload, correlation_id)
605
+
606
+ # Wait for response with timeout
607
+ try:
608
+ response = await asyncio.wait_for(future, timeout=timeout)
609
+
610
+ # Record success in circuit breaker
611
+ async with self._circuit_breaker_lock:
612
+ await self._reset_circuit_breaker()
613
+
614
+ return response
615
+
616
+ except TimeoutError:
617
+ # Remove pending future on timeout
618
+ instance.pending.pop(correlation_key, None)
619
+
620
+ # Raise InfraTimeoutError (NOT InfraUnavailableError)
621
+ timeout_context = ModelTimeoutErrorContext(
622
+ transport_type=EnumInfraTransportType.KAFKA,
623
+ operation="send_request",
624
+ target_name=instance.request_topic,
625
+ correlation_id=correlation_id,
626
+ timeout_seconds=float(timeout),
627
+ )
628
+ raise InfraTimeoutError(
629
+ f"Request-response timeout after {timeout}s: "
630
+ f"instance={instance_name}, correlation_id={correlation_key}",
631
+ context=timeout_context,
632
+ ) from None
633
+
634
+ except InfraUnavailableError:
635
+ # Circuit breaker open - re-raise without modification
636
+ instance.pending.pop(correlation_key, None)
637
+ raise
638
+
639
+ except Exception:
640
+ # Record failure in circuit breaker
641
+ async with self._circuit_breaker_lock:
642
+ await self._record_circuit_failure(
643
+ operation="send_request",
644
+ correlation_id=correlation_id,
645
+ )
646
+
647
+ # Clean up pending future
648
+ instance.pending.pop(correlation_key, None)
649
+ raise
650
+
651
+ def _ensure_correlation_id(
652
+ self,
653
+ payload: dict[str, object],
654
+ config: ModelCorrelationConfig,
655
+ ) -> UUID:
656
+ """Ensure correlation_id exists in payload, injecting if missing.
657
+
658
+ Args:
659
+ payload: Request payload dictionary. Modified in place.
660
+ config: Correlation configuration.
661
+
662
+ Returns:
663
+ The correlation ID as UUID (existing parsed or newly generated).
664
+ """
665
+ existing = payload.get(config.field)
666
+
667
+ if existing is not None:
668
+ # Parse existing to UUID - correlation IDs are always UUIDs
669
+ correlation_id = UUID(str(existing))
670
+ else:
671
+ # Generate new UUID
672
+ correlation_id = uuid4()
673
+ payload[config.field] = str(correlation_id)
674
+
675
+ return correlation_id
676
+
677
+ async def _publish_request(
678
+ self,
679
+ instance: RequestResponseInstanceState,
680
+ payload: dict[str, object],
681
+ correlation_id: UUID,
682
+ ) -> None:
683
+ """Publish request to the instance's request topic.
684
+
685
+ Args:
686
+ instance: Request-response instance.
687
+ payload: Request payload.
688
+ correlation_id: Correlation ID for logging and message key.
689
+ """
690
+ # Serialize payload
691
+ value = json.dumps(payload).encode("utf-8")
692
+
693
+ # Publish via event bus - convert UUID to string at serialization boundary
694
+ await self._event_bus.publish(
695
+ topic=instance.request_topic,
696
+ key=str(correlation_id).encode("utf-8"),
697
+ value=value,
698
+ )
699
+
700
+ self._logger.debug(
701
+ "Published request: topic=%s, correlation_id=%s, instance=%s",
702
+ instance.request_topic,
703
+ correlation_id,
704
+ instance.name,
705
+ )
706
+
707
+ async def cleanup(self) -> None:
708
+ """Clean up all request-response instances.
709
+
710
+ Cancels all consumer tasks, stops consumers, and clears pending futures
711
+ with exceptions. Should be called during runtime shutdown.
712
+
713
+ This method is safe to call multiple times - subsequent calls are no-ops.
714
+ """
715
+ cleanup_count = len(self._instances)
716
+ if cleanup_count == 0:
717
+ return
718
+
719
+ for instance_name, instance in list(self._instances.items()):
720
+ await self._cleanup_instance(instance)
721
+
722
+ self._instances.clear()
723
+ self._logger.info(
724
+ "Cleaned up %d request-response instance(s)",
725
+ cleanup_count,
726
+ )
727
+
728
+ async def _cleanup_instance(self, instance: RequestResponseInstanceState) -> None:
729
+ """Clean up a single request-response instance.
730
+
731
+ Args:
732
+ instance: Instance to clean up.
733
+ """
734
+ # Cancel consumer task
735
+ if instance.consumer_task is not None and not instance.consumer_task.done():
736
+ instance.consumer_task.cancel()
737
+ try:
738
+ await instance.consumer_task
739
+ except asyncio.CancelledError:
740
+ pass
741
+
742
+ # Stop consumer
743
+ if instance.consumer is not None:
744
+ try:
745
+ await instance.consumer.stop()
746
+ except Exception as e:
747
+ self._logger.warning(
748
+ "Error stopping consumer for instance %s: %s",
749
+ instance.name,
750
+ e,
751
+ )
752
+
753
+ # Fail all pending futures
754
+ cleanup_error = RuntimeError(
755
+ f"Request-response instance '{instance.name}' was cleaned up"
756
+ )
757
+ for correlation_key, future in instance.pending.items():
758
+ if not future.done():
759
+ future.set_exception(cleanup_error)
760
+ self._logger.debug(
761
+ "Failed pending request on cleanup: correlation_id=%s, instance=%s",
762
+ correlation_key,
763
+ instance.name,
764
+ )
765
+
766
+ instance.pending.clear()
767
+ self._logger.debug(
768
+ "Cleaned up instance: %s",
769
+ instance.name,
770
+ )
771
+
772
+ def get_boot_nonce(self) -> str:
773
+ """Return the boot nonce for this process.
774
+
775
+ Useful for debugging and logging consumer group identification.
776
+
777
+ Returns:
778
+ 8-character hex string unique to this process instance.
779
+ """
780
+ return _BOOT_NONCE
781
+
782
+
783
+ __all__: list[str] = [
784
+ "RequestResponseWiring",
785
+ ]