omnibase_infra 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. omnibase_infra/constants_topic_patterns.py +26 -0
  2. omnibase_infra/enums/__init__.py +3 -0
  3. omnibase_infra/enums/enum_consumer_group_purpose.py +92 -0
  4. omnibase_infra/enums/enum_handler_source_mode.py +16 -2
  5. omnibase_infra/errors/__init__.py +4 -0
  6. omnibase_infra/errors/error_binding_resolution.py +128 -0
  7. omnibase_infra/event_bus/configs/kafka_event_bus_config.yaml +0 -2
  8. omnibase_infra/event_bus/event_bus_inmemory.py +64 -10
  9. omnibase_infra/event_bus/event_bus_kafka.py +105 -47
  10. omnibase_infra/event_bus/mixin_kafka_broadcast.py +3 -7
  11. omnibase_infra/event_bus/mixin_kafka_dlq.py +12 -6
  12. omnibase_infra/event_bus/models/config/model_kafka_event_bus_config.py +0 -81
  13. omnibase_infra/event_bus/testing/__init__.py +26 -0
  14. omnibase_infra/event_bus/testing/adapter_protocol_event_publisher_inmemory.py +418 -0
  15. omnibase_infra/event_bus/testing/model_publisher_metrics.py +64 -0
  16. omnibase_infra/handlers/handler_consul.py +2 -0
  17. omnibase_infra/handlers/mixins/__init__.py +5 -0
  18. omnibase_infra/handlers/mixins/mixin_consul_service.py +274 -10
  19. omnibase_infra/handlers/mixins/mixin_consul_topic_index.py +585 -0
  20. omnibase_infra/handlers/models/model_filesystem_config.py +4 -4
  21. omnibase_infra/migrations/001_create_event_ledger.sql +166 -0
  22. omnibase_infra/migrations/001_drop_event_ledger.sql +18 -0
  23. omnibase_infra/mixins/mixin_node_introspection.py +189 -19
  24. omnibase_infra/models/__init__.py +8 -0
  25. omnibase_infra/models/bindings/__init__.py +59 -0
  26. omnibase_infra/models/bindings/constants.py +144 -0
  27. omnibase_infra/models/bindings/model_binding_resolution_result.py +103 -0
  28. omnibase_infra/models/bindings/model_operation_binding.py +44 -0
  29. omnibase_infra/models/bindings/model_operation_bindings_subcontract.py +152 -0
  30. omnibase_infra/models/bindings/model_parsed_binding.py +52 -0
  31. omnibase_infra/models/discovery/model_introspection_config.py +25 -17
  32. omnibase_infra/models/dispatch/__init__.py +8 -0
  33. omnibase_infra/models/dispatch/model_debug_trace_snapshot.py +114 -0
  34. omnibase_infra/models/dispatch/model_materialized_dispatch.py +141 -0
  35. omnibase_infra/models/handlers/model_handler_source_config.py +1 -1
  36. omnibase_infra/models/model_node_identity.py +126 -0
  37. omnibase_infra/models/projection/model_snapshot_topic_config.py +3 -2
  38. omnibase_infra/models/registration/__init__.py +9 -0
  39. omnibase_infra/models/registration/model_event_bus_topic_entry.py +59 -0
  40. omnibase_infra/models/registration/model_node_event_bus_config.py +99 -0
  41. omnibase_infra/models/registration/model_node_introspection_event.py +11 -0
  42. omnibase_infra/models/runtime/__init__.py +9 -0
  43. omnibase_infra/models/validation/model_coverage_metrics.py +2 -2
  44. omnibase_infra/nodes/__init__.py +9 -0
  45. omnibase_infra/nodes/contract_registry_reducer/__init__.py +29 -0
  46. omnibase_infra/nodes/contract_registry_reducer/contract.yaml +255 -0
  47. omnibase_infra/nodes/contract_registry_reducer/models/__init__.py +38 -0
  48. omnibase_infra/nodes/contract_registry_reducer/models/model_contract_registry_state.py +266 -0
  49. omnibase_infra/nodes/contract_registry_reducer/models/model_payload_cleanup_topic_references.py +55 -0
  50. omnibase_infra/nodes/contract_registry_reducer/models/model_payload_deactivate_contract.py +58 -0
  51. omnibase_infra/nodes/contract_registry_reducer/models/model_payload_mark_stale.py +49 -0
  52. omnibase_infra/nodes/contract_registry_reducer/models/model_payload_update_heartbeat.py +71 -0
  53. omnibase_infra/nodes/contract_registry_reducer/models/model_payload_update_topic.py +66 -0
  54. omnibase_infra/nodes/contract_registry_reducer/models/model_payload_upsert_contract.py +92 -0
  55. omnibase_infra/nodes/contract_registry_reducer/node.py +121 -0
  56. omnibase_infra/nodes/contract_registry_reducer/reducer.py +784 -0
  57. omnibase_infra/nodes/contract_registry_reducer/registry/__init__.py +9 -0
  58. omnibase_infra/nodes/contract_registry_reducer/registry/registry_infra_contract_registry_reducer.py +101 -0
  59. omnibase_infra/nodes/handlers/consul/contract.yaml +85 -0
  60. omnibase_infra/nodes/handlers/db/contract.yaml +72 -0
  61. omnibase_infra/nodes/handlers/graph/contract.yaml +127 -0
  62. omnibase_infra/nodes/handlers/http/contract.yaml +74 -0
  63. omnibase_infra/nodes/handlers/intent/contract.yaml +66 -0
  64. omnibase_infra/nodes/handlers/mcp/contract.yaml +69 -0
  65. omnibase_infra/nodes/handlers/vault/contract.yaml +91 -0
  66. omnibase_infra/nodes/node_ledger_projection_compute/__init__.py +50 -0
  67. omnibase_infra/nodes/node_ledger_projection_compute/contract.yaml +104 -0
  68. omnibase_infra/nodes/node_ledger_projection_compute/node.py +284 -0
  69. omnibase_infra/nodes/node_ledger_projection_compute/registry/__init__.py +29 -0
  70. omnibase_infra/nodes/node_ledger_projection_compute/registry/registry_infra_ledger_projection.py +118 -0
  71. omnibase_infra/nodes/node_ledger_write_effect/__init__.py +82 -0
  72. omnibase_infra/nodes/node_ledger_write_effect/contract.yaml +200 -0
  73. omnibase_infra/nodes/node_ledger_write_effect/handlers/__init__.py +22 -0
  74. omnibase_infra/nodes/node_ledger_write_effect/handlers/handler_ledger_append.py +372 -0
  75. omnibase_infra/nodes/node_ledger_write_effect/handlers/handler_ledger_query.py +597 -0
  76. omnibase_infra/nodes/node_ledger_write_effect/models/__init__.py +31 -0
  77. omnibase_infra/nodes/node_ledger_write_effect/models/model_ledger_append_result.py +54 -0
  78. omnibase_infra/nodes/node_ledger_write_effect/models/model_ledger_entry.py +92 -0
  79. omnibase_infra/nodes/node_ledger_write_effect/models/model_ledger_query.py +53 -0
  80. omnibase_infra/nodes/node_ledger_write_effect/models/model_ledger_query_result.py +41 -0
  81. omnibase_infra/nodes/node_ledger_write_effect/node.py +89 -0
  82. omnibase_infra/nodes/node_ledger_write_effect/protocols/__init__.py +13 -0
  83. omnibase_infra/nodes/node_ledger_write_effect/protocols/protocol_ledger_persistence.py +127 -0
  84. omnibase_infra/nodes/node_ledger_write_effect/registry/__init__.py +9 -0
  85. omnibase_infra/nodes/node_ledger_write_effect/registry/registry_infra_ledger_write.py +121 -0
  86. omnibase_infra/nodes/node_registration_orchestrator/registry/registry_infra_node_registration_orchestrator.py +7 -5
  87. omnibase_infra/nodes/reducers/models/__init__.py +7 -2
  88. omnibase_infra/nodes/reducers/models/model_payload_consul_register.py +11 -0
  89. omnibase_infra/nodes/reducers/models/model_payload_ledger_append.py +133 -0
  90. omnibase_infra/nodes/reducers/registration_reducer.py +1 -0
  91. omnibase_infra/protocols/__init__.py +3 -0
  92. omnibase_infra/protocols/protocol_dispatch_engine.py +152 -0
  93. omnibase_infra/runtime/__init__.py +60 -0
  94. omnibase_infra/runtime/binding_resolver.py +753 -0
  95. omnibase_infra/runtime/constants_security.py +70 -0
  96. omnibase_infra/runtime/contract_loaders/__init__.py +9 -0
  97. omnibase_infra/runtime/contract_loaders/operation_bindings_loader.py +789 -0
  98. omnibase_infra/runtime/emit_daemon/__init__.py +97 -0
  99. omnibase_infra/runtime/emit_daemon/cli.py +844 -0
  100. omnibase_infra/runtime/emit_daemon/client.py +811 -0
  101. omnibase_infra/runtime/emit_daemon/config.py +535 -0
  102. omnibase_infra/runtime/emit_daemon/daemon.py +812 -0
  103. omnibase_infra/runtime/emit_daemon/event_registry.py +477 -0
  104. omnibase_infra/runtime/emit_daemon/model_daemon_request.py +139 -0
  105. omnibase_infra/runtime/emit_daemon/model_daemon_response.py +191 -0
  106. omnibase_infra/runtime/emit_daemon/queue.py +618 -0
  107. omnibase_infra/runtime/event_bus_subcontract_wiring.py +466 -0
  108. omnibase_infra/runtime/handler_source_resolver.py +43 -2
  109. omnibase_infra/runtime/kafka_contract_source.py +984 -0
  110. omnibase_infra/runtime/models/__init__.py +13 -0
  111. omnibase_infra/runtime/models/model_contract_load_result.py +224 -0
  112. omnibase_infra/runtime/models/model_runtime_contract_config.py +268 -0
  113. omnibase_infra/runtime/models/model_runtime_scheduler_config.py +4 -3
  114. omnibase_infra/runtime/models/model_security_config.py +109 -0
  115. omnibase_infra/runtime/publisher_topic_scoped.py +294 -0
  116. omnibase_infra/runtime/runtime_contract_config_loader.py +406 -0
  117. omnibase_infra/runtime/service_kernel.py +76 -6
  118. omnibase_infra/runtime/service_message_dispatch_engine.py +558 -15
  119. omnibase_infra/runtime/service_runtime_host_process.py +770 -20
  120. omnibase_infra/runtime/transition_notification_publisher.py +3 -2
  121. omnibase_infra/runtime/util_wiring.py +206 -62
  122. omnibase_infra/services/mcp/service_mcp_tool_sync.py +27 -9
  123. omnibase_infra/services/session/config_consumer.py +25 -8
  124. omnibase_infra/services/session/config_store.py +2 -2
  125. omnibase_infra/services/session/consumer.py +1 -1
  126. omnibase_infra/topics/__init__.py +45 -0
  127. omnibase_infra/topics/platform_topic_suffixes.py +140 -0
  128. omnibase_infra/topics/util_topic_composition.py +95 -0
  129. omnibase_infra/types/typed_dict/__init__.py +9 -1
  130. omnibase_infra/types/typed_dict/typed_dict_envelope_build_params.py +115 -0
  131. omnibase_infra/utils/__init__.py +9 -0
  132. omnibase_infra/utils/util_consumer_group.py +232 -0
  133. omnibase_infra/validation/infra_validators.py +18 -1
  134. omnibase_infra/validation/validation_exemptions.yaml +192 -0
  135. {omnibase_infra-0.2.5.dist-info → omnibase_infra-0.2.7.dist-info}/METADATA +3 -3
  136. {omnibase_infra-0.2.5.dist-info → omnibase_infra-0.2.7.dist-info}/RECORD +139 -52
  137. {omnibase_infra-0.2.5.dist-info → omnibase_infra-0.2.7.dist-info}/entry_points.txt +1 -0
  138. {omnibase_infra-0.2.5.dist-info → omnibase_infra-0.2.7.dist-info}/WHEEL +0 -0
  139. {omnibase_infra-0.2.5.dist-info → omnibase_infra-0.2.7.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,812 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2025 OmniNode Team
3
+ """Hook Event Daemon - Unix socket server for persistent Kafka event emission.
4
+
5
+ This module provides the EmitDaemon class that implements a Unix socket server
6
+ for receiving events from Claude Code hooks and publishing them to Kafka with
7
+ fire-and-forget semantics from the caller's perspective.
8
+
9
+ Architecture:
10
+ ```
11
+ +-----------------+ Unix Socket +-------------+ Kafka +-------+
12
+ | Claude Code | -----------------> | EmitDaemon | ------------> | Kafka |
13
+ | Hooks | JSON messages | (this file) | Events | Topics|
14
+ +-----------------+ +-------------+ +-------+
15
+ |
16
+ v
17
+ +------------+
18
+ | Disk Spool |
19
+ | (overflow) |
20
+ +------------+
21
+ ```
22
+
23
+ Features:
24
+ - Unix domain socket server for low-latency local IPC
25
+ - Bounded in-memory queue with disk spool overflow
26
+ - Persistent Kafka connection with retry logic
27
+ - Fire-and-forget semantics for callers
28
+ - Graceful shutdown with queue drain
29
+ - PID file management for process tracking
30
+ - Health check endpoint for monitoring
31
+
32
+ Protocol:
33
+ Request format: {"event_type": "prompt.submitted", "payload": {...}}\\n
34
+ Response format: {"status": "queued"}\\n or {"status": "error", "reason": "..."}\\n
35
+
36
+ Special commands:
37
+ - {"command": "ping"}\\n -> {"status": "ok", "queue_size": N, "spool_size": M}\\n
38
+
39
+ Related Tickets:
40
+ - OMN-1610: Hook Event Daemon MVP
41
+
42
+ .. versionadded:: 0.2.6
43
+ """
44
+
45
+ from __future__ import annotations
46
+
47
+ import asyncio
48
+ import json
49
+ import logging
50
+ import os
51
+ import signal
52
+ from datetime import UTC, datetime
53
+ from pathlib import Path
54
+ from typing import cast
55
+ from uuid import UUID, uuid4
56
+
57
+ from pydantic import ValidationError
58
+
59
+ from omnibase_core.errors import OnexError
60
+ from omnibase_infra.event_bus.event_bus_kafka import EventBusKafka
61
+ from omnibase_infra.event_bus.models import ModelEventHeaders
62
+ from omnibase_infra.event_bus.models.config import ModelKafkaEventBusConfig
63
+ from omnibase_infra.protocols import ProtocolEventBusLike
64
+ from omnibase_infra.runtime.emit_daemon.config import ModelEmitDaemonConfig
65
+ from omnibase_infra.runtime.emit_daemon.event_registry import EventRegistry
66
+ from omnibase_infra.runtime.emit_daemon.model_daemon_request import (
67
+ ModelDaemonEmitRequest,
68
+ ModelDaemonPingRequest,
69
+ parse_daemon_request,
70
+ )
71
+ from omnibase_infra.runtime.emit_daemon.model_daemon_response import (
72
+ ModelDaemonErrorResponse,
73
+ ModelDaemonPingResponse,
74
+ ModelDaemonQueuedResponse,
75
+ )
76
+ from omnibase_infra.runtime.emit_daemon.queue import BoundedEventQueue, ModelQueuedEvent
77
+
78
+ logger = logging.getLogger(__name__)
79
+
80
+ # Poll interval for publisher loop when queue is empty (seconds)
81
+ PUBLISHER_POLL_INTERVAL_SECONDS: float = 0.1
82
+
83
+
84
+ class EmitDaemon:
85
+ """Unix socket daemon for persistent Kafka event emission.
86
+
87
+ Accepts events via Unix socket, queues them, and publishes to Kafka
88
+ with fire-and-forget semantics from the caller's perspective.
89
+
90
+ The daemon operates as follows:
91
+ 1. Listens on a Unix domain socket for incoming events
92
+ 2. Validates event payloads (type, size, required fields)
93
+ 3. Queues events in a bounded in-memory queue
94
+ 4. Background publisher loop dequeues and publishes to Kafka
95
+ 5. On publish failure, events are re-queued with exponential backoff
96
+ 6. On graceful shutdown, queue is drained to disk spool
97
+
98
+ Attributes:
99
+ config: Daemon configuration model
100
+ queue: Bounded event queue with disk spool
101
+
102
+ Example:
103
+ ```python
104
+ from omnibase_infra.runtime.emit_daemon import EmitDaemon, ModelEmitDaemonConfig
105
+
106
+ config = ModelEmitDaemonConfig(
107
+ kafka_bootstrap_servers="kafka:9092",
108
+ socket_path=Path("/tmp/emit.sock"),
109
+ )
110
+
111
+ daemon = EmitDaemon(config)
112
+ await daemon.start()
113
+
114
+ # Daemon runs until SIGTERM or SIGINT
115
+ # Or call daemon.stop() programmatically
116
+ ```
117
+ """
118
+
119
+ def __init__(
120
+ self,
121
+ config: ModelEmitDaemonConfig,
122
+ event_bus: ProtocolEventBusLike | None = None,
123
+ ) -> None:
124
+ """Initialize daemon with config.
125
+
126
+ If event_bus is None, creates EventBusKafka from config.
127
+
128
+ Args:
129
+ config: Daemon configuration model containing socket path,
130
+ Kafka settings, queue limits, and timeout values.
131
+ event_bus: Optional event bus for testing. If not provided,
132
+ creates EventBusKafka from config.
133
+
134
+ Example:
135
+ ```python
136
+ # Production usage
137
+ config = ModelEmitDaemonConfig(kafka_bootstrap_servers="kafka:9092")
138
+ daemon = EmitDaemon(config)
139
+
140
+ # Testing with mock event bus
141
+ mock_bus = MockEventBus()
142
+ daemon = EmitDaemon(config, event_bus=mock_bus)
143
+ ```
144
+ """
145
+ self._config = config
146
+ self._event_bus: ProtocolEventBusLike | None = event_bus
147
+
148
+ # Event registry for topic resolution and payload enrichment
149
+ self._registry = EventRegistry(environment=config.environment)
150
+
151
+ # Bounded event queue with disk spool overflow
152
+ self._queue = BoundedEventQueue(
153
+ max_memory_queue=config.max_memory_queue,
154
+ max_spool_messages=config.max_spool_messages,
155
+ max_spool_bytes=config.max_spool_bytes,
156
+ spool_dir=config.spool_dir,
157
+ )
158
+
159
+ # Server state
160
+ self._server: asyncio.Server | None = None
161
+ self._publisher_task: asyncio.Task[None] | None = None
162
+ self._running = False
163
+ self._shutdown_event = asyncio.Event()
164
+
165
+ # Lock for shared state access
166
+ self._lock = asyncio.Lock()
167
+
168
+ logger.debug(
169
+ "EmitDaemon initialized",
170
+ extra={
171
+ "socket_path": str(config.socket_path),
172
+ "kafka_servers": config.kafka_bootstrap_servers,
173
+ "max_memory_queue": config.max_memory_queue,
174
+ },
175
+ )
176
+
177
+ @property
178
+ def config(self) -> ModelEmitDaemonConfig:
179
+ """Get the daemon configuration.
180
+
181
+ Returns:
182
+ The daemon configuration model.
183
+ """
184
+ return self._config
185
+
186
+ @property
187
+ def queue(self) -> BoundedEventQueue:
188
+ """Get the event queue.
189
+
190
+ Returns:
191
+ The bounded event queue with disk spool.
192
+ """
193
+ return self._queue
194
+
195
+ async def start(self) -> None:
196
+ """Start the daemon.
197
+
198
+ Performs the following startup sequence:
199
+ 1. Check for stale socket/PID and clean up
200
+ 2. Create PID file
201
+ 3. Load any spooled events from disk
202
+ 4. Initialize Kafka event bus
203
+ 5. Start Unix socket server
204
+ 6. Start publisher loop (background task)
205
+ 7. Setup signal handlers for graceful shutdown
206
+
207
+ Raises:
208
+ OSError: If socket creation fails
209
+ RuntimeError: If another daemon is already running
210
+ """
211
+ async with self._lock:
212
+ if self._running:
213
+ logger.debug("EmitDaemon already running")
214
+ return
215
+
216
+ # Check and clean up stale socket/PID
217
+ if self._check_stale_socket():
218
+ self._cleanup_stale()
219
+ elif self._config.pid_path.exists():
220
+ # Another daemon is running
221
+ pid = self._config.pid_path.read_text().strip()
222
+ raise OnexError(
223
+ f"Another emit daemon is already running with PID {pid}"
224
+ )
225
+
226
+ # Create PID file
227
+ self._write_pid_file()
228
+
229
+ # Load any spooled events from previous runs
230
+ spool_count = await self._queue.load_spool()
231
+ if spool_count > 0:
232
+ logger.info(f"Loaded {spool_count} events from spool")
233
+
234
+ # Initialize Kafka event bus if not provided
235
+ if self._event_bus is None:
236
+ kafka_config = ModelKafkaEventBusConfig(
237
+ bootstrap_servers=self._config.kafka_bootstrap_servers,
238
+ environment=self._config.environment,
239
+ timeout_seconds=int(self._config.kafka_timeout_seconds),
240
+ )
241
+ self._event_bus = EventBusKafka(config=kafka_config)
242
+
243
+ # Start the event bus (connects to Kafka)
244
+ # NOTE: hasattr check required because event_bus can be a mock for testing
245
+ # that may not implement start(). EventBusKafka always has start(), but
246
+ # test doubles may omit it if they don't need explicit initialization.
247
+ if hasattr(self._event_bus, "start"):
248
+ await self._event_bus.start() # type: ignore[union-attr]
249
+
250
+ # Ensure parent directory exists for socket
251
+ self._config.socket_path.parent.mkdir(parents=True, exist_ok=True)
252
+
253
+ # Remove existing socket file if present
254
+ if self._config.socket_path.exists():
255
+ self._config.socket_path.unlink()
256
+
257
+ # Start Unix socket server
258
+ self._server = await asyncio.start_unix_server(
259
+ self._handle_client,
260
+ path=str(self._config.socket_path),
261
+ )
262
+
263
+ # Set socket permissions (configurable, defaults to owner and group read/write)
264
+ self._config.socket_path.chmod(self._config.socket_permissions)
265
+
266
+ # Start publisher loop as background task
267
+ self._publisher_task = asyncio.create_task(self._publisher_loop())
268
+
269
+ # Setup signal handlers for graceful shutdown
270
+ loop = asyncio.get_running_loop()
271
+ for sig in (signal.SIGTERM, signal.SIGINT):
272
+ loop.add_signal_handler(sig, self._signal_handler)
273
+
274
+ self._running = True
275
+ self._shutdown_event.clear()
276
+
277
+ logger.info(
278
+ "EmitDaemon started",
279
+ extra={
280
+ "socket_path": str(self._config.socket_path),
281
+ "pid": os.getpid(),
282
+ },
283
+ )
284
+
285
+ async def stop(self) -> None:
286
+ """Stop the daemon gracefully.
287
+
288
+ Performs the following shutdown sequence:
289
+ 1. Stop accepting new connections
290
+ 2. Cancel publisher task
291
+ 3. Drain queue to spool (up to shutdown_drain_seconds)
292
+ 4. Close Kafka connection
293
+ 5. Remove socket and PID file
294
+
295
+ This method is safe to call multiple times.
296
+ """
297
+ async with self._lock:
298
+ if not self._running:
299
+ logger.debug("EmitDaemon not running")
300
+ return
301
+
302
+ self._running = False
303
+ self._shutdown_event.set()
304
+
305
+ logger.info("EmitDaemon stopping...")
306
+
307
+ # Remove signal handlers
308
+ loop = asyncio.get_running_loop()
309
+ for sig in (signal.SIGTERM, signal.SIGINT):
310
+ loop.remove_signal_handler(sig)
311
+
312
+ # Stop accepting new connections
313
+ if self._server is not None:
314
+ self._server.close()
315
+ await self._server.wait_closed()
316
+ self._server = None
317
+
318
+ # Cancel publisher task
319
+ if self._publisher_task is not None:
320
+ self._publisher_task.cancel()
321
+ try:
322
+ await self._publisher_task
323
+ except asyncio.CancelledError:
324
+ pass
325
+ self._publisher_task = None
326
+
327
+ # Drain queue to spool with timeout
328
+ if self._config.shutdown_drain_seconds > 0:
329
+ try:
330
+ async with asyncio.timeout(self._config.shutdown_drain_seconds):
331
+ drained = await self._queue.drain_to_spool()
332
+ if drained > 0:
333
+ logger.info(f"Drained {drained} events to spool")
334
+ except TimeoutError:
335
+ logger.warning(
336
+ "Shutdown drain timeout exceeded, some events may be lost"
337
+ )
338
+
339
+ # Close Kafka connection
340
+ # NOTE: hasattr check required because event_bus can be a mock for testing
341
+ # that may not implement close(). EventBusKafka always has close(), but
342
+ # test doubles may omit it if they don't need explicit cleanup.
343
+ if self._event_bus is not None and hasattr(self._event_bus, "close"):
344
+ await self._event_bus.close() # type: ignore[union-attr]
345
+
346
+ # Remove socket file
347
+ if self._config.socket_path.exists():
348
+ try:
349
+ self._config.socket_path.unlink()
350
+ except OSError as e:
351
+ logger.warning(f"Failed to remove socket file: {e}")
352
+
353
+ # Remove PID file
354
+ self._remove_pid_file()
355
+
356
+ logger.info("EmitDaemon stopped")
357
+
358
+ async def run_until_shutdown(self) -> None:
359
+ """Run the daemon until shutdown signal is received.
360
+
361
+ Blocks until SIGTERM/SIGINT is received or stop() is called.
362
+ Useful for running the daemon as a standalone process.
363
+
364
+ Example:
365
+ ```python
366
+ daemon = EmitDaemon(config)
367
+ await daemon.start()
368
+ await daemon.run_until_shutdown()
369
+ ```
370
+ """
371
+ await self._shutdown_event.wait()
372
+ await self.stop()
373
+
374
+ def _signal_handler(self) -> None:
375
+ """Handle SIGTERM/SIGINT signals.
376
+
377
+ Sets the shutdown event to trigger graceful shutdown.
378
+ """
379
+ logger.info("Received shutdown signal")
380
+ self._shutdown_event.set()
381
+
382
+ async def _handle_client(
383
+ self,
384
+ reader: asyncio.StreamReader,
385
+ writer: asyncio.StreamWriter,
386
+ ) -> None:
387
+ """Handle a single client connection.
388
+
389
+ Protocol: newline-delimited JSON
390
+ Request: {"event_type": "...", "payload": {...}}
391
+ Response: {"status": "queued"} or {"status": "error", "reason": "..."}
392
+
393
+ Special commands:
394
+ - {"command": "ping"} -> {"status": "ok", "queue_size": N, "spool_size": M}
395
+
396
+ Args:
397
+ reader: Async stream reader for the client connection
398
+ writer: Async stream writer for the client connection
399
+ """
400
+ peer = "unix_client"
401
+ logger.debug(f"Client connected: {peer}")
402
+
403
+ try:
404
+ while not self._shutdown_event.is_set():
405
+ try:
406
+ # Read line with timeout
407
+ line = await asyncio.wait_for(
408
+ reader.readline(),
409
+ timeout=self._config.socket_timeout_seconds,
410
+ )
411
+ except TimeoutError:
412
+ # Client timeout - close connection
413
+ logger.debug(f"Client timeout: {peer}")
414
+ break
415
+
416
+ if not line:
417
+ # Client disconnected
418
+ break
419
+
420
+ # Process the request
421
+ response = await self._process_request(line)
422
+
423
+ # Send response
424
+ writer.write(response.encode("utf-8") + b"\n")
425
+ await writer.drain()
426
+
427
+ except ConnectionResetError:
428
+ logger.debug(f"Client connection reset: {peer}")
429
+ except Exception as e:
430
+ logger.exception(f"Error handling client {peer}: {e}")
431
+ finally:
432
+ try:
433
+ writer.close()
434
+ await writer.wait_closed()
435
+ except Exception:
436
+ pass
437
+ logger.debug(f"Client disconnected: {peer}")
438
+
439
+ async def _process_request(self, line: bytes) -> str:
440
+ """Process a single request line.
441
+
442
+ Uses typed request models (ModelDaemonPingRequest, ModelDaemonEmitRequest)
443
+ for compile-time type safety instead of dict[str, object] with isinstance checks.
444
+
445
+ Args:
446
+ line: Raw request line (JSON bytes with optional newline)
447
+
448
+ Returns:
449
+ JSON response string
450
+ """
451
+ try:
452
+ # Parse JSON request
453
+ raw_request = json.loads(line.decode("utf-8").strip())
454
+ except (json.JSONDecodeError, UnicodeDecodeError) as e:
455
+ return ModelDaemonErrorResponse(
456
+ reason=f"Invalid JSON: {e}"
457
+ ).model_dump_json()
458
+
459
+ if not isinstance(raw_request, dict):
460
+ return ModelDaemonErrorResponse(
461
+ reason="Request must be a JSON object"
462
+ ).model_dump_json()
463
+
464
+ # Parse into typed request model
465
+ try:
466
+ request = parse_daemon_request(raw_request)
467
+ except (ValueError, ValidationError) as e:
468
+ return ModelDaemonErrorResponse(reason=str(e)).model_dump_json()
469
+
470
+ # Dispatch based on request type
471
+ if isinstance(request, ModelDaemonPingRequest):
472
+ return await self._handle_ping(request)
473
+ elif isinstance(request, ModelDaemonEmitRequest):
474
+ return await self._handle_emit(request)
475
+ else:
476
+ # Should be unreachable due to exhaustive type check above
477
+ return ModelDaemonErrorResponse(
478
+ reason="Unknown request type"
479
+ ).model_dump_json()
480
+
481
+ async def _handle_ping(self, request: ModelDaemonPingRequest) -> str:
482
+ """Handle ping command request.
483
+
484
+ Args:
485
+ request: Typed ping request model
486
+
487
+ Returns:
488
+ JSON response string with queue status
489
+ """
490
+ return ModelDaemonPingResponse(
491
+ queue_size=self._queue.memory_size(),
492
+ spool_size=self._queue.spool_size(),
493
+ ).model_dump_json()
494
+
495
+ async def _handle_emit(self, request: ModelDaemonEmitRequest) -> str:
496
+ """Handle event emission request.
497
+
498
+ Args:
499
+ request: Typed emit request model with event_type and payload
500
+
501
+ Returns:
502
+ JSON response string (queued or error)
503
+ """
504
+ event_type = request.event_type
505
+
506
+ # Normalize payload to dict (JsonType could be various types)
507
+ raw_payload = request.payload
508
+ if raw_payload is None:
509
+ raw_payload = {}
510
+ if not isinstance(raw_payload, dict):
511
+ return ModelDaemonErrorResponse(
512
+ reason="'payload' must be a JSON object"
513
+ ).model_dump_json()
514
+
515
+ # Cast to dict[str, object] after isinstance check for type safety
516
+ payload: dict[str, object] = cast("dict[str, object]", raw_payload)
517
+
518
+ # Check payload size
519
+ payload_json = json.dumps(payload)
520
+ if len(payload_json.encode("utf-8")) > self._config.max_payload_bytes:
521
+ return ModelDaemonErrorResponse(
522
+ reason=f"Payload exceeds maximum size of {self._config.max_payload_bytes} bytes"
523
+ ).model_dump_json()
524
+
525
+ # Validate event type is registered
526
+ try:
527
+ topic = self._registry.resolve_topic(event_type)
528
+ except OnexError as e:
529
+ return ModelDaemonErrorResponse(reason=str(e)).model_dump_json()
530
+
531
+ # Validate payload has required fields
532
+ try:
533
+ self._registry.validate_payload(event_type, payload)
534
+ except OnexError as e:
535
+ return ModelDaemonErrorResponse(reason=str(e)).model_dump_json()
536
+
537
+ # Extract correlation_id from payload if present
538
+ correlation_id = payload.get("correlation_id")
539
+ if not isinstance(correlation_id, str):
540
+ correlation_id = None
541
+
542
+ # Inject metadata into payload
543
+ enriched_payload = self._registry.inject_metadata(
544
+ event_type,
545
+ payload,
546
+ correlation_id=correlation_id,
547
+ )
548
+
549
+ # Get partition key
550
+ partition_key = self._registry.get_partition_key(event_type, enriched_payload)
551
+
552
+ # Create queued event
553
+ event_id = str(uuid4())
554
+ queued_event = ModelQueuedEvent(
555
+ event_id=event_id,
556
+ event_type=event_type,
557
+ topic=topic,
558
+ payload=enriched_payload,
559
+ partition_key=partition_key,
560
+ queued_at=datetime.now(UTC),
561
+ )
562
+
563
+ # Enqueue the event
564
+ success = await self._queue.enqueue(queued_event)
565
+ if success:
566
+ logger.debug(
567
+ f"Event queued: {event_id}",
568
+ extra={
569
+ "event_type": event_type,
570
+ "topic": topic,
571
+ },
572
+ )
573
+ return ModelDaemonQueuedResponse(event_id=event_id).model_dump_json()
574
+ else:
575
+ return ModelDaemonErrorResponse(
576
+ reason="Failed to queue event (queue may be full)"
577
+ ).model_dump_json()
578
+
579
+ async def _publisher_loop(self) -> None:
580
+ """Background task that dequeues and publishes events to Kafka.
581
+
582
+ Runs continuously until stopped. On publish failure:
583
+ - Increment retry_count
584
+ - Re-queue with exponential backoff
585
+ - After max_retry_attempts (from config), log error and drop event
586
+ """
587
+ logger.info("Publisher loop started")
588
+
589
+ # NOTE: Using non-locking total_size() is intentional here.
590
+ # While this creates a theoretical race condition during shutdown
591
+ # (a concurrent enqueue could complete after the size check but before
592
+ # the loop re-evaluates), it avoids lock contention in this hot loop.
593
+ # For fire-and-forget semantics, this trade-off is acceptable - events
594
+ # queued during the final shutdown window may be lost, which is
595
+ # documented behavior (see shutdown_drain_seconds config and the
596
+ # drain_to_spool() call in stop()). The queue's total_size_locked()
597
+ # method exists for cases requiring accurate counts.
598
+ while self._running or self._queue.total_size() > 0:
599
+ try:
600
+ # Dequeue next event
601
+ event = await self._queue.dequeue()
602
+
603
+ if event is None:
604
+ # Queue empty, wait briefly and check again
605
+ await asyncio.sleep(PUBLISHER_POLL_INTERVAL_SECONDS)
606
+ continue
607
+
608
+ # Attempt to publish
609
+ success = await self._publish_event(event)
610
+
611
+ if not success:
612
+ # Increment retry count
613
+ event.retry_count += 1
614
+
615
+ if event.retry_count >= self._config.max_retry_attempts:
616
+ # Max retries exceeded - drop event
617
+ logger.error(
618
+ f"Dropping event {event.event_id} after {event.retry_count} retries",
619
+ extra={
620
+ "event_type": event.event_type,
621
+ "topic": event.topic,
622
+ },
623
+ )
624
+ else:
625
+ # Re-queue with backoff (capped to prevent excessive delays)
626
+ uncapped_backoff = self._config.backoff_base_seconds * (
627
+ 2 ** (event.retry_count - 1)
628
+ )
629
+ backoff = min(
630
+ uncapped_backoff, self._config.max_backoff_seconds
631
+ )
632
+ logger.warning(
633
+ f"Publish failed for {event.event_id}, retry {event.retry_count}/{self._config.max_retry_attempts} in {backoff}s",
634
+ extra={
635
+ "event_type": event.event_type,
636
+ "topic": event.topic,
637
+ },
638
+ )
639
+
640
+ # Wait for backoff period
641
+ await asyncio.sleep(backoff)
642
+
643
+ # Re-enqueue with error handling
644
+ requeue_success = await self._queue.enqueue(event)
645
+ if not requeue_success:
646
+ logger.error(
647
+ f"Failed to re-enqueue event {event.event_id} after backoff, event lost",
648
+ extra={
649
+ "event_type": event.event_type,
650
+ "topic": event.topic,
651
+ "retry_count": event.retry_count,
652
+ },
653
+ )
654
+
655
+ except asyncio.CancelledError:
656
+ logger.info("Publisher loop cancelled")
657
+ break
658
+ except Exception as e:
659
+ logger.exception(f"Unexpected error in publisher loop: {e}")
660
+ await asyncio.sleep(1.0) # Brief pause before continuing
661
+
662
+ logger.info("Publisher loop stopped")
663
+
664
+ async def _publish_event(self, event: ModelQueuedEvent) -> bool:
665
+ """Publish a single event to Kafka.
666
+
667
+ Args:
668
+ event: The queued event to publish
669
+
670
+ Returns:
671
+ True if publish succeeded, False otherwise
672
+ """
673
+ if self._event_bus is None:
674
+ logger.error("Event bus not initialized")
675
+ return False
676
+
677
+ try:
678
+ # Prepare message key and value
679
+ key = event.partition_key.encode("utf-8") if event.partition_key else None
680
+ value = json.dumps(event.payload).encode("utf-8")
681
+
682
+ # Extract correlation_id from enriched payload (injected by registry)
683
+ # Type guard: payload is always a dict in practice (created in _handle_event)
684
+ payload_correlation_id = (
685
+ event.payload.get("correlation_id")
686
+ if isinstance(event.payload, dict)
687
+ else None
688
+ )
689
+ if isinstance(payload_correlation_id, str):
690
+ try:
691
+ correlation_id = UUID(payload_correlation_id)
692
+ except ValueError:
693
+ correlation_id = uuid4()
694
+ else:
695
+ correlation_id = uuid4()
696
+
697
+ # Create event headers
698
+ headers = ModelEventHeaders(
699
+ source="emit-daemon",
700
+ event_type=event.event_type,
701
+ timestamp=event.queued_at,
702
+ correlation_id=correlation_id,
703
+ )
704
+
705
+ # Publish to Kafka
706
+ # NOTE: headers parameter is Kafka-specific, not in minimal protocol.
707
+ # When _event_bus is None, we create EventBusKafka which supports headers.
708
+ # For testing mocks, they can accept **kwargs or ignore extra params.
709
+ await self._event_bus.publish( # type: ignore[call-arg]
710
+ topic=event.topic,
711
+ key=key,
712
+ value=value,
713
+ headers=headers,
714
+ )
715
+
716
+ logger.debug(
717
+ f"Published event {event.event_id}",
718
+ extra={
719
+ "event_type": event.event_type,
720
+ "topic": event.topic,
721
+ },
722
+ )
723
+ return True
724
+
725
+ except Exception as e:
726
+ logger.warning(
727
+ f"Failed to publish event {event.event_id}: {e}",
728
+ extra={
729
+ "event_type": event.event_type,
730
+ "topic": event.topic,
731
+ "error": str(e),
732
+ },
733
+ )
734
+ return False
735
+
736
+ def _write_pid_file(self) -> None:
737
+ """Write current PID to pid_path.
738
+
739
+ Creates parent directories if needed.
740
+ """
741
+ try:
742
+ self._config.pid_path.parent.mkdir(parents=True, exist_ok=True)
743
+ self._config.pid_path.write_text(str(os.getpid()))
744
+ logger.debug(f"PID file created: {self._config.pid_path}")
745
+ except OSError as e:
746
+ logger.warning(f"Failed to write PID file: {e}")
747
+
748
+ def _remove_pid_file(self) -> None:
749
+ """Remove PID file if it exists."""
750
+ try:
751
+ if self._config.pid_path.exists():
752
+ self._config.pid_path.unlink()
753
+ logger.debug(f"PID file removed: {self._config.pid_path}")
754
+ except OSError as e:
755
+ logger.warning(f"Failed to remove PID file: {e}")
756
+
757
+ def _check_stale_socket(self) -> bool:
758
+ """Check if socket/PID are stale (process not running).
759
+
760
+ A socket/PID is considered stale if:
761
+ - PID file exists but the process is not running
762
+ - Socket file exists but no PID file exists
763
+
764
+ Returns:
765
+ True if stale (safe to clean up), False if daemon is running.
766
+ """
767
+ # Check if PID file exists
768
+ if not self._config.pid_path.exists():
769
+ # No PID file - socket is stale if it exists
770
+ return self._config.socket_path.exists()
771
+
772
+ # Read PID from file
773
+ try:
774
+ pid_str = self._config.pid_path.read_text().strip()
775
+ pid = int(pid_str)
776
+ except (OSError, ValueError):
777
+ # Can't read PID file - treat as stale
778
+ return True
779
+
780
+ # Check if process is running
781
+ try:
782
+ # Sending signal 0 checks if process exists without killing it
783
+ os.kill(pid, 0)
784
+ # Process is running - not stale
785
+ return False
786
+ except ProcessLookupError:
787
+ # Process not running - stale
788
+ return True
789
+ except PermissionError:
790
+ # Process exists but we can't signal it - assume not stale
791
+ return False
792
+
793
+ def _cleanup_stale(self) -> None:
794
+ """Remove stale socket and PID files."""
795
+ # Remove socket file
796
+ if self._config.socket_path.exists():
797
+ try:
798
+ self._config.socket_path.unlink()
799
+ logger.info(f"Removed stale socket: {self._config.socket_path}")
800
+ except OSError as e:
801
+ logger.warning(f"Failed to remove stale socket: {e}")
802
+
803
+ # Remove PID file
804
+ if self._config.pid_path.exists():
805
+ try:
806
+ self._config.pid_path.unlink()
807
+ logger.info(f"Removed stale PID file: {self._config.pid_path}")
808
+ except OSError as e:
809
+ logger.warning(f"Failed to remove stale PID file: {e}")
810
+
811
+
812
+ __all__: list[str] = ["EmitDaemon"]