dory-processor-sdk 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dory/__init__.py +101 -0
- dory/auth/__init__.py +10 -0
- dory/auth/oauth2.py +153 -0
- dory/auto_instrument.py +142 -0
- dory/cli/__init__.py +5 -0
- dory/cli/main.py +137 -0
- dory/cli/templates.py +123 -0
- dory/config/__init__.py +23 -0
- dory/config/defaults.py +24 -0
- dory/config/loader.py +430 -0
- dory/config/presets.py +73 -0
- dory/config/schema.py +84 -0
- dory/core/__init__.py +27 -0
- dory/core/app.py +434 -0
- dory/core/context.py +209 -0
- dory/core/lifecycle.py +214 -0
- dory/core/meta.py +121 -0
- dory/core/modes.py +479 -0
- dory/core/processor.py +564 -0
- dory/core/signals.py +122 -0
- dory/decorators.py +142 -0
- dory/edge/__init__.py +88 -0
- dory/edge/adaptive.py +644 -0
- dory/edge/detector.py +546 -0
- dory/edge/fencing.py +488 -0
- dory/edge/heartbeat.py +598 -0
- dory/edge/role.py +419 -0
- dory/errors/__init__.py +139 -0
- dory/errors/classification.py +362 -0
- dory/errors/codes.py +498 -0
- dory/geo/__init__.py +40 -0
- dory/geo/geolocalizer.py +1034 -0
- dory/health/__init__.py +12 -0
- dory/health/probes.py +210 -0
- dory/health/server.py +635 -0
- dory/k8s/__init__.py +80 -0
- dory/k8s/annotation_watcher.py +184 -0
- dory/k8s/client.py +251 -0
- dory/k8s/labels.py +505 -0
- dory/k8s/pod_metadata.py +182 -0
- dory/logging/__init__.py +9 -0
- dory/logging/logger.py +148 -0
- dory/metrics/__init__.py +7 -0
- dory/metrics/collector.py +301 -0
- dory/middleware/__init__.py +46 -0
- dory/middleware/connection_tracker.py +608 -0
- dory/middleware/request_id.py +325 -0
- dory/middleware/request_tracker.py +511 -0
- dory/migration/__init__.py +33 -0
- dory/migration/configmap.py +232 -0
- dory/migration/s3_store.py +594 -0
- dory/migration/serialization.py +135 -0
- dory/migration/state_manager.py +286 -0
- dory/migration/transfer.py +382 -0
- dory/monitoring/__init__.py +29 -0
- dory/monitoring/opentelemetry.py +489 -0
- dory/output/__init__.py +31 -0
- dory/output/envelope.py +137 -0
- dory/output/formatter.py +113 -0
- dory/output/rabbitmq.py +632 -0
- dory/output/routing.py +318 -0
- dory/output/validator.py +199 -0
- dory/py.typed +2 -0
- dory/recovery/__init__.py +60 -0
- dory/recovery/golden_image.py +487 -0
- dory/recovery/golden_snapshot.py +713 -0
- dory/recovery/golden_validator.py +518 -0
- dory/recovery/partial_recovery.py +482 -0
- dory/recovery/recovery_decision.py +242 -0
- dory/recovery/restart_detector.py +142 -0
- dory/recovery/state_validator.py +183 -0
- dory/resilience/__init__.py +45 -0
- dory/resilience/circuit_breaker.py +457 -0
- dory/resilience/retry.py +389 -0
- dory/simple.py +342 -0
- dory/types.py +68 -0
- dory/utils/__init__.py +31 -0
- dory/utils/errors.py +59 -0
- dory/utils/retry.py +115 -0
- dory/utils/timeout.py +80 -0
- dory_processor_sdk-0.0.1.dist-info/METADATA +424 -0
- dory_processor_sdk-0.0.1.dist-info/RECORD +86 -0
- dory_processor_sdk-0.0.1.dist-info/WHEEL +5 -0
- dory_processor_sdk-0.0.1.dist-info/entry_points.txt +2 -0
- dory_processor_sdk-0.0.1.dist-info/licenses/LICENSE +201 -0
- dory_processor_sdk-0.0.1.dist-info/top_level.txt +1 -0
dory/core/processor.py
ADDED
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BaseProcessor - Abstract base class for processor implementations.
|
|
3
|
+
|
|
4
|
+
Developers implement this class to create their processor applications.
|
|
5
|
+
The SDK handles all lifecycle, state management, and health concerns.
|
|
6
|
+
|
|
7
|
+
Auto-initialized components (if available):
|
|
8
|
+
- Circuit breakers (self.circuit_breakers)
|
|
9
|
+
- Error classifier (self.error_classifier)
|
|
10
|
+
- OpenTelemetry (self.otel)
|
|
11
|
+
- Request tracker (self.request_tracker)
|
|
12
|
+
- Request ID middleware (self.request_id_middleware)
|
|
13
|
+
- Connection tracker (self.connection_tracker)
|
|
14
|
+
- RabbitMQ publisher (self.publisher)
|
|
15
|
+
|
|
16
|
+
All handler methods are automatically instrumented via AutoInstrumentMeta.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import logging
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
22
|
+
from typing import TYPE_CHECKING, AsyncIterator, Dict, Any, Optional
|
|
23
|
+
|
|
24
|
+
from dory.decorators import get_stateful_vars, set_stateful_vars
|
|
25
|
+
from dory.core.meta import AutoInstrumentMeta
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from dory.core.context import ExecutionContext
|
|
29
|
+
from dory.geo import GeoPoint
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class BaseProcessor(ABC, metaclass=AutoInstrumentMeta):
|
|
35
|
+
"""
|
|
36
|
+
Abstract base class for processor implementations.
|
|
37
|
+
|
|
38
|
+
Required method:
|
|
39
|
+
- run(): Main processing loop
|
|
40
|
+
|
|
41
|
+
Optional methods (have sensible defaults):
|
|
42
|
+
- startup(): Initialize resources (default: no-op)
|
|
43
|
+
- shutdown(): Cleanup resources (default: no-op)
|
|
44
|
+
- get_state(): Return state dict (default: returns @stateful vars or {})
|
|
45
|
+
- restore_state(): Restore state (default: restores @stateful vars)
|
|
46
|
+
|
|
47
|
+
Optional fault handling hooks:
|
|
48
|
+
- on_state_restore_failed(): Handle state restore errors
|
|
49
|
+
- on_rapid_restart_detected(): Handle restart loop
|
|
50
|
+
- on_health_check_failed(): Handle health check errors
|
|
51
|
+
- reset_caches(): Clean caches during golden image reset
|
|
52
|
+
|
|
53
|
+
Usage:
|
|
54
|
+
# Minimal implementation (just run method)
|
|
55
|
+
class MyProcessor(BaseProcessor):
|
|
56
|
+
counter = stateful(0)
|
|
57
|
+
|
|
58
|
+
async def run(self):
|
|
59
|
+
async for _ in self.run_loop(interval=1):
|
|
60
|
+
self.counter += 1
|
|
61
|
+
|
|
62
|
+
# Full implementation
|
|
63
|
+
class MyProcessor(BaseProcessor):
|
|
64
|
+
async def startup(self):
|
|
65
|
+
self.model = load_model()
|
|
66
|
+
|
|
67
|
+
async def run(self):
|
|
68
|
+
while not self.context.is_shutdown_requested():
|
|
69
|
+
process()
|
|
70
|
+
|
|
71
|
+
async def shutdown(self):
|
|
72
|
+
self.model.close()
|
|
73
|
+
|
|
74
|
+
def get_state(self):
|
|
75
|
+
return {"processed": self.count}
|
|
76
|
+
|
|
77
|
+
async def restore_state(self, state):
|
|
78
|
+
self.count = state.get("processed", 0)
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# Optional: Define state schema for validation
|
|
82
|
+
# Schema example: {'processed_count': int, 'last_frame_id': int}
|
|
83
|
+
state_schema: dict[str, type] | None = None
|
|
84
|
+
|
|
85
|
+
# Context is auto-injected by DoryApp (no need to accept in __init__)
|
|
86
|
+
context: "ExecutionContext"
|
|
87
|
+
|
|
88
|
+
# =========================================================================
|
|
89
|
+
# SDK-Managed Components (auto-initialized with defaults)
|
|
90
|
+
# Declared here for IDE autocomplete and type checking.
|
|
91
|
+
# =========================================================================
|
|
92
|
+
error_classifier: Any
|
|
93
|
+
circuit_breakers: Dict[str, Any]
|
|
94
|
+
otel: Optional[Any]
|
|
95
|
+
request_tracker: Optional[Any]
|
|
96
|
+
request_id_middleware: Optional[Any]
|
|
97
|
+
connection_tracker: Optional[Any]
|
|
98
|
+
publisher: Optional[Any]
|
|
99
|
+
|
|
100
|
+
def __init__(self, context: "ExecutionContext | None" = None):
|
|
101
|
+
"""
|
|
102
|
+
Initialize processor with auto-initialization of SDK components.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
context: ExecutionContext (optional - will be auto-injected if not provided)
|
|
106
|
+
|
|
107
|
+
Note:
|
|
108
|
+
You can override __init__ and call super().__init__(context) to get
|
|
109
|
+
auto-initialization, or skip super() call to manually initialize.
|
|
110
|
+
"""
|
|
111
|
+
if context is not None:
|
|
112
|
+
self.context = context
|
|
113
|
+
|
|
114
|
+
# Auto-initialize SDK components if context is available
|
|
115
|
+
self._auto_initialize_components()
|
|
116
|
+
|
|
117
|
+
# =========================================================================
|
|
118
|
+
# Required Method
|
|
119
|
+
# =========================================================================
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
async def run(self) -> None:
|
|
123
|
+
"""
|
|
124
|
+
Main processing loop.
|
|
125
|
+
|
|
126
|
+
Called after startup() and restore_state(). Must check
|
|
127
|
+
context.is_shutdown_requested() periodically to exit gracefully.
|
|
128
|
+
|
|
129
|
+
You can use self.run_loop() helper for cleaner code:
|
|
130
|
+
|
|
131
|
+
async def run(self):
|
|
132
|
+
async for _ in self.run_loop(interval=1):
|
|
133
|
+
self.counter += 1
|
|
134
|
+
|
|
135
|
+
Or traditional while loop:
|
|
136
|
+
|
|
137
|
+
async def run(self):
|
|
138
|
+
while not self.context.is_shutdown_requested():
|
|
139
|
+
self.counter += 1
|
|
140
|
+
await asyncio.sleep(1)
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
Any exception will cause pod crash
|
|
144
|
+
"""
|
|
145
|
+
raise NotImplementedError
|
|
146
|
+
|
|
147
|
+
# =========================================================================
|
|
148
|
+
# Optional Lifecycle Methods (Override if needed)
|
|
149
|
+
# =========================================================================
|
|
150
|
+
|
|
151
|
+
async def startup(self) -> None:
|
|
152
|
+
"""
|
|
153
|
+
Initialize processor resources (optional).
|
|
154
|
+
|
|
155
|
+
Called once at pod startup after __init__ but before run().
|
|
156
|
+
Override to load models, open connections, etc.
|
|
157
|
+
|
|
158
|
+
Default: No-op
|
|
159
|
+
"""
|
|
160
|
+
pass
|
|
161
|
+
|
|
162
|
+
async def shutdown(self) -> None:
|
|
163
|
+
"""
|
|
164
|
+
Cleanup processor resources (optional).
|
|
165
|
+
|
|
166
|
+
Called on graceful shutdown (SIGTERM). Has max timeout
|
|
167
|
+
(configurable via DORY_SHUTDOWN_TIMEOUT_SEC, default 30s).
|
|
168
|
+
Override to close connections, flush buffers, etc.
|
|
169
|
+
|
|
170
|
+
Default: No-op
|
|
171
|
+
"""
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
def get_state(self) -> dict:
|
|
175
|
+
"""
|
|
176
|
+
Return state to migrate to next pod (optional).
|
|
177
|
+
|
|
178
|
+
Called during migration (must be fast, <1s). State must be
|
|
179
|
+
JSON-serializable.
|
|
180
|
+
|
|
181
|
+
Default: Returns all @stateful decorated attributes, or {} if none.
|
|
182
|
+
|
|
183
|
+
Override for custom state:
|
|
184
|
+
def get_state(self):
|
|
185
|
+
return {"counter": self.counter, "data": self.data}
|
|
186
|
+
"""
|
|
187
|
+
# Auto-collect @stateful decorated attributes
|
|
188
|
+
stateful_state = get_stateful_vars(self)
|
|
189
|
+
if stateful_state:
|
|
190
|
+
return stateful_state
|
|
191
|
+
return {}
|
|
192
|
+
|
|
193
|
+
async def restore_state(self, state: dict) -> None:
|
|
194
|
+
"""
|
|
195
|
+
Restore state from previous pod (optional).
|
|
196
|
+
|
|
197
|
+
Called after startup() but before run() if state exists.
|
|
198
|
+
|
|
199
|
+
Default: Restores all @stateful decorated attributes from state.
|
|
200
|
+
|
|
201
|
+
Override for custom restoration:
|
|
202
|
+
async def restore_state(self, state):
|
|
203
|
+
self.counter = state.get("counter", 0)
|
|
204
|
+
"""
|
|
205
|
+
# Auto-restore @stateful decorated attributes
|
|
206
|
+
set_stateful_vars(self, state)
|
|
207
|
+
|
|
208
|
+
# =========================================================================
|
|
209
|
+
# Helper Methods
|
|
210
|
+
# =========================================================================
|
|
211
|
+
|
|
212
|
+
async def run_loop(
|
|
213
|
+
self,
|
|
214
|
+
interval: float = 1.0,
|
|
215
|
+
check_migration: bool = True,
|
|
216
|
+
) -> AsyncIterator[int]:
|
|
217
|
+
"""
|
|
218
|
+
Async iterator that yields until shutdown is requested.
|
|
219
|
+
|
|
220
|
+
Simplifies the common pattern of checking shutdown in a loop.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
interval: Sleep interval between iterations (seconds)
|
|
224
|
+
check_migration: If True, also yields when migration is imminent
|
|
225
|
+
|
|
226
|
+
Yields:
|
|
227
|
+
Iteration count (0, 1, 2, ...)
|
|
228
|
+
|
|
229
|
+
Usage:
|
|
230
|
+
async def run(self):
|
|
231
|
+
async for i in self.run_loop(interval=1):
|
|
232
|
+
self.counter += 1
|
|
233
|
+
print(f"Iteration {i}")
|
|
234
|
+
|
|
235
|
+
# Equivalent to:
|
|
236
|
+
async def run(self):
|
|
237
|
+
i = 0
|
|
238
|
+
while not self.context.is_shutdown_requested():
|
|
239
|
+
self.counter += 1
|
|
240
|
+
print(f"Iteration {i}")
|
|
241
|
+
i += 1
|
|
242
|
+
await asyncio.sleep(1)
|
|
243
|
+
"""
|
|
244
|
+
iteration = 0
|
|
245
|
+
while not self.context.is_shutdown_requested():
|
|
246
|
+
yield iteration
|
|
247
|
+
iteration += 1
|
|
248
|
+
|
|
249
|
+
# Check if migration is imminent
|
|
250
|
+
if check_migration and self.context.is_migration_imminent():
|
|
251
|
+
self.context.logger().info(
|
|
252
|
+
f"Migration imminent, completing iteration {iteration}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
await asyncio.sleep(interval)
|
|
256
|
+
|
|
257
|
+
def is_shutting_down(self) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Convenience method to check if shutdown is requested.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
True if shutdown has been requested
|
|
263
|
+
"""
|
|
264
|
+
return self.context.is_shutdown_requested()
|
|
265
|
+
|
|
266
|
+
async def publish(
|
|
267
|
+
self,
|
|
268
|
+
event_type: str,
|
|
269
|
+
location: "GeoPoint",
|
|
270
|
+
payload: dict[str, Any],
|
|
271
|
+
*,
|
|
272
|
+
headers: dict[str, Any] | None = None,
|
|
273
|
+
exchange: str | None = None,
|
|
274
|
+
) -> None:
|
|
275
|
+
"""Publish a single message with a geo location.
|
|
276
|
+
|
|
277
|
+
The developer provides the event type, a
|
|
278
|
+
:class:`~dory.geo.GeoPoint` (e.g. from
|
|
279
|
+
:meth:`CameraGeolocalizer.estimate`), and a payload already in
|
|
280
|
+
the unified envelope format. The SDK converts the coordinates
|
|
281
|
+
to a geohash-based routing key and publishes the payload as-is
|
|
282
|
+
(no additional envelope wrapping).
|
|
283
|
+
|
|
284
|
+
Examples::
|
|
285
|
+
|
|
286
|
+
loc = geo.estimate(box)
|
|
287
|
+
await self.publish(
|
|
288
|
+
"accident",
|
|
289
|
+
location=loc,
|
|
290
|
+
payload=MessageEnvelope(payload={...}).to_dict(),
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
event_type: Event type for routing (e.g., "accident", "detection").
|
|
295
|
+
location: A :class:`~dory.geo.GeoPoint` with ``.lat`` and
|
|
296
|
+
``.lng`` attributes.
|
|
297
|
+
payload: Message payload in unified envelope format.
|
|
298
|
+
headers: Optional AMQP message headers.
|
|
299
|
+
exchange: Optional exchange override.
|
|
300
|
+
|
|
301
|
+
Raises:
|
|
302
|
+
RuntimeError: If publisher is not initialized.
|
|
303
|
+
ValueError: If event_type is empty or coordinates are out of range.
|
|
304
|
+
"""
|
|
305
|
+
if self.publisher is None:
|
|
306
|
+
raise RuntimeError(
|
|
307
|
+
"RabbitMQ publisher not initialized. "
|
|
308
|
+
"Set DORY_RABBITMQ_OAUTH2_TOKEN_URL and OAuth2 credentials to enable."
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
from dory.output.routing import build_routing_key_from_geo
|
|
312
|
+
|
|
313
|
+
routing_key = build_routing_key_from_geo(
|
|
314
|
+
event_type, location.lat, location.lng
|
|
315
|
+
)
|
|
316
|
+
logger.info(
|
|
317
|
+
"Publishing message event_type=%s lat=%.6f lon=%.6f routing_key=%s",
|
|
318
|
+
event_type,
|
|
319
|
+
location.lat,
|
|
320
|
+
location.lng,
|
|
321
|
+
routing_key,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
await self.publisher.publish(
|
|
325
|
+
routing_key=routing_key,
|
|
326
|
+
data=payload,
|
|
327
|
+
exchange=exchange,
|
|
328
|
+
headers=headers,
|
|
329
|
+
raw=True,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# =========================================================================
|
|
333
|
+
# Optional Fault Handling Hooks
|
|
334
|
+
# =========================================================================
|
|
335
|
+
|
|
336
|
+
async def on_state_restore_failed(self, error: Exception) -> bool:
|
|
337
|
+
"""
|
|
338
|
+
Called if state restore fails.
|
|
339
|
+
|
|
340
|
+
Override to attempt recovery (e.g., fetch from external backup).
|
|
341
|
+
Return True to start with golden image, False to exit and crash.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
error: Exception from restore_state() or validation
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
True to continue with golden image, False to exit
|
|
348
|
+
"""
|
|
349
|
+
return True # Default: continue with golden image
|
|
350
|
+
|
|
351
|
+
async def on_rapid_restart_detected(self, restart_count: int) -> bool:
|
|
352
|
+
"""
|
|
353
|
+
Called if restart loop detected (3+ restarts in 5 minutes).
|
|
354
|
+
|
|
355
|
+
Override to attempt recovery (e.g., reinitialize state, reset
|
|
356
|
+
connections). Return True to continue, False to trigger golden reset.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
restart_count: Number of restarts detected
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
True to continue, False to force golden reset
|
|
363
|
+
"""
|
|
364
|
+
return True # Default: continue (SDK will start golden)
|
|
365
|
+
|
|
366
|
+
async def on_health_check_failed(self, error: Exception) -> bool:
|
|
367
|
+
"""
|
|
368
|
+
Called if health check fails.
|
|
369
|
+
|
|
370
|
+
Override to attempt recovery (e.g., reconnect to external services).
|
|
371
|
+
Return True to retry health check, False to fail.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
error: Exception from health check
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
True to retry, False to fail
|
|
378
|
+
"""
|
|
379
|
+
return False # Default: fail health check
|
|
380
|
+
|
|
381
|
+
def reset_caches(self) -> None:
|
|
382
|
+
"""
|
|
383
|
+
Called during golden image reset.
|
|
384
|
+
|
|
385
|
+
Override to clear any in-memory caches, buffers, or temporary
|
|
386
|
+
state that should not persist through a golden reset.
|
|
387
|
+
"""
|
|
388
|
+
pass # Default: no caches to reset
|
|
389
|
+
|
|
390
|
+
# =========================================================================
|
|
391
|
+
# Auto-Initialization
|
|
392
|
+
# =========================================================================
|
|
393
|
+
|
|
394
|
+
def _auto_initialize_components(self) -> None:
|
|
395
|
+
"""
|
|
396
|
+
Auto-initialize SDK components with sensible defaults.
|
|
397
|
+
|
|
398
|
+
Called automatically during __init__ if context is available.
|
|
399
|
+
Each component is optional — if its package is not installed,
|
|
400
|
+
initialization is silently skipped.
|
|
401
|
+
"""
|
|
402
|
+
if not hasattr(self, "context") or self.context is None:
|
|
403
|
+
logger.debug("Context not available, skipping auto-initialization")
|
|
404
|
+
return
|
|
405
|
+
|
|
406
|
+
self._init_error_classifier()
|
|
407
|
+
self._init_circuit_breakers()
|
|
408
|
+
self._init_opentelemetry()
|
|
409
|
+
self._init_request_tracking()
|
|
410
|
+
self._init_request_id()
|
|
411
|
+
self._init_connection_tracking()
|
|
412
|
+
self._init_publisher()
|
|
413
|
+
|
|
414
|
+
logger.debug("Auto-initialization complete")
|
|
415
|
+
|
|
416
|
+
def _init_error_classifier(self) -> None:
|
|
417
|
+
"""Initialize error classifier."""
|
|
418
|
+
try:
|
|
419
|
+
from dory.errors import ErrorClassifier
|
|
420
|
+
self.error_classifier = ErrorClassifier()
|
|
421
|
+
except ImportError:
|
|
422
|
+
self.error_classifier = None
|
|
423
|
+
|
|
424
|
+
def _init_circuit_breakers(self) -> None:
|
|
425
|
+
"""Create default circuit breakers for common services."""
|
|
426
|
+
self.circuit_breakers = {}
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
from dory.resilience import CircuitBreaker
|
|
430
|
+
except ImportError:
|
|
431
|
+
return
|
|
432
|
+
|
|
433
|
+
for name in ("database", "external_api", "cache"):
|
|
434
|
+
self.circuit_breakers[name] = CircuitBreaker(
|
|
435
|
+
name=name,
|
|
436
|
+
failure_threshold=5,
|
|
437
|
+
success_threshold=2,
|
|
438
|
+
timeout_seconds=30.0,
|
|
439
|
+
half_open_max_calls=3,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
def _init_opentelemetry(self) -> None:
|
|
443
|
+
"""Initialize OpenTelemetry with defaults.
|
|
444
|
+
|
|
445
|
+
Uses DORY_APP_VERSION env var (injected by orchestrator from DB) for
|
|
446
|
+
service.version. Falls back to "1.0.0" if not set.
|
|
447
|
+
"""
|
|
448
|
+
self.otel = None
|
|
449
|
+
|
|
450
|
+
try:
|
|
451
|
+
import os
|
|
452
|
+
from dory.monitoring import OpenTelemetryManager
|
|
453
|
+
service_version = os.environ.get("DORY_APP_VERSION", "1.0.0")
|
|
454
|
+
self.otel = OpenTelemetryManager(
|
|
455
|
+
service_name="dory-app",
|
|
456
|
+
service_version=service_version,
|
|
457
|
+
environment="production",
|
|
458
|
+
console_export=True,
|
|
459
|
+
)
|
|
460
|
+
self.otel.initialize()
|
|
461
|
+
except ImportError:
|
|
462
|
+
pass
|
|
463
|
+
except Exception as e:
|
|
464
|
+
logger.warning(f"Failed to initialize OpenTelemetry: {e}")
|
|
465
|
+
|
|
466
|
+
def _init_request_tracking(self) -> None:
|
|
467
|
+
"""Initialize request tracking."""
|
|
468
|
+
self.request_tracker = None
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
from dory.middleware import RequestTracker
|
|
472
|
+
self.request_tracker = RequestTracker(
|
|
473
|
+
max_history=1000,
|
|
474
|
+
enable_history=True,
|
|
475
|
+
)
|
|
476
|
+
except ImportError:
|
|
477
|
+
pass
|
|
478
|
+
except Exception as e:
|
|
479
|
+
logger.warning(f"Failed to initialize request tracking: {e}")
|
|
480
|
+
|
|
481
|
+
def _init_request_id(self) -> None:
|
|
482
|
+
"""Initialize request ID middleware."""
|
|
483
|
+
self.request_id_middleware = None
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
from dory.middleware import RequestIdMiddleware
|
|
487
|
+
self.request_id_middleware = RequestIdMiddleware(
|
|
488
|
+
header_name="X-Request-ID",
|
|
489
|
+
log_request_id=True,
|
|
490
|
+
)
|
|
491
|
+
except ImportError:
|
|
492
|
+
pass
|
|
493
|
+
except Exception as e:
|
|
494
|
+
logger.warning(f"Failed to initialize request ID middleware: {e}")
|
|
495
|
+
|
|
496
|
+
def _init_connection_tracking(self) -> None:
|
|
497
|
+
"""Initialize connection tracking."""
|
|
498
|
+
self.connection_tracker = None
|
|
499
|
+
|
|
500
|
+
try:
|
|
501
|
+
from dory.middleware import ConnectionTracker
|
|
502
|
+
self.connection_tracker = ConnectionTracker()
|
|
503
|
+
except ImportError:
|
|
504
|
+
pass
|
|
505
|
+
except Exception as e:
|
|
506
|
+
logger.warning(f"Failed to initialize connection tracking: {e}")
|
|
507
|
+
|
|
508
|
+
def _init_publisher(self) -> None:
|
|
509
|
+
"""Initialize OAuth2-authenticated RabbitMQ publisher.
|
|
510
|
+
|
|
511
|
+
The publisher is created when DORY_RABBITMQ_OAUTH2_TOKEN_URL is set.
|
|
512
|
+
All configuration is read from environment variables.
|
|
513
|
+
"""
|
|
514
|
+
self.publisher = None
|
|
515
|
+
|
|
516
|
+
import os
|
|
517
|
+
token_url = os.environ.get("DORY_RABBITMQ_OAUTH2_TOKEN_URL", "")
|
|
518
|
+
if not token_url:
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
from dory.output.rabbitmq import RabbitMQPublisher, PublisherConfig
|
|
523
|
+
except ImportError:
|
|
524
|
+
return
|
|
525
|
+
|
|
526
|
+
try:
|
|
527
|
+
from dory.output.formatter import JSONFormatter
|
|
528
|
+
from dory.auth.oauth2 import OAuth2TokenProvider
|
|
529
|
+
|
|
530
|
+
publisher_config = PublisherConfig(
|
|
531
|
+
url="", # populated by url_provider
|
|
532
|
+
exchange=os.environ.get("DORY_RABBITMQ_EXCHANGE", "dory.output"),
|
|
533
|
+
exchange_type="topic",
|
|
534
|
+
durable=True,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
token_provider = OAuth2TokenProvider(
|
|
538
|
+
token_url=token_url,
|
|
539
|
+
client_id=os.environ.get("DORY_RABBITMQ_OAUTH2_CLIENT_ID", ""),
|
|
540
|
+
client_secret=os.environ.get("DORY_RABBITMQ_OAUTH2_CLIENT_SECRET", ""),
|
|
541
|
+
scopes=["rabbitmq/write:all"],
|
|
542
|
+
)
|
|
543
|
+
_host = os.environ.get("DORY_RABBITMQ_HOST", "")
|
|
544
|
+
_port = 5671
|
|
545
|
+
_vhost = os.environ.get("DORY_RABBITMQ_VHOST", "/")
|
|
546
|
+
_tls = os.environ.get("DORY_RABBITMQ_TLS_ENABLED", "true").lower() == "true"
|
|
547
|
+
|
|
548
|
+
async def url_provider(
|
|
549
|
+
_tp=token_provider, _h=_host, _p=_port, _v=_vhost, _t=_tls
|
|
550
|
+
):
|
|
551
|
+
return await _tp.build_amqp_url(
|
|
552
|
+
host=_h, port=_p, vhost=_v, tls=_t
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
self.publisher = RabbitMQPublisher(
|
|
556
|
+
config=publisher_config,
|
|
557
|
+
formatter=JSONFormatter(),
|
|
558
|
+
url_provider=url_provider,
|
|
559
|
+
)
|
|
560
|
+
logger.info("RabbitMQ publisher initialized (OAuth2)")
|
|
561
|
+
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.warning(f"Failed to initialize RabbitMQ publisher: {e}")
|
|
564
|
+
self.publisher = None
|
dory/core/signals.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SignalHandler - Handles OS signals for graceful shutdown.
|
|
3
|
+
|
|
4
|
+
Captures SIGTERM, SIGINT, and SIGUSR1 and triggers appropriate
|
|
5
|
+
actions in the SDK.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import logging
|
|
10
|
+
import signal
|
|
11
|
+
import sys
|
|
12
|
+
from typing import Callable, Awaitable
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class SignalHandler:
|
|
18
|
+
"""
|
|
19
|
+
Handles OS signals for graceful shutdown.
|
|
20
|
+
|
|
21
|
+
Signals handled:
|
|
22
|
+
SIGTERM: Graceful shutdown (from Kubelet)
|
|
23
|
+
SIGINT: Graceful shutdown (Ctrl+C for local testing)
|
|
24
|
+
SIGUSR1: Trigger state snapshot (for debugging)
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
self._shutdown_callback: Callable[[], Awaitable[None]] | None = None
|
|
29
|
+
self._snapshot_callback: Callable[[], Awaitable[None]] | None = None
|
|
30
|
+
self._loop: asyncio.AbstractEventLoop | None = None
|
|
31
|
+
self._shutdown_triggered = False
|
|
32
|
+
|
|
33
|
+
def setup(
|
|
34
|
+
self,
|
|
35
|
+
shutdown_callback: Callable[[], Awaitable[None]],
|
|
36
|
+
snapshot_callback: Callable[[], Awaitable[None]] | None = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""
|
|
39
|
+
Setup signal handlers.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
shutdown_callback: Async callback for graceful shutdown
|
|
43
|
+
snapshot_callback: Optional async callback for state snapshot
|
|
44
|
+
"""
|
|
45
|
+
self._shutdown_callback = shutdown_callback
|
|
46
|
+
self._snapshot_callback = snapshot_callback
|
|
47
|
+
self._loop = asyncio.get_event_loop()
|
|
48
|
+
|
|
49
|
+
# Register signal handlers
|
|
50
|
+
if sys.platform != "win32":
|
|
51
|
+
# Unix signals
|
|
52
|
+
self._loop.add_signal_handler(
|
|
53
|
+
signal.SIGTERM,
|
|
54
|
+
self._handle_shutdown_signal,
|
|
55
|
+
"SIGTERM",
|
|
56
|
+
)
|
|
57
|
+
self._loop.add_signal_handler(
|
|
58
|
+
signal.SIGINT,
|
|
59
|
+
self._handle_shutdown_signal,
|
|
60
|
+
"SIGINT",
|
|
61
|
+
)
|
|
62
|
+
self._loop.add_signal_handler(
|
|
63
|
+
signal.SIGUSR1,
|
|
64
|
+
self._handle_snapshot_signal,
|
|
65
|
+
)
|
|
66
|
+
logger.debug("Signal handlers registered (Unix)")
|
|
67
|
+
else:
|
|
68
|
+
# Windows - limited signal support
|
|
69
|
+
signal.signal(signal.SIGTERM, self._handle_shutdown_signal_sync)
|
|
70
|
+
signal.signal(signal.SIGINT, self._handle_shutdown_signal_sync)
|
|
71
|
+
logger.debug("Signal handlers registered (Windows)")
|
|
72
|
+
|
|
73
|
+
def _handle_shutdown_signal(self, sig_name: str) -> None:
|
|
74
|
+
"""Handle SIGTERM/SIGINT asynchronously."""
|
|
75
|
+
if self._shutdown_triggered:
|
|
76
|
+
logger.warning(f"Received {sig_name} but shutdown already in progress")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
self._shutdown_triggered = True
|
|
80
|
+
logger.info(f"Received {sig_name}, initiating graceful shutdown")
|
|
81
|
+
|
|
82
|
+
if self._shutdown_callback and self._loop:
|
|
83
|
+
asyncio.ensure_future(
|
|
84
|
+
self._shutdown_callback(),
|
|
85
|
+
loop=self._loop,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def _handle_shutdown_signal_sync(self, signum: int, frame) -> None:
|
|
89
|
+
"""Handle signal synchronously (Windows compatibility)."""
|
|
90
|
+
sig_name = signal.Signals(signum).name
|
|
91
|
+
self._handle_shutdown_signal(sig_name)
|
|
92
|
+
|
|
93
|
+
def _handle_snapshot_signal(self) -> None:
|
|
94
|
+
"""Handle SIGUSR1 for debug state snapshot."""
|
|
95
|
+
logger.info("Received SIGUSR1, triggering state snapshot")
|
|
96
|
+
|
|
97
|
+
if self._snapshot_callback and self._loop:
|
|
98
|
+
asyncio.ensure_future(
|
|
99
|
+
self._snapshot_callback(),
|
|
100
|
+
loop=self._loop,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def remove_handlers(self) -> None:
|
|
104
|
+
"""Remove signal handlers during shutdown."""
|
|
105
|
+
if self._loop and sys.platform != "win32":
|
|
106
|
+
try:
|
|
107
|
+
self._loop.remove_signal_handler(signal.SIGTERM)
|
|
108
|
+
self._loop.remove_signal_handler(signal.SIGINT)
|
|
109
|
+
self._loop.remove_signal_handler(signal.SIGUSR1)
|
|
110
|
+
logger.debug("Signal handlers removed")
|
|
111
|
+
except (ValueError, RuntimeError):
|
|
112
|
+
# Handler not registered or loop closed
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def shutdown_triggered(self) -> bool:
|
|
117
|
+
"""Check if shutdown has been triggered."""
|
|
118
|
+
return self._shutdown_triggered
|
|
119
|
+
|
|
120
|
+
def reset(self) -> None:
|
|
121
|
+
"""Reset shutdown state (for testing)."""
|
|
122
|
+
self._shutdown_triggered = False
|