dory-processor-sdk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. dory/__init__.py +101 -0
  2. dory/auth/__init__.py +10 -0
  3. dory/auth/oauth2.py +153 -0
  4. dory/auto_instrument.py +142 -0
  5. dory/cli/__init__.py +5 -0
  6. dory/cli/main.py +137 -0
  7. dory/cli/templates.py +123 -0
  8. dory/config/__init__.py +23 -0
  9. dory/config/defaults.py +24 -0
  10. dory/config/loader.py +430 -0
  11. dory/config/presets.py +73 -0
  12. dory/config/schema.py +84 -0
  13. dory/core/__init__.py +27 -0
  14. dory/core/app.py +434 -0
  15. dory/core/context.py +209 -0
  16. dory/core/lifecycle.py +214 -0
  17. dory/core/meta.py +121 -0
  18. dory/core/modes.py +479 -0
  19. dory/core/processor.py +564 -0
  20. dory/core/signals.py +122 -0
  21. dory/decorators.py +142 -0
  22. dory/edge/__init__.py +88 -0
  23. dory/edge/adaptive.py +644 -0
  24. dory/edge/detector.py +546 -0
  25. dory/edge/fencing.py +488 -0
  26. dory/edge/heartbeat.py +598 -0
  27. dory/edge/role.py +419 -0
  28. dory/errors/__init__.py +139 -0
  29. dory/errors/classification.py +362 -0
  30. dory/errors/codes.py +498 -0
  31. dory/geo/__init__.py +40 -0
  32. dory/geo/geolocalizer.py +1034 -0
  33. dory/health/__init__.py +12 -0
  34. dory/health/probes.py +210 -0
  35. dory/health/server.py +635 -0
  36. dory/k8s/__init__.py +80 -0
  37. dory/k8s/annotation_watcher.py +184 -0
  38. dory/k8s/client.py +251 -0
  39. dory/k8s/labels.py +505 -0
  40. dory/k8s/pod_metadata.py +182 -0
  41. dory/logging/__init__.py +9 -0
  42. dory/logging/logger.py +148 -0
  43. dory/metrics/__init__.py +7 -0
  44. dory/metrics/collector.py +301 -0
  45. dory/middleware/__init__.py +46 -0
  46. dory/middleware/connection_tracker.py +608 -0
  47. dory/middleware/request_id.py +325 -0
  48. dory/middleware/request_tracker.py +511 -0
  49. dory/migration/__init__.py +33 -0
  50. dory/migration/configmap.py +232 -0
  51. dory/migration/s3_store.py +594 -0
  52. dory/migration/serialization.py +135 -0
  53. dory/migration/state_manager.py +286 -0
  54. dory/migration/transfer.py +382 -0
  55. dory/monitoring/__init__.py +29 -0
  56. dory/monitoring/opentelemetry.py +489 -0
  57. dory/output/__init__.py +31 -0
  58. dory/output/envelope.py +137 -0
  59. dory/output/formatter.py +113 -0
  60. dory/output/rabbitmq.py +632 -0
  61. dory/output/routing.py +318 -0
  62. dory/output/validator.py +199 -0
  63. dory/py.typed +2 -0
  64. dory/recovery/__init__.py +60 -0
  65. dory/recovery/golden_image.py +487 -0
  66. dory/recovery/golden_snapshot.py +713 -0
  67. dory/recovery/golden_validator.py +518 -0
  68. dory/recovery/partial_recovery.py +482 -0
  69. dory/recovery/recovery_decision.py +242 -0
  70. dory/recovery/restart_detector.py +142 -0
  71. dory/recovery/state_validator.py +183 -0
  72. dory/resilience/__init__.py +45 -0
  73. dory/resilience/circuit_breaker.py +457 -0
  74. dory/resilience/retry.py +389 -0
  75. dory/simple.py +342 -0
  76. dory/types.py +68 -0
  77. dory/utils/__init__.py +31 -0
  78. dory/utils/errors.py +59 -0
  79. dory/utils/retry.py +115 -0
  80. dory/utils/timeout.py +80 -0
  81. dory_processor_sdk-0.0.1.dist-info/METADATA +424 -0
  82. dory_processor_sdk-0.0.1.dist-info/RECORD +86 -0
  83. dory_processor_sdk-0.0.1.dist-info/WHEEL +5 -0
  84. dory_processor_sdk-0.0.1.dist-info/entry_points.txt +2 -0
  85. dory_processor_sdk-0.0.1.dist-info/licenses/LICENSE +201 -0
  86. dory_processor_sdk-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,382 @@
1
+ """
2
+ State transfer utilities with timeout and size validation.
3
+
4
+ Provides utilities for safe state capture and restore operations
5
+ that align with Orchestrator timeout expectations.
6
+ """
7
+
8
+ import asyncio
9
+ import functools
10
+ import logging
11
+ import time
12
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
13
+ from dataclasses import dataclass
14
+ from typing import Any, Callable, TypeVar
15
+
16
+ from dory.utils.errors import DoryStateError
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Type variable for generic return type
21
+ T = TypeVar("T")
22
+
23
+ # Orchestrator constants (from transfer.go)
24
+ ORCHESTRATOR_STATE_TIMEOUT_SEC = 30 # DefaultHTTPTimeout in transfer.go
25
+ ORCHESTRATOR_MAX_STATE_SIZE = 10 * 1024 * 1024 # MaxResponseBodySize in transfer.go (10MB)
26
+
27
+ # Default SDK limits (with safety margin)
28
+ DEFAULT_CAPTURE_TIMEOUT_SEC = 25 # 5s buffer before Orchestrator timeout
29
+ DEFAULT_RESTORE_TIMEOUT_SEC = 25
30
+ DEFAULT_MAX_STATE_SIZE = 8 * 1024 * 1024 # 8MB, 2MB buffer before Orchestrator limit
31
+ DEFAULT_SIZE_WARN_THRESHOLD = 0.75 # Warn at 75% of max
32
+
33
+
34
+ @dataclass
35
+ class TransferConfig:
36
+ """Configuration for state transfer operations."""
37
+
38
+ capture_timeout_sec: float = DEFAULT_CAPTURE_TIMEOUT_SEC
39
+ restore_timeout_sec: float = DEFAULT_RESTORE_TIMEOUT_SEC
40
+ max_size_bytes: int = DEFAULT_MAX_STATE_SIZE
41
+ size_warn_threshold: float = DEFAULT_SIZE_WARN_THRESHOLD
42
+
43
+ def __post_init__(self):
44
+ """Validate configuration against Orchestrator limits."""
45
+ if self.capture_timeout_sec >= ORCHESTRATOR_STATE_TIMEOUT_SEC:
46
+ logger.warning(
47
+ f"state_capture_timeout_sec ({self.capture_timeout_sec}s) >= "
48
+ f"Orchestrator timeout ({ORCHESTRATOR_STATE_TIMEOUT_SEC}s). "
49
+ "Reducing to {ORCHESTRATOR_STATE_TIMEOUT_SEC - 5}s."
50
+ )
51
+ self.capture_timeout_sec = ORCHESTRATOR_STATE_TIMEOUT_SEC - 5
52
+
53
+ if self.restore_timeout_sec >= ORCHESTRATOR_STATE_TIMEOUT_SEC:
54
+ logger.warning(
55
+ f"state_restore_timeout_sec ({self.restore_timeout_sec}s) >= "
56
+ f"Orchestrator timeout ({ORCHESTRATOR_STATE_TIMEOUT_SEC}s). "
57
+ f"Reducing to {ORCHESTRATOR_STATE_TIMEOUT_SEC - 5}s."
58
+ )
59
+ self.restore_timeout_sec = ORCHESTRATOR_STATE_TIMEOUT_SEC - 5
60
+
61
+ if self.max_size_bytes > ORCHESTRATOR_MAX_STATE_SIZE:
62
+ logger.warning(
63
+ f"state_max_size_bytes ({self.max_size_bytes}) > "
64
+ f"Orchestrator limit ({ORCHESTRATOR_MAX_STATE_SIZE}). "
65
+ f"Reducing to {ORCHESTRATOR_MAX_STATE_SIZE}."
66
+ )
67
+ self.max_size_bytes = ORCHESTRATOR_MAX_STATE_SIZE
68
+
69
+
70
+ @dataclass
71
+ class TransferMetrics:
72
+ """Metrics from a state transfer operation."""
73
+
74
+ duration_sec: float
75
+ size_bytes: int
76
+ size_ratio: float # size / max_size
77
+ timed_out: bool
78
+ size_exceeded: bool
79
+
80
+
81
+ class StateTransferError(DoryStateError):
82
+ """Error during state transfer operation."""
83
+
84
+ def __init__(
85
+ self,
86
+ message: str,
87
+ metrics: TransferMetrics | None = None,
88
+ cause: Exception | None = None,
89
+ ):
90
+ super().__init__(message, cause=cause)
91
+ self.metrics = metrics
92
+
93
+
94
+ class StateTransferTimeout(StateTransferError):
95
+ """State transfer operation timed out."""
96
+ pass
97
+
98
+
99
+ class StateSizeExceeded(StateTransferError):
100
+ """State size exceeds configured maximum."""
101
+ pass
102
+
103
+
104
+ def validate_state_size(
105
+ state_json: str,
106
+ max_size: int = DEFAULT_MAX_STATE_SIZE,
107
+ warn_threshold: float = DEFAULT_SIZE_WARN_THRESHOLD,
108
+ ) -> TransferMetrics:
109
+ """
110
+ Validate state size against limits.
111
+
112
+ Args:
113
+ state_json: Serialized state JSON string
114
+ max_size: Maximum allowed size in bytes
115
+ warn_threshold: Fraction of max_size to trigger warning
116
+
117
+ Returns:
118
+ TransferMetrics with size information
119
+
120
+ Raises:
121
+ StateSizeExceeded: If state exceeds max_size
122
+ """
123
+ size_bytes = len(state_json.encode("utf-8"))
124
+ size_ratio = size_bytes / max_size if max_size > 0 else 0
125
+
126
+ metrics = TransferMetrics(
127
+ duration_sec=0,
128
+ size_bytes=size_bytes,
129
+ size_ratio=size_ratio,
130
+ timed_out=False,
131
+ size_exceeded=size_bytes > max_size,
132
+ )
133
+
134
+ if size_bytes > max_size:
135
+ raise StateSizeExceeded(
136
+ f"State size ({size_bytes:,} bytes) exceeds maximum "
137
+ f"({max_size:,} bytes). Orchestrator will reject this state.",
138
+ metrics=metrics,
139
+ )
140
+
141
+ if size_ratio >= warn_threshold:
142
+ logger.warning(
143
+ f"State size ({size_bytes:,} bytes) is {size_ratio:.1%} of maximum "
144
+ f"({max_size:,} bytes). Consider reducing state size to avoid "
145
+ "transfer failures."
146
+ )
147
+
148
+ return metrics
149
+
150
+
151
+ def with_timeout(
152
+ timeout_sec: float,
153
+ operation_name: str = "operation",
154
+ ) -> Callable[[Callable[..., T]], Callable[..., T]]:
155
+ """
156
+ Decorator to add timeout to synchronous functions.
157
+
158
+ Runs the function in a thread pool executor with timeout.
159
+
160
+ Args:
161
+ timeout_sec: Timeout in seconds
162
+ operation_name: Name for error messages
163
+
164
+ Returns:
165
+ Decorated function with timeout
166
+ """
167
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
168
+ @functools.wraps(func)
169
+ def wrapper(*args: Any, **kwargs: Any) -> T:
170
+ start_time = time.monotonic()
171
+
172
+ with ThreadPoolExecutor(max_workers=1) as executor:
173
+ future = executor.submit(func, *args, **kwargs)
174
+
175
+ try:
176
+ result = future.result(timeout=timeout_sec)
177
+ duration = time.monotonic() - start_time
178
+
179
+ # Log if operation took significant time
180
+ if duration > timeout_sec * 0.5:
181
+ logger.warning(
182
+ f"{operation_name} took {duration:.2f}s "
183
+ f"({duration/timeout_sec:.1%} of {timeout_sec}s timeout)"
184
+ )
185
+
186
+ return result
187
+
188
+ except FuturesTimeoutError:
189
+ duration = time.monotonic() - start_time
190
+ metrics = TransferMetrics(
191
+ duration_sec=duration,
192
+ size_bytes=0,
193
+ size_ratio=0,
194
+ timed_out=True,
195
+ size_exceeded=False,
196
+ )
197
+ raise StateTransferTimeout(
198
+ f"{operation_name} timed out after {timeout_sec}s. "
199
+ "Consider reducing state size or optimizing get_state().",
200
+ metrics=metrics,
201
+ )
202
+
203
+ return wrapper
204
+ return decorator
205
+
206
+
207
+ async def async_with_timeout(
208
+ coro: Any,
209
+ timeout_sec: float,
210
+ operation_name: str = "operation",
211
+ ) -> Any:
212
+ """
213
+ Execute coroutine with timeout.
214
+
215
+ Args:
216
+ coro: Coroutine to execute
217
+ timeout_sec: Timeout in seconds
218
+ operation_name: Name for error messages
219
+
220
+ Returns:
221
+ Result of the coroutine
222
+
223
+ Raises:
224
+ StateTransferTimeout: If operation times out
225
+ """
226
+ start_time = time.monotonic()
227
+
228
+ try:
229
+ result = await asyncio.wait_for(coro, timeout=timeout_sec)
230
+ duration = time.monotonic() - start_time
231
+
232
+ # Log if operation took significant time
233
+ if duration > timeout_sec * 0.5:
234
+ logger.warning(
235
+ f"{operation_name} took {duration:.2f}s "
236
+ f"({duration/timeout_sec:.1%} of {timeout_sec}s timeout)"
237
+ )
238
+
239
+ return result
240
+
241
+ except asyncio.TimeoutError:
242
+ duration = time.monotonic() - start_time
243
+ metrics = TransferMetrics(
244
+ duration_sec=duration,
245
+ size_bytes=0,
246
+ size_ratio=0,
247
+ timed_out=True,
248
+ size_exceeded=False,
249
+ )
250
+ raise StateTransferTimeout(
251
+ f"{operation_name} timed out after {timeout_sec}s. "
252
+ "Consider reducing state size or optimizing the operation.",
253
+ metrics=metrics,
254
+ )
255
+
256
+
257
+ class StateCaptureGuard:
258
+ """
259
+ Context manager for safe state capture with timeout and size validation.
260
+
261
+ Usage:
262
+ config = TransferConfig(capture_timeout_sec=25, max_size_bytes=8*1024*1024)
263
+
264
+ with StateCaptureGuard(config) as guard:
265
+ state = processor.get_state()
266
+ state_json = json.dumps(state)
267
+ guard.validate(state_json)
268
+ """
269
+
270
+ def __init__(self, config: TransferConfig | None = None):
271
+ """
272
+ Initialize capture guard.
273
+
274
+ Args:
275
+ config: Transfer configuration
276
+ """
277
+ self._config = config or TransferConfig()
278
+ self._start_time: float = 0
279
+ self._metrics: TransferMetrics | None = None
280
+
281
+ def __enter__(self) -> "StateCaptureGuard":
282
+ self._start_time = time.monotonic()
283
+ return self
284
+
285
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> bool:
286
+ duration = time.monotonic() - self._start_time
287
+
288
+ if duration > self._config.capture_timeout_sec:
289
+ logger.error(
290
+ f"State capture took {duration:.2f}s, exceeding "
291
+ f"{self._config.capture_timeout_sec}s timeout"
292
+ )
293
+
294
+ return False # Don't suppress exceptions
295
+
296
+ def validate(self, state_json: str) -> TransferMetrics:
297
+ """
298
+ Validate captured state.
299
+
300
+ Args:
301
+ state_json: Serialized state JSON
302
+
303
+ Returns:
304
+ TransferMetrics with capture information
305
+
306
+ Raises:
307
+ StateSizeExceeded: If state exceeds max size
308
+ StateTransferTimeout: If capture exceeded timeout
309
+ """
310
+ duration = time.monotonic() - self._start_time
311
+
312
+ # Check timeout
313
+ if duration > self._config.capture_timeout_sec:
314
+ self._metrics = TransferMetrics(
315
+ duration_sec=duration,
316
+ size_bytes=len(state_json.encode("utf-8")),
317
+ size_ratio=0,
318
+ timed_out=True,
319
+ size_exceeded=False,
320
+ )
321
+ raise StateTransferTimeout(
322
+ f"State capture took {duration:.2f}s, exceeding "
323
+ f"{self._config.capture_timeout_sec}s timeout",
324
+ metrics=self._metrics,
325
+ )
326
+
327
+ # Check size
328
+ size_metrics = validate_state_size(
329
+ state_json,
330
+ max_size=self._config.max_size_bytes,
331
+ warn_threshold=self._config.size_warn_threshold,
332
+ )
333
+
334
+ self._metrics = TransferMetrics(
335
+ duration_sec=duration,
336
+ size_bytes=size_metrics.size_bytes,
337
+ size_ratio=size_metrics.size_ratio,
338
+ timed_out=False,
339
+ size_exceeded=size_metrics.size_exceeded,
340
+ )
341
+
342
+ return self._metrics
343
+
344
+ @property
345
+ def metrics(self) -> TransferMetrics | None:
346
+ """Get capture metrics."""
347
+ return self._metrics
348
+
349
+
350
+ def log_transfer_summary(
351
+ operation: str,
352
+ metrics: TransferMetrics,
353
+ config: TransferConfig,
354
+ ) -> None:
355
+ """
356
+ Log a summary of the transfer operation.
357
+
358
+ Args:
359
+ operation: Operation name (e.g., "capture", "restore")
360
+ metrics: Transfer metrics
361
+ config: Transfer configuration
362
+ """
363
+ level = logging.INFO
364
+ status = "completed"
365
+
366
+ if metrics.timed_out:
367
+ level = logging.ERROR
368
+ status = "TIMED OUT"
369
+ elif metrics.size_exceeded:
370
+ level = logging.ERROR
371
+ status = "SIZE EXCEEDED"
372
+ elif metrics.size_ratio >= config.size_warn_threshold:
373
+ level = logging.WARNING
374
+ status = "completed (size warning)"
375
+
376
+ logger.log(
377
+ level,
378
+ f"State {operation} {status}: "
379
+ f"duration={metrics.duration_sec:.2f}s/{config.capture_timeout_sec}s, "
380
+ f"size={metrics.size_bytes:,}B/{config.max_size_bytes:,}B "
381
+ f"({metrics.size_ratio:.1%})",
382
+ )
@@ -0,0 +1,29 @@
1
+ """
2
+ Dory Monitoring and Observability
3
+
4
+ OpenTelemetry integration for distributed tracing and metrics.
5
+ """
6
+
7
+ from dory.monitoring.opentelemetry import (
8
+ OpenTelemetryManager,
9
+ trace_function,
10
+ create_span,
11
+ add_span_attributes,
12
+ record_exception,
13
+ get_tracer,
14
+ initialize_otel,
15
+ get_global_otel,
16
+ OTEL_AVAILABLE,
17
+ )
18
+
19
+ __all__ = [
20
+ "OpenTelemetryManager",
21
+ "trace_function",
22
+ "create_span",
23
+ "add_span_attributes",
24
+ "record_exception",
25
+ "get_tracer",
26
+ "initialize_otel",
27
+ "get_global_otel",
28
+ "OTEL_AVAILABLE",
29
+ ]