dory-sdk 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dory/__init__.py +70 -0
- dory/auto_instrument.py +142 -0
- dory/cli/__init__.py +5 -0
- dory/cli/main.py +290 -0
- dory/cli/templates.py +333 -0
- dory/config/__init__.py +23 -0
- dory/config/defaults.py +50 -0
- dory/config/loader.py +361 -0
- dory/config/presets.py +325 -0
- dory/config/schema.py +152 -0
- dory/core/__init__.py +27 -0
- dory/core/app.py +404 -0
- dory/core/context.py +209 -0
- dory/core/lifecycle.py +214 -0
- dory/core/meta.py +121 -0
- dory/core/modes.py +479 -0
- dory/core/processor.py +654 -0
- dory/core/signals.py +122 -0
- dory/decorators.py +142 -0
- dory/errors/__init__.py +117 -0
- dory/errors/classification.py +362 -0
- dory/errors/codes.py +495 -0
- dory/health/__init__.py +10 -0
- dory/health/probes.py +210 -0
- dory/health/server.py +306 -0
- dory/k8s/__init__.py +11 -0
- dory/k8s/annotation_watcher.py +184 -0
- dory/k8s/client.py +251 -0
- dory/k8s/pod_metadata.py +182 -0
- dory/logging/__init__.py +9 -0
- dory/logging/logger.py +175 -0
- dory/metrics/__init__.py +7 -0
- dory/metrics/collector.py +301 -0
- dory/middleware/__init__.py +36 -0
- dory/middleware/connection_tracker.py +608 -0
- dory/middleware/request_id.py +321 -0
- dory/middleware/request_tracker.py +501 -0
- dory/migration/__init__.py +11 -0
- dory/migration/configmap.py +260 -0
- dory/migration/serialization.py +167 -0
- dory/migration/state_manager.py +301 -0
- dory/monitoring/__init__.py +23 -0
- dory/monitoring/opentelemetry.py +462 -0
- dory/py.typed +2 -0
- dory/recovery/__init__.py +60 -0
- dory/recovery/golden_image.py +480 -0
- dory/recovery/golden_snapshot.py +561 -0
- dory/recovery/golden_validator.py +518 -0
- dory/recovery/partial_recovery.py +479 -0
- dory/recovery/recovery_decision.py +242 -0
- dory/recovery/restart_detector.py +142 -0
- dory/recovery/state_validator.py +187 -0
- dory/resilience/__init__.py +45 -0
- dory/resilience/circuit_breaker.py +454 -0
- dory/resilience/retry.py +389 -0
- dory/sidecar/__init__.py +6 -0
- dory/sidecar/main.py +75 -0
- dory/sidecar/server.py +329 -0
- dory/simple.py +342 -0
- dory/types.py +75 -0
- dory/utils/__init__.py +25 -0
- dory/utils/errors.py +59 -0
- dory/utils/retry.py +115 -0
- dory/utils/timeout.py +80 -0
- dory_sdk-2.1.0.dist-info/METADATA +663 -0
- dory_sdk-2.1.0.dist-info/RECORD +69 -0
- dory_sdk-2.1.0.dist-info/WHEEL +5 -0
- dory_sdk-2.1.0.dist-info/entry_points.txt +3 -0
- dory_sdk-2.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Prometheus metrics collector.
|
|
3
|
+
|
|
4
|
+
Collects and exports metrics in Prometheus format.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Dict, List
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class MetricValue:
|
|
17
|
+
"""A single metric value with labels."""
|
|
18
|
+
value: float
|
|
19
|
+
labels: Dict[str, str] = field(default_factory=dict)
|
|
20
|
+
timestamp: float = field(default_factory=time.time)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class MetricDefinition:
|
|
25
|
+
"""Definition of a metric."""
|
|
26
|
+
name: str
|
|
27
|
+
help: str
|
|
28
|
+
type: str # counter, gauge, histogram, summary
|
|
29
|
+
values: List[MetricValue] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class MetricsCollector:
|
|
33
|
+
"""
|
|
34
|
+
Collects and exports Prometheus metrics.
|
|
35
|
+
|
|
36
|
+
Metrics collected:
|
|
37
|
+
- dory_startup_duration_seconds: Time to complete startup
|
|
38
|
+
- dory_shutdown_duration_seconds: Time to complete shutdown
|
|
39
|
+
- dory_state_save_duration_seconds: Time to save state
|
|
40
|
+
- dory_state_load_duration_seconds: Time to load state
|
|
41
|
+
- dory_restart_count: Number of restarts
|
|
42
|
+
- dory_golden_image_resets: Number of golden image resets
|
|
43
|
+
- dory_health_check_failures: Number of health check failures
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, prefix: str = "dory"):
|
|
47
|
+
"""
|
|
48
|
+
Initialize metrics collector.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
prefix: Metric name prefix
|
|
52
|
+
"""
|
|
53
|
+
self._prefix = prefix
|
|
54
|
+
self._metrics: Dict[str, MetricDefinition] = {}
|
|
55
|
+
|
|
56
|
+
# Timing state
|
|
57
|
+
self._startup_start: float | None = None
|
|
58
|
+
self._startup_completed_at: float | None = None
|
|
59
|
+
self._shutdown_start: float | None = None
|
|
60
|
+
self._request_count: int = 0
|
|
61
|
+
|
|
62
|
+
# Initialize standard metrics
|
|
63
|
+
self._init_standard_metrics()
|
|
64
|
+
|
|
65
|
+
def _init_standard_metrics(self) -> None:
|
|
66
|
+
"""Initialize standard Dory metrics."""
|
|
67
|
+
self._register_metric(
|
|
68
|
+
"startup_duration_seconds",
|
|
69
|
+
"Time to complete processor startup",
|
|
70
|
+
"gauge",
|
|
71
|
+
)
|
|
72
|
+
self._register_metric(
|
|
73
|
+
"shutdown_duration_seconds",
|
|
74
|
+
"Time to complete processor shutdown",
|
|
75
|
+
"gauge",
|
|
76
|
+
)
|
|
77
|
+
self._register_metric(
|
|
78
|
+
"state_save_duration_seconds",
|
|
79
|
+
"Time to save processor state",
|
|
80
|
+
"gauge",
|
|
81
|
+
)
|
|
82
|
+
self._register_metric(
|
|
83
|
+
"state_load_duration_seconds",
|
|
84
|
+
"Time to load processor state",
|
|
85
|
+
"gauge",
|
|
86
|
+
)
|
|
87
|
+
self._register_metric(
|
|
88
|
+
"restart_count",
|
|
89
|
+
"Total number of processor restarts",
|
|
90
|
+
"counter",
|
|
91
|
+
)
|
|
92
|
+
self._register_metric(
|
|
93
|
+
"golden_image_resets_total",
|
|
94
|
+
"Total number of golden image resets",
|
|
95
|
+
"counter",
|
|
96
|
+
)
|
|
97
|
+
self._register_metric(
|
|
98
|
+
"health_check_failures_total",
|
|
99
|
+
"Total number of health check failures",
|
|
100
|
+
"counter",
|
|
101
|
+
)
|
|
102
|
+
self._register_metric(
|
|
103
|
+
"state_size_bytes",
|
|
104
|
+
"Size of processor state in bytes",
|
|
105
|
+
"gauge",
|
|
106
|
+
)
|
|
107
|
+
self._register_metric(
|
|
108
|
+
"processor_info",
|
|
109
|
+
"Processor information",
|
|
110
|
+
"gauge",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def _register_metric(self, name: str, help: str, type: str) -> None:
|
|
114
|
+
"""Register a metric definition."""
|
|
115
|
+
full_name = f"{self._prefix}_{name}"
|
|
116
|
+
self._metrics[full_name] = MetricDefinition(
|
|
117
|
+
name=full_name,
|
|
118
|
+
help=help,
|
|
119
|
+
type=type,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def _metric_name(self, name: str) -> str:
|
|
123
|
+
"""Get full metric name."""
|
|
124
|
+
return f"{self._prefix}_{name}"
|
|
125
|
+
|
|
126
|
+
def set_gauge(
|
|
127
|
+
self,
|
|
128
|
+
name: str,
|
|
129
|
+
value: float,
|
|
130
|
+
labels: Dict[str, str] | None = None,
|
|
131
|
+
) -> None:
|
|
132
|
+
"""
|
|
133
|
+
Set a gauge metric value.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
name: Metric name (without prefix)
|
|
137
|
+
value: Metric value
|
|
138
|
+
labels: Optional labels
|
|
139
|
+
"""
|
|
140
|
+
full_name = self._metric_name(name)
|
|
141
|
+
if full_name not in self._metrics:
|
|
142
|
+
self._register_metric(name, f"Custom gauge {name}", "gauge")
|
|
143
|
+
|
|
144
|
+
metric = self._metrics[full_name]
|
|
145
|
+
metric.values = [MetricValue(value=value, labels=labels or {})]
|
|
146
|
+
|
|
147
|
+
def inc_counter(
|
|
148
|
+
self,
|
|
149
|
+
name: str,
|
|
150
|
+
value: float = 1.0,
|
|
151
|
+
labels: Dict[str, str] | None = None,
|
|
152
|
+
) -> None:
|
|
153
|
+
"""
|
|
154
|
+
Increment a counter metric.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
name: Metric name (without prefix)
|
|
158
|
+
value: Amount to increment (default 1)
|
|
159
|
+
labels: Optional labels
|
|
160
|
+
"""
|
|
161
|
+
full_name = self._metric_name(name)
|
|
162
|
+
if full_name not in self._metrics:
|
|
163
|
+
self._register_metric(name, f"Custom counter {name}", "counter")
|
|
164
|
+
|
|
165
|
+
metric = self._metrics[full_name]
|
|
166
|
+
|
|
167
|
+
# Find existing value with same labels or create new
|
|
168
|
+
labels = labels or {}
|
|
169
|
+
for mv in metric.values:
|
|
170
|
+
if mv.labels == labels:
|
|
171
|
+
mv.value += value
|
|
172
|
+
mv.timestamp = time.time()
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
metric.values.append(MetricValue(value=value, labels=labels))
|
|
176
|
+
|
|
177
|
+
# Convenience methods for standard metrics
|
|
178
|
+
|
|
179
|
+
def record_startup_started(self) -> None:
|
|
180
|
+
"""Record that startup has started."""
|
|
181
|
+
self._startup_start = time.time()
|
|
182
|
+
|
|
183
|
+
def record_startup_completed(self) -> None:
|
|
184
|
+
"""Record that startup has completed."""
|
|
185
|
+
self._startup_completed_at = time.time()
|
|
186
|
+
if self._startup_start:
|
|
187
|
+
duration = self._startup_completed_at - self._startup_start
|
|
188
|
+
self.set_gauge("startup_duration_seconds", duration)
|
|
189
|
+
logger.debug(f"Startup completed in {duration:.3f}s")
|
|
190
|
+
|
|
191
|
+
def record_shutdown_started(self) -> None:
|
|
192
|
+
"""Record that shutdown has started."""
|
|
193
|
+
self._shutdown_start = time.time()
|
|
194
|
+
|
|
195
|
+
def record_shutdown_completed(self) -> None:
|
|
196
|
+
"""Record that shutdown has completed."""
|
|
197
|
+
if self._shutdown_start:
|
|
198
|
+
duration = time.time() - self._shutdown_start
|
|
199
|
+
self.set_gauge("shutdown_duration_seconds", duration)
|
|
200
|
+
logger.debug(f"Shutdown completed in {duration:.3f}s")
|
|
201
|
+
|
|
202
|
+
def record_state_save(self, duration: float, size_bytes: int = 0) -> None:
|
|
203
|
+
"""Record state save operation."""
|
|
204
|
+
self.set_gauge("state_save_duration_seconds", duration)
|
|
205
|
+
if size_bytes > 0:
|
|
206
|
+
self.set_gauge("state_size_bytes", size_bytes)
|
|
207
|
+
|
|
208
|
+
def record_state_load(self, duration: float) -> None:
|
|
209
|
+
"""Record state load operation."""
|
|
210
|
+
self.set_gauge("state_load_duration_seconds", duration)
|
|
211
|
+
|
|
212
|
+
def record_restart(self) -> None:
|
|
213
|
+
"""Record a restart event."""
|
|
214
|
+
self.inc_counter("restart_count")
|
|
215
|
+
|
|
216
|
+
def record_golden_image_reset(self) -> None:
|
|
217
|
+
"""Record a golden image reset."""
|
|
218
|
+
self.inc_counter("golden_image_resets_total")
|
|
219
|
+
|
|
220
|
+
def record_health_check_failure(self) -> None:
|
|
221
|
+
"""Record a health check failure."""
|
|
222
|
+
self.inc_counter("health_check_failures_total")
|
|
223
|
+
|
|
224
|
+
def set_processor_info(
|
|
225
|
+
self,
|
|
226
|
+
processor_id: str,
|
|
227
|
+
version: str = "",
|
|
228
|
+
pod_name: str = "",
|
|
229
|
+
) -> None:
|
|
230
|
+
"""Set processor information metric."""
|
|
231
|
+
self.set_gauge(
|
|
232
|
+
"processor_info",
|
|
233
|
+
1.0,
|
|
234
|
+
labels={
|
|
235
|
+
"processor_id": processor_id,
|
|
236
|
+
"version": version,
|
|
237
|
+
"pod": pod_name,
|
|
238
|
+
},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def export_prometheus(self) -> str:
|
|
242
|
+
"""
|
|
243
|
+
Export metrics in Prometheus text format.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Prometheus-formatted metrics string
|
|
247
|
+
"""
|
|
248
|
+
lines = []
|
|
249
|
+
|
|
250
|
+
for metric in self._metrics.values():
|
|
251
|
+
# HELP line
|
|
252
|
+
lines.append(f"# HELP {metric.name} {metric.help}")
|
|
253
|
+
# TYPE line
|
|
254
|
+
lines.append(f"# TYPE {metric.name} {metric.type}")
|
|
255
|
+
|
|
256
|
+
# Metric values
|
|
257
|
+
for mv in metric.values:
|
|
258
|
+
if mv.labels:
|
|
259
|
+
label_str = ",".join(
|
|
260
|
+
f'{k}="{v}"' for k, v in mv.labels.items()
|
|
261
|
+
)
|
|
262
|
+
lines.append(f"{metric.name}{{{label_str}}} {mv.value}")
|
|
263
|
+
else:
|
|
264
|
+
lines.append(f"{metric.name} {mv.value}")
|
|
265
|
+
|
|
266
|
+
return "\n".join(lines) + "\n"
|
|
267
|
+
|
|
268
|
+
def flush(self) -> None:
|
|
269
|
+
"""Flush any buffered metrics."""
|
|
270
|
+
# In this implementation, metrics are not buffered
|
|
271
|
+
# This is a hook for implementations that buffer
|
|
272
|
+
pass
|
|
273
|
+
|
|
274
|
+
def get_uptime_seconds(self) -> float:
|
|
275
|
+
"""
|
|
276
|
+
Get the processor uptime in seconds.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Uptime in seconds since startup completed, or 0 if not started
|
|
280
|
+
"""
|
|
281
|
+
if self._startup_completed_at is None:
|
|
282
|
+
return 0.0
|
|
283
|
+
return time.time() - self._startup_completed_at
|
|
284
|
+
|
|
285
|
+
def get_request_count(self) -> int:
|
|
286
|
+
"""
|
|
287
|
+
Get the total request count.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Total number of requests processed
|
|
291
|
+
"""
|
|
292
|
+
return self._request_count
|
|
293
|
+
|
|
294
|
+
def increment_request_count(self, count: int = 1) -> None:
|
|
295
|
+
"""
|
|
296
|
+
Increment the request counter.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
count: Amount to increment (default 1)
|
|
300
|
+
"""
|
|
301
|
+
self._request_count += count
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dory Middleware
|
|
3
|
+
|
|
4
|
+
Automatic bookkeeping middleware for request tracking, connection management,
|
|
5
|
+
and observability.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dory.middleware.request_tracker import (
|
|
9
|
+
RequestTracker,
|
|
10
|
+
track_request,
|
|
11
|
+
RequestMetrics,
|
|
12
|
+
RequestInfo,
|
|
13
|
+
)
|
|
14
|
+
from dory.middleware.request_id import (
|
|
15
|
+
RequestIdMiddleware,
|
|
16
|
+
with_request_id,
|
|
17
|
+
get_current_request_id,
|
|
18
|
+
)
|
|
19
|
+
from dory.middleware.connection_tracker import (
|
|
20
|
+
ConnectionTracker,
|
|
21
|
+
track_connection,
|
|
22
|
+
ConnectionInfo,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"RequestTracker",
|
|
27
|
+
"track_request",
|
|
28
|
+
"RequestMetrics",
|
|
29
|
+
"RequestInfo",
|
|
30
|
+
"RequestIdMiddleware",
|
|
31
|
+
"with_request_id",
|
|
32
|
+
"get_current_request_id",
|
|
33
|
+
"ConnectionTracker",
|
|
34
|
+
"track_connection",
|
|
35
|
+
"ConnectionInfo",
|
|
36
|
+
]
|