dory-sdk 2.1.0__py3-none-any.whl → 2.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dory/__init__.py +32 -1
- dory/config/defaults.py +6 -0
- dory/config/schema.py +26 -0
- dory/edge/__init__.py +88 -0
- dory/edge/adaptive.py +648 -0
- dory/edge/detector.py +546 -0
- dory/edge/fencing.py +488 -0
- dory/edge/heartbeat.py +598 -0
- dory/edge/role.py +416 -0
- dory/health/server.py +283 -9
- dory/k8s/__init__.py +69 -0
- dory/k8s/labels.py +505 -0
- dory/migration/__init__.py +49 -0
- dory/migration/s3_store.py +656 -0
- dory/migration/state_manager.py +64 -6
- dory/migration/transfer.py +382 -0
- dory/migration/versioning.py +749 -0
- {dory_sdk-2.1.0.dist-info → dory_sdk-2.1.4.dist-info}/METADATA +37 -32
- {dory_sdk-2.1.0.dist-info → dory_sdk-2.1.4.dist-info}/RECORD +22 -15
- dory_sdk-2.1.4.dist-info/entry_points.txt +2 -0
- dory/sidecar/__init__.py +0 -6
- dory/sidecar/main.py +0 -75
- dory/sidecar/server.py +0 -329
- dory_sdk-2.1.0.dist-info/entry_points.txt +0 -3
- {dory_sdk-2.1.0.dist-info → dory_sdk-2.1.4.dist-info}/WHEEL +0 -0
- {dory_sdk-2.1.0.dist-info → dory_sdk-2.1.4.dist-info}/top_level.txt +0 -0
dory/migration/state_manager.py
CHANGED
|
@@ -14,6 +14,7 @@ from typing import Any, TYPE_CHECKING
|
|
|
14
14
|
from dory.types import StateBackend
|
|
15
15
|
from dory.migration.serialization import StateSerializer
|
|
16
16
|
from dory.migration.configmap import ConfigMapStore
|
|
17
|
+
from dory.migration.s3_store import S3Store, S3Config
|
|
17
18
|
from dory.utils.errors import DoryStateError
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
@@ -55,6 +56,7 @@ class StateManager:
|
|
|
55
56
|
self._config = config
|
|
56
57
|
self._serializer = StateSerializer()
|
|
57
58
|
self._configmap_store: ConfigMapStore | None = None
|
|
59
|
+
self._s3_store: S3Store | None = None
|
|
58
60
|
|
|
59
61
|
# Get namespace from environment
|
|
60
62
|
self._namespace = os.environ.get("POD_NAMESPACE", "default")
|
|
@@ -285,17 +287,73 @@ class StateManager:
|
|
|
285
287
|
raise DoryStateError(f"Failed to delete state from PVC {path}: {e}", cause=e)
|
|
286
288
|
|
|
287
289
|
# =========================================================================
|
|
288
|
-
# S3 Backend
|
|
290
|
+
# S3 Backend
|
|
289
291
|
# =========================================================================
|
|
290
292
|
|
|
293
|
+
def _get_s3_store(self) -> S3Store:
|
|
294
|
+
"""Get or create S3 store instance."""
|
|
295
|
+
if self._s3_store is None:
|
|
296
|
+
# Try to get S3 config from DoryConfig if available
|
|
297
|
+
s3_config = None
|
|
298
|
+
if self._config and hasattr(self._config, "s3_config"):
|
|
299
|
+
s3_config = self._config.s3_config
|
|
300
|
+
|
|
301
|
+
self._s3_store = S3Store(config=s3_config)
|
|
302
|
+
|
|
303
|
+
return self._s3_store
|
|
304
|
+
|
|
291
305
|
async def _save_to_s3(self, processor_id: str, state_json: str) -> None:
|
|
292
|
-
"""Save state to S3."""
|
|
293
|
-
|
|
306
|
+
"""Save state to S3 with offline buffering support."""
|
|
307
|
+
store = self._get_s3_store()
|
|
308
|
+
await store.save(
|
|
309
|
+
processor_id,
|
|
310
|
+
state_json,
|
|
311
|
+
metadata={
|
|
312
|
+
"pod-name": self._pod_name,
|
|
313
|
+
"namespace": self._namespace,
|
|
314
|
+
},
|
|
315
|
+
)
|
|
294
316
|
|
|
295
317
|
async def _load_from_s3(self, processor_id: str) -> str | None:
|
|
296
|
-
"""Load state from S3."""
|
|
297
|
-
|
|
318
|
+
"""Load state from S3 (falls back to local buffer if unavailable)."""
|
|
319
|
+
store = self._get_s3_store()
|
|
320
|
+
return await store.load(processor_id)
|
|
298
321
|
|
|
299
322
|
async def _delete_from_s3(self, processor_id: str) -> bool:
|
|
300
323
|
"""Delete state from S3."""
|
|
301
|
-
|
|
324
|
+
store = self._get_s3_store()
|
|
325
|
+
return await store.delete(processor_id)
|
|
326
|
+
|
|
327
|
+
async def sync_s3_buffer(self) -> int:
|
|
328
|
+
"""
|
|
329
|
+
Sync locally buffered states to S3.
|
|
330
|
+
|
|
331
|
+
Call this periodically on edge nodes to upload states
|
|
332
|
+
that were buffered during connectivity issues.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
Number of states synced
|
|
336
|
+
"""
|
|
337
|
+
if self._backend != StateBackend.S3:
|
|
338
|
+
return 0
|
|
339
|
+
|
|
340
|
+
store = self._get_s3_store()
|
|
341
|
+
return await store.sync_buffer()
|
|
342
|
+
|
|
343
|
+
async def start_s3_background_sync(self, interval_seconds: float = 60.0) -> None:
|
|
344
|
+
"""
|
|
345
|
+
Start background S3 sync for edge nodes.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
interval_seconds: Interval between sync attempts
|
|
349
|
+
"""
|
|
350
|
+
if self._backend != StateBackend.S3:
|
|
351
|
+
return
|
|
352
|
+
|
|
353
|
+
store = self._get_s3_store()
|
|
354
|
+
await store.start_background_sync(interval_seconds)
|
|
355
|
+
|
|
356
|
+
async def stop_s3_background_sync(self) -> None:
|
|
357
|
+
"""Stop background S3 sync."""
|
|
358
|
+
if self._s3_store:
|
|
359
|
+
await self._s3_store.stop_background_sync()
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""
|
|
2
|
+
State transfer utilities with timeout and size validation.
|
|
3
|
+
|
|
4
|
+
Provides utilities for safe state capture and restore operations
|
|
5
|
+
that align with Orchestrator timeout expectations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import functools
|
|
10
|
+
import logging
|
|
11
|
+
import time
|
|
12
|
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Any, Callable, TypeVar
|
|
15
|
+
|
|
16
|
+
from dory.utils.errors import DoryStateError
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Type variable for generic return type
|
|
21
|
+
T = TypeVar("T")
|
|
22
|
+
|
|
23
|
+
# Orchestrator constants (from transfer.go)
|
|
24
|
+
ORCHESTRATOR_STATE_TIMEOUT_SEC = 30 # DefaultHTTPTimeout in transfer.go
|
|
25
|
+
ORCHESTRATOR_MAX_STATE_SIZE = 10 * 1024 * 1024 # MaxResponseBodySize in transfer.go (10MB)
|
|
26
|
+
|
|
27
|
+
# Default SDK limits (with safety margin)
|
|
28
|
+
DEFAULT_CAPTURE_TIMEOUT_SEC = 25 # 5s buffer before Orchestrator timeout
|
|
29
|
+
DEFAULT_RESTORE_TIMEOUT_SEC = 25
|
|
30
|
+
DEFAULT_MAX_STATE_SIZE = 8 * 1024 * 1024 # 8MB, 2MB buffer before Orchestrator limit
|
|
31
|
+
DEFAULT_SIZE_WARN_THRESHOLD = 0.75 # Warn at 75% of max
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class TransferConfig:
|
|
36
|
+
"""Configuration for state transfer operations."""
|
|
37
|
+
|
|
38
|
+
capture_timeout_sec: float = DEFAULT_CAPTURE_TIMEOUT_SEC
|
|
39
|
+
restore_timeout_sec: float = DEFAULT_RESTORE_TIMEOUT_SEC
|
|
40
|
+
max_size_bytes: int = DEFAULT_MAX_STATE_SIZE
|
|
41
|
+
size_warn_threshold: float = DEFAULT_SIZE_WARN_THRESHOLD
|
|
42
|
+
|
|
43
|
+
def __post_init__(self):
|
|
44
|
+
"""Validate configuration against Orchestrator limits."""
|
|
45
|
+
if self.capture_timeout_sec >= ORCHESTRATOR_STATE_TIMEOUT_SEC:
|
|
46
|
+
logger.warning(
|
|
47
|
+
f"state_capture_timeout_sec ({self.capture_timeout_sec}s) >= "
|
|
48
|
+
f"Orchestrator timeout ({ORCHESTRATOR_STATE_TIMEOUT_SEC}s). "
|
|
49
|
+
"Reducing to {ORCHESTRATOR_STATE_TIMEOUT_SEC - 5}s."
|
|
50
|
+
)
|
|
51
|
+
self.capture_timeout_sec = ORCHESTRATOR_STATE_TIMEOUT_SEC - 5
|
|
52
|
+
|
|
53
|
+
if self.restore_timeout_sec >= ORCHESTRATOR_STATE_TIMEOUT_SEC:
|
|
54
|
+
logger.warning(
|
|
55
|
+
f"state_restore_timeout_sec ({self.restore_timeout_sec}s) >= "
|
|
56
|
+
f"Orchestrator timeout ({ORCHESTRATOR_STATE_TIMEOUT_SEC}s). "
|
|
57
|
+
f"Reducing to {ORCHESTRATOR_STATE_TIMEOUT_SEC - 5}s."
|
|
58
|
+
)
|
|
59
|
+
self.restore_timeout_sec = ORCHESTRATOR_STATE_TIMEOUT_SEC - 5
|
|
60
|
+
|
|
61
|
+
if self.max_size_bytes > ORCHESTRATOR_MAX_STATE_SIZE:
|
|
62
|
+
logger.warning(
|
|
63
|
+
f"state_max_size_bytes ({self.max_size_bytes}) > "
|
|
64
|
+
f"Orchestrator limit ({ORCHESTRATOR_MAX_STATE_SIZE}). "
|
|
65
|
+
f"Reducing to {ORCHESTRATOR_MAX_STATE_SIZE}."
|
|
66
|
+
)
|
|
67
|
+
self.max_size_bytes = ORCHESTRATOR_MAX_STATE_SIZE
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class TransferMetrics:
|
|
72
|
+
"""Metrics from a state transfer operation."""
|
|
73
|
+
|
|
74
|
+
duration_sec: float
|
|
75
|
+
size_bytes: int
|
|
76
|
+
size_ratio: float # size / max_size
|
|
77
|
+
timed_out: bool
|
|
78
|
+
size_exceeded: bool
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class StateTransferError(DoryStateError):
|
|
82
|
+
"""Error during state transfer operation."""
|
|
83
|
+
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
message: str,
|
|
87
|
+
metrics: TransferMetrics | None = None,
|
|
88
|
+
cause: Exception | None = None,
|
|
89
|
+
):
|
|
90
|
+
super().__init__(message, cause=cause)
|
|
91
|
+
self.metrics = metrics
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class StateTransferTimeout(StateTransferError):
|
|
95
|
+
"""State transfer operation timed out."""
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class StateSizeExceeded(StateTransferError):
|
|
100
|
+
"""State size exceeds configured maximum."""
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def validate_state_size(
|
|
105
|
+
state_json: str,
|
|
106
|
+
max_size: int = DEFAULT_MAX_STATE_SIZE,
|
|
107
|
+
warn_threshold: float = DEFAULT_SIZE_WARN_THRESHOLD,
|
|
108
|
+
) -> TransferMetrics:
|
|
109
|
+
"""
|
|
110
|
+
Validate state size against limits.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
state_json: Serialized state JSON string
|
|
114
|
+
max_size: Maximum allowed size in bytes
|
|
115
|
+
warn_threshold: Fraction of max_size to trigger warning
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
TransferMetrics with size information
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
StateSizeExceeded: If state exceeds max_size
|
|
122
|
+
"""
|
|
123
|
+
size_bytes = len(state_json.encode("utf-8"))
|
|
124
|
+
size_ratio = size_bytes / max_size if max_size > 0 else 0
|
|
125
|
+
|
|
126
|
+
metrics = TransferMetrics(
|
|
127
|
+
duration_sec=0,
|
|
128
|
+
size_bytes=size_bytes,
|
|
129
|
+
size_ratio=size_ratio,
|
|
130
|
+
timed_out=False,
|
|
131
|
+
size_exceeded=size_bytes > max_size,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if size_bytes > max_size:
|
|
135
|
+
raise StateSizeExceeded(
|
|
136
|
+
f"State size ({size_bytes:,} bytes) exceeds maximum "
|
|
137
|
+
f"({max_size:,} bytes). Orchestrator will reject this state.",
|
|
138
|
+
metrics=metrics,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
if size_ratio >= warn_threshold:
|
|
142
|
+
logger.warning(
|
|
143
|
+
f"State size ({size_bytes:,} bytes) is {size_ratio:.1%} of maximum "
|
|
144
|
+
f"({max_size:,} bytes). Consider reducing state size to avoid "
|
|
145
|
+
"transfer failures."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return metrics
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def with_timeout(
|
|
152
|
+
timeout_sec: float,
|
|
153
|
+
operation_name: str = "operation",
|
|
154
|
+
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
155
|
+
"""
|
|
156
|
+
Decorator to add timeout to synchronous functions.
|
|
157
|
+
|
|
158
|
+
Runs the function in a thread pool executor with timeout.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
timeout_sec: Timeout in seconds
|
|
162
|
+
operation_name: Name for error messages
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Decorated function with timeout
|
|
166
|
+
"""
|
|
167
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
168
|
+
@functools.wraps(func)
|
|
169
|
+
def wrapper(*args: Any, **kwargs: Any) -> T:
|
|
170
|
+
start_time = time.monotonic()
|
|
171
|
+
|
|
172
|
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
|
173
|
+
future = executor.submit(func, *args, **kwargs)
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
result = future.result(timeout=timeout_sec)
|
|
177
|
+
duration = time.monotonic() - start_time
|
|
178
|
+
|
|
179
|
+
# Log if operation took significant time
|
|
180
|
+
if duration > timeout_sec * 0.5:
|
|
181
|
+
logger.warning(
|
|
182
|
+
f"{operation_name} took {duration:.2f}s "
|
|
183
|
+
f"({duration/timeout_sec:.1%} of {timeout_sec}s timeout)"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
except FuturesTimeoutError:
|
|
189
|
+
duration = time.monotonic() - start_time
|
|
190
|
+
metrics = TransferMetrics(
|
|
191
|
+
duration_sec=duration,
|
|
192
|
+
size_bytes=0,
|
|
193
|
+
size_ratio=0,
|
|
194
|
+
timed_out=True,
|
|
195
|
+
size_exceeded=False,
|
|
196
|
+
)
|
|
197
|
+
raise StateTransferTimeout(
|
|
198
|
+
f"{operation_name} timed out after {timeout_sec}s. "
|
|
199
|
+
"Consider reducing state size or optimizing get_state().",
|
|
200
|
+
metrics=metrics,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
return wrapper
|
|
204
|
+
return decorator
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
async def async_with_timeout(
|
|
208
|
+
coro: Any,
|
|
209
|
+
timeout_sec: float,
|
|
210
|
+
operation_name: str = "operation",
|
|
211
|
+
) -> Any:
|
|
212
|
+
"""
|
|
213
|
+
Execute coroutine with timeout.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
coro: Coroutine to execute
|
|
217
|
+
timeout_sec: Timeout in seconds
|
|
218
|
+
operation_name: Name for error messages
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Result of the coroutine
|
|
222
|
+
|
|
223
|
+
Raises:
|
|
224
|
+
StateTransferTimeout: If operation times out
|
|
225
|
+
"""
|
|
226
|
+
start_time = time.monotonic()
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
result = await asyncio.wait_for(coro, timeout=timeout_sec)
|
|
230
|
+
duration = time.monotonic() - start_time
|
|
231
|
+
|
|
232
|
+
# Log if operation took significant time
|
|
233
|
+
if duration > timeout_sec * 0.5:
|
|
234
|
+
logger.warning(
|
|
235
|
+
f"{operation_name} took {duration:.2f}s "
|
|
236
|
+
f"({duration/timeout_sec:.1%} of {timeout_sec}s timeout)"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
return result
|
|
240
|
+
|
|
241
|
+
except asyncio.TimeoutError:
|
|
242
|
+
duration = time.monotonic() - start_time
|
|
243
|
+
metrics = TransferMetrics(
|
|
244
|
+
duration_sec=duration,
|
|
245
|
+
size_bytes=0,
|
|
246
|
+
size_ratio=0,
|
|
247
|
+
timed_out=True,
|
|
248
|
+
size_exceeded=False,
|
|
249
|
+
)
|
|
250
|
+
raise StateTransferTimeout(
|
|
251
|
+
f"{operation_name} timed out after {timeout_sec}s. "
|
|
252
|
+
"Consider reducing state size or optimizing the operation.",
|
|
253
|
+
metrics=metrics,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class StateCaptureGuard:
|
|
258
|
+
"""
|
|
259
|
+
Context manager for safe state capture with timeout and size validation.
|
|
260
|
+
|
|
261
|
+
Usage:
|
|
262
|
+
config = TransferConfig(capture_timeout_sec=25, max_size_bytes=8*1024*1024)
|
|
263
|
+
|
|
264
|
+
with StateCaptureGuard(config) as guard:
|
|
265
|
+
state = processor.get_state()
|
|
266
|
+
state_json = json.dumps(state)
|
|
267
|
+
guard.validate(state_json)
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
def __init__(self, config: TransferConfig | None = None):
|
|
271
|
+
"""
|
|
272
|
+
Initialize capture guard.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
config: Transfer configuration
|
|
276
|
+
"""
|
|
277
|
+
self._config = config or TransferConfig()
|
|
278
|
+
self._start_time: float = 0
|
|
279
|
+
self._metrics: TransferMetrics | None = None
|
|
280
|
+
|
|
281
|
+
def __enter__(self) -> "StateCaptureGuard":
|
|
282
|
+
self._start_time = time.monotonic()
|
|
283
|
+
return self
|
|
284
|
+
|
|
285
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> bool:
|
|
286
|
+
duration = time.monotonic() - self._start_time
|
|
287
|
+
|
|
288
|
+
if duration > self._config.capture_timeout_sec:
|
|
289
|
+
logger.error(
|
|
290
|
+
f"State capture took {duration:.2f}s, exceeding "
|
|
291
|
+
f"{self._config.capture_timeout_sec}s timeout"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return False # Don't suppress exceptions
|
|
295
|
+
|
|
296
|
+
def validate(self, state_json: str) -> TransferMetrics:
|
|
297
|
+
"""
|
|
298
|
+
Validate captured state.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
state_json: Serialized state JSON
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
TransferMetrics with capture information
|
|
305
|
+
|
|
306
|
+
Raises:
|
|
307
|
+
StateSizeExceeded: If state exceeds max size
|
|
308
|
+
StateTransferTimeout: If capture exceeded timeout
|
|
309
|
+
"""
|
|
310
|
+
duration = time.monotonic() - self._start_time
|
|
311
|
+
|
|
312
|
+
# Check timeout
|
|
313
|
+
if duration > self._config.capture_timeout_sec:
|
|
314
|
+
self._metrics = TransferMetrics(
|
|
315
|
+
duration_sec=duration,
|
|
316
|
+
size_bytes=len(state_json.encode("utf-8")),
|
|
317
|
+
size_ratio=0,
|
|
318
|
+
timed_out=True,
|
|
319
|
+
size_exceeded=False,
|
|
320
|
+
)
|
|
321
|
+
raise StateTransferTimeout(
|
|
322
|
+
f"State capture took {duration:.2f}s, exceeding "
|
|
323
|
+
f"{self._config.capture_timeout_sec}s timeout",
|
|
324
|
+
metrics=self._metrics,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Check size
|
|
328
|
+
size_metrics = validate_state_size(
|
|
329
|
+
state_json,
|
|
330
|
+
max_size=self._config.max_size_bytes,
|
|
331
|
+
warn_threshold=self._config.size_warn_threshold,
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
self._metrics = TransferMetrics(
|
|
335
|
+
duration_sec=duration,
|
|
336
|
+
size_bytes=size_metrics.size_bytes,
|
|
337
|
+
size_ratio=size_metrics.size_ratio,
|
|
338
|
+
timed_out=False,
|
|
339
|
+
size_exceeded=size_metrics.size_exceeded,
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
return self._metrics
|
|
343
|
+
|
|
344
|
+
@property
|
|
345
|
+
def metrics(self) -> TransferMetrics | None:
|
|
346
|
+
"""Get capture metrics."""
|
|
347
|
+
return self._metrics
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def log_transfer_summary(
|
|
351
|
+
operation: str,
|
|
352
|
+
metrics: TransferMetrics,
|
|
353
|
+
config: TransferConfig,
|
|
354
|
+
) -> None:
|
|
355
|
+
"""
|
|
356
|
+
Log a summary of the transfer operation.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
operation: Operation name (e.g., "capture", "restore")
|
|
360
|
+
metrics: Transfer metrics
|
|
361
|
+
config: Transfer configuration
|
|
362
|
+
"""
|
|
363
|
+
level = logging.INFO
|
|
364
|
+
status = "completed"
|
|
365
|
+
|
|
366
|
+
if metrics.timed_out:
|
|
367
|
+
level = logging.ERROR
|
|
368
|
+
status = "TIMED OUT"
|
|
369
|
+
elif metrics.size_exceeded:
|
|
370
|
+
level = logging.ERROR
|
|
371
|
+
status = "SIZE EXCEEDED"
|
|
372
|
+
elif metrics.size_ratio >= config.size_warn_threshold:
|
|
373
|
+
level = logging.WARNING
|
|
374
|
+
status = "completed (size warning)"
|
|
375
|
+
|
|
376
|
+
logger.log(
|
|
377
|
+
level,
|
|
378
|
+
f"State {operation} {status}: "
|
|
379
|
+
f"duration={metrics.duration_sec:.2f}s/{config.capture_timeout_sec}s, "
|
|
380
|
+
f"size={metrics.size_bytes:,}B/{config.max_size_bytes:,}B "
|
|
381
|
+
f"({metrics.size_ratio:.1%})",
|
|
382
|
+
)
|