dory-sdk 2.1.0__py3-none-any.whl → 2.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,656 @@
1
+ """
2
+ S3 storage backend for state persistence.
3
+
4
+ Provides S3-based state storage with local buffering for edge nodes
5
+ that may have intermittent connectivity.
6
+
7
+ Features:
8
+ - S3 upload/download with retry logic
9
+ - Local SQLite buffer for offline scenarios
10
+ - Multiple credential options (IAM role, env vars, STS)
11
+ - Automatic sync when connectivity is restored
12
+ """
13
+
14
+ import asyncio
15
+ import json
16
+ import logging
17
+ import os
18
+ import sqlite3
19
+ import time
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ from dory.utils.errors import DoryStateError
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Optional boto3 import - gracefully handle if not available
29
+ try:
30
+ import boto3
31
+ from botocore.exceptions import ClientError, NoCredentialsError, BotoCoreError
32
+ BOTO3_AVAILABLE = True
33
+ except ImportError:
34
+ BOTO3_AVAILABLE = False
35
+ boto3 = None
36
+ ClientError = Exception
37
+ NoCredentialsError = Exception
38
+ BotoCoreError = Exception
39
+
40
+
41
+ @dataclass
42
+ class S3Config:
43
+ """Configuration for S3 state backend."""
44
+
45
+ bucket: str
46
+ prefix: str = "dory-state"
47
+ region: str | None = None
48
+ endpoint_url: str | None = None # For S3-compatible services (MinIO, LocalStack)
49
+
50
+ # Credential options
51
+ access_key_id: str | None = None
52
+ secret_access_key: str | None = None
53
+ session_token: str | None = None # For STS temporary credentials
54
+ role_arn: str | None = None # For assuming a role
55
+ credential_broker_url: str | None = None # For edge credential broker
56
+
57
+ # Offline buffering
58
+ enable_offline_buffer: bool = True
59
+ buffer_path: str = "/data/dory-state-buffer.db"
60
+ buffer_path_fallback: str = "/tmp/dory-state-buffer.db"
61
+ max_buffer_age_seconds: int = 86400 # 24 hours
62
+
63
+ # Retry settings
64
+ max_retries: int = 3
65
+ retry_delay_seconds: float = 1.0
66
+ retry_backoff_multiplier: float = 2.0
67
+
68
+ @classmethod
69
+ def from_env(cls) -> "S3Config":
70
+ """Create config from environment variables."""
71
+ bucket = os.environ.get("DORY_S3_BUCKET")
72
+ if not bucket:
73
+ raise DoryStateError(
74
+ "DORY_S3_BUCKET environment variable is required for S3 backend"
75
+ )
76
+
77
+ return cls(
78
+ bucket=bucket,
79
+ prefix=os.environ.get("DORY_S3_PREFIX", "dory-state"),
80
+ region=os.environ.get("DORY_S3_REGION", os.environ.get("AWS_REGION")),
81
+ endpoint_url=os.environ.get("DORY_S3_ENDPOINT_URL"),
82
+ access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
83
+ secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
84
+ session_token=os.environ.get("AWS_SESSION_TOKEN"),
85
+ role_arn=os.environ.get("DORY_S3_ROLE_ARN"),
86
+ credential_broker_url=os.environ.get("DORY_CREDENTIAL_BROKER_URL"),
87
+ enable_offline_buffer=os.environ.get(
88
+ "DORY_S3_ENABLE_OFFLINE_BUFFER", "true"
89
+ ).lower() == "true",
90
+ buffer_path=os.environ.get(
91
+ "DORY_S3_BUFFER_PATH", "/data/dory-state-buffer.db"
92
+ ),
93
+ )
94
+
95
+
96
+ class OfflineBuffer:
97
+ """
98
+ SQLite-based local buffer for offline state persistence.
99
+
100
+ Stores state locally when S3 is unreachable and syncs when
101
+ connectivity is restored.
102
+ """
103
+
104
+ def __init__(self, db_path: str, fallback_path: str):
105
+ """
106
+ Initialize offline buffer.
107
+
108
+ Args:
109
+ db_path: Primary path for SQLite database
110
+ fallback_path: Fallback path if primary is not writable
111
+ """
112
+ self._db_path = self._resolve_path(db_path, fallback_path)
113
+ self._conn: sqlite3.Connection | None = None
114
+ self._initialized = False
115
+
116
+ def _resolve_path(self, primary: str, fallback: str) -> str:
117
+ """Resolve which path to use for the database."""
118
+ primary_dir = Path(primary).parent
119
+ if primary_dir.exists() and os.access(primary_dir, os.W_OK):
120
+ return primary
121
+
122
+ fallback_dir = Path(fallback).parent
123
+ fallback_dir.mkdir(parents=True, exist_ok=True)
124
+ logger.info(f"Using fallback buffer path: {fallback}")
125
+ return fallback
126
+
127
+ def _ensure_initialized(self) -> None:
128
+ """Initialize database if not already done."""
129
+ if self._initialized:
130
+ return
131
+
132
+ self._conn = sqlite3.connect(self._db_path)
133
+ self._conn.execute("""
134
+ CREATE TABLE IF NOT EXISTS state_buffer (
135
+ processor_id TEXT PRIMARY KEY,
136
+ state_json TEXT NOT NULL,
137
+ created_at REAL NOT NULL,
138
+ synced_at REAL,
139
+ sync_attempts INTEGER DEFAULT 0
140
+ )
141
+ """)
142
+ self._conn.execute("""
143
+ CREATE INDEX IF NOT EXISTS idx_synced
144
+ ON state_buffer(synced_at)
145
+ """)
146
+ self._conn.commit()
147
+ self._initialized = True
148
+ logger.debug(f"Offline buffer initialized at {self._db_path}")
149
+
150
+ def save(self, processor_id: str, state_json: str) -> None:
151
+ """Save state to local buffer."""
152
+ self._ensure_initialized()
153
+
154
+ self._conn.execute("""
155
+ INSERT OR REPLACE INTO state_buffer
156
+ (processor_id, state_json, created_at, synced_at, sync_attempts)
157
+ VALUES (?, ?, ?, NULL, 0)
158
+ """, (processor_id, state_json, time.time()))
159
+ self._conn.commit()
160
+ logger.debug(f"State buffered locally for {processor_id}")
161
+
162
+ def load(self, processor_id: str) -> str | None:
163
+ """Load state from local buffer."""
164
+ self._ensure_initialized()
165
+
166
+ cursor = self._conn.execute("""
167
+ SELECT state_json FROM state_buffer
168
+ WHERE processor_id = ?
169
+ """, (processor_id,))
170
+ row = cursor.fetchone()
171
+ return row[0] if row else None
172
+
173
+ def mark_synced(self, processor_id: str) -> None:
174
+ """Mark state as synced to S3."""
175
+ self._ensure_initialized()
176
+
177
+ self._conn.execute("""
178
+ UPDATE state_buffer
179
+ SET synced_at = ?
180
+ WHERE processor_id = ?
181
+ """, (time.time(), processor_id))
182
+ self._conn.commit()
183
+
184
+ def get_unsynced(self) -> list[tuple[str, str]]:
185
+ """Get all unsynced states."""
186
+ self._ensure_initialized()
187
+
188
+ cursor = self._conn.execute("""
189
+ SELECT processor_id, state_json
190
+ FROM state_buffer
191
+ WHERE synced_at IS NULL
192
+ ORDER BY created_at ASC
193
+ """)
194
+ return cursor.fetchall()
195
+
196
+ def increment_sync_attempts(self, processor_id: str) -> None:
197
+ """Increment sync attempt counter."""
198
+ self._ensure_initialized()
199
+
200
+ self._conn.execute("""
201
+ UPDATE state_buffer
202
+ SET sync_attempts = sync_attempts + 1
203
+ WHERE processor_id = ?
204
+ """, (processor_id,))
205
+ self._conn.commit()
206
+
207
+ def delete(self, processor_id: str) -> bool:
208
+ """Delete state from buffer."""
209
+ self._ensure_initialized()
210
+
211
+ cursor = self._conn.execute("""
212
+ DELETE FROM state_buffer WHERE processor_id = ?
213
+ """, (processor_id,))
214
+ self._conn.commit()
215
+ return cursor.rowcount > 0
216
+
217
+ def cleanup_old(self, max_age_seconds: int) -> int:
218
+ """Remove entries older than max_age_seconds."""
219
+ self._ensure_initialized()
220
+
221
+ cutoff = time.time() - max_age_seconds
222
+ cursor = self._conn.execute("""
223
+ DELETE FROM state_buffer
224
+ WHERE created_at < ? AND synced_at IS NOT NULL
225
+ """, (cutoff,))
226
+ self._conn.commit()
227
+ return cursor.rowcount
228
+
229
+ def close(self) -> None:
230
+ """Close database connection."""
231
+ if self._conn:
232
+ self._conn.close()
233
+ self._conn = None
234
+ self._initialized = False
235
+
236
+
237
+ class S3Store:
238
+ """
239
+ Store and retrieve state from AWS S3.
240
+
241
+ Supports offline buffering for edge nodes with intermittent connectivity.
242
+
243
+ S3 key format: {prefix}/{processor_id}/state.json
244
+ """
245
+
246
+ def __init__(self, config: S3Config | None = None):
247
+ """
248
+ Initialize S3 store.
249
+
250
+ Args:
251
+ config: S3 configuration (defaults to from_env())
252
+ """
253
+ if not BOTO3_AVAILABLE:
254
+ raise DoryStateError(
255
+ "boto3 is required for S3 backend. "
256
+ "Install with: pip install boto3"
257
+ )
258
+
259
+ self._config = config or S3Config.from_env()
260
+ self._client: Any = None
261
+ self._initialized = False
262
+
263
+ # Offline buffer
264
+ self._buffer: OfflineBuffer | None = None
265
+ if self._config.enable_offline_buffer:
266
+ self._buffer = OfflineBuffer(
267
+ self._config.buffer_path,
268
+ self._config.buffer_path_fallback,
269
+ )
270
+
271
+ # Background sync task
272
+ self._sync_task: asyncio.Task | None = None
273
+
274
+ def _ensure_initialized(self) -> None:
275
+ """Initialize S3 client if not already done."""
276
+ if self._initialized:
277
+ return
278
+
279
+ try:
280
+ session_kwargs = {}
281
+ client_kwargs = {}
282
+
283
+ # Region
284
+ if self._config.region:
285
+ session_kwargs["region_name"] = self._config.region
286
+
287
+ # Explicit credentials
288
+ if self._config.access_key_id and self._config.secret_access_key:
289
+ session_kwargs["aws_access_key_id"] = self._config.access_key_id
290
+ session_kwargs["aws_secret_access_key"] = self._config.secret_access_key
291
+ if self._config.session_token:
292
+ session_kwargs["aws_session_token"] = self._config.session_token
293
+
294
+ # Custom endpoint (MinIO, LocalStack)
295
+ if self._config.endpoint_url:
296
+ client_kwargs["endpoint_url"] = self._config.endpoint_url
297
+
298
+ # Create session and client
299
+ session = boto3.Session(**session_kwargs)
300
+
301
+ # Assume role if specified
302
+ if self._config.role_arn:
303
+ sts = session.client("sts")
304
+ assumed = sts.assume_role(
305
+ RoleArn=self._config.role_arn,
306
+ RoleSessionName="dory-state-manager",
307
+ )
308
+ credentials = assumed["Credentials"]
309
+ session = boto3.Session(
310
+ aws_access_key_id=credentials["AccessKeyId"],
311
+ aws_secret_access_key=credentials["SecretAccessKey"],
312
+ aws_session_token=credentials["SessionToken"],
313
+ region_name=self._config.region,
314
+ )
315
+
316
+ self._client = session.client("s3", **client_kwargs)
317
+ self._initialized = True
318
+ logger.debug(f"S3 client initialized for bucket {self._config.bucket}")
319
+
320
+ except NoCredentialsError as e:
321
+ raise DoryStateError(
322
+ "No AWS credentials found. Configure via environment variables, "
323
+ "IAM role, or credential broker.",
324
+ cause=e,
325
+ )
326
+ except Exception as e:
327
+ raise DoryStateError(f"Failed to initialize S3 client: {e}", cause=e)
328
+
329
+ def _s3_key(self, processor_id: str) -> str:
330
+ """Generate S3 key for processor state."""
331
+ return f"{self._config.prefix}/{processor_id}/state.json"
332
+
333
+ async def _retry_operation(
334
+ self,
335
+ operation: str,
336
+ func: Any,
337
+ *args: Any,
338
+ **kwargs: Any,
339
+ ) -> Any:
340
+ """Execute operation with retry logic."""
341
+ last_error = None
342
+ delay = self._config.retry_delay_seconds
343
+
344
+ for attempt in range(1, self._config.max_retries + 1):
345
+ try:
346
+ # Run synchronous boto3 call in executor
347
+ loop = asyncio.get_event_loop()
348
+ result = await loop.run_in_executor(None, lambda: func(*args, **kwargs))
349
+ return result
350
+
351
+ except (ClientError, BotoCoreError) as e:
352
+ last_error = e
353
+ if attempt < self._config.max_retries:
354
+ logger.warning(
355
+ f"S3 {operation} attempt {attempt} failed: {e}. "
356
+ f"Retrying in {delay:.1f}s..."
357
+ )
358
+ await asyncio.sleep(delay)
359
+ delay *= self._config.retry_backoff_multiplier
360
+
361
+ raise DoryStateError(
362
+ f"S3 {operation} failed after {self._config.max_retries} attempts: {last_error}",
363
+ cause=last_error,
364
+ )
365
+
366
+ async def save(
367
+ self,
368
+ processor_id: str,
369
+ state_json: str,
370
+ metadata: dict[str, str] | None = None,
371
+ ) -> None:
372
+ """
373
+ Save state to S3.
374
+
375
+ If S3 is unreachable and offline buffering is enabled,
376
+ state is saved to local buffer for later sync.
377
+
378
+ Args:
379
+ processor_id: Processor ID
380
+ state_json: JSON-serialized state
381
+ metadata: Optional metadata to store with object
382
+
383
+ Raises:
384
+ DoryStateError: If save fails and no buffer available
385
+ """
386
+ self._ensure_initialized()
387
+
388
+ s3_key = self._s3_key(processor_id)
389
+ s3_metadata = metadata or {}
390
+ s3_metadata["processor-id"] = processor_id
391
+ s3_metadata["saved-at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
392
+
393
+ try:
394
+ await self._retry_operation(
395
+ "put_object",
396
+ self._client.put_object,
397
+ Bucket=self._config.bucket,
398
+ Key=s3_key,
399
+ Body=state_json.encode("utf-8"),
400
+ ContentType="application/json",
401
+ Metadata=s3_metadata,
402
+ )
403
+ logger.debug(f"State saved to S3: s3://{self._config.bucket}/{s3_key}")
404
+
405
+ # If buffer exists, mark as synced
406
+ if self._buffer:
407
+ self._buffer.save(processor_id, state_json)
408
+ self._buffer.mark_synced(processor_id)
409
+
410
+ except DoryStateError:
411
+ # S3 failed - try to buffer locally
412
+ if self._buffer:
413
+ self._buffer.save(processor_id, state_json)
414
+ logger.warning(
415
+ f"S3 unavailable, state buffered locally for {processor_id}"
416
+ )
417
+ else:
418
+ raise
419
+
420
+ async def load(self, processor_id: str) -> str | None:
421
+ """
422
+ Load state from S3.
423
+
424
+ Falls back to local buffer if S3 is unreachable.
425
+
426
+ Args:
427
+ processor_id: Processor ID
428
+
429
+ Returns:
430
+ JSON-serialized state, or None if not found
431
+
432
+ Raises:
433
+ DoryStateError: If load fails
434
+ """
435
+ self._ensure_initialized()
436
+
437
+ s3_key = self._s3_key(processor_id)
438
+
439
+ try:
440
+ response = await self._retry_operation(
441
+ "get_object",
442
+ self._client.get_object,
443
+ Bucket=self._config.bucket,
444
+ Key=s3_key,
445
+ )
446
+
447
+ # Read body in executor
448
+ loop = asyncio.get_event_loop()
449
+ body = await loop.run_in_executor(
450
+ None,
451
+ lambda: response["Body"].read().decode("utf-8"),
452
+ )
453
+
454
+ logger.debug(f"State loaded from S3: s3://{self._config.bucket}/{s3_key}")
455
+ return body
456
+
457
+ except DoryStateError as e:
458
+ # Check if it's a 404
459
+ if "NoSuchKey" in str(e) or "404" in str(e):
460
+ logger.debug(f"State not found in S3: {s3_key}")
461
+
462
+ # Try local buffer as fallback
463
+ if self._buffer:
464
+ buffered = self._buffer.load(processor_id)
465
+ if buffered:
466
+ logger.info(
467
+ f"Using buffered state for {processor_id} (not yet synced to S3)"
468
+ )
469
+ return buffered
470
+
471
+ return None
472
+
473
+ # S3 error - try local buffer
474
+ if self._buffer:
475
+ buffered = self._buffer.load(processor_id)
476
+ if buffered:
477
+ logger.warning(
478
+ f"S3 unavailable, using buffered state for {processor_id}"
479
+ )
480
+ return buffered
481
+
482
+ raise
483
+
484
+ except ClientError as e:
485
+ if e.response.get("Error", {}).get("Code") == "NoSuchKey":
486
+ logger.debug(f"State not found in S3: {s3_key}")
487
+ return None
488
+ raise DoryStateError(f"Failed to load state from S3: {e}", cause=e)
489
+
490
+ async def delete(self, processor_id: str) -> bool:
491
+ """
492
+ Delete state from S3.
493
+
494
+ Args:
495
+ processor_id: Processor ID
496
+
497
+ Returns:
498
+ True if deleted, False if not found
499
+
500
+ Raises:
501
+ DoryStateError: If delete fails
502
+ """
503
+ self._ensure_initialized()
504
+
505
+ s3_key = self._s3_key(processor_id)
506
+
507
+ try:
508
+ # Check if exists first
509
+ try:
510
+ await self._retry_operation(
511
+ "head_object",
512
+ self._client.head_object,
513
+ Bucket=self._config.bucket,
514
+ Key=s3_key,
515
+ )
516
+ except DoryStateError:
517
+ # Not found
518
+ if self._buffer:
519
+ self._buffer.delete(processor_id)
520
+ return False
521
+
522
+ # Delete from S3
523
+ await self._retry_operation(
524
+ "delete_object",
525
+ self._client.delete_object,
526
+ Bucket=self._config.bucket,
527
+ Key=s3_key,
528
+ )
529
+ logger.debug(f"State deleted from S3: s3://{self._config.bucket}/{s3_key}")
530
+
531
+ # Delete from buffer too
532
+ if self._buffer:
533
+ self._buffer.delete(processor_id)
534
+
535
+ return True
536
+
537
+ except ClientError as e:
538
+ if e.response.get("Error", {}).get("Code") == "NoSuchKey":
539
+ return False
540
+ raise DoryStateError(f"Failed to delete state from S3: {e}", cause=e)
541
+
542
+ async def exists(self, processor_id: str) -> bool:
543
+ """
544
+ Check if state exists in S3.
545
+
546
+ Args:
547
+ processor_id: Processor ID
548
+
549
+ Returns:
550
+ True if state exists
551
+ """
552
+ self._ensure_initialized()
553
+
554
+ s3_key = self._s3_key(processor_id)
555
+
556
+ try:
557
+ await self._retry_operation(
558
+ "head_object",
559
+ self._client.head_object,
560
+ Bucket=self._config.bucket,
561
+ Key=s3_key,
562
+ )
563
+ return True
564
+ except (DoryStateError, ClientError):
565
+ return False
566
+
567
+ async def sync_buffer(self) -> int:
568
+ """
569
+ Sync buffered states to S3.
570
+
571
+ Call periodically to upload states that were buffered
572
+ during connectivity issues.
573
+
574
+ Returns:
575
+ Number of states synced
576
+ """
577
+ if not self._buffer:
578
+ return 0
579
+
580
+ unsynced = self._buffer.get_unsynced()
581
+ synced_count = 0
582
+
583
+ for processor_id, state_json in unsynced:
584
+ try:
585
+ s3_key = self._s3_key(processor_id)
586
+
587
+ await self._retry_operation(
588
+ "put_object",
589
+ self._client.put_object,
590
+ Bucket=self._config.bucket,
591
+ Key=s3_key,
592
+ Body=state_json.encode("utf-8"),
593
+ ContentType="application/json",
594
+ Metadata={
595
+ "processor-id": processor_id,
596
+ "synced-from-buffer": "true",
597
+ "synced-at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
598
+ },
599
+ )
600
+
601
+ self._buffer.mark_synced(processor_id)
602
+ synced_count += 1
603
+ logger.info(f"Synced buffered state for {processor_id} to S3")
604
+
605
+ except DoryStateError as e:
606
+ self._buffer.increment_sync_attempts(processor_id)
607
+ logger.warning(f"Failed to sync buffered state for {processor_id}: {e}")
608
+
609
+ # Cleanup old synced entries
610
+ if synced_count > 0:
611
+ cleaned = self._buffer.cleanup_old(self._config.max_buffer_age_seconds)
612
+ if cleaned > 0:
613
+ logger.debug(f"Cleaned up {cleaned} old buffer entries")
614
+
615
+ return synced_count
616
+
617
+ async def start_background_sync(self, interval_seconds: float = 60.0) -> None:
618
+ """
619
+ Start background sync task.
620
+
621
+ Args:
622
+ interval_seconds: Interval between sync attempts
623
+ """
624
+ if self._sync_task is not None:
625
+ return
626
+
627
+ async def sync_loop():
628
+ while True:
629
+ try:
630
+ await asyncio.sleep(interval_seconds)
631
+ synced = await self.sync_buffer()
632
+ if synced > 0:
633
+ logger.debug(f"Background sync: {synced} states synced")
634
+ except asyncio.CancelledError:
635
+ break
636
+ except Exception as e:
637
+ logger.error(f"Background sync error: {e}")
638
+
639
+ self._sync_task = asyncio.create_task(sync_loop())
640
+ logger.info(f"Started background S3 sync (interval: {interval_seconds}s)")
641
+
642
+ async def stop_background_sync(self) -> None:
643
+ """Stop background sync task."""
644
+ if self._sync_task:
645
+ self._sync_task.cancel()
646
+ try:
647
+ await self._sync_task
648
+ except asyncio.CancelledError:
649
+ pass
650
+ self._sync_task = None
651
+ logger.info("Stopped background S3 sync")
652
+
653
+ def close(self) -> None:
654
+ """Close resources."""
655
+ if self._buffer:
656
+ self._buffer.close()