AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractruntime/__init__.py +76 -1
- abstractruntime/core/config.py +68 -1
- abstractruntime/core/models.py +5 -0
- abstractruntime/core/policy.py +74 -3
- abstractruntime/core/runtime.py +1002 -126
- abstractruntime/core/vars.py +8 -2
- abstractruntime/evidence/recorder.py +1 -1
- abstractruntime/history_bundle.py +772 -0
- abstractruntime/integrations/abstractcore/__init__.py +3 -0
- abstractruntime/integrations/abstractcore/default_tools.py +127 -3
- abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
- abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
- abstractruntime/integrations/abstractcore/factory.py +68 -20
- abstractruntime/integrations/abstractcore/llm_client.py +447 -15
- abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
- abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
- abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
- abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
- abstractruntime/integrations/abstractmemory/__init__.py +3 -0
- abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
- abstractruntime/memory/active_context.py +6 -1
- abstractruntime/memory/kg_packets.py +164 -0
- abstractruntime/memory/memact_composer.py +175 -0
- abstractruntime/memory/recall_levels.py +163 -0
- abstractruntime/memory/token_budget.py +86 -0
- abstractruntime/storage/__init__.py +4 -1
- abstractruntime/storage/artifacts.py +158 -30
- abstractruntime/storage/base.py +17 -1
- abstractruntime/storage/commands.py +339 -0
- abstractruntime/storage/in_memory.py +41 -1
- abstractruntime/storage/json_files.py +195 -12
- abstractruntime/storage/observable.py +38 -1
- abstractruntime/storage/offloading.py +433 -0
- abstractruntime/storage/sqlite.py +836 -0
- abstractruntime/visualflow_compiler/__init__.py +29 -0
- abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
- abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
- abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
- abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
- abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
- abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
- abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
- abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
- abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
- abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
- abstractruntime/visualflow_compiler/compiler.py +3832 -0
- abstractruntime/visualflow_compiler/flow.py +247 -0
- abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
- abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
- abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
- abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
- abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
- abstractruntime/visualflow_compiler/visual/models.py +211 -0
- abstractruntime/workflow_bundle/__init__.py +52 -0
- abstractruntime/workflow_bundle/models.py +236 -0
- abstractruntime/workflow_bundle/packer.py +317 -0
- abstractruntime/workflow_bundle/reader.py +87 -0
- abstractruntime/workflow_bundle/registry.py +587 -0
- abstractruntime-0.4.1.dist-info/METADATA +177 -0
- abstractruntime-0.4.1.dist-info/RECORD +86 -0
- abstractruntime-0.4.0.dist-info/METADATA +0 -167
- abstractruntime-0.4.0.dist-info/RECORD +0 -49
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
- {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -44,6 +44,7 @@ class ArtifactMetadata:
|
|
|
44
44
|
content_type: str # MIME type or semantic type
|
|
45
45
|
size_bytes: int
|
|
46
46
|
created_at: str
|
|
47
|
+
blob_id: Optional[str] = None # Global (cross-run) content hash for dedupe
|
|
47
48
|
run_id: Optional[str] = None # Optional association with a run
|
|
48
49
|
tags: Dict[str, str] = field(default_factory=dict)
|
|
49
50
|
|
|
@@ -54,6 +55,7 @@ class ArtifactMetadata:
|
|
|
54
55
|
def from_dict(cls, data: Dict[str, Any]) -> "ArtifactMetadata":
|
|
55
56
|
return cls(
|
|
56
57
|
artifact_id=data["artifact_id"],
|
|
58
|
+
blob_id=data.get("blob_id"),
|
|
57
59
|
content_type=data["content_type"],
|
|
58
60
|
size_bytes=data["size_bytes"],
|
|
59
61
|
created_at=data["created_at"],
|
|
@@ -92,9 +94,8 @@ def compute_artifact_id(content: bytes, *, run_id: Optional[str] = None) -> str:
|
|
|
92
94
|
By default, artifacts are content-addressed (SHA-256, truncated) so the same bytes
|
|
93
95
|
produce the same id.
|
|
94
96
|
|
|
95
|
-
If `run_id` is provided, the id is *namespaced to that run*
|
|
96
|
-
|
|
97
|
-
correct `list_by_run(...)` / purge-by-run semantics.
|
|
97
|
+
If `run_id` is provided, the id is *namespaced to that run* so each run can have a
|
|
98
|
+
distinct artifact_id (while still enabling cross-run blob dedupe via `blob_id`).
|
|
98
99
|
"""
|
|
99
100
|
h = hashlib.sha256()
|
|
100
101
|
if run_id is not None:
|
|
@@ -106,6 +107,11 @@ def compute_artifact_id(content: bytes, *, run_id: Optional[str] = None) -> str:
|
|
|
106
107
|
return h.hexdigest()[:32]
|
|
107
108
|
|
|
108
109
|
|
|
110
|
+
def compute_blob_id(content: bytes) -> str:
|
|
111
|
+
"""Compute a stable, global content hash for artifact blob dedupe."""
|
|
112
|
+
return hashlib.sha256(content).hexdigest()
|
|
113
|
+
|
|
114
|
+
|
|
109
115
|
def validate_artifact_id(artifact_id: str) -> None:
|
|
110
116
|
"""Validate artifact ID to prevent path traversal attacks.
|
|
111
117
|
|
|
@@ -337,6 +343,7 @@ class InMemoryArtifactStore(ArtifactStore):
|
|
|
337
343
|
|
|
338
344
|
metadata = ArtifactMetadata(
|
|
339
345
|
artifact_id=artifact_id,
|
|
346
|
+
blob_id=compute_blob_id(content),
|
|
340
347
|
content_type=content_type,
|
|
341
348
|
size_bytes=len(content),
|
|
342
349
|
created_at=utc_now_iso(),
|
|
@@ -382,26 +389,77 @@ class InMemoryArtifactStore(ArtifactStore):
|
|
|
382
389
|
class FileArtifactStore(ArtifactStore):
|
|
383
390
|
"""File-based artifact store.
|
|
384
391
|
|
|
385
|
-
Directory structure:
|
|
392
|
+
Directory structure (v1, cross-run blob dedupe):
|
|
393
|
+
base_dir/
|
|
394
|
+
artifacts/
|
|
395
|
+
blobs/{blob_id}.bin # global content-addressed bytes
|
|
396
|
+
refs/{artifact_id}.meta # per-artifact metadata (points to blob_id)
|
|
397
|
+
|
|
398
|
+
Legacy layout (v0) is still supported for reads:
|
|
386
399
|
base_dir/
|
|
387
400
|
artifacts/
|
|
388
|
-
{artifact_id}.bin
|
|
389
|
-
{artifact_id}.meta
|
|
401
|
+
{artifact_id}.bin
|
|
402
|
+
{artifact_id}.meta
|
|
390
403
|
"""
|
|
391
404
|
|
|
392
405
|
def __init__(self, base_dir: Union[str, Path]) -> None:
|
|
393
406
|
self._base = Path(base_dir)
|
|
394
407
|
self._artifacts_dir = self._base / "artifacts"
|
|
395
|
-
self.
|
|
408
|
+
self._blobs_dir = self._artifacts_dir / "blobs"
|
|
409
|
+
self._refs_dir = self._artifacts_dir / "refs"
|
|
410
|
+
self._blobs_dir.mkdir(parents=True, exist_ok=True)
|
|
411
|
+
self._refs_dir.mkdir(parents=True, exist_ok=True)
|
|
396
412
|
|
|
397
|
-
def
|
|
413
|
+
def _legacy_content_path(self, artifact_id: str) -> Path:
|
|
398
414
|
validate_artifact_id(artifact_id)
|
|
399
415
|
return self._artifacts_dir / f"{artifact_id}.bin"
|
|
400
416
|
|
|
401
|
-
def
|
|
417
|
+
def _legacy_metadata_path(self, artifact_id: str) -> Path:
|
|
402
418
|
validate_artifact_id(artifact_id)
|
|
403
419
|
return self._artifacts_dir / f"{artifact_id}.meta"
|
|
404
420
|
|
|
421
|
+
def _ref_metadata_path(self, artifact_id: str) -> Path:
|
|
422
|
+
validate_artifact_id(artifact_id)
|
|
423
|
+
return self._refs_dir / f"{artifact_id}.meta"
|
|
424
|
+
|
|
425
|
+
def _blob_path(self, blob_id: str) -> Path:
|
|
426
|
+
validate_artifact_id(blob_id)
|
|
427
|
+
return self._blobs_dir / f"{blob_id}.bin"
|
|
428
|
+
|
|
429
|
+
def _write_blob(self, *, blob_id: str, content: bytes) -> Path:
|
|
430
|
+
path = self._blob_path(blob_id)
|
|
431
|
+
if path.exists():
|
|
432
|
+
return path
|
|
433
|
+
import uuid
|
|
434
|
+
|
|
435
|
+
tmp = path.with_name(f"{path.name}.{uuid.uuid4().hex}.tmp")
|
|
436
|
+
try:
|
|
437
|
+
with open(tmp, "wb") as f:
|
|
438
|
+
f.write(content)
|
|
439
|
+
tmp.replace(path)
|
|
440
|
+
finally:
|
|
441
|
+
try:
|
|
442
|
+
if tmp.exists():
|
|
443
|
+
tmp.unlink()
|
|
444
|
+
except Exception:
|
|
445
|
+
pass
|
|
446
|
+
return path
|
|
447
|
+
|
|
448
|
+
def _content_path(self, artifact_id: str) -> Path:
|
|
449
|
+
validate_artifact_id(artifact_id)
|
|
450
|
+
meta = self.get_metadata(artifact_id)
|
|
451
|
+
blob_id = getattr(meta, "blob_id", None) if meta is not None else None
|
|
452
|
+
if isinstance(blob_id, str) and blob_id.strip():
|
|
453
|
+
return self._blob_path(blob_id.strip())
|
|
454
|
+
return self._legacy_content_path(artifact_id)
|
|
455
|
+
|
|
456
|
+
def _metadata_path(self, artifact_id: str) -> Path:
|
|
457
|
+
validate_artifact_id(artifact_id)
|
|
458
|
+
p = self._ref_metadata_path(artifact_id)
|
|
459
|
+
if p.exists():
|
|
460
|
+
return p
|
|
461
|
+
return self._legacy_metadata_path(artifact_id)
|
|
462
|
+
|
|
405
463
|
def store(
|
|
406
464
|
self,
|
|
407
465
|
content: bytes,
|
|
@@ -413,9 +471,11 @@ class FileArtifactStore(ArtifactStore):
|
|
|
413
471
|
) -> ArtifactMetadata:
|
|
414
472
|
if artifact_id is None:
|
|
415
473
|
artifact_id = compute_artifact_id(content, run_id=run_id)
|
|
474
|
+
blob_id = compute_blob_id(content)
|
|
416
475
|
|
|
417
476
|
metadata = ArtifactMetadata(
|
|
418
477
|
artifact_id=artifact_id,
|
|
478
|
+
blob_id=blob_id,
|
|
419
479
|
content_type=content_type,
|
|
420
480
|
size_bytes=len(content),
|
|
421
481
|
created_at=utc_now_iso(),
|
|
@@ -423,39 +483,40 @@ class FileArtifactStore(ArtifactStore):
|
|
|
423
483
|
tags=tags or {},
|
|
424
484
|
)
|
|
425
485
|
|
|
426
|
-
# Write
|
|
427
|
-
|
|
428
|
-
with open(content_path, "wb") as f:
|
|
429
|
-
f.write(content)
|
|
486
|
+
# Write blob bytes (deduped across runs)
|
|
487
|
+
self._write_blob(blob_id=blob_id, content=content)
|
|
430
488
|
|
|
431
489
|
# Write metadata
|
|
432
|
-
metadata_path = self.
|
|
490
|
+
metadata_path = self._ref_metadata_path(artifact_id)
|
|
433
491
|
with open(metadata_path, "w", encoding="utf-8") as f:
|
|
434
492
|
json.dump(metadata.to_dict(), f, ensure_ascii=False, indent=2)
|
|
435
493
|
|
|
436
494
|
return metadata
|
|
437
495
|
|
|
438
496
|
def load(self, artifact_id: str) -> Optional[Artifact]:
|
|
439
|
-
content_path = self._content_path(artifact_id)
|
|
440
497
|
metadata_path = self._metadata_path(artifact_id)
|
|
441
|
-
|
|
442
|
-
if not content_path.exists() or not metadata_path.exists():
|
|
498
|
+
if not metadata_path.exists():
|
|
443
499
|
return None
|
|
444
500
|
|
|
445
|
-
with open(content_path, "rb") as f:
|
|
446
|
-
content = f.read()
|
|
447
|
-
|
|
448
501
|
with open(metadata_path, "r", encoding="utf-8") as f:
|
|
449
502
|
metadata_dict = json.load(f)
|
|
450
503
|
|
|
451
504
|
metadata = ArtifactMetadata.from_dict(metadata_dict)
|
|
505
|
+
content_path = self._content_path(artifact_id)
|
|
506
|
+
if not content_path.exists():
|
|
507
|
+
return None
|
|
508
|
+
|
|
509
|
+
with open(content_path, "rb") as f:
|
|
510
|
+
content = f.read()
|
|
452
511
|
return Artifact(metadata=metadata, content=content)
|
|
453
512
|
|
|
454
513
|
def get_metadata(self, artifact_id: str) -> Optional[ArtifactMetadata]:
|
|
455
|
-
|
|
456
|
-
|
|
514
|
+
validate_artifact_id(artifact_id)
|
|
515
|
+
metadata_path = self._ref_metadata_path(artifact_id)
|
|
457
516
|
if not metadata_path.exists():
|
|
458
|
-
|
|
517
|
+
metadata_path = self._legacy_metadata_path(artifact_id)
|
|
518
|
+
if not metadata_path.exists():
|
|
519
|
+
return None
|
|
459
520
|
|
|
460
521
|
with open(metadata_path, "r", encoding="utf-8") as f:
|
|
461
522
|
metadata_dict = json.load(f)
|
|
@@ -463,28 +524,42 @@ class FileArtifactStore(ArtifactStore):
|
|
|
463
524
|
return ArtifactMetadata.from_dict(metadata_dict)
|
|
464
525
|
|
|
465
526
|
def exists(self, artifact_id: str) -> bool:
|
|
527
|
+
meta = self.get_metadata(artifact_id)
|
|
528
|
+
if meta is None:
|
|
529
|
+
return False
|
|
466
530
|
return self._content_path(artifact_id).exists()
|
|
467
531
|
|
|
468
532
|
def delete(self, artifact_id: str) -> bool:
|
|
469
|
-
|
|
470
|
-
metadata_path = self.
|
|
533
|
+
validate_artifact_id(artifact_id)
|
|
534
|
+
metadata_path = self._ref_metadata_path(artifact_id)
|
|
535
|
+
legacy_meta = self._legacy_metadata_path(artifact_id)
|
|
536
|
+
legacy_content = self._legacy_content_path(artifact_id)
|
|
471
537
|
|
|
472
538
|
deleted = False
|
|
473
|
-
if content_path.exists():
|
|
474
|
-
content_path.unlink()
|
|
475
|
-
deleted = True
|
|
476
539
|
if metadata_path.exists():
|
|
477
540
|
metadata_path.unlink()
|
|
478
541
|
deleted = True
|
|
542
|
+
if legacy_meta.exists():
|
|
543
|
+
legacy_meta.unlink()
|
|
544
|
+
deleted = True
|
|
545
|
+
if legacy_content.exists():
|
|
546
|
+
legacy_content.unlink()
|
|
547
|
+
deleted = True
|
|
479
548
|
|
|
480
549
|
return deleted
|
|
481
550
|
|
|
482
551
|
def list_by_run(self, run_id: str) -> List[ArtifactMetadata]:
|
|
483
552
|
results = []
|
|
484
|
-
|
|
553
|
+
meta_paths = list(self._refs_dir.glob("*.meta")) + list(self._artifacts_dir.glob("*.meta"))
|
|
554
|
+
seen: set[str] = set()
|
|
555
|
+
for metadata_path in meta_paths:
|
|
485
556
|
try:
|
|
486
557
|
with open(metadata_path, "r", encoding="utf-8") as f:
|
|
487
558
|
metadata_dict = json.load(f)
|
|
559
|
+
artifact_id = str(metadata_dict.get("artifact_id") or "").strip()
|
|
560
|
+
if not artifact_id or artifact_id in seen:
|
|
561
|
+
continue
|
|
562
|
+
seen.add(artifact_id)
|
|
488
563
|
if metadata_dict.get("run_id") == run_id:
|
|
489
564
|
results.append(ArtifactMetadata.from_dict(metadata_dict))
|
|
490
565
|
except (json.JSONDecodeError, IOError):
|
|
@@ -493,10 +568,16 @@ class FileArtifactStore(ArtifactStore):
|
|
|
493
568
|
|
|
494
569
|
def list_all(self, *, limit: int = 1000) -> List[ArtifactMetadata]:
|
|
495
570
|
results = []
|
|
496
|
-
|
|
571
|
+
meta_paths = list(self._refs_dir.glob("*.meta")) + list(self._artifacts_dir.glob("*.meta"))
|
|
572
|
+
seen: set[str] = set()
|
|
573
|
+
for metadata_path in meta_paths:
|
|
497
574
|
try:
|
|
498
575
|
with open(metadata_path, "r", encoding="utf-8") as f:
|
|
499
576
|
metadata_dict = json.load(f)
|
|
577
|
+
artifact_id = str(metadata_dict.get("artifact_id") or "").strip()
|
|
578
|
+
if not artifact_id or artifact_id in seen:
|
|
579
|
+
continue
|
|
580
|
+
seen.add(artifact_id)
|
|
500
581
|
results.append(ArtifactMetadata.from_dict(metadata_dict))
|
|
501
582
|
except (json.JSONDecodeError, IOError):
|
|
502
583
|
continue
|
|
@@ -504,6 +585,53 @@ class FileArtifactStore(ArtifactStore):
|
|
|
504
585
|
results.sort(key=lambda m: m.created_at, reverse=True)
|
|
505
586
|
return results[:limit]
|
|
506
587
|
|
|
588
|
+
def gc(self, *, dry_run: bool = True) -> Dict[str, Any]:
|
|
589
|
+
"""Garbage collect unreferenced blobs.
|
|
590
|
+
|
|
591
|
+
Notes:
|
|
592
|
+
- This only applies to the v1 `artifacts/blobs` layout.
|
|
593
|
+
- Safe-by-default: `dry_run=True` returns the plan without deleting.
|
|
594
|
+
"""
|
|
595
|
+
|
|
596
|
+
report: Dict[str, Any] = {
|
|
597
|
+
"dry_run": bool(dry_run),
|
|
598
|
+
"blobs_total": 0,
|
|
599
|
+
"blobs_referenced": 0,
|
|
600
|
+
"blobs_deleted": 0,
|
|
601
|
+
"bytes_reclaimed": 0,
|
|
602
|
+
"errors": [],
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
referenced: set[str] = set()
|
|
606
|
+
for meta in self.list_all(limit=1_000_000):
|
|
607
|
+
blob_id = getattr(meta, "blob_id", None)
|
|
608
|
+
if isinstance(blob_id, str) and blob_id.strip():
|
|
609
|
+
referenced.add(blob_id.strip())
|
|
610
|
+
|
|
611
|
+
report["blobs_referenced"] = len(referenced)
|
|
612
|
+
|
|
613
|
+
blobs = list(self._blobs_dir.glob("*.bin"))
|
|
614
|
+
report["blobs_total"] = len(blobs)
|
|
615
|
+
|
|
616
|
+
for p in blobs:
|
|
617
|
+
blob_id = p.stem
|
|
618
|
+
if blob_id in referenced:
|
|
619
|
+
continue
|
|
620
|
+
try:
|
|
621
|
+
size = p.stat().st_size
|
|
622
|
+
except Exception:
|
|
623
|
+
size = 0
|
|
624
|
+
if not dry_run:
|
|
625
|
+
try:
|
|
626
|
+
p.unlink()
|
|
627
|
+
except Exception as e:
|
|
628
|
+
report["errors"].append({"blob_id": blob_id, "error": str(e)})
|
|
629
|
+
continue
|
|
630
|
+
report["blobs_deleted"] += 1
|
|
631
|
+
report["bytes_reclaimed"] += int(size)
|
|
632
|
+
|
|
633
|
+
return report
|
|
634
|
+
|
|
507
635
|
|
|
508
636
|
# Artifact reference helpers for use in RunState.vars
|
|
509
637
|
|
abstractruntime/storage/base.py
CHANGED
|
@@ -95,6 +95,23 @@ class QueryableRunStore(Protocol):
|
|
|
95
95
|
...
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
@runtime_checkable
|
|
99
|
+
class QueryableRunIndexStore(Protocol):
|
|
100
|
+
"""Optional fast-path for listing run summaries without loading full RunState payloads."""
|
|
101
|
+
|
|
102
|
+
def list_run_index(
|
|
103
|
+
self,
|
|
104
|
+
*,
|
|
105
|
+
status: Optional[RunStatus] = None,
|
|
106
|
+
workflow_id: Optional[str] = None,
|
|
107
|
+
session_id: Optional[str] = None,
|
|
108
|
+
root_only: bool = False,
|
|
109
|
+
limit: int = 100,
|
|
110
|
+
) -> List[Dict[str, Any]]:
|
|
111
|
+
"""List lightweight run index rows (most recent first)."""
|
|
112
|
+
...
|
|
113
|
+
|
|
114
|
+
|
|
98
115
|
class LedgerStore(ABC):
|
|
99
116
|
"""Append-only journal store."""
|
|
100
117
|
|
|
@@ -104,4 +121,3 @@ class LedgerStore(ABC):
|
|
|
104
121
|
@abstractmethod
|
|
105
122
|
def list(self, run_id: str) -> List[Dict[str, Any]]: ...
|
|
106
123
|
|
|
107
|
-
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""abstractruntime.storage.commands
|
|
2
|
+
|
|
3
|
+
Durable command inbox primitives (append-only, idempotent).
|
|
4
|
+
|
|
5
|
+
Why this exists:
|
|
6
|
+
- A remote Run Gateway (ADR-0018 / backlog 307) needs a control plane that is safe under
|
|
7
|
+
retries and intermittent networks.
|
|
8
|
+
- The key SQS/Temporal insight is to decouple *command acceptance* from *fulfillment*:
|
|
9
|
+
clients submit commands with idempotency keys, and a worker processes them asynchronously.
|
|
10
|
+
|
|
11
|
+
Design constraints:
|
|
12
|
+
- JSON-safe records only (persisted).
|
|
13
|
+
- Append-only storage (audit-friendly, replayable).
|
|
14
|
+
- Idempotency by `command_id` (duplicate submissions are ignored).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
import threading
|
|
21
|
+
import uuid
|
|
22
|
+
from dataclasses import asdict, dataclass
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any, Dict, List, Optional, Protocol, Tuple, runtime_checkable
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _utc_now_iso() -> str:
|
|
29
|
+
return datetime.now(timezone.utc).isoformat()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _is_json_value(value: Any) -> bool:
|
|
33
|
+
if value is None or isinstance(value, (str, int, float, bool)):
|
|
34
|
+
return True
|
|
35
|
+
if isinstance(value, list):
|
|
36
|
+
return all(_is_json_value(v) for v in value)
|
|
37
|
+
if isinstance(value, dict):
|
|
38
|
+
return all(isinstance(k, str) and _is_json_value(v) for k, v in value.items())
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class CommandRecord:
|
|
44
|
+
"""A durable command record.
|
|
45
|
+
|
|
46
|
+
Notes:
|
|
47
|
+
- `seq` is assigned by the store and provides cursor semantics for consumers.
|
|
48
|
+
- `payload` must be JSON-serializable (dict of JSON values).
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
command_id: str
|
|
52
|
+
run_id: str
|
|
53
|
+
type: str
|
|
54
|
+
payload: Dict[str, Any]
|
|
55
|
+
ts: str
|
|
56
|
+
client_id: Optional[str] = None
|
|
57
|
+
seq: int = 0
|
|
58
|
+
|
|
59
|
+
def to_json(self) -> Dict[str, Any]:
|
|
60
|
+
return asdict(self)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass(frozen=True)
|
|
64
|
+
class CommandAppendResult:
|
|
65
|
+
"""Result of appending a command to a CommandStore."""
|
|
66
|
+
|
|
67
|
+
accepted: bool
|
|
68
|
+
duplicate: bool
|
|
69
|
+
seq: int
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@runtime_checkable
|
|
73
|
+
class CommandStore(Protocol):
|
|
74
|
+
"""Append-only inbox of commands with cursor replay semantics."""
|
|
75
|
+
|
|
76
|
+
def append(self, record: CommandRecord) -> CommandAppendResult:
|
|
77
|
+
"""Append a command if its command_id is new (idempotent)."""
|
|
78
|
+
|
|
79
|
+
def list_after(self, *, after: int, limit: int = 1000) -> Tuple[List[CommandRecord], int]:
|
|
80
|
+
"""Return commands with seq > after, up to limit, and the next cursor."""
|
|
81
|
+
|
|
82
|
+
def get_last_seq(self) -> int:
|
|
83
|
+
"""Return the greatest assigned sequence number (0 if empty)."""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@runtime_checkable
|
|
87
|
+
class CommandCursorStore(Protocol):
|
|
88
|
+
"""Durable consumer cursor for CommandStore replay."""
|
|
89
|
+
|
|
90
|
+
def load(self) -> int: ...
|
|
91
|
+
|
|
92
|
+
def save(self, cursor: int) -> None: ...
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class InMemoryCommandStore(CommandStore):
|
|
96
|
+
def __init__(self) -> None:
|
|
97
|
+
self._lock = threading.Lock()
|
|
98
|
+
self._seq = 0
|
|
99
|
+
self._by_id: Dict[str, CommandRecord] = {}
|
|
100
|
+
self._ordered: List[CommandRecord] = []
|
|
101
|
+
|
|
102
|
+
def append(self, record: CommandRecord) -> CommandAppendResult:
|
|
103
|
+
cid = str(record.command_id or "").strip()
|
|
104
|
+
if not cid:
|
|
105
|
+
cid = uuid.uuid4().hex
|
|
106
|
+
with self._lock:
|
|
107
|
+
existing = self._by_id.get(cid)
|
|
108
|
+
if existing is not None:
|
|
109
|
+
return CommandAppendResult(accepted=False, duplicate=True, seq=int(existing.seq or 0))
|
|
110
|
+
self._seq += 1
|
|
111
|
+
rec = CommandRecord(
|
|
112
|
+
command_id=cid,
|
|
113
|
+
run_id=str(record.run_id or ""),
|
|
114
|
+
type=str(record.type or ""),
|
|
115
|
+
payload=dict(record.payload or {}),
|
|
116
|
+
ts=str(record.ts or _utc_now_iso()),
|
|
117
|
+
client_id=str(record.client_id) if isinstance(record.client_id, str) and record.client_id else None,
|
|
118
|
+
seq=self._seq,
|
|
119
|
+
)
|
|
120
|
+
self._by_id[cid] = rec
|
|
121
|
+
self._ordered.append(rec)
|
|
122
|
+
return CommandAppendResult(accepted=True, duplicate=False, seq=rec.seq)
|
|
123
|
+
|
|
124
|
+
def list_after(self, *, after: int, limit: int = 1000) -> Tuple[List[CommandRecord], int]:
|
|
125
|
+
after2 = int(after or 0)
|
|
126
|
+
limit2 = int(limit or 1000)
|
|
127
|
+
if limit2 <= 0:
|
|
128
|
+
limit2 = 1000
|
|
129
|
+
with self._lock:
|
|
130
|
+
items = [r for r in self._ordered if int(r.seq or 0) > after2]
|
|
131
|
+
out = items[:limit2]
|
|
132
|
+
next_cursor = after2
|
|
133
|
+
if out:
|
|
134
|
+
next_cursor = int(out[-1].seq or after2)
|
|
135
|
+
return (list(out), next_cursor)
|
|
136
|
+
|
|
137
|
+
def get_last_seq(self) -> int:
|
|
138
|
+
with self._lock:
|
|
139
|
+
return int(self._seq or 0)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class InMemoryCommandCursorStore(CommandCursorStore):
|
|
143
|
+
def __init__(self, initial: int = 0) -> None:
|
|
144
|
+
self._cursor = int(initial or 0)
|
|
145
|
+
self._lock = threading.Lock()
|
|
146
|
+
|
|
147
|
+
def load(self) -> int:
|
|
148
|
+
with self._lock:
|
|
149
|
+
return int(self._cursor or 0)
|
|
150
|
+
|
|
151
|
+
def save(self, cursor: int) -> None:
|
|
152
|
+
with self._lock:
|
|
153
|
+
self._cursor = int(cursor or 0)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class JsonFileCommandCursorStore(CommandCursorStore):
|
|
157
|
+
"""JSON file-backed cursor store.
|
|
158
|
+
|
|
159
|
+
Atomic write semantics are important because this file is updated frequently.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(self, path: str | Path) -> None:
|
|
163
|
+
self._path = Path(path)
|
|
164
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
165
|
+
self._lock = threading.Lock()
|
|
166
|
+
|
|
167
|
+
def load(self) -> int:
|
|
168
|
+
with self._lock:
|
|
169
|
+
if not self._path.exists():
|
|
170
|
+
return 0
|
|
171
|
+
try:
|
|
172
|
+
data = json.loads(self._path.read_text(encoding="utf-8") or "{}")
|
|
173
|
+
except Exception:
|
|
174
|
+
return 0
|
|
175
|
+
cur = data.get("cursor")
|
|
176
|
+
try:
|
|
177
|
+
return int(cur or 0)
|
|
178
|
+
except Exception:
|
|
179
|
+
return 0
|
|
180
|
+
|
|
181
|
+
def save(self, cursor: int) -> None:
|
|
182
|
+
cur = int(cursor or 0)
|
|
183
|
+
tmp = self._path.with_name(f"{self._path.name}.{uuid.uuid4().hex}.tmp")
|
|
184
|
+
payload = {"cursor": cur, "updated_at": _utc_now_iso()}
|
|
185
|
+
with self._lock:
|
|
186
|
+
try:
|
|
187
|
+
tmp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
188
|
+
tmp.replace(self._path)
|
|
189
|
+
finally:
|
|
190
|
+
try:
|
|
191
|
+
if tmp.exists():
|
|
192
|
+
tmp.unlink()
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
class JsonlCommandStore(CommandStore):
|
|
198
|
+
"""Append-only JSONL command store.
|
|
199
|
+
|
|
200
|
+
File format: one JSON object per line. Each record includes a store-assigned `seq`.
|
|
201
|
+
"""
|
|
202
|
+
|
|
203
|
+
def __init__(self, base_dir: str | Path, *, filename: str = "commands.jsonl") -> None:
|
|
204
|
+
self._base = Path(base_dir)
|
|
205
|
+
self._base.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
self._path = self._base / str(filename or "commands.jsonl")
|
|
207
|
+
self._lock = threading.Lock()
|
|
208
|
+
|
|
209
|
+
# Idempotency index (rebuilt on init from the log).
|
|
210
|
+
self._seq = 0
|
|
211
|
+
self._by_id: Dict[str, int] = {}
|
|
212
|
+
self._rebuild_index()
|
|
213
|
+
|
|
214
|
+
def _rebuild_index(self) -> None:
|
|
215
|
+
if not self._path.exists():
|
|
216
|
+
self._seq = 0
|
|
217
|
+
self._by_id = {}
|
|
218
|
+
return
|
|
219
|
+
seq = 0
|
|
220
|
+
by_id: Dict[str, int] = {}
|
|
221
|
+
try:
|
|
222
|
+
with self._path.open("r", encoding="utf-8") as f:
|
|
223
|
+
for line in f:
|
|
224
|
+
line = line.strip()
|
|
225
|
+
if not line:
|
|
226
|
+
continue
|
|
227
|
+
try:
|
|
228
|
+
obj = json.loads(line)
|
|
229
|
+
except Exception:
|
|
230
|
+
continue
|
|
231
|
+
s = obj.get("seq")
|
|
232
|
+
cid = obj.get("command_id")
|
|
233
|
+
try:
|
|
234
|
+
s_int = int(s or 0)
|
|
235
|
+
except Exception:
|
|
236
|
+
s_int = 0
|
|
237
|
+
if s_int <= 0:
|
|
238
|
+
continue
|
|
239
|
+
if s_int > seq:
|
|
240
|
+
seq = s_int
|
|
241
|
+
if isinstance(cid, str) and cid and cid not in by_id:
|
|
242
|
+
by_id[cid] = s_int
|
|
243
|
+
except Exception:
|
|
244
|
+
seq = 0
|
|
245
|
+
by_id = {}
|
|
246
|
+
self._seq = seq
|
|
247
|
+
self._by_id = by_id
|
|
248
|
+
|
|
249
|
+
def append(self, record: CommandRecord) -> CommandAppendResult:
|
|
250
|
+
cid = str(record.command_id or "").strip()
|
|
251
|
+
if not cid:
|
|
252
|
+
cid = uuid.uuid4().hex
|
|
253
|
+
run_id = str(record.run_id or "").strip()
|
|
254
|
+
typ = str(record.type or "").strip()
|
|
255
|
+
payload = dict(record.payload or {})
|
|
256
|
+
ts = str(record.ts or "").strip() or _utc_now_iso()
|
|
257
|
+
client_id = str(record.client_id).strip() if isinstance(record.client_id, str) and record.client_id else None
|
|
258
|
+
|
|
259
|
+
if not run_id:
|
|
260
|
+
raise ValueError("CommandRecord.run_id must be non-empty")
|
|
261
|
+
if not typ:
|
|
262
|
+
raise ValueError("CommandRecord.type must be non-empty")
|
|
263
|
+
if not isinstance(payload, dict) or not _is_json_value(payload):
|
|
264
|
+
raise ValueError("CommandRecord.payload must be a JSON-serializable dict")
|
|
265
|
+
|
|
266
|
+
with self._lock:
|
|
267
|
+
existing_seq = self._by_id.get(cid)
|
|
268
|
+
if existing_seq is not None:
|
|
269
|
+
return CommandAppendResult(accepted=False, duplicate=True, seq=int(existing_seq))
|
|
270
|
+
|
|
271
|
+
self._seq += 1
|
|
272
|
+
seq = int(self._seq)
|
|
273
|
+
rec = CommandRecord(
|
|
274
|
+
command_id=cid,
|
|
275
|
+
run_id=run_id,
|
|
276
|
+
type=typ,
|
|
277
|
+
payload=payload,
|
|
278
|
+
ts=ts,
|
|
279
|
+
client_id=client_id,
|
|
280
|
+
seq=seq,
|
|
281
|
+
)
|
|
282
|
+
with self._path.open("a", encoding="utf-8") as f:
|
|
283
|
+
f.write(json.dumps(rec.to_json(), ensure_ascii=False))
|
|
284
|
+
f.write("\n")
|
|
285
|
+
self._by_id[cid] = seq
|
|
286
|
+
return CommandAppendResult(accepted=True, duplicate=False, seq=seq)
|
|
287
|
+
|
|
288
|
+
def list_after(self, *, after: int, limit: int = 1000) -> Tuple[List[CommandRecord], int]:
|
|
289
|
+
after2 = int(after or 0)
|
|
290
|
+
limit2 = int(limit or 1000)
|
|
291
|
+
if limit2 <= 0:
|
|
292
|
+
limit2 = 1000
|
|
293
|
+
|
|
294
|
+
if not self._path.exists():
|
|
295
|
+
return ([], after2)
|
|
296
|
+
|
|
297
|
+
out: List[CommandRecord] = []
|
|
298
|
+
next_cursor = after2
|
|
299
|
+
try:
|
|
300
|
+
with self._path.open("r", encoding="utf-8") as f:
|
|
301
|
+
for line in f:
|
|
302
|
+
line = line.strip()
|
|
303
|
+
if not line:
|
|
304
|
+
continue
|
|
305
|
+
try:
|
|
306
|
+
obj = json.loads(line)
|
|
307
|
+
except Exception:
|
|
308
|
+
continue
|
|
309
|
+
try:
|
|
310
|
+
seq = int(obj.get("seq") or 0)
|
|
311
|
+
except Exception:
|
|
312
|
+
continue
|
|
313
|
+
if seq <= after2:
|
|
314
|
+
continue
|
|
315
|
+
try:
|
|
316
|
+
rec = CommandRecord(
|
|
317
|
+
command_id=str(obj.get("command_id") or ""),
|
|
318
|
+
run_id=str(obj.get("run_id") or ""),
|
|
319
|
+
type=str(obj.get("type") or ""),
|
|
320
|
+
payload=dict(obj.get("payload") or {}),
|
|
321
|
+
ts=str(obj.get("ts") or ""),
|
|
322
|
+
client_id=str(obj.get("client_id") or "") or None,
|
|
323
|
+
seq=seq,
|
|
324
|
+
)
|
|
325
|
+
except Exception:
|
|
326
|
+
continue
|
|
327
|
+
out.append(rec)
|
|
328
|
+
next_cursor = seq
|
|
329
|
+
if len(out) >= limit2:
|
|
330
|
+
break
|
|
331
|
+
except Exception:
|
|
332
|
+
return ([], after2)
|
|
333
|
+
return (out, next_cursor)
|
|
334
|
+
|
|
335
|
+
def get_last_seq(self) -> int:
|
|
336
|
+
with self._lock:
|
|
337
|
+
return int(self._seq or 0)
|
|
338
|
+
|
|
339
|
+
|