AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. abstractruntime/__init__.py +76 -1
  2. abstractruntime/core/config.py +68 -1
  3. abstractruntime/core/models.py +5 -0
  4. abstractruntime/core/policy.py +74 -3
  5. abstractruntime/core/runtime.py +1002 -126
  6. abstractruntime/core/vars.py +8 -2
  7. abstractruntime/evidence/recorder.py +1 -1
  8. abstractruntime/history_bundle.py +772 -0
  9. abstractruntime/integrations/abstractcore/__init__.py +3 -0
  10. abstractruntime/integrations/abstractcore/default_tools.py +127 -3
  11. abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
  12. abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
  13. abstractruntime/integrations/abstractcore/factory.py +68 -20
  14. abstractruntime/integrations/abstractcore/llm_client.py +447 -15
  15. abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
  16. abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
  17. abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
  18. abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
  19. abstractruntime/integrations/abstractmemory/__init__.py +3 -0
  20. abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
  21. abstractruntime/memory/active_context.py +6 -1
  22. abstractruntime/memory/kg_packets.py +164 -0
  23. abstractruntime/memory/memact_composer.py +175 -0
  24. abstractruntime/memory/recall_levels.py +163 -0
  25. abstractruntime/memory/token_budget.py +86 -0
  26. abstractruntime/storage/__init__.py +4 -1
  27. abstractruntime/storage/artifacts.py +158 -30
  28. abstractruntime/storage/base.py +17 -1
  29. abstractruntime/storage/commands.py +339 -0
  30. abstractruntime/storage/in_memory.py +41 -1
  31. abstractruntime/storage/json_files.py +195 -12
  32. abstractruntime/storage/observable.py +38 -1
  33. abstractruntime/storage/offloading.py +433 -0
  34. abstractruntime/storage/sqlite.py +836 -0
  35. abstractruntime/visualflow_compiler/__init__.py +29 -0
  36. abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
  37. abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
  38. abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
  39. abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
  40. abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
  41. abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
  42. abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
  43. abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
  44. abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
  45. abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
  46. abstractruntime/visualflow_compiler/compiler.py +3832 -0
  47. abstractruntime/visualflow_compiler/flow.py +247 -0
  48. abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
  49. abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
  50. abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
  51. abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
  52. abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
  53. abstractruntime/visualflow_compiler/visual/models.py +211 -0
  54. abstractruntime/workflow_bundle/__init__.py +52 -0
  55. abstractruntime/workflow_bundle/models.py +236 -0
  56. abstractruntime/workflow_bundle/packer.py +317 -0
  57. abstractruntime/workflow_bundle/reader.py +87 -0
  58. abstractruntime/workflow_bundle/registry.py +587 -0
  59. abstractruntime-0.4.1.dist-info/METADATA +177 -0
  60. abstractruntime-0.4.1.dist-info/RECORD +86 -0
  61. abstractruntime-0.4.0.dist-info/METADATA +0 -167
  62. abstractruntime-0.4.0.dist-info/RECORD +0 -49
  63. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
  64. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
  65. {abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -44,6 +44,7 @@ class ArtifactMetadata:
44
44
  content_type: str # MIME type or semantic type
45
45
  size_bytes: int
46
46
  created_at: str
47
+ blob_id: Optional[str] = None # Global (cross-run) content hash for dedupe
47
48
  run_id: Optional[str] = None # Optional association with a run
48
49
  tags: Dict[str, str] = field(default_factory=dict)
49
50
 
@@ -54,6 +55,7 @@ class ArtifactMetadata:
54
55
  def from_dict(cls, data: Dict[str, Any]) -> "ArtifactMetadata":
55
56
  return cls(
56
57
  artifact_id=data["artifact_id"],
58
+ blob_id=data.get("blob_id"),
57
59
  content_type=data["content_type"],
58
60
  size_bytes=data["size_bytes"],
59
61
  created_at=data["created_at"],
@@ -92,9 +94,8 @@ def compute_artifact_id(content: bytes, *, run_id: Optional[str] = None) -> str:
92
94
  By default, artifacts are content-addressed (SHA-256, truncated) so the same bytes
93
95
  produce the same id.
94
96
 
95
- If `run_id` is provided, the id is *namespaced to that run* to avoid cross-run
96
- collisions when using a shared `FileArtifactStore(base_dir)` and to preserve
97
- correct `list_by_run(...)` / purge-by-run semantics.
97
+ If `run_id` is provided, the id is *namespaced to that run* so each run can have a
98
+ distinct artifact_id (while still enabling cross-run blob dedupe via `blob_id`).
98
99
  """
99
100
  h = hashlib.sha256()
100
101
  if run_id is not None:
@@ -106,6 +107,11 @@ def compute_artifact_id(content: bytes, *, run_id: Optional[str] = None) -> str:
106
107
  return h.hexdigest()[:32]
107
108
 
108
109
 
110
+ def compute_blob_id(content: bytes) -> str:
111
+ """Compute a stable, global content hash for artifact blob dedupe."""
112
+ return hashlib.sha256(content).hexdigest()
113
+
114
+
109
115
  def validate_artifact_id(artifact_id: str) -> None:
110
116
  """Validate artifact ID to prevent path traversal attacks.
111
117
 
@@ -337,6 +343,7 @@ class InMemoryArtifactStore(ArtifactStore):
337
343
 
338
344
  metadata = ArtifactMetadata(
339
345
  artifact_id=artifact_id,
346
+ blob_id=compute_blob_id(content),
340
347
  content_type=content_type,
341
348
  size_bytes=len(content),
342
349
  created_at=utc_now_iso(),
@@ -382,26 +389,77 @@ class InMemoryArtifactStore(ArtifactStore):
382
389
  class FileArtifactStore(ArtifactStore):
383
390
  """File-based artifact store.
384
391
 
385
- Directory structure:
392
+ Directory structure (v1, cross-run blob dedupe):
393
+ base_dir/
394
+ artifacts/
395
+ blobs/{blob_id}.bin # global content-addressed bytes
396
+ refs/{artifact_id}.meta # per-artifact metadata (points to blob_id)
397
+
398
+ Legacy layout (v0) is still supported for reads:
386
399
  base_dir/
387
400
  artifacts/
388
- {artifact_id}.bin # content
389
- {artifact_id}.meta # metadata JSON
401
+ {artifact_id}.bin
402
+ {artifact_id}.meta
390
403
  """
391
404
 
392
405
  def __init__(self, base_dir: Union[str, Path]) -> None:
393
406
  self._base = Path(base_dir)
394
407
  self._artifacts_dir = self._base / "artifacts"
395
- self._artifacts_dir.mkdir(parents=True, exist_ok=True)
408
+ self._blobs_dir = self._artifacts_dir / "blobs"
409
+ self._refs_dir = self._artifacts_dir / "refs"
410
+ self._blobs_dir.mkdir(parents=True, exist_ok=True)
411
+ self._refs_dir.mkdir(parents=True, exist_ok=True)
396
412
 
397
- def _content_path(self, artifact_id: str) -> Path:
413
+ def _legacy_content_path(self, artifact_id: str) -> Path:
398
414
  validate_artifact_id(artifact_id)
399
415
  return self._artifacts_dir / f"{artifact_id}.bin"
400
416
 
401
- def _metadata_path(self, artifact_id: str) -> Path:
417
+ def _legacy_metadata_path(self, artifact_id: str) -> Path:
402
418
  validate_artifact_id(artifact_id)
403
419
  return self._artifacts_dir / f"{artifact_id}.meta"
404
420
 
421
+ def _ref_metadata_path(self, artifact_id: str) -> Path:
422
+ validate_artifact_id(artifact_id)
423
+ return self._refs_dir / f"{artifact_id}.meta"
424
+
425
+ def _blob_path(self, blob_id: str) -> Path:
426
+ validate_artifact_id(blob_id)
427
+ return self._blobs_dir / f"{blob_id}.bin"
428
+
429
+ def _write_blob(self, *, blob_id: str, content: bytes) -> Path:
430
+ path = self._blob_path(blob_id)
431
+ if path.exists():
432
+ return path
433
+ import uuid
434
+
435
+ tmp = path.with_name(f"{path.name}.{uuid.uuid4().hex}.tmp")
436
+ try:
437
+ with open(tmp, "wb") as f:
438
+ f.write(content)
439
+ tmp.replace(path)
440
+ finally:
441
+ try:
442
+ if tmp.exists():
443
+ tmp.unlink()
444
+ except Exception:
445
+ pass
446
+ return path
447
+
448
+ def _content_path(self, artifact_id: str) -> Path:
449
+ validate_artifact_id(artifact_id)
450
+ meta = self.get_metadata(artifact_id)
451
+ blob_id = getattr(meta, "blob_id", None) if meta is not None else None
452
+ if isinstance(blob_id, str) and blob_id.strip():
453
+ return self._blob_path(blob_id.strip())
454
+ return self._legacy_content_path(artifact_id)
455
+
456
+ def _metadata_path(self, artifact_id: str) -> Path:
457
+ validate_artifact_id(artifact_id)
458
+ p = self._ref_metadata_path(artifact_id)
459
+ if p.exists():
460
+ return p
461
+ return self._legacy_metadata_path(artifact_id)
462
+
405
463
  def store(
406
464
  self,
407
465
  content: bytes,
@@ -413,9 +471,11 @@ class FileArtifactStore(ArtifactStore):
413
471
  ) -> ArtifactMetadata:
414
472
  if artifact_id is None:
415
473
  artifact_id = compute_artifact_id(content, run_id=run_id)
474
+ blob_id = compute_blob_id(content)
416
475
 
417
476
  metadata = ArtifactMetadata(
418
477
  artifact_id=artifact_id,
478
+ blob_id=blob_id,
419
479
  content_type=content_type,
420
480
  size_bytes=len(content),
421
481
  created_at=utc_now_iso(),
@@ -423,39 +483,40 @@ class FileArtifactStore(ArtifactStore):
423
483
  tags=tags or {},
424
484
  )
425
485
 
426
- # Write content
427
- content_path = self._content_path(artifact_id)
428
- with open(content_path, "wb") as f:
429
- f.write(content)
486
+ # Write blob bytes (deduped across runs)
487
+ self._write_blob(blob_id=blob_id, content=content)
430
488
 
431
489
  # Write metadata
432
- metadata_path = self._metadata_path(artifact_id)
490
+ metadata_path = self._ref_metadata_path(artifact_id)
433
491
  with open(metadata_path, "w", encoding="utf-8") as f:
434
492
  json.dump(metadata.to_dict(), f, ensure_ascii=False, indent=2)
435
493
 
436
494
  return metadata
437
495
 
438
496
  def load(self, artifact_id: str) -> Optional[Artifact]:
439
- content_path = self._content_path(artifact_id)
440
497
  metadata_path = self._metadata_path(artifact_id)
441
-
442
- if not content_path.exists() or not metadata_path.exists():
498
+ if not metadata_path.exists():
443
499
  return None
444
500
 
445
- with open(content_path, "rb") as f:
446
- content = f.read()
447
-
448
501
  with open(metadata_path, "r", encoding="utf-8") as f:
449
502
  metadata_dict = json.load(f)
450
503
 
451
504
  metadata = ArtifactMetadata.from_dict(metadata_dict)
505
+ content_path = self._content_path(artifact_id)
506
+ if not content_path.exists():
507
+ return None
508
+
509
+ with open(content_path, "rb") as f:
510
+ content = f.read()
452
511
  return Artifact(metadata=metadata, content=content)
453
512
 
454
513
  def get_metadata(self, artifact_id: str) -> Optional[ArtifactMetadata]:
455
- metadata_path = self._metadata_path(artifact_id)
456
-
514
+ validate_artifact_id(artifact_id)
515
+ metadata_path = self._ref_metadata_path(artifact_id)
457
516
  if not metadata_path.exists():
458
- return None
517
+ metadata_path = self._legacy_metadata_path(artifact_id)
518
+ if not metadata_path.exists():
519
+ return None
459
520
 
460
521
  with open(metadata_path, "r", encoding="utf-8") as f:
461
522
  metadata_dict = json.load(f)
@@ -463,28 +524,42 @@ class FileArtifactStore(ArtifactStore):
463
524
  return ArtifactMetadata.from_dict(metadata_dict)
464
525
 
465
526
  def exists(self, artifact_id: str) -> bool:
527
+ meta = self.get_metadata(artifact_id)
528
+ if meta is None:
529
+ return False
466
530
  return self._content_path(artifact_id).exists()
467
531
 
468
532
  def delete(self, artifact_id: str) -> bool:
469
- content_path = self._content_path(artifact_id)
470
- metadata_path = self._metadata_path(artifact_id)
533
+ validate_artifact_id(artifact_id)
534
+ metadata_path = self._ref_metadata_path(artifact_id)
535
+ legacy_meta = self._legacy_metadata_path(artifact_id)
536
+ legacy_content = self._legacy_content_path(artifact_id)
471
537
 
472
538
  deleted = False
473
- if content_path.exists():
474
- content_path.unlink()
475
- deleted = True
476
539
  if metadata_path.exists():
477
540
  metadata_path.unlink()
478
541
  deleted = True
542
+ if legacy_meta.exists():
543
+ legacy_meta.unlink()
544
+ deleted = True
545
+ if legacy_content.exists():
546
+ legacy_content.unlink()
547
+ deleted = True
479
548
 
480
549
  return deleted
481
550
 
482
551
  def list_by_run(self, run_id: str) -> List[ArtifactMetadata]:
483
552
  results = []
484
- for metadata_path in self._artifacts_dir.glob("*.meta"):
553
+ meta_paths = list(self._refs_dir.glob("*.meta")) + list(self._artifacts_dir.glob("*.meta"))
554
+ seen: set[str] = set()
555
+ for metadata_path in meta_paths:
485
556
  try:
486
557
  with open(metadata_path, "r", encoding="utf-8") as f:
487
558
  metadata_dict = json.load(f)
559
+ artifact_id = str(metadata_dict.get("artifact_id") or "").strip()
560
+ if not artifact_id or artifact_id in seen:
561
+ continue
562
+ seen.add(artifact_id)
488
563
  if metadata_dict.get("run_id") == run_id:
489
564
  results.append(ArtifactMetadata.from_dict(metadata_dict))
490
565
  except (json.JSONDecodeError, IOError):
@@ -493,10 +568,16 @@ class FileArtifactStore(ArtifactStore):
493
568
 
494
569
  def list_all(self, *, limit: int = 1000) -> List[ArtifactMetadata]:
495
570
  results = []
496
- for metadata_path in self._artifacts_dir.glob("*.meta"):
571
+ meta_paths = list(self._refs_dir.glob("*.meta")) + list(self._artifacts_dir.glob("*.meta"))
572
+ seen: set[str] = set()
573
+ for metadata_path in meta_paths:
497
574
  try:
498
575
  with open(metadata_path, "r", encoding="utf-8") as f:
499
576
  metadata_dict = json.load(f)
577
+ artifact_id = str(metadata_dict.get("artifact_id") or "").strip()
578
+ if not artifact_id or artifact_id in seen:
579
+ continue
580
+ seen.add(artifact_id)
500
581
  results.append(ArtifactMetadata.from_dict(metadata_dict))
501
582
  except (json.JSONDecodeError, IOError):
502
583
  continue
@@ -504,6 +585,53 @@ class FileArtifactStore(ArtifactStore):
504
585
  results.sort(key=lambda m: m.created_at, reverse=True)
505
586
  return results[:limit]
506
587
 
588
+ def gc(self, *, dry_run: bool = True) -> Dict[str, Any]:
589
+ """Garbage collect unreferenced blobs.
590
+
591
+ Notes:
592
+ - This only applies to the v1 `artifacts/blobs` layout.
593
+ - Safe-by-default: `dry_run=True` returns the plan without deleting.
594
+ """
595
+
596
+ report: Dict[str, Any] = {
597
+ "dry_run": bool(dry_run),
598
+ "blobs_total": 0,
599
+ "blobs_referenced": 0,
600
+ "blobs_deleted": 0,
601
+ "bytes_reclaimed": 0,
602
+ "errors": [],
603
+ }
604
+
605
+ referenced: set[str] = set()
606
+ for meta in self.list_all(limit=1_000_000):
607
+ blob_id = getattr(meta, "blob_id", None)
608
+ if isinstance(blob_id, str) and blob_id.strip():
609
+ referenced.add(blob_id.strip())
610
+
611
+ report["blobs_referenced"] = len(referenced)
612
+
613
+ blobs = list(self._blobs_dir.glob("*.bin"))
614
+ report["blobs_total"] = len(blobs)
615
+
616
+ for p in blobs:
617
+ blob_id = p.stem
618
+ if blob_id in referenced:
619
+ continue
620
+ try:
621
+ size = p.stat().st_size
622
+ except Exception:
623
+ size = 0
624
+ if not dry_run:
625
+ try:
626
+ p.unlink()
627
+ except Exception as e:
628
+ report["errors"].append({"blob_id": blob_id, "error": str(e)})
629
+ continue
630
+ report["blobs_deleted"] += 1
631
+ report["bytes_reclaimed"] += int(size)
632
+
633
+ return report
634
+
507
635
 
508
636
  # Artifact reference helpers for use in RunState.vars
509
637
 
@@ -95,6 +95,23 @@ class QueryableRunStore(Protocol):
95
95
  ...
96
96
 
97
97
 
98
+ @runtime_checkable
99
+ class QueryableRunIndexStore(Protocol):
100
+ """Optional fast-path for listing run summaries without loading full RunState payloads."""
101
+
102
+ def list_run_index(
103
+ self,
104
+ *,
105
+ status: Optional[RunStatus] = None,
106
+ workflow_id: Optional[str] = None,
107
+ session_id: Optional[str] = None,
108
+ root_only: bool = False,
109
+ limit: int = 100,
110
+ ) -> List[Dict[str, Any]]:
111
+ """List lightweight run index rows (most recent first)."""
112
+ ...
113
+
114
+
98
115
  class LedgerStore(ABC):
99
116
  """Append-only journal store."""
100
117
 
@@ -104,4 +121,3 @@ class LedgerStore(ABC):
104
121
  @abstractmethod
105
122
  def list(self, run_id: str) -> List[Dict[str, Any]]: ...
106
123
 
107
-
@@ -0,0 +1,339 @@
1
+ """abstractruntime.storage.commands
2
+
3
+ Durable command inbox primitives (append-only, idempotent).
4
+
5
+ Why this exists:
6
+ - A remote Run Gateway (ADR-0018 / backlog 307) needs a control plane that is safe under
7
+ retries and intermittent networks.
8
+ - The key SQS/Temporal insight is to decouple *command acceptance* from *fulfillment*:
9
+ clients submit commands with idempotency keys, and a worker processes them asynchronously.
10
+
11
+ Design constraints:
12
+ - JSON-safe records only (persisted).
13
+ - Append-only storage (audit-friendly, replayable).
14
+ - Idempotency by `command_id` (duplicate submissions are ignored).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import threading
21
+ import uuid
22
+ from dataclasses import asdict, dataclass
23
+ from datetime import datetime, timezone
24
+ from pathlib import Path
25
+ from typing import Any, Dict, List, Optional, Protocol, Tuple, runtime_checkable
26
+
27
+
28
+ def _utc_now_iso() -> str:
29
+ return datetime.now(timezone.utc).isoformat()
30
+
31
+
32
+ def _is_json_value(value: Any) -> bool:
33
+ if value is None or isinstance(value, (str, int, float, bool)):
34
+ return True
35
+ if isinstance(value, list):
36
+ return all(_is_json_value(v) for v in value)
37
+ if isinstance(value, dict):
38
+ return all(isinstance(k, str) and _is_json_value(v) for k, v in value.items())
39
+ return False
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class CommandRecord:
44
+ """A durable command record.
45
+
46
+ Notes:
47
+ - `seq` is assigned by the store and provides cursor semantics for consumers.
48
+ - `payload` must be JSON-serializable (dict of JSON values).
49
+ """
50
+
51
+ command_id: str
52
+ run_id: str
53
+ type: str
54
+ payload: Dict[str, Any]
55
+ ts: str
56
+ client_id: Optional[str] = None
57
+ seq: int = 0
58
+
59
+ def to_json(self) -> Dict[str, Any]:
60
+ return asdict(self)
61
+
62
+
63
+ @dataclass(frozen=True)
64
+ class CommandAppendResult:
65
+ """Result of appending a command to a CommandStore."""
66
+
67
+ accepted: bool
68
+ duplicate: bool
69
+ seq: int
70
+
71
+
72
+ @runtime_checkable
73
+ class CommandStore(Protocol):
74
+ """Append-only inbox of commands with cursor replay semantics."""
75
+
76
+ def append(self, record: CommandRecord) -> CommandAppendResult:
77
+ """Append a command if its command_id is new (idempotent)."""
78
+
79
+ def list_after(self, *, after: int, limit: int = 1000) -> Tuple[List[CommandRecord], int]:
80
+ """Return commands with seq > after, up to limit, and the next cursor."""
81
+
82
+ def get_last_seq(self) -> int:
83
+ """Return the greatest assigned sequence number (0 if empty)."""
84
+
85
+
86
+ @runtime_checkable
87
+ class CommandCursorStore(Protocol):
88
+ """Durable consumer cursor for CommandStore replay."""
89
+
90
+ def load(self) -> int: ...
91
+
92
+ def save(self, cursor: int) -> None: ...
93
+
94
+
95
+ class InMemoryCommandStore(CommandStore):
96
+ def __init__(self) -> None:
97
+ self._lock = threading.Lock()
98
+ self._seq = 0
99
+ self._by_id: Dict[str, CommandRecord] = {}
100
+ self._ordered: List[CommandRecord] = []
101
+
102
+ def append(self, record: CommandRecord) -> CommandAppendResult:
103
+ cid = str(record.command_id or "").strip()
104
+ if not cid:
105
+ cid = uuid.uuid4().hex
106
+ with self._lock:
107
+ existing = self._by_id.get(cid)
108
+ if existing is not None:
109
+ return CommandAppendResult(accepted=False, duplicate=True, seq=int(existing.seq or 0))
110
+ self._seq += 1
111
+ rec = CommandRecord(
112
+ command_id=cid,
113
+ run_id=str(record.run_id or ""),
114
+ type=str(record.type or ""),
115
+ payload=dict(record.payload or {}),
116
+ ts=str(record.ts or _utc_now_iso()),
117
+ client_id=str(record.client_id) if isinstance(record.client_id, str) and record.client_id else None,
118
+ seq=self._seq,
119
+ )
120
+ self._by_id[cid] = rec
121
+ self._ordered.append(rec)
122
+ return CommandAppendResult(accepted=True, duplicate=False, seq=rec.seq)
123
+
124
+ def list_after(self, *, after: int, limit: int = 1000) -> Tuple[List[CommandRecord], int]:
125
+ after2 = int(after or 0)
126
+ limit2 = int(limit or 1000)
127
+ if limit2 <= 0:
128
+ limit2 = 1000
129
+ with self._lock:
130
+ items = [r for r in self._ordered if int(r.seq or 0) > after2]
131
+ out = items[:limit2]
132
+ next_cursor = after2
133
+ if out:
134
+ next_cursor = int(out[-1].seq or after2)
135
+ return (list(out), next_cursor)
136
+
137
+ def get_last_seq(self) -> int:
138
+ with self._lock:
139
+ return int(self._seq or 0)
140
+
141
+
142
+ class InMemoryCommandCursorStore(CommandCursorStore):
143
+ def __init__(self, initial: int = 0) -> None:
144
+ self._cursor = int(initial or 0)
145
+ self._lock = threading.Lock()
146
+
147
+ def load(self) -> int:
148
+ with self._lock:
149
+ return int(self._cursor or 0)
150
+
151
+ def save(self, cursor: int) -> None:
152
+ with self._lock:
153
+ self._cursor = int(cursor or 0)
154
+
155
+
156
+ class JsonFileCommandCursorStore(CommandCursorStore):
157
+ """JSON file-backed cursor store.
158
+
159
+ Atomic write semantics are important because this file is updated frequently.
160
+ """
161
+
162
+ def __init__(self, path: str | Path) -> None:
163
+ self._path = Path(path)
164
+ self._path.parent.mkdir(parents=True, exist_ok=True)
165
+ self._lock = threading.Lock()
166
+
167
+ def load(self) -> int:
168
+ with self._lock:
169
+ if not self._path.exists():
170
+ return 0
171
+ try:
172
+ data = json.loads(self._path.read_text(encoding="utf-8") or "{}")
173
+ except Exception:
174
+ return 0
175
+ cur = data.get("cursor")
176
+ try:
177
+ return int(cur or 0)
178
+ except Exception:
179
+ return 0
180
+
181
+ def save(self, cursor: int) -> None:
182
+ cur = int(cursor or 0)
183
+ tmp = self._path.with_name(f"{self._path.name}.{uuid.uuid4().hex}.tmp")
184
+ payload = {"cursor": cur, "updated_at": _utc_now_iso()}
185
+ with self._lock:
186
+ try:
187
+ tmp.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
188
+ tmp.replace(self._path)
189
+ finally:
190
+ try:
191
+ if tmp.exists():
192
+ tmp.unlink()
193
+ except Exception:
194
+ pass
195
+
196
+
197
+ class JsonlCommandStore(CommandStore):
198
+ """Append-only JSONL command store.
199
+
200
+ File format: one JSON object per line. Each record includes a store-assigned `seq`.
201
+ """
202
+
203
+ def __init__(self, base_dir: str | Path, *, filename: str = "commands.jsonl") -> None:
204
+ self._base = Path(base_dir)
205
+ self._base.mkdir(parents=True, exist_ok=True)
206
+ self._path = self._base / str(filename or "commands.jsonl")
207
+ self._lock = threading.Lock()
208
+
209
+ # Idempotency index (rebuilt on init from the log).
210
+ self._seq = 0
211
+ self._by_id: Dict[str, int] = {}
212
+ self._rebuild_index()
213
+
214
+ def _rebuild_index(self) -> None:
215
+ if not self._path.exists():
216
+ self._seq = 0
217
+ self._by_id = {}
218
+ return
219
+ seq = 0
220
+ by_id: Dict[str, int] = {}
221
+ try:
222
+ with self._path.open("r", encoding="utf-8") as f:
223
+ for line in f:
224
+ line = line.strip()
225
+ if not line:
226
+ continue
227
+ try:
228
+ obj = json.loads(line)
229
+ except Exception:
230
+ continue
231
+ s = obj.get("seq")
232
+ cid = obj.get("command_id")
233
+ try:
234
+ s_int = int(s or 0)
235
+ except Exception:
236
+ s_int = 0
237
+ if s_int <= 0:
238
+ continue
239
+ if s_int > seq:
240
+ seq = s_int
241
+ if isinstance(cid, str) and cid and cid not in by_id:
242
+ by_id[cid] = s_int
243
+ except Exception:
244
+ seq = 0
245
+ by_id = {}
246
+ self._seq = seq
247
+ self._by_id = by_id
248
+
249
+ def append(self, record: CommandRecord) -> CommandAppendResult:
250
+ cid = str(record.command_id or "").strip()
251
+ if not cid:
252
+ cid = uuid.uuid4().hex
253
+ run_id = str(record.run_id or "").strip()
254
+ typ = str(record.type or "").strip()
255
+ payload = dict(record.payload or {})
256
+ ts = str(record.ts or "").strip() or _utc_now_iso()
257
+ client_id = str(record.client_id).strip() if isinstance(record.client_id, str) and record.client_id else None
258
+
259
+ if not run_id:
260
+ raise ValueError("CommandRecord.run_id must be non-empty")
261
+ if not typ:
262
+ raise ValueError("CommandRecord.type must be non-empty")
263
+ if not isinstance(payload, dict) or not _is_json_value(payload):
264
+ raise ValueError("CommandRecord.payload must be a JSON-serializable dict")
265
+
266
+ with self._lock:
267
+ existing_seq = self._by_id.get(cid)
268
+ if existing_seq is not None:
269
+ return CommandAppendResult(accepted=False, duplicate=True, seq=int(existing_seq))
270
+
271
+ self._seq += 1
272
+ seq = int(self._seq)
273
+ rec = CommandRecord(
274
+ command_id=cid,
275
+ run_id=run_id,
276
+ type=typ,
277
+ payload=payload,
278
+ ts=ts,
279
+ client_id=client_id,
280
+ seq=seq,
281
+ )
282
+ with self._path.open("a", encoding="utf-8") as f:
283
+ f.write(json.dumps(rec.to_json(), ensure_ascii=False))
284
+ f.write("\n")
285
+ self._by_id[cid] = seq
286
+ return CommandAppendResult(accepted=True, duplicate=False, seq=seq)
287
+
288
+ def list_after(self, *, after: int, limit: int = 1000) -> Tuple[List[CommandRecord], int]:
289
+ after2 = int(after or 0)
290
+ limit2 = int(limit or 1000)
291
+ if limit2 <= 0:
292
+ limit2 = 1000
293
+
294
+ if not self._path.exists():
295
+ return ([], after2)
296
+
297
+ out: List[CommandRecord] = []
298
+ next_cursor = after2
299
+ try:
300
+ with self._path.open("r", encoding="utf-8") as f:
301
+ for line in f:
302
+ line = line.strip()
303
+ if not line:
304
+ continue
305
+ try:
306
+ obj = json.loads(line)
307
+ except Exception:
308
+ continue
309
+ try:
310
+ seq = int(obj.get("seq") or 0)
311
+ except Exception:
312
+ continue
313
+ if seq <= after2:
314
+ continue
315
+ try:
316
+ rec = CommandRecord(
317
+ command_id=str(obj.get("command_id") or ""),
318
+ run_id=str(obj.get("run_id") or ""),
319
+ type=str(obj.get("type") or ""),
320
+ payload=dict(obj.get("payload") or {}),
321
+ ts=str(obj.get("ts") or ""),
322
+ client_id=str(obj.get("client_id") or "") or None,
323
+ seq=seq,
324
+ )
325
+ except Exception:
326
+ continue
327
+ out.append(rec)
328
+ next_cursor = seq
329
+ if len(out) >= limit2:
330
+ break
331
+ except Exception:
332
+ return ([], after2)
333
+ return (out, next_cursor)
334
+
335
+ def get_last_seq(self) -> int:
336
+ with self._lock:
337
+ return int(self._seq or 0)
338
+
339
+