simplicio-prompt 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +15 -0
- package/README.md +173 -0
- package/YOOL_TUPLE_HAMT.md +1149 -0
- package/adopters.md +24 -0
- package/benchmarks/generate_prompt_benchmark_pdf.py +355 -0
- package/benchmarks/generate_v2_benchmark_pdf.py +302 -0
- package/benchmarks/prompt_vs_normal.py +431 -0
- package/benchmarks/prompt_vs_normal_benchmark.pdf +124 -0
- package/benchmarks/prompt_vs_normal_results.md +148 -0
- package/benchmarks/v2_safe_speed_benchmark.pdf +118 -0
- package/benchmarks/v2_safe_speed_benchmark.py +626 -0
- package/benchmarks/v2_safe_speed_results.json +446 -0
- package/benchmarks/v2_safe_speed_results.md +96 -0
- package/docs/assets/simplicio-prompt-hero.png +0 -0
- package/docs/assets/yool-v2-safe-speed-infographic-en.png +0 -0
- package/docs/assets/yool-v2-safe-speed-infographic-pt.png +0 -0
- package/examples/node/build-catalog.mjs +70 -0
- package/examples/python/minimal_bus.py +134 -0
- package/examples/python/receipts.py +152 -0
- package/guardrails/cpu_throttle.py +119 -0
- package/guardrails/disk_gc.py +212 -0
- package/kernel/README.md +82 -0
- package/kernel/yool_tuple_kernel.py +1109 -0
- package/kernel-implementation-request.md +38 -0
- package/package.json +40 -0
- package/prompts/agent-runtime-execution-prompt.md +119 -0
- package/prompts/legacy-tuple-space-engine-prompt.md +36 -0
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Minimal Linda-style tuple-space bus. See YOOL_TUPLE_HAMT.md §2.4.
|
|
3
|
+
|
|
4
|
+
Implements `out`, `in_`, `rd`, `eval_` primitives. Single-process reference
|
|
5
|
+
impl; multi-process variant swaps the dict for SQLite or Redis.
|
|
6
|
+
|
|
7
|
+
Pattern matching is by predicate (callable on tuple dict). For production,
|
|
8
|
+
prefer indexed lookup by (lane, yool) — see spec §7.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
import uuid
|
|
16
|
+
from concurrent.futures import Future, ThreadPoolExecutor
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Callable
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
Predicate = Callable[[dict[str, Any]], bool]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _now_iso() -> str:
|
|
26
|
+
return datetime.now(timezone.utc).isoformat()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TupleSpace:
|
|
30
|
+
def __init__(self, log_path: str | Path | None = None, workers: int = 4):
|
|
31
|
+
self._tuples: dict[str, dict[str, Any]] = {}
|
|
32
|
+
self._cond = threading.Condition()
|
|
33
|
+
self._log = Path(log_path).expanduser() if log_path else None
|
|
34
|
+
if self._log:
|
|
35
|
+
self._log.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
self._pool = ThreadPoolExecutor(max_workers=workers)
|
|
37
|
+
|
|
38
|
+
def _persist(self, op: str, tup: dict[str, Any]) -> None:
|
|
39
|
+
if not self._log:
|
|
40
|
+
return
|
|
41
|
+
line = json.dumps({"op": op, "at": _now_iso(), "tuple": tup}, ensure_ascii=False)
|
|
42
|
+
with self._log.open("a", encoding="utf-8") as f:
|
|
43
|
+
f.write(line + "\n")
|
|
44
|
+
|
|
45
|
+
def out(self, tup: dict[str, Any]) -> str:
|
|
46
|
+
"""Place a tuple into the space. Returns tuple id."""
|
|
47
|
+
tid = tup.get("id") or f"t-{uuid.uuid4().hex[:12]}"
|
|
48
|
+
tup = {**tup, "id": tid, "created_at": tup.get("created_at") or _now_iso()}
|
|
49
|
+
with self._cond:
|
|
50
|
+
self._tuples[tid] = tup
|
|
51
|
+
self._persist("out", tup)
|
|
52
|
+
self._cond.notify_all()
|
|
53
|
+
return tid
|
|
54
|
+
|
|
55
|
+
def rd(self, pred: Predicate, timeout: float | None = None) -> dict[str, Any] | None:
|
|
56
|
+
"""Non-destructive read. Blocks until a matching tuple appears or timeout."""
|
|
57
|
+
deadline = (time.monotonic() + timeout) if timeout is not None else None
|
|
58
|
+
with self._cond:
|
|
59
|
+
while True:
|
|
60
|
+
for tup in self._tuples.values():
|
|
61
|
+
if pred(tup):
|
|
62
|
+
return dict(tup)
|
|
63
|
+
if deadline is None:
|
|
64
|
+
self._cond.wait()
|
|
65
|
+
else:
|
|
66
|
+
remaining = deadline - time.monotonic()
|
|
67
|
+
if remaining <= 0:
|
|
68
|
+
return None
|
|
69
|
+
self._cond.wait(timeout=remaining)
|
|
70
|
+
|
|
71
|
+
def in_(self, pred: Predicate, timeout: float | None = None) -> dict[str, Any] | None:
|
|
72
|
+
"""Destructive take. Removes the first matching tuple from the space."""
|
|
73
|
+
deadline = (time.monotonic() + timeout) if timeout is not None else None
|
|
74
|
+
with self._cond:
|
|
75
|
+
while True:
|
|
76
|
+
for tid, tup in list(self._tuples.items()):
|
|
77
|
+
if pred(tup):
|
|
78
|
+
del self._tuples[tid]
|
|
79
|
+
self._persist("in", tup)
|
|
80
|
+
return dict(tup)
|
|
81
|
+
if deadline is None:
|
|
82
|
+
self._cond.wait()
|
|
83
|
+
else:
|
|
84
|
+
remaining = deadline - time.monotonic()
|
|
85
|
+
if remaining <= 0:
|
|
86
|
+
return None
|
|
87
|
+
self._cond.wait(timeout=remaining)
|
|
88
|
+
|
|
89
|
+
def eval_(self, fn: Callable[[], dict[str, Any]]) -> Future:
|
|
90
|
+
"""Run fn() asynchronously; resulting tuple is placed in the space."""
|
|
91
|
+
def _runner() -> dict[str, Any]:
|
|
92
|
+
result = fn()
|
|
93
|
+
self.out(result)
|
|
94
|
+
return result
|
|
95
|
+
return self._pool.submit(_runner)
|
|
96
|
+
|
|
97
|
+
def all(self) -> list[dict[str, Any]]:
|
|
98
|
+
with self._cond:
|
|
99
|
+
return [dict(t) for t in self._tuples.values()]
|
|
100
|
+
|
|
101
|
+
def shutdown(self) -> None:
|
|
102
|
+
self._pool.shutdown(wait=True)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def by_yool(yool: str) -> Predicate:
|
|
106
|
+
return lambda t: t.get("yool") == yool
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def by_lane(lane: str) -> Predicate:
|
|
110
|
+
return lambda t: t.get("lane") == lane
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
bus = TupleSpace(log_path=".catalog/tuples.jsonl")
|
|
115
|
+
|
|
116
|
+
bus.out({"yool": "ide.cursor.send", "args": {"file": "x.py"}, "lane": "dev"})
|
|
117
|
+
bus.out({"yool": "op.jira.fetch_sprint", "args": {"sprint": 42}, "lane": "ops"})
|
|
118
|
+
|
|
119
|
+
def worker_dev() -> None:
|
|
120
|
+
while True:
|
|
121
|
+
t = bus.in_(by_lane("dev"), timeout=1.0)
|
|
122
|
+
if t is None:
|
|
123
|
+
return
|
|
124
|
+
print(f"[dev] processing {t['yool']} args={t['args']}")
|
|
125
|
+
bus.out({
|
|
126
|
+
"yool": "receipt",
|
|
127
|
+
"lane": "audit",
|
|
128
|
+
"parent_id": t["id"],
|
|
129
|
+
"status": "ok",
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
worker_dev()
|
|
133
|
+
print("final state:", bus.all())
|
|
134
|
+
bus.shutdown()
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Content-addressable receipt store. See YOOL_TUPLE_HAMT.md §2.5, §4.3.
|
|
3
|
+
|
|
4
|
+
A receipt is the immutable record of a single yool execution: inputs (hashed),
|
|
5
|
+
outputs (referenced by sha256), timing, status. The receipt's own sha256 over
|
|
6
|
+
its canonical JSON acts as both ID and cache key.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
from dataclasses import asdict, dataclass, field
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def canonical_json(obj: Any) -> str:
|
|
20
|
+
return json.dumps(obj, sort_keys=True, separators=(",", ":"), ensure_ascii=False)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def sha256_text(s: str) -> str:
|
|
24
|
+
return hashlib.sha256(s.encode("utf-8")).hexdigest()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def sha256_file(path: str | Path) -> str:
|
|
28
|
+
h = hashlib.sha256()
|
|
29
|
+
with open(path, "rb") as f:
|
|
30
|
+
for chunk in iter(lambda: f.read(65536), b""):
|
|
31
|
+
h.update(chunk)
|
|
32
|
+
return h.hexdigest()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def input_hash(
|
|
36
|
+
yool: str,
|
|
37
|
+
args: dict[str, Any],
|
|
38
|
+
file_shas: list[str] | None = None,
|
|
39
|
+
env_whitelist: dict[str, str] | None = None,
|
|
40
|
+
) -> str:
|
|
41
|
+
"""Cache key. See spec §4.3."""
|
|
42
|
+
payload = {
|
|
43
|
+
"yool": yool,
|
|
44
|
+
"args": args,
|
|
45
|
+
"files": sorted(file_shas or []),
|
|
46
|
+
"env": dict(sorted((env_whitelist or {}).items())),
|
|
47
|
+
}
|
|
48
|
+
return sha256_text(canonical_json(payload))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class Artifact:
|
|
53
|
+
path: str
|
|
54
|
+
sha256: str
|
|
55
|
+
size_bytes: int
|
|
56
|
+
mime: str = "application/octet-stream"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class Receipt:
|
|
61
|
+
id: str
|
|
62
|
+
yool: str
|
|
63
|
+
input_hash: str
|
|
64
|
+
args: dict[str, Any]
|
|
65
|
+
status: str
|
|
66
|
+
started_at: str
|
|
67
|
+
ended_at: str
|
|
68
|
+
duration_ms: int
|
|
69
|
+
artifacts: list[Artifact] = field(default_factory=list)
|
|
70
|
+
error: str | None = None
|
|
71
|
+
parent_id: str | None = None
|
|
72
|
+
cpu_quota_pct: int = 100
|
|
73
|
+
artifacts_purged_at: str | None = None
|
|
74
|
+
|
|
75
|
+
def to_canonical(self) -> str:
|
|
76
|
+
d = asdict(self)
|
|
77
|
+
d.pop("id", None)
|
|
78
|
+
return canonical_json(d)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ReceiptStore:
|
|
82
|
+
"""Stores receipts as .catalog/receipts/<sha>.json. Lookup by input_hash via index."""
|
|
83
|
+
|
|
84
|
+
def __init__(self, base_dir: str | Path):
|
|
85
|
+
self.base = Path(base_dir).expanduser()
|
|
86
|
+
self.receipts_dir = self.base / "receipts"
|
|
87
|
+
self.index_path = self.base / "receipts.index.json"
|
|
88
|
+
self.receipts_dir.mkdir(parents=True, exist_ok=True)
|
|
89
|
+
self._index: dict[str, str] = {}
|
|
90
|
+
if self.index_path.exists():
|
|
91
|
+
try:
|
|
92
|
+
self._index = json.loads(self.index_path.read_text(encoding="utf-8"))
|
|
93
|
+
except (OSError, json.JSONDecodeError):
|
|
94
|
+
self._index = {}
|
|
95
|
+
|
|
96
|
+
def get_by_input_hash(self, ih: str) -> Receipt | None:
|
|
97
|
+
rid = self._index.get(ih)
|
|
98
|
+
if not rid:
|
|
99
|
+
return None
|
|
100
|
+
return self.load(rid)
|
|
101
|
+
|
|
102
|
+
def load(self, receipt_id: str) -> Receipt | None:
|
|
103
|
+
p = self.receipts_dir / f"{receipt_id}.json"
|
|
104
|
+
if not p.exists():
|
|
105
|
+
return None
|
|
106
|
+
data = json.loads(p.read_text(encoding="utf-8"))
|
|
107
|
+
arts = [Artifact(**a) for a in data.pop("artifacts", [])]
|
|
108
|
+
return Receipt(artifacts=arts, **data)
|
|
109
|
+
|
|
110
|
+
def put(self, r: Receipt) -> str:
|
|
111
|
+
if not r.id:
|
|
112
|
+
r.id = sha256_text(r.to_canonical())
|
|
113
|
+
path = self.receipts_dir / f"{r.id}.json"
|
|
114
|
+
data = asdict(r)
|
|
115
|
+
path.write_text(canonical_json(data), encoding="utf-8")
|
|
116
|
+
self._index[r.input_hash] = r.id
|
|
117
|
+
tmp = self.index_path.with_suffix(".json.tmp")
|
|
118
|
+
tmp.write_text(canonical_json(self._index), encoding="utf-8")
|
|
119
|
+
os.replace(tmp, self.index_path)
|
|
120
|
+
return r.id
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def make_receipt(
|
|
124
|
+
yool: str,
|
|
125
|
+
args: dict[str, Any],
|
|
126
|
+
status: str,
|
|
127
|
+
started: datetime,
|
|
128
|
+
ended: datetime,
|
|
129
|
+
artifacts: list[Artifact] | None = None,
|
|
130
|
+
error: str | None = None,
|
|
131
|
+
parent_id: str | None = None,
|
|
132
|
+
file_shas: list[str] | None = None,
|
|
133
|
+
env_whitelist: dict[str, str] | None = None,
|
|
134
|
+
cpu_quota_pct: int = 100,
|
|
135
|
+
) -> Receipt:
|
|
136
|
+
ih = input_hash(yool, args, file_shas, env_whitelist)
|
|
137
|
+
r = Receipt(
|
|
138
|
+
id="",
|
|
139
|
+
yool=yool,
|
|
140
|
+
input_hash=ih,
|
|
141
|
+
args=args,
|
|
142
|
+
status=status,
|
|
143
|
+
started_at=started.astimezone(timezone.utc).isoformat(),
|
|
144
|
+
ended_at=ended.astimezone(timezone.utc).isoformat(),
|
|
145
|
+
duration_ms=int((ended - started).total_seconds() * 1000),
|
|
146
|
+
artifacts=list(artifacts or []),
|
|
147
|
+
error=error,
|
|
148
|
+
parent_id=parent_id,
|
|
149
|
+
cpu_quota_pct=cpu_quota_pct,
|
|
150
|
+
)
|
|
151
|
+
r.id = sha256_text(r.to_canonical())
|
|
152
|
+
return r
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CPU throttle guardrail. See YOOL_TUPLE_HAMT.md §11.1.
|
|
3
|
+
|
|
4
|
+
Soft throttle via process niceness. For hard throttle, use cgroups (Linux)
|
|
5
|
+
or taskpolicy (macOS) at process launch — see spec §11.1.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import contextlib
|
|
11
|
+
import os
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
DEFAULT_CPU_QUOTA_PCT = 95
|
|
15
|
+
DEFAULT_LANE_CONCURRENCY = 32
|
|
16
|
+
DEFAULT_MAX_LANE_CONCURRENCY = 64
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _positive_int_from_env(names: tuple[str, ...], default: int) -> int:
|
|
20
|
+
for name in names:
|
|
21
|
+
value = os.getenv(name)
|
|
22
|
+
if value is None or not value.strip():
|
|
23
|
+
continue
|
|
24
|
+
try:
|
|
25
|
+
parsed = int(value)
|
|
26
|
+
except ValueError:
|
|
27
|
+
return default
|
|
28
|
+
return parsed if parsed > 0 else default
|
|
29
|
+
return default
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def cpu_quota_from_env(default: int = DEFAULT_CPU_QUOTA_PCT) -> int:
|
|
33
|
+
"""Return CPU quota from env aliases, clamped to [1, 100]."""
|
|
34
|
+
quota = _positive_int_from_env(
|
|
35
|
+
("YOOL_TUPLE_CPU_QUOTA_PCT", "YOOL_CPU_QUOTA_PCT"), default
|
|
36
|
+
)
|
|
37
|
+
return max(1, min(100, quota))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def lane_concurrency_from_env(default: int = DEFAULT_LANE_CONCURRENCY) -> int:
|
|
41
|
+
"""Return preferred workers per lane for high-throughput runtimes."""
|
|
42
|
+
return _positive_int_from_env(
|
|
43
|
+
("YOOL_TUPLE_LANE_CONCURRENCY", "YOOL_LANE_CONCURRENCY"),
|
|
44
|
+
default,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def max_lane_concurrency_from_env(default: int = DEFAULT_MAX_LANE_CONCURRENCY) -> int:
|
|
49
|
+
"""Return the per-lane worker ceiling used by high-throughput runtimes."""
|
|
50
|
+
return _positive_int_from_env(
|
|
51
|
+
("YOOL_TUPLE_MAX_LANE_CONCURRENCY", "YOOL_MAX_LANE_CONCURRENCY"),
|
|
52
|
+
default,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@contextlib.contextmanager
|
|
57
|
+
def cpu_throttle(quota_pct: int):
|
|
58
|
+
"""
|
|
59
|
+
Reduce CPU pressure by raising process niceness for the duration.
|
|
60
|
+
quota_pct in [1, 100]. quota=100 -> no-op.
|
|
61
|
+
"""
|
|
62
|
+
if quota_pct >= 100:
|
|
63
|
+
yield
|
|
64
|
+
return
|
|
65
|
+
if quota_pct < 1:
|
|
66
|
+
quota_pct = 1
|
|
67
|
+
|
|
68
|
+
nice_delta = max(0, min(19, int(round((100 - quota_pct) / 5.2))))
|
|
69
|
+
applied = False
|
|
70
|
+
try:
|
|
71
|
+
os.nice(nice_delta)
|
|
72
|
+
applied = True
|
|
73
|
+
except OSError:
|
|
74
|
+
pass
|
|
75
|
+
try:
|
|
76
|
+
yield
|
|
77
|
+
finally:
|
|
78
|
+
if applied:
|
|
79
|
+
try:
|
|
80
|
+
os.nice(-nice_delta)
|
|
81
|
+
except OSError:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def cooperative_yield(quota_pct: int, last_yield: list[float]) -> None:
|
|
86
|
+
"""In-loop cooperative throttle. Caller passes mutable single-element list as state."""
|
|
87
|
+
if quota_pct >= 100:
|
|
88
|
+
return
|
|
89
|
+
now = time.monotonic()
|
|
90
|
+
elapsed = now - last_yield[0]
|
|
91
|
+
sleep_for = elapsed * (100 - quota_pct) / max(1, quota_pct)
|
|
92
|
+
if sleep_for > 0.001:
|
|
93
|
+
time.sleep(min(sleep_for, 1.0))
|
|
94
|
+
last_yield[0] = time.monotonic()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class DiskQuotaExceeded(RuntimeError):
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def disk_quota_check(path: str, max_mb: int) -> None:
|
|
102
|
+
"""Raise DiskQuotaExceeded if `path` exceeds max_mb."""
|
|
103
|
+
total = 0
|
|
104
|
+
for root, _dirs, files in os.walk(path):
|
|
105
|
+
for f in files:
|
|
106
|
+
try:
|
|
107
|
+
total += os.path.getsize(os.path.join(root, f))
|
|
108
|
+
except OSError:
|
|
109
|
+
pass
|
|
110
|
+
mb = total / (1024 * 1024)
|
|
111
|
+
if mb > max_mb:
|
|
112
|
+
raise DiskQuotaExceeded(f"{path} = {mb:.1f}MB > limit {max_mb}MB")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@contextlib.contextmanager
|
|
116
|
+
def disk_quota(max_mb: int, path: str = "."):
|
|
117
|
+
"""Context-manager wrapper around disk_quota_check."""
|
|
118
|
+
yield
|
|
119
|
+
disk_quota_check(path, max_mb)
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Disk garbage collector. See YOOL_TUPLE_HAMT.md §11.2.
|
|
3
|
+
|
|
4
|
+
Three-tier retention: hot (default 30d) keeps artifact bodies + receipts;
|
|
5
|
+
warm (default 365d) keeps receipts only; cold keeps receipts forever
|
|
6
|
+
(Merkle chain integrity). Receipts are NEVER deleted, only artifact bodies.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import gzip
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
import sys
|
|
16
|
+
import time
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DiskPressure(RuntimeError):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _du_mb(path: Path) -> float:
|
|
26
|
+
total = 0
|
|
27
|
+
for root, _dirs, files in os.walk(path):
|
|
28
|
+
for f in files:
|
|
29
|
+
try:
|
|
30
|
+
total += os.path.getsize(os.path.join(root, f))
|
|
31
|
+
except OSError:
|
|
32
|
+
pass
|
|
33
|
+
return total / (1024 * 1024)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _parse_iso(ts: str) -> float:
|
|
37
|
+
try:
|
|
38
|
+
return datetime.fromisoformat(ts.replace("Z", "+00:00")).timestamp()
|
|
39
|
+
except (ValueError, AttributeError):
|
|
40
|
+
return 0.0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _purge_artifact_bodies(receipt_path: Path, dry_run: bool) -> tuple[int, int]:
|
|
44
|
+
"""Delete artifact files referenced by receipt. Mark receipt as purged.
|
|
45
|
+
Returns (files_deleted, bytes_freed)."""
|
|
46
|
+
try:
|
|
47
|
+
data = json.loads(receipt_path.read_text(encoding="utf-8"))
|
|
48
|
+
except (OSError, json.JSONDecodeError):
|
|
49
|
+
return 0, 0
|
|
50
|
+
if data.get("artifacts_purged_at"):
|
|
51
|
+
return 0, 0
|
|
52
|
+
deleted = 0
|
|
53
|
+
freed = 0
|
|
54
|
+
for art in data.get("artifacts", []):
|
|
55
|
+
p = Path(art.get("path", ""))
|
|
56
|
+
if not p.is_absolute():
|
|
57
|
+
p = receipt_path.parent.parent / p
|
|
58
|
+
if p.exists() and p.is_file():
|
|
59
|
+
try:
|
|
60
|
+
sz = p.stat().st_size
|
|
61
|
+
if not dry_run:
|
|
62
|
+
p.unlink()
|
|
63
|
+
deleted += 1
|
|
64
|
+
freed += sz
|
|
65
|
+
except OSError:
|
|
66
|
+
pass
|
|
67
|
+
if deleted and not dry_run:
|
|
68
|
+
data["artifacts_purged_at"] = datetime.now(timezone.utc).isoformat()
|
|
69
|
+
try:
|
|
70
|
+
receipt_path.write_text(json.dumps(data), encoding="utf-8")
|
|
71
|
+
except OSError:
|
|
72
|
+
pass
|
|
73
|
+
return deleted, freed
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _find_oldest_artifact(receipts_dir: Path) -> Path | None:
|
|
77
|
+
oldest_ts = float("inf")
|
|
78
|
+
oldest: Path | None = None
|
|
79
|
+
for rp in receipts_dir.glob("*.json"):
|
|
80
|
+
try:
|
|
81
|
+
data = json.loads(rp.read_text(encoding="utf-8"))
|
|
82
|
+
except (OSError, json.JSONDecodeError):
|
|
83
|
+
continue
|
|
84
|
+
if data.get("artifacts_purged_at"):
|
|
85
|
+
continue
|
|
86
|
+
if not data.get("artifacts"):
|
|
87
|
+
continue
|
|
88
|
+
ts = _parse_iso(data.get("ended_at", ""))
|
|
89
|
+
if ts and ts < oldest_ts:
|
|
90
|
+
oldest_ts = ts
|
|
91
|
+
oldest = rp
|
|
92
|
+
return oldest
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _rotate_daily(tuples_log: Path, dry_run: bool) -> bool:
|
|
96
|
+
"""Rotate tuples.jsonl daily, gzip yesterday's."""
|
|
97
|
+
if not tuples_log.exists():
|
|
98
|
+
return False
|
|
99
|
+
mtime = tuples_log.stat().st_mtime
|
|
100
|
+
age_days = (time.time() - mtime) / 86400
|
|
101
|
+
if age_days < 1.0:
|
|
102
|
+
return False
|
|
103
|
+
stamp = datetime.fromtimestamp(mtime, timezone.utc).strftime("%Y%m%d")
|
|
104
|
+
rotated = tuples_log.with_name(f"tuples-{stamp}.jsonl.gz")
|
|
105
|
+
if rotated.exists():
|
|
106
|
+
return False
|
|
107
|
+
if dry_run:
|
|
108
|
+
return True
|
|
109
|
+
with tuples_log.open("rb") as src, gzip.open(rotated, "wb") as dst:
|
|
110
|
+
shutil.copyfileobj(src, dst)
|
|
111
|
+
tuples_log.write_bytes(b"")
|
|
112
|
+
return True
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def gc_run(
|
|
116
|
+
catalog_dir: str | Path,
|
|
117
|
+
hot_days: int = 30,
|
|
118
|
+
warm_days: int = 365,
|
|
119
|
+
max_total_mb: int = 5000,
|
|
120
|
+
dry_run: bool = False,
|
|
121
|
+
) -> dict:
|
|
122
|
+
"""Run garbage collection over a .catalog directory.
|
|
123
|
+
|
|
124
|
+
Phase 1: purge artifact bodies for receipts older than hot_days.
|
|
125
|
+
Phase 2: enforce size cap by purging oldest artifacts until under max_total_mb.
|
|
126
|
+
Phase 3: rotate tuples.jsonl daily, gzip yesterday's.
|
|
127
|
+
"""
|
|
128
|
+
base = Path(catalog_dir).expanduser()
|
|
129
|
+
receipts_dir = base / "receipts"
|
|
130
|
+
tuples_log = base / "tuples.jsonl"
|
|
131
|
+
|
|
132
|
+
stats = {
|
|
133
|
+
"phase1_purged_files": 0,
|
|
134
|
+
"phase1_freed_mb": 0.0,
|
|
135
|
+
"phase2_purged_files": 0,
|
|
136
|
+
"phase2_freed_mb": 0.0,
|
|
137
|
+
"rotated": False,
|
|
138
|
+
"size_before_mb": 0.0,
|
|
139
|
+
"size_after_mb": 0.0,
|
|
140
|
+
"dry_run": dry_run,
|
|
141
|
+
}
|
|
142
|
+
if not base.exists():
|
|
143
|
+
return stats
|
|
144
|
+
|
|
145
|
+
stats["size_before_mb"] = _du_mb(base)
|
|
146
|
+
now = time.time()
|
|
147
|
+
hot_cutoff = now - hot_days * 86400
|
|
148
|
+
_warm_cutoff = now - warm_days * 86400
|
|
149
|
+
|
|
150
|
+
if receipts_dir.exists():
|
|
151
|
+
for rp in receipts_dir.glob("*.json"):
|
|
152
|
+
try:
|
|
153
|
+
data = json.loads(rp.read_text(encoding="utf-8"))
|
|
154
|
+
except (OSError, json.JSONDecodeError):
|
|
155
|
+
continue
|
|
156
|
+
ts = _parse_iso(data.get("ended_at", ""))
|
|
157
|
+
if ts and ts < hot_cutoff and not data.get("artifacts_purged_at"):
|
|
158
|
+
d, freed = _purge_artifact_bodies(rp, dry_run)
|
|
159
|
+
stats["phase1_purged_files"] += d
|
|
160
|
+
stats["phase1_freed_mb"] += freed / (1024 * 1024)
|
|
161
|
+
|
|
162
|
+
current_mb = _du_mb(base)
|
|
163
|
+
while current_mb > max_total_mb and receipts_dir.exists():
|
|
164
|
+
target = _find_oldest_artifact(receipts_dir)
|
|
165
|
+
if target is None:
|
|
166
|
+
break
|
|
167
|
+
d, freed = _purge_artifact_bodies(target, dry_run)
|
|
168
|
+
if d == 0:
|
|
169
|
+
break
|
|
170
|
+
stats["phase2_purged_files"] += d
|
|
171
|
+
stats["phase2_freed_mb"] += freed / (1024 * 1024)
|
|
172
|
+
current_mb = _du_mb(base)
|
|
173
|
+
|
|
174
|
+
stats["rotated"] = _rotate_daily(tuples_log, dry_run)
|
|
175
|
+
stats["size_after_mb"] = _du_mb(base)
|
|
176
|
+
return stats
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def check_disk_pressure(catalog_dir: str | Path, free_mb_floor: int = 1000) -> None:
|
|
180
|
+
"""Raise DiskPressure if free space on catalog_dir's filesystem drops below floor."""
|
|
181
|
+
base = Path(catalog_dir).expanduser()
|
|
182
|
+
try:
|
|
183
|
+
usage = shutil.disk_usage(base if base.exists() else base.parent)
|
|
184
|
+
except OSError as e:
|
|
185
|
+
raise DiskPressure(f"cannot stat {base}: {e}") from e
|
|
186
|
+
free_mb = usage.free / (1024 * 1024)
|
|
187
|
+
if free_mb < free_mb_floor:
|
|
188
|
+
raise DiskPressure(f"free={free_mb:.0f}MB < floor={free_mb_floor}MB on {base}")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def main() -> int:
|
|
192
|
+
ap = argparse.ArgumentParser(description="Run disk GC over a .catalog directory.")
|
|
193
|
+
ap.add_argument("--catalog-dir", required=True, help="Path to .catalog directory")
|
|
194
|
+
ap.add_argument("--hot-days", type=int, default=30)
|
|
195
|
+
ap.add_argument("--warm-days", type=int, default=365)
|
|
196
|
+
ap.add_argument("--max-mb", type=int, default=5000)
|
|
197
|
+
ap.add_argument("--dry-run", action="store_true")
|
|
198
|
+
args = ap.parse_args()
|
|
199
|
+
|
|
200
|
+
stats = gc_run(
|
|
201
|
+
args.catalog_dir,
|
|
202
|
+
hot_days=args.hot_days,
|
|
203
|
+
warm_days=args.warm_days,
|
|
204
|
+
max_total_mb=args.max_mb,
|
|
205
|
+
dry_run=args.dry_run,
|
|
206
|
+
)
|
|
207
|
+
print(json.dumps(stats, indent=2))
|
|
208
|
+
return 0
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
if __name__ == "__main__":
|
|
212
|
+
sys.exit(main())
|
package/kernel/README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# Yool Tuple Space Kernel
|
|
2
|
+
|
|
3
|
+
Reference Python kernel for Tuple-Space + Yool Architecture.
|
|
4
|
+
|
|
5
|
+
Core file: `kernel/yool_tuple_kernel.py`.
|
|
6
|
+
|
|
7
|
+
## Primitives
|
|
8
|
+
|
|
9
|
+
- `out_tuple`, `in_tuple`, `rd_tuple`
|
|
10
|
+
- `spawn_agent`
|
|
11
|
+
- `batch_spawn(depth, branching, compression_threshold)`
|
|
12
|
+
- `route_packet`
|
|
13
|
+
- `scan_index`
|
|
14
|
+
- `hookwall`
|
|
15
|
+
- `compress_token`
|
|
16
|
+
- `prune_idle`
|
|
17
|
+
- `ReceiptCache` for receipt/input-hash dedupe
|
|
18
|
+
- `ProviderCircuitBreaker` and jittered backoff for API/LLM calls
|
|
19
|
+
- `ContextCompressor` for large prompt/context payloads
|
|
20
|
+
- `LaneWorkerPool` for adaptive per-lane fan-out and small-task batching
|
|
21
|
+
|
|
22
|
+
## Scale model
|
|
23
|
+
|
|
24
|
+
Use `batch_spawn(root, "codex_worker", depth=4, branching=32)` to represent
|
|
25
|
+
1,048,576 subagents without materializing a flat million-item Python list. The
|
|
26
|
+
kernel stores a lazy batch controller tuple plus compressed virtual-agent
|
|
27
|
+
accounting, then materializes only active work.
|
|
28
|
+
|
|
29
|
+
Inactive materialized agents can be compacted with `compress_token(agent_id)`.
|
|
30
|
+
`prune_idle(max_active)` automatically compresses the oldest active subagents.
|
|
31
|
+
|
|
32
|
+
## Safe speed model
|
|
33
|
+
|
|
34
|
+
The reference kernel increases throughput without provider-ban risk by avoiding
|
|
35
|
+
repeat work and by slowing down safely when providers fail:
|
|
36
|
+
|
|
37
|
+
- `TupleSpace.execute_tuple(...)` checks registered local yools first, compresses
|
|
38
|
+
large LLM context, hits `ReceiptCache`, then calls the provider with jittered
|
|
39
|
+
backoff and a provider-level circuit breaker.
|
|
40
|
+
- `LaneWorkerPool.concurrency_for(lane)` adapts from lane queue depth, latency,
|
|
41
|
+
failures, and env ceilings instead of using one fixed worker count forever.
|
|
42
|
+
- `LaneWorkerPool.run_lane_batched(...)` groups small lane tasks into bounded
|
|
43
|
+
batches.
|
|
44
|
+
- `speculative_executor` is only used when the tuple data explicitly sets
|
|
45
|
+
`idempotent=True`.
|
|
46
|
+
|
|
47
|
+
## Runtime policy
|
|
48
|
+
|
|
49
|
+
Environment aliases:
|
|
50
|
+
|
|
51
|
+
- `YOOL_TUPLE_LANE_CONCURRENCY` / `YOOL_LANE_CONCURRENCY`, default `32`
|
|
52
|
+
- `YOOL_TUPLE_MAX_LANE_CONCURRENCY` / `YOOL_MAX_LANE_CONCURRENCY`, default `64`
|
|
53
|
+
- `YOOL_TUPLE_CPU_QUOTA_PCT` / `YOOL_CPU_QUOTA_PCT`, default `95`
|
|
54
|
+
- `YOOL_TUPLE_QUEUE_MAXSIZE` / `YOOL_QUEUE_MAXSIZE`, default `8192`
|
|
55
|
+
- `YOOL_TUPLE_COMPRESSION_THRESHOLD` / `YOOL_COMPRESSION_THRESHOLD`, default `1024`
|
|
56
|
+
- `YOOL_TUPLE_CACHE_MAX_ENTRIES` / `YOOL_CACHE_MAX_ENTRIES`, default `16384`
|
|
57
|
+
- `YOOL_TUPLE_CACHE_TTL_S` / `YOOL_CACHE_TTL_S`, default `3600`
|
|
58
|
+
- `YOOL_TUPLE_API_MAX_RETRIES` / `YOOL_API_MAX_RETRIES`, default `3`
|
|
59
|
+
- `YOOL_TUPLE_API_BACKOFF_BASE_MS` / `YOOL_API_BACKOFF_BASE_MS`, default `100`
|
|
60
|
+
- `YOOL_TUPLE_API_BACKOFF_MAX_MS` / `YOOL_API_BACKOFF_MAX_MS`, default `5000`
|
|
61
|
+
- `YOOL_TUPLE_CIRCUIT_FAILURE_THRESHOLD` / `YOOL_CIRCUIT_FAILURE_THRESHOLD`, default `5`
|
|
62
|
+
- `YOOL_TUPLE_CIRCUIT_COOLDOWN_S` / `YOOL_CIRCUIT_COOLDOWN_S`, default `30`
|
|
63
|
+
- `YOOL_TUPLE_BATCH_SMALL_TASK_SIZE` / `YOOL_BATCH_SMALL_TASK_SIZE`, default `32`
|
|
64
|
+
- `YOOL_TUPLE_CONTEXT_COMPRESSION_CHARS` / `YOOL_CONTEXT_COMPRESSION_CHARS`, default `6000`
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from kernel.yool_tuple_kernel import build_default_space
|
|
70
|
+
|
|
71
|
+
space, root = build_default_space()
|
|
72
|
+
receipt = space.batch_spawn(root, "codex_worker", depth=4, branching=32)
|
|
73
|
+
print(receipt.virtual_agents) # 1048576
|
|
74
|
+
print(space.snapshot())
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Run:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python kernel/yool_tuple_kernel.py
|
|
81
|
+
python -m unittest discover -s tests -p "test_*.py"
|
|
82
|
+
```
|