chronomemory 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chronomemory/__init__.py +94 -0
- chronomemory/bridge.py +249 -0
- chronomemory/context_pack.py +214 -0
- chronomemory/deps.py +272 -0
- chronomemory/metrics.py +170 -0
- chronomemory/py.typed +1 -0
- chronomemory/query.py +306 -0
- chronomemory/rollback.py +171 -0
- chronomemory/store.py +706 -0
- chronomemory-0.1.2.dist-info/METADATA +395 -0
- chronomemory-0.1.2.dist-info/RECORD +15 -0
- chronomemory-0.1.2.dist-info/WHEEL +5 -0
- chronomemory-0.1.2.dist-info/licenses/COMMERCIAL-LICENSE.md +55 -0
- chronomemory-0.1.2.dist-info/licenses/LICENSE +56 -0
- chronomemory-0.1.2.dist-info/top_level.txt +1 -0
chronomemory/__init__.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 Layer1Labs Silicon, Inc. / BitConcepts, LLC.
|
|
3
|
+
"""chronomemory — Epistemic State Database for agentic AI workflows.
|
|
4
|
+
|
|
5
|
+
Optional Rust acceleration:
|
|
6
|
+
Build the Rust extension with maturin to enable a faster backend::
|
|
7
|
+
|
|
8
|
+
pip install maturin
|
|
9
|
+
maturin develop --manifest-path crates/chronomemory-py/Cargo.toml
|
|
10
|
+
|
|
11
|
+
When the ``_chronomemory_rust`` extension is present it is imported
|
|
12
|
+
automatically and ``RUST_BACKEND`` is set to ``True``.
|
|
13
|
+
|
|
14
|
+
Quick start::
|
|
15
|
+
|
|
16
|
+
from chronomemory import ChronoStore, ChronoRecord
|
|
17
|
+
|
|
18
|
+
with ChronoStore("/path/to/project") as store:
|
|
19
|
+
store.upsert(ChronoRecord(
|
|
20
|
+
id="FACT-001",
|
|
21
|
+
kind="fact",
|
|
22
|
+
label="CPSC projection is the sole validity authority",
|
|
23
|
+
source_type="observed",
|
|
24
|
+
confidence=0.99,
|
|
25
|
+
evidence=["CPSC-Specification.md §9"],
|
|
26
|
+
))
|
|
27
|
+
|
|
28
|
+
facts = store.query(kind="fact", rag_filter=True)
|
|
29
|
+
|
|
30
|
+
Spec: ESDB-Specification.md v1.0 (Layer1Labs / BitConcepts)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# ---------------------------------------------------------------------------
|
|
34
|
+
# Optional Rust acceleration
|
|
35
|
+
# ---------------------------------------------------------------------------
|
|
36
|
+
# Try to import the PyO3-compiled extension module. If unavailable (e.g. the
|
|
37
|
+
# Rust extension has not been built yet), fall back silently to pure Python.
|
|
38
|
+
# Build with: maturin develop --manifest-path crates/chronomemory-py/Cargo.toml
|
|
39
|
+
from typing import Any
|
|
40
|
+
|
|
41
|
+
from chronomemory.bridge import (
|
|
42
|
+
EsdbBridge,
|
|
43
|
+
EsdbRecord,
|
|
44
|
+
EsdbStatus,
|
|
45
|
+
)
|
|
46
|
+
from chronomemory.context_pack import ContextPack, ContextPackCompiler, ContextPackEntry
|
|
47
|
+
from chronomemory.deps import DependencyEdge, DepGraph
|
|
48
|
+
from chronomemory.rollback import RollbackReport, invalidate
|
|
49
|
+
from chronomemory.store import (
|
|
50
|
+
ChronoRecord,
|
|
51
|
+
ChronoStore,
|
|
52
|
+
WalEvent,
|
|
53
|
+
open_store,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
RustChronoStore: Any | None = None
|
|
57
|
+
RustRecord: Any | None = None
|
|
58
|
+
RUST_BACKEND: bool = False
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
import _chronomemory_rust as _rust # noqa: PLC0415
|
|
62
|
+
|
|
63
|
+
RustChronoStore = _rust.RustChronoStore
|
|
64
|
+
RustRecord = _rust.RustRecord
|
|
65
|
+
RUST_BACKEND = True
|
|
66
|
+
except ImportError:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
__version__ = "0.1.2"
|
|
70
|
+
__all__ = [
|
|
71
|
+
# Core store
|
|
72
|
+
"ChronoStore",
|
|
73
|
+
"ChronoRecord",
|
|
74
|
+
"WalEvent",
|
|
75
|
+
"open_store",
|
|
76
|
+
# Bridge (unified read/write with .specsmith/ fallback)
|
|
77
|
+
"EsdbBridge",
|
|
78
|
+
"EsdbRecord",
|
|
79
|
+
"EsdbStatus",
|
|
80
|
+
# Phase 2: dependency graph
|
|
81
|
+
"DepGraph",
|
|
82
|
+
"DependencyEdge",
|
|
83
|
+
# Phase 2: epistemic rollback
|
|
84
|
+
"RollbackReport",
|
|
85
|
+
"invalidate",
|
|
86
|
+
# Phase 2: context pack compiler
|
|
87
|
+
"ContextPack",
|
|
88
|
+
"ContextPackCompiler",
|
|
89
|
+
"ContextPackEntry",
|
|
90
|
+
# Phase 3: optional Rust acceleration
|
|
91
|
+
"RustChronoStore",
|
|
92
|
+
"RustRecord",
|
|
93
|
+
"RUST_BACKEND",
|
|
94
|
+
]
|
chronomemory/bridge.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 Layer1Labs Silicon, Inc. / BitConcepts, LLC.
|
|
3
|
+
"""ESDB bridge — adapter between .specsmith/ JSON and ESDB concepts.
|
|
4
|
+
|
|
5
|
+
Delegation strategy:
|
|
6
|
+
1. If .chronomemory/events.wal exists → delegate to ChronoStore (full
|
|
7
|
+
WAL-based engine with OEA anti-hallucination fields).
|
|
8
|
+
2. Otherwise → read flat .specsmith/*.json files (legacy fallback).
|
|
9
|
+
|
|
10
|
+
Write operations (upsert_record, delete_record) are only available when
|
|
11
|
+
ChronoStore is active. Callers should run ``specsmith esdb migrate`` to
|
|
12
|
+
convert a legacy project before calling write paths.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class EsdbRecord:
|
|
25
|
+
"""Python mirror of the Rust Record type."""
|
|
26
|
+
|
|
27
|
+
id: str
|
|
28
|
+
kind: str = "fact" # Default matches ChronoRecord and ESDB spec
|
|
29
|
+
status: str = "active"
|
|
30
|
+
confidence: float = 0.7
|
|
31
|
+
label: str = ""
|
|
32
|
+
data: dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
source_ids: list[str] = field(default_factory=list)
|
|
34
|
+
|
|
35
|
+
def to_dict(self) -> dict[str, Any]:
|
|
36
|
+
"""Return the original source data dict (used for export/backup)."""
|
|
37
|
+
return (
|
|
38
|
+
self.data
|
|
39
|
+
if self.data
|
|
40
|
+
else {
|
|
41
|
+
"id": self.id,
|
|
42
|
+
"kind": self.kind,
|
|
43
|
+
"status": self.status,
|
|
44
|
+
"confidence": self.confidence,
|
|
45
|
+
"label": self.label,
|
|
46
|
+
}
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class EsdbStatus:
|
|
52
|
+
"""ESDB health status for the REST API."""
|
|
53
|
+
|
|
54
|
+
available: bool
|
|
55
|
+
backend: str # "esdb" or "json-fallback"
|
|
56
|
+
record_count: int = 0
|
|
57
|
+
wal_seq: int = 0
|
|
58
|
+
epoch: int = 0
|
|
59
|
+
chain_valid: bool = True
|
|
60
|
+
|
|
61
|
+
def to_dict(self) -> dict[str, Any]:
|
|
62
|
+
return {
|
|
63
|
+
"available": self.available,
|
|
64
|
+
"backend": self.backend,
|
|
65
|
+
"record_count": self.record_count,
|
|
66
|
+
"wal_seq": self.wal_seq,
|
|
67
|
+
"epoch": self.epoch,
|
|
68
|
+
"chain_valid": self.chain_valid,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class EsdbBridge:
|
|
73
|
+
"""Unified bridge to ESDB: delegates to ChronoStore when available.
|
|
74
|
+
|
|
75
|
+
Delegation strategy:
|
|
76
|
+
- ChronoStore (.chronomemory/events.wal): full WAL engine, all writes
|
|
77
|
+
- JSON fallback (.specsmith/*.json): legacy read-only access
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, project_dir: str = ".") -> None:
|
|
81
|
+
self.root = Path(project_dir).resolve()
|
|
82
|
+
self._requirements: list[EsdbRecord] | None = None
|
|
83
|
+
self._testcases: list[EsdbRecord] | None = None
|
|
84
|
+
self._store: Any = None # ChronoStore | None
|
|
85
|
+
|
|
86
|
+
def _get_store(self) -> Any:
|
|
87
|
+
"""Return an open ChronoStore if available, else None."""
|
|
88
|
+
if self._store is not None:
|
|
89
|
+
return self._store
|
|
90
|
+
wal = self.root / ".chronomemory" / "events.wal"
|
|
91
|
+
if wal.exists():
|
|
92
|
+
try:
|
|
93
|
+
from chronomemory.store import ChronoStore
|
|
94
|
+
|
|
95
|
+
self._store = ChronoStore(self.root).open()
|
|
96
|
+
except Exception: # noqa: BLE001
|
|
97
|
+
self._store = None
|
|
98
|
+
return self._store
|
|
99
|
+
|
|
100
|
+
def status(self) -> EsdbStatus:
|
|
101
|
+
"""Return ESDB status."""
|
|
102
|
+
store = self._get_store()
|
|
103
|
+
if store is not None:
|
|
104
|
+
return EsdbStatus(
|
|
105
|
+
available=True,
|
|
106
|
+
backend="ChronoStore WAL",
|
|
107
|
+
record_count=store.record_count(),
|
|
108
|
+
wal_seq=store.wal_seq(),
|
|
109
|
+
chain_valid=store.chain_valid(),
|
|
110
|
+
)
|
|
111
|
+
# Legacy JSON fallback
|
|
112
|
+
reqs = self._load_requirements()
|
|
113
|
+
tests = self._load_testcases()
|
|
114
|
+
return EsdbStatus(
|
|
115
|
+
available=True,
|
|
116
|
+
backend=".specsmith/ JSON (run esdb migrate to upgrade)",
|
|
117
|
+
record_count=len(reqs) + len(tests),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def requirements(self) -> list[EsdbRecord]:
|
|
121
|
+
"""Load requirements as ESDB records."""
|
|
122
|
+
store = self._get_store()
|
|
123
|
+
if store is not None:
|
|
124
|
+
return [
|
|
125
|
+
EsdbRecord(
|
|
126
|
+
id=r.id,
|
|
127
|
+
kind=r.kind,
|
|
128
|
+
status=r.status,
|
|
129
|
+
confidence=r.confidence,
|
|
130
|
+
label=r.label,
|
|
131
|
+
data=r.data,
|
|
132
|
+
source_ids=r.evidence,
|
|
133
|
+
)
|
|
134
|
+
for r in store.query(kind="requirement")
|
|
135
|
+
]
|
|
136
|
+
return self._load_requirements()
|
|
137
|
+
|
|
138
|
+
def testcases(self) -> list[EsdbRecord]:
|
|
139
|
+
"""Load test cases as ESDB records."""
|
|
140
|
+
store = self._get_store()
|
|
141
|
+
if store is not None:
|
|
142
|
+
return [
|
|
143
|
+
EsdbRecord(
|
|
144
|
+
id=r.id,
|
|
145
|
+
kind=r.kind,
|
|
146
|
+
status=r.status,
|
|
147
|
+
confidence=r.confidence,
|
|
148
|
+
label=r.label,
|
|
149
|
+
data=r.data,
|
|
150
|
+
source_ids=r.evidence,
|
|
151
|
+
)
|
|
152
|
+
for r in store.query(kind="testcase")
|
|
153
|
+
]
|
|
154
|
+
return self._load_testcases()
|
|
155
|
+
|
|
156
|
+
def record_counts(self) -> dict[str, int]:
|
|
157
|
+
"""Record counts by kind (for dashboard)."""
|
|
158
|
+
store = self._get_store()
|
|
159
|
+
if store is not None:
|
|
160
|
+
records = store.query()
|
|
161
|
+
counts: dict[str, int] = {}
|
|
162
|
+
for r in records:
|
|
163
|
+
counts[r.kind] = counts.get(r.kind, 0) + 1
|
|
164
|
+
return counts
|
|
165
|
+
return {
|
|
166
|
+
"requirements": len(self._load_requirements()),
|
|
167
|
+
"testcases": len(self._load_testcases()),
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
def upsert_record(self, record: EsdbRecord) -> bool:
|
|
171
|
+
"""Write or update a record. Returns True if ChronoStore is active."""
|
|
172
|
+
store = self._get_store()
|
|
173
|
+
if store is None:
|
|
174
|
+
return False
|
|
175
|
+
try:
|
|
176
|
+
from chronomemory.store import ChronoRecord
|
|
177
|
+
|
|
178
|
+
chrono_rec = ChronoRecord(
|
|
179
|
+
id=record.id,
|
|
180
|
+
kind=record.kind,
|
|
181
|
+
status=record.status,
|
|
182
|
+
confidence=record.confidence,
|
|
183
|
+
label=record.label,
|
|
184
|
+
data=record.data,
|
|
185
|
+
evidence=record.source_ids,
|
|
186
|
+
)
|
|
187
|
+
store.upsert(chrono_rec)
|
|
188
|
+
return True
|
|
189
|
+
except Exception: # noqa: BLE001
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
def delete_record(self, record_id: str) -> bool:
|
|
193
|
+
"""Tombstone a record. Returns True if ChronoStore is active."""
|
|
194
|
+
store = self._get_store()
|
|
195
|
+
if store is None:
|
|
196
|
+
return False
|
|
197
|
+
try:
|
|
198
|
+
store.delete(record_id)
|
|
199
|
+
return True
|
|
200
|
+
except Exception: # noqa: BLE001
|
|
201
|
+
return False
|
|
202
|
+
|
|
203
|
+
def _load_requirements(self) -> list[EsdbRecord]:
|
|
204
|
+
if self._requirements is not None:
|
|
205
|
+
return self._requirements
|
|
206
|
+
path = self.root / ".specsmith" / "requirements.json"
|
|
207
|
+
if not path.is_file():
|
|
208
|
+
self._requirements = []
|
|
209
|
+
return []
|
|
210
|
+
try:
|
|
211
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
212
|
+
self._requirements = [
|
|
213
|
+
EsdbRecord(
|
|
214
|
+
id=r.get("id", ""),
|
|
215
|
+
kind="requirement",
|
|
216
|
+
label=r.get("title", ""),
|
|
217
|
+
confidence=float(r.get("confidence", 0.7)),
|
|
218
|
+
data=r,
|
|
219
|
+
)
|
|
220
|
+
for r in raw
|
|
221
|
+
if isinstance(r, dict)
|
|
222
|
+
]
|
|
223
|
+
except (OSError, ValueError):
|
|
224
|
+
self._requirements = []
|
|
225
|
+
return self._requirements
|
|
226
|
+
|
|
227
|
+
def _load_testcases(self) -> list[EsdbRecord]:
|
|
228
|
+
if self._testcases is not None:
|
|
229
|
+
return self._testcases
|
|
230
|
+
path = self.root / ".specsmith" / "testcases.json"
|
|
231
|
+
if not path.is_file():
|
|
232
|
+
self._testcases = []
|
|
233
|
+
return []
|
|
234
|
+
try:
|
|
235
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
236
|
+
self._testcases = [
|
|
237
|
+
EsdbRecord(
|
|
238
|
+
id=r.get("id", ""),
|
|
239
|
+
kind="testcase",
|
|
240
|
+
label=r.get("title", ""),
|
|
241
|
+
confidence=float(r.get("confidence", 1.0)),
|
|
242
|
+
data=r,
|
|
243
|
+
)
|
|
244
|
+
for r in raw
|
|
245
|
+
if isinstance(r, dict)
|
|
246
|
+
]
|
|
247
|
+
except (OSError, ValueError):
|
|
248
|
+
self._testcases = []
|
|
249
|
+
return self._testcases
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
# SPDX-License-Identifier: MIT
|
|
2
|
+
# Copyright (c) 2026 Layer1Labs Silicon, Inc. / BitConcepts, LLC.
|
|
3
|
+
"""Context Pack Compiler — minimal verified prompt payloads for agent tasks.
|
|
4
|
+
|
|
5
|
+
Phase 2 / Issue #3: Context Pack Compiler
|
|
6
|
+
Spec: ESDB Master Spec §18
|
|
7
|
+
|
|
8
|
+
Assembles only the epistemic state an agent actually needs, excluding
|
|
9
|
+
stale, invalidated, and unsupported records, then respects a token budget
|
|
10
|
+
by dropping lowest-confidence candidates first.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import TYPE_CHECKING, Any
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from chronomemory.deps import DepGraph
|
|
20
|
+
from chronomemory.store import ChronoRecord, ChronoStore
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Constants
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
_CHARS_PER_TOKEN: int = 4 # conservative estimate per spec §18
|
|
27
|
+
_MIN_CONFIDENCE: float = 0.6 # H18 threshold
|
|
28
|
+
|
|
29
|
+
# Record kinds that are infrastructure and should not appear in context packs
|
|
30
|
+
_INFRA_KINDS: frozenset[str] = frozenset(["edge", "rollback_event", "token_metric", "skill_run"])
|
|
31
|
+
|
|
32
|
+
# Statuses that indicate a record should be excluded
|
|
33
|
+
_EXCLUDED_STATUSES: frozenset[str] = frozenset(["tombstone", "invalidated", "hypothesis"])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Data types
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ContextPackEntry:
|
|
43
|
+
"""A single record included in a ContextPack."""
|
|
44
|
+
|
|
45
|
+
record_id: str
|
|
46
|
+
kind: str
|
|
47
|
+
label: str
|
|
48
|
+
confidence: float
|
|
49
|
+
token_estimate: int
|
|
50
|
+
data: dict[str, Any] = field(default_factory=dict)
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict[str, Any]:
|
|
53
|
+
return {
|
|
54
|
+
"record_id": self.record_id,
|
|
55
|
+
"kind": self.kind,
|
|
56
|
+
"label": self.label,
|
|
57
|
+
"confidence": self.confidence,
|
|
58
|
+
"token_estimate": self.token_estimate,
|
|
59
|
+
"data": self.data,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class ExclusionReason:
|
|
65
|
+
"""Explains why a record was excluded from a ContextPack."""
|
|
66
|
+
|
|
67
|
+
record_id: str
|
|
68
|
+
reason: str
|
|
69
|
+
|
|
70
|
+
def to_dict(self) -> dict[str, str]:
|
|
71
|
+
return {"record_id": self.record_id, "reason": self.reason}
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class ContextPack:
|
|
76
|
+
"""A compiled, budget-constrained set of records ready for LLM injection.
|
|
77
|
+
|
|
78
|
+
Serializable via :py:meth:`to_dict` for JSON injection into LLM context.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
task_id: str
|
|
82
|
+
goal: str
|
|
83
|
+
token_budget: int
|
|
84
|
+
entries: list[ContextPackEntry] = field(default_factory=list)
|
|
85
|
+
excluded: list[ExclusionReason] = field(default_factory=list)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def token_count(self) -> int:
|
|
89
|
+
"""Estimated total tokens consumed by all entries."""
|
|
90
|
+
return sum(e.token_estimate for e in self.entries)
|
|
91
|
+
|
|
92
|
+
def to_dict(self) -> dict[str, Any]:
|
|
93
|
+
return {
|
|
94
|
+
"task_id": self.task_id,
|
|
95
|
+
"goal": self.goal,
|
|
96
|
+
"token_budget": self.token_budget,
|
|
97
|
+
"token_count": self.token_count,
|
|
98
|
+
"entries": [e.to_dict() for e in self.entries],
|
|
99
|
+
"excluded": [x.to_dict() for x in self.excluded],
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
# ContextPackCompiler
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ContextPackCompiler:
|
|
109
|
+
"""Assembles a minimal, verified prompt payload for a given task.
|
|
110
|
+
|
|
111
|
+
Usage::
|
|
112
|
+
|
|
113
|
+
compiler = ContextPackCompiler(store, dep_graph)
|
|
114
|
+
pack = compiler.compile(task_id="TASK-42", goal="fix ruff errors", token_budget=4096)
|
|
115
|
+
|
|
116
|
+
# Inject into LLM context
|
|
117
|
+
context_json = pack.to_dict()
|
|
118
|
+
|
|
119
|
+
The ``dep_graph`` argument is optional. When provided, graph relevance
|
|
120
|
+
traversal is used in addition to keyword matching (Phase 2 extension).
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def __init__(self, store: ChronoStore, dep_graph: DepGraph | None = None) -> None:
|
|
124
|
+
self._store = store
|
|
125
|
+
self._dep_graph = dep_graph
|
|
126
|
+
|
|
127
|
+
def compile(
|
|
128
|
+
self,
|
|
129
|
+
task_id: str,
|
|
130
|
+
goal: str,
|
|
131
|
+
token_budget: int = 4096,
|
|
132
|
+
) -> ContextPack:
|
|
133
|
+
"""Compile a token-budget-constrained ContextPack for a task.
|
|
134
|
+
|
|
135
|
+
Inclusion rules:
|
|
136
|
+
* Status must be ``active`` (not tombstone / invalidated / hypothesis)
|
|
137
|
+
* Confidence must be ≥ 0.6 (H18)
|
|
138
|
+
* Kind must not be an infrastructure record (edge, rollback_event, …)
|
|
139
|
+
* Label must share at least one word with *goal* (or *goal* is empty)
|
|
140
|
+
|
|
141
|
+
Budget enforcement:
|
|
142
|
+
* Records are sorted by confidence descending
|
|
143
|
+
* Records are added until token_budget is reached; remainder is excluded
|
|
144
|
+
"""
|
|
145
|
+
pack = ContextPack(task_id=task_id, goal=goal, token_budget=token_budget)
|
|
146
|
+
goal_words = set(goal.lower().split()) if goal.strip() else set()
|
|
147
|
+
|
|
148
|
+
candidates: list[ContextPackEntry] = []
|
|
149
|
+
|
|
150
|
+
for rec in self._store.query(status=""):
|
|
151
|
+
token_est = self._estimate_tokens(rec)
|
|
152
|
+
|
|
153
|
+
# ── Exclusion: bad status ────────────────────────────────────
|
|
154
|
+
if rec.status in _EXCLUDED_STATUSES:
|
|
155
|
+
pack.excluded.append(ExclusionReason(rec.id, f"status={rec.status}"))
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
# ── Exclusion: confidence below H18 threshold ────────────────
|
|
159
|
+
if rec.confidence < _MIN_CONFIDENCE:
|
|
160
|
+
pack.excluded.append(
|
|
161
|
+
ExclusionReason(rec.id, f"confidence={rec.confidence:.2f} < {_MIN_CONFIDENCE}")
|
|
162
|
+
)
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# ── Exclusion: infrastructure records ────────────────────────
|
|
166
|
+
if rec.kind in _INFRA_KINDS:
|
|
167
|
+
pack.excluded.append(
|
|
168
|
+
ExclusionReason(rec.id, f"infrastructure record (kind={rec.kind})")
|
|
169
|
+
)
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
# ── Relevance: keyword overlap with goal ─────────────────────
|
|
173
|
+
if goal_words:
|
|
174
|
+
label_words = set(rec.label.lower().split())
|
|
175
|
+
if goal_words.isdisjoint(label_words):
|
|
176
|
+
pack.excluded.append(ExclusionReason(rec.id, "not relevant to goal"))
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
candidates.append(
|
|
180
|
+
ContextPackEntry(
|
|
181
|
+
record_id=rec.id,
|
|
182
|
+
kind=rec.kind,
|
|
183
|
+
label=rec.label,
|
|
184
|
+
confidence=rec.confidence,
|
|
185
|
+
token_estimate=token_est,
|
|
186
|
+
data=rec.data,
|
|
187
|
+
)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# ── Sort: highest confidence first (include most trusted first) ──
|
|
191
|
+
candidates.sort(key=lambda e: e.confidence, reverse=True)
|
|
192
|
+
|
|
193
|
+
# ── Budget enforcement: add until budget is exceeded ─────────────
|
|
194
|
+
running_tokens = 0
|
|
195
|
+
for entry in candidates:
|
|
196
|
+
if running_tokens + entry.token_estimate <= token_budget:
|
|
197
|
+
pack.entries.append(entry)
|
|
198
|
+
running_tokens += entry.token_estimate
|
|
199
|
+
else:
|
|
200
|
+
pack.excluded.append(
|
|
201
|
+
ExclusionReason(
|
|
202
|
+
entry.record_id,
|
|
203
|
+
f"token budget exceeded "
|
|
204
|
+
f"({running_tokens}+{entry.token_estimate}>{token_budget})",
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return pack
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def _estimate_tokens(rec: ChronoRecord) -> int:
|
|
212
|
+
"""Estimate token count: ~4 chars per token (label + data repr)."""
|
|
213
|
+
chars = len(rec.label) + len(str(rec.data))
|
|
214
|
+
return max(1, chars // _CHARS_PER_TOKEN)
|