chimera-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chimera_memory/__init__.py +55 -0
- chimera_memory/_index.py +204 -0
- chimera_memory/adapters/__init__.py +1 -0
- chimera_memory/adapters/git.py +87 -0
- chimera_memory/adapters/manual.py +1 -0
- chimera_memory/adapters/pytest_ci.py +87 -0
- chimera_memory/append_state.py +159 -0
- chimera_memory/cli.py +1402 -0
- chimera_memory/data_quality.py +137 -0
- chimera_memory/drift.py +101 -0
- chimera_memory/errata.py +113 -0
- chimera_memory/evidence.py +502 -0
- chimera_memory/export.py +207 -0
- chimera_memory/integrity.py +333 -0
- chimera_memory/ledger.py +441 -0
- chimera_memory/m2b_readiness.py +449 -0
- chimera_memory/preflight.py +492 -0
- chimera_memory/query.py +183 -0
- chimera_memory/receipt.py +392 -0
- chimera_memory/redaction.py +63 -0
- chimera_memory/reliability.py +302 -0
- chimera_memory/scoring.py +69 -0
- chimera_memory/seal.py +32 -0
- chimera_memory/session.py +123 -0
- chimera_memory/session_lifecycle.py +293 -0
- chimera_memory/storage.py +266 -0
- chimera_memory-0.1.0.dist-info/METADATA +303 -0
- chimera_memory-0.1.0.dist-info/RECORD +31 -0
- chimera_memory-0.1.0.dist-info/WHEEL +4 -0
- chimera_memory-0.1.0.dist-info/entry_points.txt +2 -0
- chimera_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from chimera_memory.drift import detect_drift
|
|
2
|
+
from chimera_memory.ledger import (
|
|
3
|
+
build_dogfood_status,
|
|
4
|
+
export_report,
|
|
5
|
+
query_memory,
|
|
6
|
+
record_claim,
|
|
7
|
+
settle_claim,
|
|
8
|
+
)
|
|
9
|
+
from chimera_memory.receipt import (
|
|
10
|
+
build_receipt,
|
|
11
|
+
format_receipt_json,
|
|
12
|
+
format_receipt_markdown,
|
|
13
|
+
format_receipt_text,
|
|
14
|
+
)
|
|
15
|
+
from chimera_memory.reliability import build_reliability_summary
|
|
16
|
+
from chimera_memory.session import (
|
|
17
|
+
AttributionConfidence,
|
|
18
|
+
FinalStatus,
|
|
19
|
+
IdentitySource,
|
|
20
|
+
Session,
|
|
21
|
+
new_session_id,
|
|
22
|
+
)
|
|
23
|
+
from chimera_memory.session_lifecycle import (
|
|
24
|
+
end_session,
|
|
25
|
+
get_current_session,
|
|
26
|
+
get_session,
|
|
27
|
+
list_sessions,
|
|
28
|
+
start_session,
|
|
29
|
+
)
|
|
30
|
+
from chimera_memory.storage import MemoryStore
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"AttributionConfidence",
|
|
34
|
+
"FinalStatus",
|
|
35
|
+
"IdentitySource",
|
|
36
|
+
"MemoryStore",
|
|
37
|
+
"Session",
|
|
38
|
+
"build_dogfood_status",
|
|
39
|
+
"build_receipt",
|
|
40
|
+
"build_reliability_summary",
|
|
41
|
+
"detect_drift",
|
|
42
|
+
"end_session",
|
|
43
|
+
"export_report",
|
|
44
|
+
"format_receipt_json",
|
|
45
|
+
"format_receipt_markdown",
|
|
46
|
+
"format_receipt_text",
|
|
47
|
+
"get_current_session",
|
|
48
|
+
"get_session",
|
|
49
|
+
"list_sessions",
|
|
50
|
+
"new_session_id",
|
|
51
|
+
"query_memory",
|
|
52
|
+
"record_claim",
|
|
53
|
+
"settle_claim",
|
|
54
|
+
"start_session",
|
|
55
|
+
]
|
chimera_memory/_index.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""Sidecar SQLite settlement-aware claim search index.
|
|
2
|
+
|
|
3
|
+
Folded from chimera-graphsource into chimera-memory for the public package.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sqlite3
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from chimera_memory_types.knowledge import Claim
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ClaimIndex:
|
|
17
|
+
def __init__(self, path: str | Path, *, force_like: bool = False) -> None:
|
|
18
|
+
self.path = Path(path)
|
|
19
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
self.connection = sqlite3.connect(str(self.path))
|
|
21
|
+
self.connection.row_factory = sqlite3.Row
|
|
22
|
+
self.fts_enabled = False if force_like else self._probe_fts5()
|
|
23
|
+
self._init_schema()
|
|
24
|
+
|
|
25
|
+
def _probe_fts5(self) -> bool:
|
|
26
|
+
try:
|
|
27
|
+
self.connection.execute("CREATE VIRTUAL TABLE temp._fts_probe USING fts5(value)")
|
|
28
|
+
self.connection.execute("DROP TABLE temp._fts_probe")
|
|
29
|
+
return True
|
|
30
|
+
except sqlite3.Error:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
def _init_schema(self) -> None:
|
|
34
|
+
self.connection.execute(
|
|
35
|
+
"""
|
|
36
|
+
CREATE TABLE IF NOT EXISTS claims (
|
|
37
|
+
claim_id TEXT PRIMARY KEY,
|
|
38
|
+
claim_type TEXT NOT NULL,
|
|
39
|
+
claim_status TEXT NOT NULL,
|
|
40
|
+
entity_id TEXT,
|
|
41
|
+
polarity TEXT,
|
|
42
|
+
settled INTEGER NOT NULL,
|
|
43
|
+
wealth REAL NOT NULL,
|
|
44
|
+
text TEXT NOT NULL,
|
|
45
|
+
claim_json TEXT NOT NULL
|
|
46
|
+
)
|
|
47
|
+
"""
|
|
48
|
+
)
|
|
49
|
+
if self.fts_enabled:
|
|
50
|
+
self.connection.execute(
|
|
51
|
+
"CREATE VIRTUAL TABLE IF NOT EXISTS claims_fts USING fts5(claim_id UNINDEXED, text)"
|
|
52
|
+
)
|
|
53
|
+
self.connection.commit()
|
|
54
|
+
|
|
55
|
+
def index(self, claim: Claim) -> None:
|
|
56
|
+
text = _claim_text(claim)
|
|
57
|
+
settled = _is_settled(claim)
|
|
58
|
+
wealth = _wealth(claim)
|
|
59
|
+
entity_id = _metadata_string(claim, "entity_id")
|
|
60
|
+
polarity = _metadata_string(claim, "polarity")
|
|
61
|
+
payload = (
|
|
62
|
+
claim.claim_id,
|
|
63
|
+
claim.claim_type.value,
|
|
64
|
+
claim.claim_status.value,
|
|
65
|
+
entity_id,
|
|
66
|
+
polarity,
|
|
67
|
+
int(settled),
|
|
68
|
+
wealth,
|
|
69
|
+
text,
|
|
70
|
+
json.dumps(claim.model_dump(mode="json"), sort_keys=True),
|
|
71
|
+
)
|
|
72
|
+
self.connection.execute(
|
|
73
|
+
"""
|
|
74
|
+
INSERT INTO claims (
|
|
75
|
+
claim_id, claim_type, claim_status, entity_id, polarity,
|
|
76
|
+
settled, wealth, text, claim_json
|
|
77
|
+
)
|
|
78
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
79
|
+
ON CONFLICT(claim_id) DO UPDATE SET
|
|
80
|
+
claim_type=excluded.claim_type,
|
|
81
|
+
claim_status=excluded.claim_status,
|
|
82
|
+
entity_id=excluded.entity_id,
|
|
83
|
+
polarity=excluded.polarity,
|
|
84
|
+
settled=excluded.settled,
|
|
85
|
+
wealth=excluded.wealth,
|
|
86
|
+
text=excluded.text,
|
|
87
|
+
claim_json=excluded.claim_json
|
|
88
|
+
""",
|
|
89
|
+
payload,
|
|
90
|
+
)
|
|
91
|
+
if self.fts_enabled:
|
|
92
|
+
self.connection.execute("DELETE FROM claims_fts WHERE claim_id = ?", (claim.claim_id,))
|
|
93
|
+
self.connection.execute(
|
|
94
|
+
"INSERT INTO claims_fts (claim_id, text) VALUES (?, ?)",
|
|
95
|
+
(claim.claim_id, text),
|
|
96
|
+
)
|
|
97
|
+
self.connection.commit()
|
|
98
|
+
|
|
99
|
+
def search(
|
|
100
|
+
self,
|
|
101
|
+
query: str,
|
|
102
|
+
*,
|
|
103
|
+
settled_only: bool = False,
|
|
104
|
+
min_wealth: float | None = None,
|
|
105
|
+
claim_type: str | None = None,
|
|
106
|
+
) -> list[Claim]:
|
|
107
|
+
where: list[str] = []
|
|
108
|
+
params: list[Any] = []
|
|
109
|
+
if settled_only:
|
|
110
|
+
where.append("claims.settled = 1")
|
|
111
|
+
if min_wealth is not None:
|
|
112
|
+
where.append("claims.wealth >= ?")
|
|
113
|
+
params.append(float(min_wealth))
|
|
114
|
+
if claim_type is not None:
|
|
115
|
+
where.append("claims.claim_type = ?")
|
|
116
|
+
params.append(str(claim_type))
|
|
117
|
+
|
|
118
|
+
if self.fts_enabled:
|
|
119
|
+
sql = "SELECT claims.claim_json FROM claims JOIN claims_fts USING (claim_id)"
|
|
120
|
+
where.append("claims_fts.text MATCH ?")
|
|
121
|
+
params.append(query)
|
|
122
|
+
else:
|
|
123
|
+
sql = "SELECT claims.claim_json FROM claims"
|
|
124
|
+
where.append("LOWER(claims.text) LIKE ?")
|
|
125
|
+
params.append(f"%{query.lower()}%")
|
|
126
|
+
if where:
|
|
127
|
+
sql += " WHERE " + " AND ".join(where)
|
|
128
|
+
sql += " ORDER BY claims.wealth DESC, claims.claim_id ASC"
|
|
129
|
+
rows = self.connection.execute(sql, params).fetchall()
|
|
130
|
+
return [Claim.model_validate(json.loads(row["claim_json"])) for row in rows]
|
|
131
|
+
|
|
132
|
+
def find_contradictions(self, claim_id: str) -> list[Claim]:
|
|
133
|
+
row = self.connection.execute(
|
|
134
|
+
"SELECT entity_id, polarity FROM claims WHERE claim_id = ? AND settled = 1",
|
|
135
|
+
(claim_id,),
|
|
136
|
+
).fetchone()
|
|
137
|
+
if row is None or row["entity_id"] is None or row["polarity"] is None:
|
|
138
|
+
return []
|
|
139
|
+
opposite = _opposite_polarity(row["polarity"])
|
|
140
|
+
if opposite is None:
|
|
141
|
+
return []
|
|
142
|
+
rows = self.connection.execute(
|
|
143
|
+
"""
|
|
144
|
+
SELECT claim_json FROM claims
|
|
145
|
+
WHERE settled = 1
|
|
146
|
+
AND claim_id != ?
|
|
147
|
+
AND entity_id = ?
|
|
148
|
+
AND polarity = ?
|
|
149
|
+
ORDER BY wealth DESC, claim_id ASC
|
|
150
|
+
""",
|
|
151
|
+
(claim_id, row["entity_id"], opposite),
|
|
152
|
+
).fetchall()
|
|
153
|
+
return [Claim.model_validate(json.loads(result["claim_json"])) for result in rows]
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _claim_text(claim: Claim) -> str:
|
|
157
|
+
parts = [
|
|
158
|
+
claim.claim_id,
|
|
159
|
+
claim.claim_type.value,
|
|
160
|
+
claim.claim_status.value,
|
|
161
|
+
claim.title,
|
|
162
|
+
claim.summary,
|
|
163
|
+
claim.formal_statement or "",
|
|
164
|
+
str(claim.metadata.get("entity_id", "")),
|
|
165
|
+
str(claim.metadata.get("polarity", "")),
|
|
166
|
+
]
|
|
167
|
+
if claim.settlement is not None:
|
|
168
|
+
parts.extend(
|
|
169
|
+
[
|
|
170
|
+
claim.settlement.status.value,
|
|
171
|
+
str(claim.settlement.proper_score or ""),
|
|
172
|
+
str(claim.settlement.wealth.wealth),
|
|
173
|
+
]
|
|
174
|
+
)
|
|
175
|
+
return " ".join(part for part in parts if part)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _is_settled(claim: Claim) -> bool:
|
|
179
|
+
return bool(claim.settlement is not None and claim.settlement.events)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _wealth(claim: Claim) -> float:
|
|
183
|
+
if claim.settlement is None:
|
|
184
|
+
return 0.0
|
|
185
|
+
return claim.settlement.wealth.wealth
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _metadata_string(claim: Claim, key: str) -> str | None:
|
|
189
|
+
value = claim.metadata.get(key)
|
|
190
|
+
if value is None:
|
|
191
|
+
return None
|
|
192
|
+
return str(value)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _opposite_polarity(polarity: str) -> str | None:
|
|
196
|
+
pairs = {
|
|
197
|
+
"positive": "negative",
|
|
198
|
+
"negative": "positive",
|
|
199
|
+
"true": "false",
|
|
200
|
+
"false": "true",
|
|
201
|
+
"support": "oppose",
|
|
202
|
+
"oppose": "support",
|
|
203
|
+
}
|
|
204
|
+
return pairs.get(polarity.lower())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__: list[str] = []
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import subprocess
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from chimera_memory_types.finding import EvidenceRef, EvidenceRefType
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def capture_git_evidence(
|
|
11
|
+
root: Path | str | None = None,
|
|
12
|
+
claim_time: datetime | None = None,
|
|
13
|
+
) -> tuple[list[EvidenceRef], dict[str, object]]:
|
|
14
|
+
repo = Path(root) if root is not None else Path.cwd()
|
|
15
|
+
available_at = claim_time or datetime.now(UTC)
|
|
16
|
+
|
|
17
|
+
head = _run_git(repo, "rev-parse", "HEAD")
|
|
18
|
+
if head.missing:
|
|
19
|
+
return [], {"git_available": False, "reason": "git executable not available"}
|
|
20
|
+
if not head.ok:
|
|
21
|
+
return [], {"git_available": False, "reason": head.reason}
|
|
22
|
+
|
|
23
|
+
commit_sha = head.stdout.strip()
|
|
24
|
+
branch_result = _run_git(repo, "branch", "--show-current")
|
|
25
|
+
branch = branch_result.stdout.strip() if branch_result.ok else ""
|
|
26
|
+
if not branch:
|
|
27
|
+
branch = "HEAD"
|
|
28
|
+
|
|
29
|
+
status_result = _run_git(repo, "status", "--short")
|
|
30
|
+
status = status_result.stdout.splitlines() if status_result.ok else []
|
|
31
|
+
files_changed = [_status_path(line) for line in status if _status_path(line)]
|
|
32
|
+
|
|
33
|
+
metadata: dict[str, object] = {
|
|
34
|
+
"git_available": True,
|
|
35
|
+
"commit_sha": commit_sha,
|
|
36
|
+
"branch": branch,
|
|
37
|
+
"files_changed": files_changed,
|
|
38
|
+
"dirty_state": bool(status),
|
|
39
|
+
"status": status,
|
|
40
|
+
}
|
|
41
|
+
return [
|
|
42
|
+
EvidenceRef(
|
|
43
|
+
ref_type=EvidenceRefType.EXTERNAL,
|
|
44
|
+
ref_id=f"git:{commit_sha}",
|
|
45
|
+
available_at=available_at,
|
|
46
|
+
)
|
|
47
|
+
], metadata
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class _GitResult:
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
*,
|
|
54
|
+
ok: bool,
|
|
55
|
+
stdout: str = "",
|
|
56
|
+
reason: str = "",
|
|
57
|
+
missing: bool = False,
|
|
58
|
+
) -> None:
|
|
59
|
+
self.ok = ok
|
|
60
|
+
self.stdout = stdout
|
|
61
|
+
self.reason = reason
|
|
62
|
+
self.missing = missing
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _run_git(root: Path, *args: str) -> _GitResult:
|
|
66
|
+
try:
|
|
67
|
+
result = subprocess.run(
|
|
68
|
+
["git", *args],
|
|
69
|
+
cwd=root,
|
|
70
|
+
check=False,
|
|
71
|
+
text=True,
|
|
72
|
+
capture_output=True,
|
|
73
|
+
shell=False,
|
|
74
|
+
)
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
return _GitResult(ok=False, reason="git executable not available", missing=True)
|
|
77
|
+
|
|
78
|
+
if result.returncode != 0:
|
|
79
|
+
reason = result.stderr.strip() or result.stdout.strip() or "git command failed"
|
|
80
|
+
return _GitResult(ok=False, reason=reason)
|
|
81
|
+
return _GitResult(ok=True, stdout=result.stdout)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _status_path(line: str) -> str:
|
|
85
|
+
if " -> " in line:
|
|
86
|
+
return line.split(" -> ", 1)[1].strip()
|
|
87
|
+
return line[3:].strip()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__all__: list[str] = []
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
|
|
9
|
+
_EXCERPT_MAX = 2000
|
|
10
|
+
_ANSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]|\x1b\(B")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class PytestRunResult:
|
|
15
|
+
command: list[str]
|
|
16
|
+
exit_code: int
|
|
17
|
+
observed: bool
|
|
18
|
+
duration_seconds: float
|
|
19
|
+
stdout_excerpt: str = field(default="")
|
|
20
|
+
stderr_excerpt: str = field(default="")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def run_pytest(command_args: list[str]) -> PytestRunResult:
|
|
24
|
+
"""Run pytest and capture output for witness storage while showing it live.
|
|
25
|
+
|
|
26
|
+
Captures stdout+stderr as bounded, redacted excerpts for failure witnesses.
|
|
27
|
+
Output is printed to the terminal so the developer sees it.
|
|
28
|
+
"""
|
|
29
|
+
from chimera_memory.redaction import redact
|
|
30
|
+
|
|
31
|
+
command = _pytest_command(command_args)
|
|
32
|
+
started_at = time.monotonic()
|
|
33
|
+
result = subprocess.run(
|
|
34
|
+
command, shell=False, capture_output=True, text=True, errors="replace"
|
|
35
|
+
)
|
|
36
|
+
duration_seconds = time.monotonic() - started_at
|
|
37
|
+
exit_code = int(result.returncode)
|
|
38
|
+
|
|
39
|
+
# Print to terminal so developer sees output
|
|
40
|
+
if result.stdout:
|
|
41
|
+
sys.stdout.write(result.stdout)
|
|
42
|
+
sys.stdout.flush()
|
|
43
|
+
if result.stderr:
|
|
44
|
+
sys.stderr.write(result.stderr)
|
|
45
|
+
sys.stderr.flush()
|
|
46
|
+
|
|
47
|
+
stdout_raw = _ANSI_RE.sub("", (result.stdout or "")[-_EXCERPT_MAX:])
|
|
48
|
+
stderr_raw = _ANSI_RE.sub("", (result.stderr or "")[-_EXCERPT_MAX:])
|
|
49
|
+
return PytestRunResult(
|
|
50
|
+
command=command,
|
|
51
|
+
exit_code=exit_code,
|
|
52
|
+
observed=exit_code == 0,
|
|
53
|
+
duration_seconds=duration_seconds,
|
|
54
|
+
stdout_excerpt=redact(stdout_raw),
|
|
55
|
+
stderr_excerpt=redact(stderr_raw),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def run_command(command_args: list[str]) -> PytestRunResult:
|
|
60
|
+
"""Run an arbitrary command and return a result shaped like PytestRunResult.
|
|
61
|
+
|
|
62
|
+
exit code 0 → observed=True (VALIDATED)
|
|
63
|
+
exit code nonzero → observed=False (CONTRADICTED)
|
|
64
|
+
Captures stdout/stderr as bounded excerpts (last _EXCERPT_MAX chars).
|
|
65
|
+
"""
|
|
66
|
+
from chimera_memory.redaction import redact
|
|
67
|
+
|
|
68
|
+
started_at = time.monotonic()
|
|
69
|
+
result = subprocess.run(
|
|
70
|
+
command_args, shell=False, capture_output=True, text=True, errors="replace"
|
|
71
|
+
)
|
|
72
|
+
duration_seconds = time.monotonic() - started_at
|
|
73
|
+
exit_code = int(result.returncode)
|
|
74
|
+
return PytestRunResult(
|
|
75
|
+
command=command_args,
|
|
76
|
+
exit_code=exit_code,
|
|
77
|
+
observed=exit_code == 0,
|
|
78
|
+
duration_seconds=duration_seconds,
|
|
79
|
+
stdout_excerpt=redact(_ANSI_RE.sub("", (result.stdout or "")[-_EXCERPT_MAX:])),
|
|
80
|
+
stderr_excerpt=redact(_ANSI_RE.sub("", (result.stderr or "")[-_EXCERPT_MAX:])),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _pytest_command(command_args: list[str]) -> list[str]:
|
|
85
|
+
if not command_args or command_args[0] != "pytest":
|
|
86
|
+
raise ValueError("wrap supports pytest commands only")
|
|
87
|
+
return [sys.executable, "-m", "pytest", *command_args[1:]]
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Append-state cache for O(1) claim/integrity appends.
|
|
2
|
+
|
|
3
|
+
Stores derived state (last line number, last chain hash) so append_claim
|
|
4
|
+
avoids re-reading the full claims.jsonl and integrity.jsonl on every write.
|
|
5
|
+
|
|
6
|
+
This is a cache/accelerator only — NOT the source of truth.
|
|
7
|
+
canonical source of truth: claims.jsonl + integrity.jsonl
|
|
8
|
+
verify always reads the canonical files, ignoring this cache.
|
|
9
|
+
|
|
10
|
+
Validation: before trusting a loaded state, we read only the last line of
|
|
11
|
+
integrity.jsonl (O(1) via seek-from-end) and compare. If mismatch, we rebuild
|
|
12
|
+
from canonical files (O(N) fallback). This closes the stale-valid-JSON caveat.
|
|
13
|
+
|
|
14
|
+
If the cache is missing, corrupt, stale, or mismatched it is rebuilt from
|
|
15
|
+
canonical files.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import hashlib
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Any
|
|
26
|
+
|
|
27
|
+
_STATE_FILE = "append_state.json"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class AppendState:
|
|
32
|
+
last_line_number: int # number of non-empty lines currently in claims.jsonl
|
|
33
|
+
last_chain_hash: str | None # chain_hash of the last integrity entry (None if none)
|
|
34
|
+
last_record_hash: str | None # record_hash of the last claim (for mismatch check)
|
|
35
|
+
|
|
36
|
+
def to_dict(self) -> dict[str, Any]:
|
|
37
|
+
return {
|
|
38
|
+
"last_line_number": self.last_line_number,
|
|
39
|
+
"last_chain_hash": self.last_chain_hash,
|
|
40
|
+
"last_record_hash": self.last_record_hash,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_dict(cls, d: dict[str, Any]) -> AppendState:
|
|
45
|
+
return cls(
|
|
46
|
+
last_line_number=int(d["last_line_number"]),
|
|
47
|
+
last_chain_hash=d.get("last_chain_hash"),
|
|
48
|
+
last_record_hash=d.get("last_record_hash"),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _rebuild_from_canonical(memory_dir: Path) -> AppendState:
|
|
53
|
+
"""Rebuild append state by scanning canonical files. O(N) but only on fallback."""
|
|
54
|
+
claims_path = memory_dir / "claims.jsonl"
|
|
55
|
+
integrity_path = memory_dir / "integrity.jsonl"
|
|
56
|
+
|
|
57
|
+
last_line_number = 0
|
|
58
|
+
last_record_hash: str | None = None
|
|
59
|
+
if claims_path.exists():
|
|
60
|
+
lines = claims_path.read_text(encoding="utf-8").splitlines()
|
|
61
|
+
non_empty = [ln for ln in lines if ln.strip()]
|
|
62
|
+
last_line_number = len(non_empty)
|
|
63
|
+
if non_empty:
|
|
64
|
+
last_record_hash = hashlib.sha256(
|
|
65
|
+
non_empty[-1].rstrip("\n").encode("utf-8")
|
|
66
|
+
).hexdigest()
|
|
67
|
+
|
|
68
|
+
last_chain_hash: str | None = None
|
|
69
|
+
if integrity_path.exists():
|
|
70
|
+
int_lines = integrity_path.read_text(encoding="utf-8").splitlines()
|
|
71
|
+
for raw in reversed(int_lines):
|
|
72
|
+
raw = raw.strip()
|
|
73
|
+
if raw:
|
|
74
|
+
try:
|
|
75
|
+
last_chain_hash = json.loads(raw).get("chain_hash")
|
|
76
|
+
except (json.JSONDecodeError, KeyError):
|
|
77
|
+
pass
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
return AppendState(
|
|
81
|
+
last_line_number=last_line_number,
|
|
82
|
+
last_chain_hash=last_chain_hash,
|
|
83
|
+
last_record_hash=last_record_hash,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _read_integrity_tail(memory_dir: Path) -> dict[str, object] | None:
|
|
88
|
+
"""Read only the last non-empty line of integrity.jsonl — O(1) via seek.
|
|
89
|
+
|
|
90
|
+
Returns the parsed dict, or None if the file is missing/empty/unparseable.
|
|
91
|
+
Never reads the full file.
|
|
92
|
+
"""
|
|
93
|
+
integrity_path = memory_dir / "integrity.jsonl"
|
|
94
|
+
if not integrity_path.exists():
|
|
95
|
+
return None
|
|
96
|
+
try:
|
|
97
|
+
size = integrity_path.stat().st_size
|
|
98
|
+
if size == 0:
|
|
99
|
+
return None
|
|
100
|
+
chunk_size = min(4096, size)
|
|
101
|
+
with integrity_path.open("rb") as fh:
|
|
102
|
+
fh.seek(-chunk_size, 2) # seek from end
|
|
103
|
+
tail = fh.read().decode("utf-8", errors="replace")
|
|
104
|
+
for raw in reversed(tail.splitlines()):
|
|
105
|
+
raw = raw.strip()
|
|
106
|
+
if raw:
|
|
107
|
+
result: dict[str, object] = json.loads(raw)
|
|
108
|
+
return result
|
|
109
|
+
except (OSError, json.JSONDecodeError, UnicodeDecodeError, ValueError):
|
|
110
|
+
pass
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _state_matches_tail(state: AppendState, tail: dict[str, object] | None) -> bool:
|
|
115
|
+
"""Return True if state is consistent with the integrity.jsonl tail entry.
|
|
116
|
+
|
|
117
|
+
If tail is None (no integrity entries), validates state reflects empty chain.
|
|
118
|
+
"""
|
|
119
|
+
if tail is None:
|
|
120
|
+
# No integrity entries yet — valid only if chain is empty.
|
|
121
|
+
return state.last_chain_hash is None
|
|
122
|
+
try:
|
|
123
|
+
line_ok = int(str(tail["line_number"])) == state.last_line_number
|
|
124
|
+
chain_ok = tail.get("chain_hash") == state.last_chain_hash
|
|
125
|
+
record_ok = (
|
|
126
|
+
state.last_record_hash is None
|
|
127
|
+
or tail.get("record_hash") == state.last_record_hash
|
|
128
|
+
)
|
|
129
|
+
return line_ok and chain_ok and record_ok
|
|
130
|
+
except (KeyError, TypeError, ValueError):
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def load_or_rebuild(memory_dir: Path) -> AppendState:
|
|
135
|
+
"""Load cached append state, validating against integrity tail; rebuild if needed.
|
|
136
|
+
|
|
137
|
+
Fast path (valid state AND matches integrity.jsonl tail): O(1).
|
|
138
|
+
Slow path (any mismatch, missing, or corrupt): O(N) rebuild.
|
|
139
|
+
"""
|
|
140
|
+
state_path = memory_dir / _STATE_FILE
|
|
141
|
+
if state_path.exists():
|
|
142
|
+
try:
|
|
143
|
+
d = json.loads(state_path.read_text(encoding="utf-8"))
|
|
144
|
+
state = AppendState.from_dict(d)
|
|
145
|
+
tail = _read_integrity_tail(memory_dir)
|
|
146
|
+
if _state_matches_tail(state, tail):
|
|
147
|
+
return state
|
|
148
|
+
# Stale or mismatched — fall through to rebuild
|
|
149
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
|
150
|
+
pass # corrupt → rebuild
|
|
151
|
+
return _rebuild_from_canonical(memory_dir)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def save(memory_dir: Path, state: AppendState) -> None:
|
|
155
|
+
"""Atomically write append state cache via temp-file rename."""
|
|
156
|
+
state_path = memory_dir / _STATE_FILE
|
|
157
|
+
tmp_path = memory_dir / f".{_STATE_FILE}.tmp"
|
|
158
|
+
tmp_path.write_text(json.dumps(state.to_dict(), sort_keys=True), encoding="utf-8")
|
|
159
|
+
os.replace(tmp_path, state_path) # atomic on POSIX; best-effort on Windows
|