flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
code_memory/metrics.py ADDED
@@ -0,0 +1,260 @@
1
+ import sqlite3
2
+ import time
3
+ import json
4
+ from dataclasses import dataclass, asdict
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ @dataclass
9
+ class RetrieveTiming:
10
+ query: str
11
+ embed_ms: float
12
+ code_search_ms: float
13
+ eps_search_ms: float
14
+ claims_ms: float
15
+ total_ms: float
16
+ code_hit_count: int = 0
17
+ eps_hit_count: int = 0
18
+ claims_hit_count: int = 0
19
+
20
+ class MetricsStore:
21
+ def __init__(self, path: Path):
22
+ self.path = Path(path)
23
+ self.path.parent.mkdir(parents=True, exist_ok=True)
24
+ self._init_db()
25
+
26
+ def _init_db(self):
27
+ with sqlite3.connect(str(self.path)) as conn:
28
+ conn.execute("""
29
+ CREATE TABLE IF NOT EXISTS retrieves (
30
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
31
+ query TEXT,
32
+ embed_ms REAL,
33
+ code_search_ms REAL,
34
+ eps_search_ms REAL,
35
+ claims_ms REAL,
36
+ total_ms REAL,
37
+ code_hit_count INTEGER DEFAULT 0,
38
+ eps_hit_count INTEGER DEFAULT 0,
39
+ claims_hit_count INTEGER DEFAULT 0,
40
+ ts REAL NOT NULL
41
+ )
42
+ """)
43
+ conn.execute("""
44
+ CREATE TABLE IF NOT EXISTS backend_health (
45
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
46
+ backend TEXT NOT NULL,
47
+ status TEXT NOT NULL,
48
+ latency_ms REAL,
49
+ ts REAL NOT NULL
50
+ )
51
+ """)
52
+ conn.execute("""
53
+ CREATE TABLE IF NOT EXISTS cache_stats (
54
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
55
+ hits INTEGER DEFAULT 0,
56
+ misses INTEGER DEFAULT 0,
57
+ ts REAL NOT NULL
58
+ )
59
+ """)
60
+ conn.execute("""
61
+ CREATE TABLE IF NOT EXISTS ingest_stats (
62
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
63
+ files INTEGER DEFAULT 0,
64
+ symbols INTEGER DEFAULT 0,
65
+ duration_s REAL,
66
+ ts REAL NOT NULL
67
+ )
68
+ """)
69
+ conn.execute("""
70
+ CREATE TABLE IF NOT EXISTS tool_calls (
71
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
72
+ tool TEXT NOT NULL,
73
+ project TEXT NOT NULL,
74
+ query_text TEXT,
75
+ output_chars INTEGER DEFAULT 0,
76
+ result_count INTEGER DEFAULT 0,
77
+ session_id TEXT,
78
+ ts REAL NOT NULL
79
+ )
80
+ """)
81
+ conn.execute("""
82
+ CREATE TABLE IF NOT EXISTS fs_reads (
83
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
84
+ tool TEXT NOT NULL,
85
+ path TEXT,
86
+ output_chars INTEGER DEFAULT 0,
87
+ session_id TEXT,
88
+ project TEXT NOT NULL,
89
+ ts REAL NOT NULL
90
+ )
91
+ """)
92
+ conn.commit()
93
+
94
+ def record_retrieve(self, m: RetrieveTiming):
95
+ with sqlite3.connect(str(self.path)) as conn:
96
+ conn.execute(
97
+ "INSERT INTO retrieves (query, embed_ms, code_search_ms, eps_search_ms, claims_ms, total_ms, code_hit_count, eps_hit_count, claims_hit_count, ts) VALUES (?,?,?,?,?,?,?,?,?,?)",
98
+ (m.query, m.embed_ms, m.code_search_ms, m.eps_search_ms, m.claims_ms, m.total_ms, m.code_hit_count, m.eps_hit_count, m.claims_hit_count, time.time())
99
+ )
100
+ conn.commit()
101
+
102
+ def record_backend_health(self, backend: str, status: str, latency_ms: float):
103
+ with sqlite3.connect(str(self.path)) as conn:
104
+ conn.execute(
105
+ "INSERT INTO backend_health (backend, status, latency_ms, ts) VALUES (?,?,?,?)",
106
+ (backend, status, latency_ms, time.time())
107
+ )
108
+ conn.commit()
109
+
110
+ def record_cache_stats(self, hits: int, misses: int):
111
+ with sqlite3.connect(str(self.path)) as conn:
112
+ conn.execute(
113
+ "INSERT INTO cache_stats (hits, misses, ts) VALUES (?,?,?)",
114
+ (hits, misses, time.time())
115
+ )
116
+ conn.commit()
117
+
118
+ def record_ingest(self, files: int, symbols: int, duration_s: float):
119
+ with sqlite3.connect(str(self.path)) as conn:
120
+ conn.execute(
121
+ "INSERT INTO ingest_stats (files, symbols, duration_s, ts) VALUES (?,?,?,?)",
122
+ (files, symbols, duration_s, time.time())
123
+ )
124
+ conn.commit()
125
+
126
+ def record_tool_call(self, tool: str, project: str, *, query_text: str = "", output_chars: int = 0, result_count: int = 0, session_id: str = ""):
127
+ with sqlite3.connect(str(self.path)) as conn:
128
+ conn.execute(
129
+ "INSERT INTO tool_calls (tool, project, query_text, output_chars, result_count, session_id, ts) VALUES (?,?,?,?,?,?,?)",
130
+ (tool, project, query_text or None, output_chars, result_count, session_id or None, time.time())
131
+ )
132
+ conn.commit()
133
+
134
+ def record_fs_read(self, tool: str, path: str, project: str, *, output_chars: int = 0, session_id: str = ""):
135
+ with sqlite3.connect(str(self.path)) as conn:
136
+ conn.execute(
137
+ "INSERT INTO fs_reads (tool, path, output_chars, session_id, project, ts) VALUES (?,?,?,?,?,?)",
138
+ (tool, path or None, output_chars, session_id or None, project, time.time())
139
+ )
140
+ conn.commit()
141
+
142
+ def tool_usage_summary(self, project: str | None = None) -> dict:
143
+ with sqlite3.connect(str(self.path)) as conn:
144
+ conn.row_factory = sqlite3.Row
145
+ where = " WHERE project=?" if project else ""
146
+ params = (project,) if project else ()
147
+ rows = conn.execute(
148
+ f"SELECT tool, COUNT(*) as calls, COALESCE(SUM(output_chars),0) as total_chars, COALESCE(AVG(output_chars),0) as avg_chars FROM tool_calls{where} GROUP BY tool ORDER BY calls DESC",
149
+ params
150
+ ).fetchall()
151
+ return {
152
+ "tools": [dict(r) for r in rows],
153
+ "total_calls": sum(r["calls"] for r in rows),
154
+ }
155
+
156
+ def efficiency_summary(self, project: str | None = None) -> dict:
157
+ with sqlite3.connect(str(self.path)) as conn:
158
+ conn.row_factory = sqlite3.Row
159
+ # Aggregate tool_calls
160
+ tc_where = " WHERE project=?" if project else ""
161
+ tc_params = (project,) if project else ()
162
+ tc = conn.execute(
163
+ f"SELECT COUNT(*) as calls, COALESCE(SUM(output_chars),0) as total_chars FROM tool_calls{tc_where}",
164
+ tc_params
165
+ ).fetchone()
166
+
167
+ # Aggregate fs_reads
168
+ fs_where = " WHERE project=?" if project else ""
169
+ fs_params = (project,) if project else ()
170
+ fs = conn.execute(
171
+ f"SELECT COUNT(*) as reads, COALESCE(SUM(output_chars),0) as total_chars FROM fs_reads{fs_where}",
172
+ fs_params
173
+ ).fetchone()
174
+
175
+ # Per-session breakdown
176
+ session_where = " WHERE project=?" if project else ""
177
+ session_params = (project,) if project else ()
178
+ tc_sessions = conn.execute(
179
+ f"SELECT COALESCE(session_id,'') as session_id, SUM(output_chars) as mcp_chars FROM tool_calls{session_where} GROUP BY session_id",
180
+ session_params
181
+ ).fetchall()
182
+ fs_sessions = conn.execute(
183
+ f"SELECT COALESCE(session_id,'') as session_id, SUM(output_chars) as fs_chars FROM fs_reads{session_where} GROUP BY session_id",
184
+ session_params
185
+ ).fetchall()
186
+
187
+ # Merge per-session
188
+ session_map: dict[str, dict] = {}
189
+ for r in tc_sessions:
190
+ sid = r["session_id"]
191
+ session_map.setdefault(sid, {"session_id": sid, "mcp_chars": 0, "fs_chars": 0})
192
+ session_map[sid]["mcp_chars"] = r["mcp_chars"]
193
+ for r in fs_sessions:
194
+ sid = r["session_id"]
195
+ session_map.setdefault(sid, {"session_id": sid, "mcp_chars": 0, "fs_chars": 0})
196
+ session_map[sid]["fs_chars"] = r["fs_chars"]
197
+
198
+ return {
199
+ "total_mcp_chars": tc["total_chars"],
200
+ "total_fs_chars": fs["total_chars"],
201
+ "mcp_calls": tc["calls"],
202
+ "fs_reads": fs["reads"],
203
+ "sessions": list(session_map.values()),
204
+ }
205
+
206
+ def summary(self) -> dict[str, Any]:
207
+ with sqlite3.connect(str(self.path)) as conn:
208
+ conn.row_factory = sqlite3.Row
209
+ # Retrieve stats
210
+ r = conn.execute(
211
+ "SELECT COUNT(*) as count, AVG(total_ms) as avg_total_ms, AVG(embed_ms) as avg_embed_ms, AVG(code_search_ms) as avg_code_search_ms FROM retrieves"
212
+ ).fetchone()
213
+
214
+ # Cache stats
215
+ c = conn.execute(
216
+ "SELECT COALESCE(SUM(hits),0) as total_hits, COALESCE(SUM(misses),0) as total_misses FROM cache_stats"
217
+ ).fetchone()
218
+ total_cache = c["total_hits"] + c["total_misses"]
219
+ hit_ratio = c["total_hits"] / total_cache if total_cache > 0 else 0.0
220
+
221
+ # Ingest stats
222
+ i = conn.execute(
223
+ "SELECT COUNT(*) as count, COALESCE(SUM(symbols),0) as total_symbols, MAX(ts) as last_ingest_ts FROM ingest_stats"
224
+ ).fetchone()
225
+
226
+ # Backend health - latest per backend
227
+ b = conn.execute(
228
+ "SELECT backend, status, MAX(ts) as last_ts FROM backend_health GROUP BY backend"
229
+ ).fetchall()
230
+
231
+ return {
232
+ "retrieves": {
233
+ "count": r["count"],
234
+ "avg_total_ms": round(r["avg_total_ms"] or 0, 1),
235
+ "avg_embed_ms": round(r["avg_embed_ms"] or 0, 1),
236
+ "avg_code_search_ms": round(r["avg_code_search_ms"] or 0, 1),
237
+ },
238
+ "cache": {
239
+ "total_hits": c["total_hits"],
240
+ "total_misses": c["total_misses"],
241
+ "hit_ratio": round(hit_ratio, 3),
242
+ },
243
+ "ingest": {
244
+ "count": i["count"],
245
+ "total_symbols": i["total_symbols"],
246
+ "last_ingest_ts": i["last_ingest_ts"],
247
+ },
248
+ "backends": [{"backend": row["backend"], "status": row["status"]} for row in b],
249
+ }
250
+
251
+ def recent_retrieves(self, limit: int = 10) -> list[dict]:
252
+ with sqlite3.connect(str(self.path)) as conn:
253
+ conn.row_factory = sqlite3.Row
254
+ rows = conn.execute(
255
+ "SELECT * FROM retrieves ORDER BY ts DESC LIMIT ?", (limit,)
256
+ ).fetchall()
257
+ return [dict(r) for r in rows]
258
+
259
+ def close(self):
260
+ pass
@@ -0,0 +1,13 @@
1
+ from .pipeline import Pipeline
2
+ from .reset import ResetResult, list_projects, reset_all, reset_project
3
+ from .retrieve import ContextPack, Retriever
4
+
5
+ __all__ = [
6
+ "Pipeline",
7
+ "Retriever",
8
+ "ContextPack",
9
+ "ResetResult",
10
+ "list_projects",
11
+ "reset_project",
12
+ "reset_all",
13
+ ]
@@ -0,0 +1,211 @@
1
+ """Git-aware delta detection for incremental ingestion.
2
+
3
+ Given a repo root and a base commit, produce three lists:
4
+ - changed (added / modified / renamed-new) -> reingest
5
+ - deleted (removed / renamed-old) -> drop from index
6
+ - dirty (uncommitted worktree changes) -> reingest
7
+
8
+ All paths returned are absolute, resolved.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import subprocess
14
+ from dataclasses import dataclass, field
15
+ from pathlib import Path
16
+
17
+
18
+ class GitError(RuntimeError):
19
+ pass
20
+
21
+
22
+ def _run(repo: Path, *args: str, check: bool = True, timeout: float = 30.0) -> str:
23
+ try:
24
+ out = subprocess.run(
25
+ ["git", "-C", str(repo), *args],
26
+ capture_output=True,
27
+ text=True,
28
+ check=False,
29
+ timeout=timeout,
30
+ )
31
+ except FileNotFoundError as e:
32
+ raise GitError("git executable not found on PATH") from e
33
+ except subprocess.SubprocessError as e:
34
+ raise GitError(f"git invocation failed: {e}") from e
35
+ if check and out.returncode != 0:
36
+ raise GitError(
37
+ f"git {' '.join(args)} failed (exit {out.returncode}): {out.stderr.strip()}"
38
+ )
39
+ return out.stdout
40
+
41
+
42
+ def is_git_repo(root: str | Path) -> bool:
43
+ try:
44
+ out = _run(Path(root), "rev-parse", "--is-inside-work-tree", check=False)
45
+ except GitError:
46
+ return False
47
+ return out.strip() == "true"
48
+
49
+
50
+ def head_sha(root: str | Path) -> str:
51
+ return _run(Path(root), "rev-parse", "HEAD").strip()
52
+
53
+
54
+ def current_branch(root: str | Path) -> str | None:
55
+ out = _run(Path(root), "rev-parse", "--abbrev-ref", "HEAD", check=False).strip()
56
+ return out if out and out != "HEAD" else None
57
+
58
+
59
+ def is_reachable(root: str | Path, sha: str) -> bool:
60
+ try:
61
+ _run(Path(root), "cat-file", "-e", f"{sha}^{{commit}}", check=True)
62
+ return True
63
+ except GitError:
64
+ return False
65
+
66
+
67
+ def resolve_ref(root: str | Path, ref: str) -> str:
68
+ """Resolve a ref (branch / tag / sha) to a full SHA. Raises if unknown."""
69
+ return _run(Path(root), "rev-parse", "--verify", f"{ref}^{{commit}}").strip()
70
+
71
+
72
+ def commit_ordinal(root: str | Path, sha: str) -> int | None:
73
+ """Return the topological ordinal of ``sha`` — first-parent ancestor count.
74
+
75
+ ``git rev-list --count --first-parent <sha>`` gives a monotonic integer
76
+ along the main branch: parent < child, always. We use it as a cheap
77
+ "before / after" comparator across SHAs without dragging full topology
78
+ into the graph store. Returns ``None`` if the SHA isn't reachable
79
+ (shallow clone, orphan, freshly initialised repo).
80
+ """
81
+ try:
82
+ out = _run(
83
+ Path(root),
84
+ "rev-list",
85
+ "--count",
86
+ "--first-parent",
87
+ sha,
88
+ check=True,
89
+ ).strip()
90
+ except GitError:
91
+ return None
92
+ try:
93
+ return int(out)
94
+ except ValueError:
95
+ return None
96
+
97
+
98
+ @dataclass
99
+ class Delta:
100
+ changed: list[Path] = field(default_factory=list)
101
+ deleted: list[Path] = field(default_factory=list)
102
+ dirty: list[Path] = field(default_factory=list)
103
+
104
+ @property
105
+ def is_empty(self) -> bool:
106
+ return not (self.changed or self.deleted or self.dirty)
107
+
108
+ def reingest_paths(self) -> list[Path]:
109
+ # de-dup while preserving order
110
+ seen: set[Path] = set()
111
+ out: list[Path] = []
112
+ for p in self.changed + self.dirty:
113
+ if p in seen:
114
+ continue
115
+ seen.add(p)
116
+ out.append(p)
117
+ return out
118
+
119
+
120
+ def diff(root: str | Path, base_sha: str, head: str = "HEAD") -> Delta:
121
+ """Compute path-level delta between base_sha and head (committed only)."""
122
+ repo = Path(root).resolve()
123
+ out = _run(repo, "diff", "--name-status", "-M", base_sha, head)
124
+ delta = Delta()
125
+ for line in out.splitlines():
126
+ if not line.strip():
127
+ continue
128
+ parts = line.split("\t")
129
+ status = parts[0]
130
+ # M / A / D / T => 2 fields ; R### / C### => 3 fields (old, new)
131
+ code = status[0]
132
+ if code in ("R", "C") and len(parts) >= 3:
133
+ old_abs = (repo / parts[1]).resolve()
134
+ new_abs = (repo / parts[2]).resolve()
135
+ delta.deleted.append(old_abs)
136
+ delta.changed.append(new_abs)
137
+ elif code == "D" and len(parts) >= 2:
138
+ delta.deleted.append((repo / parts[1]).resolve())
139
+ elif code in ("A", "M", "T") and len(parts) >= 2:
140
+ delta.changed.append((repo / parts[1]).resolve())
141
+ # anything else (U/X/B) -> skip silently
142
+ return delta
143
+
144
+
145
+ def dirty_files(root: str | Path) -> list[Path]:
146
+ """Return absolute paths of files with uncommitted *content* changes
147
+ (modified, added, untracked). Worktree deletions are reported by
148
+ ``dirty_deleted_files`` so callers can route them to a delete path
149
+ instead of trying to reingest a missing file.
150
+ """
151
+ repo = Path(root).resolve()
152
+ out = _run(repo, "status", "--porcelain=v1", "--untracked-files=all")
153
+ paths: list[Path] = []
154
+ for line in out.splitlines():
155
+ if len(line) < 4:
156
+ continue
157
+ xy = line[:2]
158
+ rest = line[3:]
159
+ # rename in index: "R old -> new"
160
+ if "->" in rest:
161
+ rest = rest.split("->", 1)[1].strip()
162
+ # ignored / deleted both index+worktree -> skip
163
+ if xy == "!!" or "D" in xy:
164
+ continue
165
+ path = (repo / rest).resolve()
166
+ if path.is_file():
167
+ paths.append(path)
168
+ return paths
169
+
170
+
171
+ def dirty_deleted_files(root: str | Path) -> list[Path]:
172
+ """Return absolute paths of files deleted in the worktree but not yet
173
+ committed.
174
+
175
+ Catches both ``git rm`` (index column 'D') and a plain ``rm`` against
176
+ a tracked file (worktree column 'D'), so the index can be pruned
177
+ before any commit lands. Without this, deletes only propagate after
178
+ the next commit — leaving the graph and vector index claiming the
179
+ file still exists.
180
+ """
181
+ repo = Path(root).resolve()
182
+ out = _run(repo, "status", "--porcelain=v1")
183
+ paths: list[Path] = []
184
+ for line in out.splitlines():
185
+ if len(line) < 4:
186
+ continue
187
+ xy = line[:2]
188
+ rest = line[3:]
189
+ if xy == "!!":
190
+ continue
191
+ # Renames: ``R<sp>`` in index means the OLD path is gone; we
192
+ # don't get the old name from here (it appears with ``->``),
193
+ # but ``diff`` against HEAD already emits a paired delete +
194
+ # change for those, so we skip rename lines defensively.
195
+ if "->" in rest:
196
+ continue
197
+ if xy[0] == "D" or xy[1] == "D":
198
+ paths.append((repo / rest).resolve())
199
+ return paths
200
+
201
+
202
+ def changed_since(root: str | Path, base_sha: str, *, include_dirty: bool = True) -> Delta:
203
+ """Convenience: delta from base_sha to HEAD, plus optional dirty worktree."""
204
+ d = diff(root, base_sha, "HEAD")
205
+ if include_dirty:
206
+ d.dirty.extend(dirty_files(root))
207
+ # Uncommitted deletes must reach the pipeline's delete loop so
208
+ # graph nodes + vectors for vanished files get torn down even
209
+ # before the user commits the removal.
210
+ d.deleted.extend(dirty_deleted_files(root))
211
+ return d
@@ -0,0 +1,71 @@
1
+ """Per-repo ingest state: track the last commit successfully ingested.
2
+
3
+ Lives in the same SQLite DB as episodes (per-project namespaced) so it
4
+ inherits project isolation automatically.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sqlite3
10
+ import time
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+
14
+ SCHEMA = """
15
+ CREATE TABLE IF NOT EXISTS ingest_state (
16
+ repo_root TEXT PRIMARY KEY,
17
+ last_sha TEXT NOT NULL,
18
+ last_ts REAL NOT NULL,
19
+ branch TEXT
20
+ );
21
+ """
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class IngestState:
26
+ repo_root: str
27
+ last_sha: str
28
+ last_ts: float
29
+ branch: str | None = None
30
+
31
+
32
+ class IngestStateStore:
33
+ """Thin SQLite wrapper for per-repo ingest checkpoints."""
34
+
35
+ def __init__(self, db_path: Path) -> None:
36
+ self.path = db_path
37
+ self.path.parent.mkdir(parents=True, exist_ok=True)
38
+ self.conn = sqlite3.connect(self.path)
39
+ self.conn.executescript(SCHEMA)
40
+ self.conn.commit()
41
+
42
+ def get(self, repo_root: str | Path) -> IngestState | None:
43
+ row = self.conn.execute(
44
+ "SELECT repo_root, last_sha, last_ts, branch FROM ingest_state WHERE repo_root = ?",
45
+ (str(Path(repo_root).resolve()),),
46
+ ).fetchone()
47
+ if row is None:
48
+ return None
49
+ return IngestState(repo_root=row[0], last_sha=row[1], last_ts=row[2], branch=row[3])
50
+
51
+ def set(self, repo_root: str | Path, sha: str, branch: str | None = None) -> None:
52
+ self.conn.execute(
53
+ "INSERT INTO ingest_state(repo_root, last_sha, last_ts, branch) "
54
+ "VALUES (?, ?, ?, ?) "
55
+ "ON CONFLICT(repo_root) DO UPDATE SET "
56
+ " last_sha = excluded.last_sha, "
57
+ " last_ts = excluded.last_ts, "
58
+ " branch = excluded.branch",
59
+ (str(Path(repo_root).resolve()), sha, time.time(), branch),
60
+ )
61
+ self.conn.commit()
62
+
63
+ def clear(self, repo_root: str | Path) -> None:
64
+ self.conn.execute(
65
+ "DELETE FROM ingest_state WHERE repo_root = ?",
66
+ (str(Path(repo_root).resolve()),),
67
+ )
68
+ self.conn.commit()
69
+
70
+ def close(self) -> None:
71
+ self.conn.close()