flurryx-code-memory 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_memory/__init__.py +1 -0
- code_memory/claims/__init__.py +32 -0
- code_memory/claims/extractor.py +325 -0
- code_memory/claims/indexer.py +258 -0
- code_memory/claims/resolver.py +186 -0
- code_memory/claims/store.py +424 -0
- code_memory/cli.py +1192 -0
- code_memory/config.py +268 -0
- code_memory/embed/__init__.py +224 -0
- code_memory/embed/cache.py +204 -0
- code_memory/embed/m3.py +174 -0
- code_memory/embed/ollama.py +92 -0
- code_memory/embed/tei.py +106 -0
- code_memory/episodic/__init__.py +3 -0
- code_memory/episodic/sqlite_store.py +278 -0
- code_memory/extractor/__init__.py +3 -0
- code_memory/extractor/csproj.py +166 -0
- code_memory/extractor/dll.py +385 -0
- code_memory/extractor/gitignore.py +162 -0
- code_memory/extractor/nuget.py +275 -0
- code_memory/extractor/sanity.py +124 -0
- code_memory/extractor/sln.py +108 -0
- code_memory/extractor/treesitter.py +1172 -0
- code_memory/graph/__init__.py +3 -0
- code_memory/graph/falkor_store.py +740 -0
- code_memory/mcp_server.py +1816 -0
- code_memory/metrics.py +260 -0
- code_memory/orchestrator/__init__.py +13 -0
- code_memory/orchestrator/git_delta.py +211 -0
- code_memory/orchestrator/ingest_state.py +71 -0
- code_memory/orchestrator/pipeline.py +1478 -0
- code_memory/orchestrator/reset.py +130 -0
- code_memory/orchestrator/resolver.py +825 -0
- code_memory/orchestrator/retrieve.py +505 -0
- code_memory/resilience.py +73 -0
- code_memory/sync/__init__.py +20 -0
- code_memory/sync/autostart/__init__.py +42 -0
- code_memory/sync/autostart/base.py +106 -0
- code_memory/sync/autostart/launchd.py +115 -0
- code_memory/sync/autostart/schtasks.py +155 -0
- code_memory/sync/autostart/systemd.py +113 -0
- code_memory/sync/hooks.py +164 -0
- code_memory/sync/safety.py +65 -0
- code_memory/sync/snapshot.py +461 -0
- code_memory/sync/store.py +399 -0
- code_memory/sync/sync.py +405 -0
- code_memory/sync/watcher.py +320 -0
- code_memory/vector/__init__.py +3 -0
- code_memory/vector/qdrant_store.py +302 -0
- flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
- flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
- flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
- flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
code_memory/embed/m3.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""BGE-M3 hybrid embedder: dense + sparse from one forward pass.
|
|
2
|
+
|
|
3
|
+
Opt-in backend (``EMBED_BACKEND=flagembed``). Loads m3 in-process via
|
|
4
|
+
FlagEmbedding, which means each Python process pays a ~5-15 s
|
|
5
|
+
cold-load. Worth it for long-lived processes (watcher, MCP server)
|
|
6
|
+
that want the sparse signal; not worth it for hook-driven per-save
|
|
7
|
+
CLI invocations — :class:`code_memory.embed.OllamaEmbedder` is the
|
|
8
|
+
default for that reason.
|
|
9
|
+
|
|
10
|
+
m3 emits three views per input:
|
|
11
|
+
|
|
12
|
+
* Dense (1024-d float) — semantic similarity (cosine).
|
|
13
|
+
* Sparse (token-id -> weight) — lexical/identifier signal akin to BM25
|
|
14
|
+
but learned. Used for code search where exact symbol names matter.
|
|
15
|
+
* ColBERT multi-vec — not used here; cross-encoder rerank covers the
|
|
16
|
+
late-interaction case.
|
|
17
|
+
|
|
18
|
+
Fusion happens server-side in Qdrant (RRF / DBSF), so both views are
|
|
19
|
+
stored alongside each chunk and combined at query time.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
import platform
|
|
26
|
+
from collections.abc import Sequence
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import Any
|
|
29
|
+
|
|
30
|
+
from ..config import CONFIG
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# FlagEmbedding requires a HF repo id. The legacy ``EMBED_MODEL`` env
|
|
35
|
+
# var used the Ollama short name (``bge-m3``), which HF rejects, so we
|
|
36
|
+
# remap it here. Other models pass through unchanged.
|
|
37
|
+
_OLLAMA_TO_HF = {"bge-m3": "BAAI/bge-m3"}
|
|
38
|
+
DEFAULT_MODEL = "BAAI/bge-m3"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _resolve_model(name: str | None) -> str:
|
|
42
|
+
raw = (name or CONFIG.embed_model or DEFAULT_MODEL).strip()
|
|
43
|
+
return _OLLAMA_TO_HF.get(raw, raw)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass(frozen=True)
|
|
47
|
+
class SparseVec:
|
|
48
|
+
"""Sparse vector in Qdrant's (indices, values) layout."""
|
|
49
|
+
|
|
50
|
+
indices: list[int]
|
|
51
|
+
values: list[float]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class HybridVec:
|
|
56
|
+
dense: list[float]
|
|
57
|
+
sparse: SparseVec
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _detect_device() -> str:
|
|
61
|
+
"""Best available accelerator; falls back to CPU."""
|
|
62
|
+
try:
|
|
63
|
+
import torch
|
|
64
|
+
except ImportError:
|
|
65
|
+
return "cpu"
|
|
66
|
+
if platform.system() == "Darwin" and torch.backends.mps.is_available():
|
|
67
|
+
return "mps"
|
|
68
|
+
if torch.cuda.is_available():
|
|
69
|
+
return "cuda"
|
|
70
|
+
return "cpu"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class M3Embedder:
|
|
74
|
+
"""Stateful BGE-M3 wrapper producing dense + sparse vectors.
|
|
75
|
+
|
|
76
|
+
Heavy to construct (downloads + loads ~2.3GB on first use). Cache
|
|
77
|
+
the instance for the process lifetime — see ``code_memory.embed``
|
|
78
|
+
factory below.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
model: str | None = None,
|
|
84
|
+
device: str | None = None,
|
|
85
|
+
use_fp16: bool | None = None,
|
|
86
|
+
batch_size: int = 12,
|
|
87
|
+
) -> None:
|
|
88
|
+
from FlagEmbedding import BGEM3FlagModel
|
|
89
|
+
|
|
90
|
+
self.model_name = _resolve_model(model)
|
|
91
|
+
self.device = device or _detect_device()
|
|
92
|
+
# fp16 only safe on CUDA/MPS; CPU stays at fp32 for numerical
|
|
93
|
+
# stability + because some BLAS kernels don't support fp16.
|
|
94
|
+
if use_fp16 is None:
|
|
95
|
+
use_fp16 = self.device in ("cuda", "mps")
|
|
96
|
+
self.batch_size = batch_size
|
|
97
|
+
log.info(
|
|
98
|
+
"m3: loading %s (device=%s fp16=%s)",
|
|
99
|
+
self.model_name,
|
|
100
|
+
self.device,
|
|
101
|
+
use_fp16,
|
|
102
|
+
)
|
|
103
|
+
self._impl = BGEM3FlagModel(
|
|
104
|
+
self.model_name,
|
|
105
|
+
use_fp16=use_fp16,
|
|
106
|
+
devices=self.device,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# ----------------------------------------------------------- batch
|
|
110
|
+
|
|
111
|
+
def embed(self, texts: Sequence[str]) -> list[HybridVec]:
|
|
112
|
+
if not texts:
|
|
113
|
+
return []
|
|
114
|
+
out = self._impl.encode(
|
|
115
|
+
list(texts),
|
|
116
|
+
batch_size=self.batch_size,
|
|
117
|
+
return_dense=True,
|
|
118
|
+
return_sparse=True,
|
|
119
|
+
return_colbert_vecs=False,
|
|
120
|
+
)
|
|
121
|
+
dense = out["dense_vecs"]
|
|
122
|
+
sparse = out["lexical_weights"]
|
|
123
|
+
return [
|
|
124
|
+
HybridVec(
|
|
125
|
+
dense=list(map(float, dense[i])),
|
|
126
|
+
sparse=_to_qdrant_sparse(sparse[i]),
|
|
127
|
+
)
|
|
128
|
+
for i in range(len(texts))
|
|
129
|
+
]
|
|
130
|
+
|
|
131
|
+
def embed_one(self, text: str) -> HybridVec:
|
|
132
|
+
return self.embed([text])[0]
|
|
133
|
+
|
|
134
|
+
# ------------------------------------------------------------ misc
|
|
135
|
+
|
|
136
|
+
def close(self) -> None:
|
|
137
|
+
# FlagEmbedding has no explicit close; drop the reference to free
|
|
138
|
+
# GPU mem on next gc cycle.
|
|
139
|
+
self._impl = None
|
|
140
|
+
|
|
141
|
+
def __enter__(self) -> M3Embedder:
|
|
142
|
+
return self
|
|
143
|
+
|
|
144
|
+
def __exit__(self, *exc: object) -> None:
|
|
145
|
+
self.close()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _to_qdrant_sparse(weights: dict[Any, Any]) -> SparseVec:
|
|
149
|
+
"""Convert m3 ``{token_id: weight}`` mapping to Qdrant sparse format.
|
|
150
|
+
|
|
151
|
+
m3 returns numpy floats keyed by string token IDs. Qdrant wants
|
|
152
|
+
plain ints and floats; the conversion is explicit so misbehaving
|
|
153
|
+
inputs (negative weights, NaN) are dropped rather than poisoning the
|
|
154
|
+
index.
|
|
155
|
+
"""
|
|
156
|
+
indices: list[int] = []
|
|
157
|
+
values: list[float] = []
|
|
158
|
+
for tok, w in weights.items():
|
|
159
|
+
try:
|
|
160
|
+
idx = int(tok)
|
|
161
|
+
except (TypeError, ValueError):
|
|
162
|
+
continue
|
|
163
|
+
val = float(w)
|
|
164
|
+
if val <= 0.0 or val != val: # drop NaN / non-positive
|
|
165
|
+
continue
|
|
166
|
+
indices.append(idx)
|
|
167
|
+
values.append(val)
|
|
168
|
+
return SparseVec(indices=indices, values=values)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# Factory + singleton live in ``code_memory.embed.__init__`` so the
|
|
172
|
+
# Ollama and M3 backends share one selection mechanism. ``M3Embedder``
|
|
173
|
+
# itself remains directly constructible for tests and for users who
|
|
174
|
+
# want to bypass the env-var dispatch.
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Ollama-backed dense embedder (default backend).
|
|
2
|
+
|
|
3
|
+
Runs `bge-m3` (or any Ollama-served model) over HTTP. Ollama keeps the
|
|
4
|
+
model loaded in its own daemon, so short-lived CLI processes (e.g.
|
|
5
|
+
``code-memory reingest <file>`` invoked from a save-file hook) reuse
|
|
6
|
+
the warm model instead of paying a ~5-15 s cold load every call.
|
|
7
|
+
|
|
8
|
+
Trade-off vs the in-process FlagEmbedding path: Ollama only exposes the
|
|
9
|
+
dense head of m3 — no sparse, no ColBERT. Sparse is returned as an
|
|
10
|
+
empty :class:`SparseVec` so the Qdrant hybrid layout still upserts
|
|
11
|
+
cleanly; queries through the hybrid slot then degrade to dense-only at
|
|
12
|
+
RRF time. Users who want true m3 hybrid (dense + sparse from one
|
|
13
|
+
forward pass) can flip ``EMBED_BACKEND=flagembed`` and accept the
|
|
14
|
+
cold-load cost.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from collections.abc import Sequence
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
import httpx
|
|
24
|
+
|
|
25
|
+
from ..config import CONFIG
|
|
26
|
+
from ..resilience import with_retry
|
|
27
|
+
from .m3 import HybridVec, SparseVec
|
|
28
|
+
|
|
29
|
+
log_ = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class OllamaEmbedder:
|
|
33
|
+
"""Thin sync wrapper over Ollama /api/embed.
|
|
34
|
+
|
|
35
|
+
Returns :class:`HybridVec` with an empty sparse component so the
|
|
36
|
+
shape matches :class:`M3Embedder`. The empty sparse vector is a
|
|
37
|
+
deliberate signal to :class:`QdrantStore` that hybrid fusion will
|
|
38
|
+
degrade to dense-only for this point.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
url: str | None = None,
|
|
44
|
+
model: str | None = None,
|
|
45
|
+
timeout: float = 300.0,
|
|
46
|
+
) -> None:
|
|
47
|
+
self.url = (url or CONFIG.ollama_url).rstrip("/")
|
|
48
|
+
self.model = model or CONFIG.embed_model
|
|
49
|
+
self._client = httpx.Client(timeout=timeout)
|
|
50
|
+
|
|
51
|
+
def embed(self, texts: Sequence[str]) -> list[HybridVec]:
|
|
52
|
+
if not texts:
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
def _call():
|
|
56
|
+
res = self._client.post(
|
|
57
|
+
f"{self.url}/api/embed",
|
|
58
|
+
json={"model": self.model, "input": list(texts)},
|
|
59
|
+
)
|
|
60
|
+
res.raise_for_status()
|
|
61
|
+
data = res.json()
|
|
62
|
+
embeddings = data.get("embeddings")
|
|
63
|
+
if embeddings is None:
|
|
64
|
+
raise RuntimeError(f"Ollama returned no embeddings: {data}")
|
|
65
|
+
return embeddings
|
|
66
|
+
|
|
67
|
+
embeddings = with_retry(
|
|
68
|
+
_call,
|
|
69
|
+
max_retries=3,
|
|
70
|
+
backoff_s=1.0,
|
|
71
|
+
on_retry=lambda attempt, exc: log_.warning(
|
|
72
|
+
"ollama embed retry %d/3 after %s", attempt, exc
|
|
73
|
+
),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
empty = SparseVec(indices=[], values=[])
|
|
77
|
+
return [
|
|
78
|
+
HybridVec(dense=[float(x) for x in vec], sparse=empty)
|
|
79
|
+
for vec in embeddings
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
def embed_one(self, text: str) -> HybridVec:
|
|
83
|
+
return self.embed([text])[0]
|
|
84
|
+
|
|
85
|
+
def close(self) -> None:
|
|
86
|
+
self._client.close()
|
|
87
|
+
|
|
88
|
+
def __enter__(self) -> OllamaEmbedder:
|
|
89
|
+
return self
|
|
90
|
+
|
|
91
|
+
def __exit__(self, *exc: object) -> None:
|
|
92
|
+
self.close()
|
code_memory/embed/tei.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""text-embeddings-inference (TEI) backend.
|
|
2
|
+
|
|
3
|
+
`HuggingFace TEI <https://github.com/huggingface/text-embeddings-inference>`_
|
|
4
|
+
is a purpose-built embedding server. On a Linux + NVIDIA host with the
|
|
5
|
+
same ``BAAI/bge-m3`` weights, it serves embeddings at **5-10× the
|
|
6
|
+
throughput** of Ollama because:
|
|
7
|
+
|
|
8
|
+
* Built on ONNX Runtime / candle-rs with native CUDA batching.
|
|
9
|
+
* Streams + dynamically batches concurrent requests instead of
|
|
10
|
+
serialising one-at-a-time.
|
|
11
|
+
* No HTTP-to-llama.cpp daemon hop per call.
|
|
12
|
+
|
|
13
|
+
For enterprise CI / staging where the cold ingest of a large monorepo
|
|
14
|
+
matters, this is the way to break the ``bge-m3`` throughput floor
|
|
15
|
+
without changing models or losing semantic recall.
|
|
16
|
+
|
|
17
|
+
Trade-off vs ``OllamaEmbedder``:
|
|
18
|
+
|
|
19
|
+
* Same shape (dense-only ``HybridVec`` with empty sparse) so callers
|
|
20
|
+
swap backends transparently.
|
|
21
|
+
* TEI must be running before code-memory ingests; Ollama-style "I
|
|
22
|
+
brought my own daemon" still applies.
|
|
23
|
+
* On Mac (no NVIDIA), TEI's CPU path is roughly on par with Ollama's
|
|
24
|
+
Metal path — there's no advantage. Stay on Ollama there.
|
|
25
|
+
|
|
26
|
+
Activated via ``EMBED_BACKEND=tei``; URL via ``TEI_URL``.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from collections.abc import Sequence
|
|
32
|
+
|
|
33
|
+
import httpx
|
|
34
|
+
|
|
35
|
+
from ..config import CONFIG
|
|
36
|
+
from .m3 import HybridVec, SparseVec
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TEIEmbedder:
|
|
40
|
+
"""Sync wrapper over TEI's ``/embed`` endpoint.
|
|
41
|
+
|
|
42
|
+
Returns :class:`HybridVec` with an empty sparse component so the
|
|
43
|
+
shape matches :class:`OllamaEmbedder` and :class:`M3Embedder`.
|
|
44
|
+
Callers (pipeline, retrieve) need no branching on backend type.
|
|
45
|
+
|
|
46
|
+
TEI's request payload differs slightly from Ollama's:
|
|
47
|
+
|
|
48
|
+
* Endpoint: ``POST /embed``
|
|
49
|
+
* Body: ``{"inputs": [...]}``
|
|
50
|
+
* Response: ``[[float, ...], [float, ...]]`` (raw vector list, no
|
|
51
|
+
wrapping object).
|
|
52
|
+
|
|
53
|
+
A ``truncate=true`` flag is set so over-length chunks are silently
|
|
54
|
+
truncated to the model's max sequence length rather than failing
|
|
55
|
+
the whole batch — the same forgiving semantic Ollama applies.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
url: str | None = None,
|
|
61
|
+
timeout: float = 300.0,
|
|
62
|
+
) -> None:
|
|
63
|
+
# TEI doesn't accept a model id at request time — the daemon
|
|
64
|
+
# is launched with a single ``--model-id`` flag — so we don't
|
|
65
|
+
# carry one through requests. ``self.model`` exists for
|
|
66
|
+
# parity with :class:`OllamaEmbedder` and is sourced from
|
|
67
|
+
# ``EMBED_MODEL`` so the cache key namespace lines up across
|
|
68
|
+
# backends pointing at the same model weights.
|
|
69
|
+
self.url = (url or CONFIG.tei_url).rstrip("/")
|
|
70
|
+
self.model = CONFIG.embed_model
|
|
71
|
+
self._client = httpx.Client(timeout=timeout)
|
|
72
|
+
|
|
73
|
+
def embed(self, texts: Sequence[str]) -> list[HybridVec]:
|
|
74
|
+
if not texts:
|
|
75
|
+
return []
|
|
76
|
+
res = self._client.post(
|
|
77
|
+
f"{self.url}/embed",
|
|
78
|
+
json={"inputs": list(texts), "truncate": True},
|
|
79
|
+
)
|
|
80
|
+
res.raise_for_status()
|
|
81
|
+
data = res.json()
|
|
82
|
+
# TEI returns ``[[float, ...], ...]`` — a bare list of
|
|
83
|
+
# vectors, one per input, in the same order. No wrapper key.
|
|
84
|
+
if not isinstance(data, list):
|
|
85
|
+
raise RuntimeError(f"TEI returned unexpected shape: {type(data).__name__}")
|
|
86
|
+
if len(data) != len(texts):
|
|
87
|
+
raise RuntimeError(
|
|
88
|
+
f"TEI returned {len(data)} vectors for {len(texts)} inputs"
|
|
89
|
+
)
|
|
90
|
+
empty = SparseVec(indices=[], values=[])
|
|
91
|
+
return [
|
|
92
|
+
HybridVec(dense=[float(x) for x in vec], sparse=empty)
|
|
93
|
+
for vec in data
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
def embed_one(self, text: str) -> HybridVec:
|
|
97
|
+
return self.embed([text])[0]
|
|
98
|
+
|
|
99
|
+
def close(self) -> None:
|
|
100
|
+
self._client.close()
|
|
101
|
+
|
|
102
|
+
def __enter__(self) -> TEIEmbedder:
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def __exit__(self, *exc: object) -> None:
|
|
106
|
+
self.close()
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
import time
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import asdict, dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ..config import CONFIG
|
|
13
|
+
|
|
14
|
+
# Base table — kept minimal so a legacy DB opens without errors. Every
|
|
15
|
+
# additional column lives in ``_MIGRATIONS`` so loading an old database
|
|
16
|
+
# transparently catches it up to the latest schema.
|
|
17
|
+
_BASE_SCHEMA = """
|
|
18
|
+
CREATE TABLE IF NOT EXISTS episodes (
|
|
19
|
+
id TEXT PRIMARY KEY,
|
|
20
|
+
ts REAL NOT NULL,
|
|
21
|
+
prompt TEXT NOT NULL,
|
|
22
|
+
plan TEXT,
|
|
23
|
+
patch TEXT,
|
|
24
|
+
verdict TEXT,
|
|
25
|
+
tags TEXT,
|
|
26
|
+
meta TEXT
|
|
27
|
+
);
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# Idempotent migrations. Each statement is run independently; failures
|
|
31
|
+
# (e.g. "duplicate column" when the migration has already been applied)
|
|
32
|
+
# are swallowed because that's the success path for an idempotent
|
|
33
|
+
# migration. Indexes that reference migration-added columns must come
|
|
34
|
+
# AFTER the corresponding ADD COLUMN, hence interleaved here.
|
|
35
|
+
_MIGRATIONS = (
|
|
36
|
+
"ALTER TABLE episodes ADD COLUMN head_sha TEXT",
|
|
37
|
+
"CREATE INDEX IF NOT EXISTS idx_episodes_ts ON episodes(ts)",
|
|
38
|
+
"CREATE INDEX IF NOT EXISTS idx_episodes_verdict ON episodes(verdict)",
|
|
39
|
+
"CREATE INDEX IF NOT EXISTS idx_episodes_head_sha ON episodes(head_sha)",
|
|
40
|
+
# Content hash for dedup. Same user prompt re-asserted across turns
|
|
41
|
+
# produced one row per assertion before; now the existing row gets
|
|
42
|
+
# its ``ts`` refreshed and the new insert is a no-op. Non-unique by
|
|
43
|
+
# design so legacy rows (NULL hash) still load without conflict.
|
|
44
|
+
"ALTER TABLE episodes ADD COLUMN content_hash TEXT",
|
|
45
|
+
"CREATE INDEX IF NOT EXISTS idx_episodes_content_hash ON episodes(content_hash)",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _content_hash(prompt: str) -> str:
|
|
50
|
+
"""SHA-256 over the user prompt, normalized.
|
|
51
|
+
|
|
52
|
+
Dedup key is prompt-only on purpose: the same prompt typed twice
|
|
53
|
+
represents the same intent, regardless of which plan/patch/verdict
|
|
54
|
+
the agent eventually produced. Whitespace is normalized so a
|
|
55
|
+
trailing newline doesn't split otherwise-identical rows.
|
|
56
|
+
"""
|
|
57
|
+
return hashlib.sha256(prompt.strip().encode("utf-8")).hexdigest()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class Episode:
|
|
62
|
+
prompt: str
|
|
63
|
+
plan: str | None = None
|
|
64
|
+
patch: str | None = None
|
|
65
|
+
verdict: str | None = None # pass | fail | partial
|
|
66
|
+
tags: list[str] = field(default_factory=list)
|
|
67
|
+
meta: dict[str, Any] = field(default_factory=dict)
|
|
68
|
+
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
69
|
+
ts: float = field(default_factory=time.time)
|
|
70
|
+
# Git HEAD at the moment the episode was recorded — links the
|
|
71
|
+
# agent's work back to the code state the graph was indexing then.
|
|
72
|
+
head_sha: str | None = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class EpisodicStore:
|
|
76
|
+
def __init__(self, path: Path | None = None) -> None:
|
|
77
|
+
self.path = path or CONFIG.episodic_db
|
|
78
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
+
self.conn = sqlite3.connect(self.path)
|
|
80
|
+
self.conn.executescript(_BASE_SCHEMA)
|
|
81
|
+
for stmt in _MIGRATIONS:
|
|
82
|
+
try:
|
|
83
|
+
self.conn.execute(stmt)
|
|
84
|
+
except sqlite3.OperationalError:
|
|
85
|
+
# column already added by a prior process — that's the
|
|
86
|
+
# success path for an idempotent migration
|
|
87
|
+
pass
|
|
88
|
+
self.conn.commit()
|
|
89
|
+
|
|
90
|
+
def add(self, ep: Episode) -> str:
|
|
91
|
+
"""Insert an episode, deduping on prompt content.
|
|
92
|
+
|
|
93
|
+
If an existing row has the same ``content_hash``, refresh its
|
|
94
|
+
``ts`` to ``ep.ts`` and fill any previously-NULL fields from
|
|
95
|
+
the new episode (plan/patch/verdict/head_sha). Tags are unioned
|
|
96
|
+
and meta is merged with new values winning on key collision.
|
|
97
|
+
Returns the existing row's id so vector upserts stay idempotent.
|
|
98
|
+
"""
|
|
99
|
+
hash_ = _content_hash(ep.prompt)
|
|
100
|
+
existing = self.conn.execute(
|
|
101
|
+
"SELECT id, plan, patch, verdict, head_sha, tags, meta "
|
|
102
|
+
"FROM episodes WHERE content_hash = ? LIMIT 1",
|
|
103
|
+
(hash_,),
|
|
104
|
+
).fetchone()
|
|
105
|
+
if existing is not None:
|
|
106
|
+
return self._refresh_existing(existing, ep)
|
|
107
|
+
|
|
108
|
+
self.conn.execute(
|
|
109
|
+
"INSERT INTO episodes(id, ts, prompt, plan, patch, verdict, tags, meta, head_sha, content_hash) "
|
|
110
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
111
|
+
(
|
|
112
|
+
ep.id,
|
|
113
|
+
ep.ts,
|
|
114
|
+
ep.prompt,
|
|
115
|
+
ep.plan,
|
|
116
|
+
ep.patch,
|
|
117
|
+
ep.verdict,
|
|
118
|
+
json.dumps(ep.tags),
|
|
119
|
+
json.dumps(ep.meta),
|
|
120
|
+
ep.head_sha,
|
|
121
|
+
hash_,
|
|
122
|
+
),
|
|
123
|
+
)
|
|
124
|
+
self.conn.commit()
|
|
125
|
+
return ep.id
|
|
126
|
+
|
|
127
|
+
def _refresh_existing(
|
|
128
|
+
self, existing: tuple[Any, ...], ep: Episode
|
|
129
|
+
) -> str:
|
|
130
|
+
existing_id = str(existing[0])
|
|
131
|
+
old_plan, old_patch, old_verdict, old_head = (
|
|
132
|
+
existing[1],
|
|
133
|
+
existing[2],
|
|
134
|
+
existing[3],
|
|
135
|
+
existing[4],
|
|
136
|
+
)
|
|
137
|
+
old_tags = json.loads(existing[5]) if existing[5] else []
|
|
138
|
+
old_meta = json.loads(existing[6]) if existing[6] else {}
|
|
139
|
+
|
|
140
|
+
merged_tags = list(dict.fromkeys([*old_tags, *ep.tags]))
|
|
141
|
+
merged_meta = {**old_meta, **ep.meta}
|
|
142
|
+
|
|
143
|
+
self.conn.execute(
|
|
144
|
+
"UPDATE episodes SET "
|
|
145
|
+
" ts = ?, "
|
|
146
|
+
" plan = COALESCE(plan, ?), "
|
|
147
|
+
" patch = COALESCE(patch, ?), "
|
|
148
|
+
" verdict = COALESCE(verdict, ?), "
|
|
149
|
+
" head_sha = COALESCE(head_sha, ?), "
|
|
150
|
+
" tags = ?, "
|
|
151
|
+
" meta = ? "
|
|
152
|
+
"WHERE id = ?",
|
|
153
|
+
(
|
|
154
|
+
ep.ts,
|
|
155
|
+
ep.plan if ep.plan else None,
|
|
156
|
+
ep.patch if ep.patch else None,
|
|
157
|
+
ep.verdict if ep.verdict else None,
|
|
158
|
+
ep.head_sha,
|
|
159
|
+
json.dumps(merged_tags),
|
|
160
|
+
json.dumps(merged_meta),
|
|
161
|
+
existing_id,
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
self.conn.commit()
|
|
165
|
+
return existing_id
|
|
166
|
+
|
|
167
|
+
def dedupe(self) -> dict[str, list[str]]:
|
|
168
|
+
"""Compact pre-existing duplicates in the table.
|
|
169
|
+
|
|
170
|
+
For each ``content_hash`` group with >1 row, keep the row with
|
|
171
|
+
the oldest ``ts`` (first observation), update its ``ts`` to
|
|
172
|
+
``MAX(ts)`` of the group so retrieval still surfaces it as
|
|
173
|
+
recent, and delete the rest. Returns ``{kept_id: [removed_ids]}``
|
|
174
|
+
so callers (e.g. the orchestrator) can prune matching vectors.
|
|
175
|
+
|
|
176
|
+
Backfills ``content_hash`` for legacy NULL rows on the fly.
|
|
177
|
+
"""
|
|
178
|
+
null_rows = self.conn.execute(
|
|
179
|
+
"SELECT id, prompt FROM episodes WHERE content_hash IS NULL"
|
|
180
|
+
).fetchall()
|
|
181
|
+
for ep_id, prompt in null_rows:
|
|
182
|
+
self.conn.execute(
|
|
183
|
+
"UPDATE episodes SET content_hash = ? WHERE id = ?",
|
|
184
|
+
(_content_hash(prompt), ep_id),
|
|
185
|
+
)
|
|
186
|
+
if null_rows:
|
|
187
|
+
self.conn.commit()
|
|
188
|
+
|
|
189
|
+
groups = self.conn.execute(
|
|
190
|
+
"SELECT content_hash FROM episodes "
|
|
191
|
+
"WHERE content_hash IS NOT NULL "
|
|
192
|
+
"GROUP BY content_hash HAVING COUNT(*) > 1"
|
|
193
|
+
).fetchall()
|
|
194
|
+
|
|
195
|
+
removed: dict[str, list[str]] = {}
|
|
196
|
+
for (hash_,) in groups:
|
|
197
|
+
rows = self.conn.execute(
|
|
198
|
+
"SELECT id, ts FROM episodes WHERE content_hash = ? "
|
|
199
|
+
"ORDER BY ts ASC",
|
|
200
|
+
(hash_,),
|
|
201
|
+
).fetchall()
|
|
202
|
+
keep_id = str(rows[0][0])
|
|
203
|
+
max_ts = max(float(r[1]) for r in rows)
|
|
204
|
+
del_ids = [str(r[0]) for r in rows[1:]]
|
|
205
|
+
self.conn.execute(
|
|
206
|
+
"UPDATE episodes SET ts = ? WHERE id = ?", (max_ts, keep_id)
|
|
207
|
+
)
|
|
208
|
+
self.conn.executemany(
|
|
209
|
+
"DELETE FROM episodes WHERE id = ?", [(d,) for d in del_ids]
|
|
210
|
+
)
|
|
211
|
+
removed[keep_id] = del_ids
|
|
212
|
+
self.conn.commit()
|
|
213
|
+
return removed
|
|
214
|
+
|
|
215
|
+
def get(self, ep_id: str) -> Episode | None:
|
|
216
|
+
row = self.conn.execute(
|
|
217
|
+
"SELECT id, ts, prompt, plan, patch, verdict, tags, meta, head_sha "
|
|
218
|
+
"FROM episodes WHERE id = ?",
|
|
219
|
+
(ep_id,),
|
|
220
|
+
).fetchone()
|
|
221
|
+
if row is None:
|
|
222
|
+
return None
|
|
223
|
+
return _row_to_episode(row)
|
|
224
|
+
|
|
225
|
+
def recent(self, limit: int = 20) -> list[Episode]:
|
|
226
|
+
rows = self.conn.execute(
|
|
227
|
+
"SELECT id, ts, prompt, plan, patch, verdict, tags, meta, head_sha "
|
|
228
|
+
"FROM episodes ORDER BY ts DESC LIMIT ?",
|
|
229
|
+
(limit,),
|
|
230
|
+
).fetchall()
|
|
231
|
+
return [_row_to_episode(r) for r in rows]
|
|
232
|
+
|
|
233
|
+
def by_ids(self, ids: list[str]) -> list[Episode]:
|
|
234
|
+
if not ids:
|
|
235
|
+
return []
|
|
236
|
+
placeholders = ",".join("?" for _ in ids)
|
|
237
|
+
rows = self.conn.execute(
|
|
238
|
+
f"SELECT id, ts, prompt, plan, patch, verdict, tags, meta, head_sha "
|
|
239
|
+
f"FROM episodes WHERE id IN ({placeholders})",
|
|
240
|
+
ids,
|
|
241
|
+
).fetchall()
|
|
242
|
+
return [_row_to_episode(r) for r in rows]
|
|
243
|
+
|
|
244
|
+
def close(self) -> None:
|
|
245
|
+
self.conn.close()
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _row_to_episode(row: tuple[Any, ...]) -> Episode:
|
|
249
|
+
return Episode(
|
|
250
|
+
id=row[0],
|
|
251
|
+
ts=row[1],
|
|
252
|
+
prompt=row[2],
|
|
253
|
+
plan=row[3],
|
|
254
|
+
patch=row[4],
|
|
255
|
+
verdict=row[5],
|
|
256
|
+
tags=json.loads(row[6]) if row[6] else [],
|
|
257
|
+
meta=json.loads(row[7]) if row[7] else {},
|
|
258
|
+
head_sha=row[8] if len(row) > 8 else None,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def episode_text(ep: Episode) -> str:
|
|
263
|
+
"""Composite text for embedding."""
|
|
264
|
+
parts = [f"PROMPT:\n{ep.prompt}"]
|
|
265
|
+
if ep.plan:
|
|
266
|
+
parts.append(f"PLAN:\n{ep.plan}")
|
|
267
|
+
if ep.patch:
|
|
268
|
+
parts.append(f"PATCH:\n{ep.patch}")
|
|
269
|
+
if ep.verdict:
|
|
270
|
+
parts.append(f"VERDICT: {ep.verdict}")
|
|
271
|
+
return "\n\n".join(parts)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def episode_payload(ep: Episode) -> dict[str, Any]:
|
|
275
|
+
d = asdict(ep)
|
|
276
|
+
d.pop("plan", None)
|
|
277
|
+
d.pop("patch", None)
|
|
278
|
+
return d
|