bubble-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bubble/__init__.py ADDED
@@ -0,0 +1,101 @@
1
+ """
2
+ bubble — Hierarchical Memory Consolidation System
3
+
4
+ Typical agent usage
5
+ -------------------
6
+ import bubble
7
+
8
+ # Once, when a user session starts:
9
+ await bubble.init_graph(user_id)
10
+
11
+ # On every user message — retrieve and store in one call (preferred):
12
+ result = await bubble.observe(user_id, message, prior=agent_reply)
13
+ context = result["retrieved"] # SnapshotNode results relevant to this message
14
+ stored = result["stored"] # ingested node descriptors
15
+
16
+ # Or separately:
17
+ await bubble.process(user_id, message, prior=agent_reply)
18
+ context = await bubble.retrieve(user_id, query)
19
+
20
+ # Periodically (runs HDBSCAN + promotion):
21
+ await bubble.consolidate(user_id)
22
+
23
+ # retrieved is a list of dicts:
24
+ # {id, summary, members: [{id, summary, confidence_label}],
25
+ # context: [{rel, id, summary, confidence_label}]}
26
+ """
27
+
28
+ import asyncio
29
+
30
+ from .db import get_graph, init_graph
31
+ from .embed import embed as _embed
32
+ from .decomposer import decompose as _decompose
33
+ from .ingest import _route_segments, ingest, replay
34
+ from .promote import promote
35
+ from .retrieve import _retrieve_from_vecs, retrieve
36
+
37
+
38
+ async def observe(user_id: str, message: str, prior: str | None = None, top_k: int = 3, verbose: bool = False) -> dict:
39
+ """
40
+ Decompose once, retrieve relevant memories, then store — all in a single call.
41
+
42
+ Shares the decompose+embed step between retrieval and ingestion.
43
+ Retrieval runs before storage so newly ingested segments don't appear in results.
44
+
45
+ Returns:
46
+ {
47
+ "retrieved": [...], # same format as retrieve()
48
+ "stored": [...], # same format as process()
49
+ }
50
+ """
51
+ segments = await _decompose(message, prior)
52
+ embeddings = list(await asyncio.gather(*[_embed(s["text"]) for s in segments]))
53
+
54
+ g = get_graph(user_id)
55
+ stored = await _route_segments(user_id, segments, embeddings, prior)
56
+ retrieved = await _retrieve_from_vecs(g, message, embeddings, top_k, verbose)
57
+ return {"retrieved": retrieved, "stored": stored}
58
+
59
+
60
+ async def process(user_id: str, message: str, prior: str | None = None) -> list[dict]:
61
+ """
62
+ Ingest a message into the user's memory graph.
63
+
64
+ Routes each segment to:
65
+ - Episodic Episode (intensity >= 0.6): JSONL + Layer 1 node immediately
66
+ - Layer 0 active pool (everything else): waits for consolidate()
67
+
68
+ prior: optional conversational context the user is responding to.
69
+ Returns the list of created node descriptors.
70
+ """
71
+ nodes = await ingest(user_id, message, prior)
72
+ await promote(user_id)
73
+ return nodes
74
+
75
+
76
+ async def consolidate(user_id: str) -> dict:
77
+ """
78
+ Run the full consolidation pipeline on a user's graph:
79
+ 1. HDBSCAN on the Layer 0 active pool
80
+ 2. Promote clusters crossing the t_promo_score threshold to Episodes
81
+ (includes JSONL archival, SegmentNode deletion, L2 assignment)
82
+
83
+ Returns:
84
+ {"promoted": [...]} # newly created Episode descriptors
85
+
86
+ Call periodically rather than on every message.
87
+ """
88
+ promoted = await promote(user_id)
89
+ return {"promoted": promoted}
90
+
91
+
92
+ __all__ = [
93
+ "init_graph",
94
+ "observe",
95
+ "process",
96
+ "consolidate",
97
+ "retrieve",
98
+ "ingest",
99
+ "promote",
100
+ "replay",
101
+ ]
bubble/_shared.py ADDED
@@ -0,0 +1,49 @@
1
+ import os
2
+ from datetime import datetime, timezone
3
+
4
+ import numpy as np
5
+ from anthropic import AsyncAnthropic
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ MODEL = os.getenv("BUBBLE_MODEL", "claude-sonnet-4-6")
11
+ _client = AsyncAnthropic()
12
+
13
+ _SUMMARIZE_SYSTEM = """\
14
+ You distill one or more user statements into a single memory record.
15
+
16
+ Rules:
17
+ - Capture the belief, preference, event, or tendency the statements express.
18
+ - When multiple statements are given, identify the common pattern they share.
19
+ - Write exactly one sentence with no grammatical subject.
20
+ - Start with a verb or descriptor that names the belief, event, or pattern.
21
+ - Do not explain, qualify, or ask for clarification.\
22
+ """
23
+
24
+
25
+ def _now() -> str:
26
+ return datetime.now(timezone.utc).isoformat()
27
+
28
+
29
+ def _normalize(vec: np.ndarray) -> list[float]:
30
+ """L2-normalize a numpy vector and return as a Python list."""
31
+ norm = np.linalg.norm(vec)
32
+ return (vec / norm if norm > 0 else vec).tolist()
33
+
34
+
35
+ def _centroid(nodes: list[dict]) -> list[float]:
36
+ """Mean of source embeddings, L2-normalized."""
37
+ matrix = np.array([n["embedding"] for n in nodes], dtype=np.float32)
38
+ return _normalize(matrix.mean(axis=0))
39
+
40
+
41
+ async def _summarize(nodes: list[dict]) -> str:
42
+ texts = "\n".join(f"- {n['raw_text']}" for n in nodes)
43
+ response = await _client.messages.create(
44
+ model=MODEL,
45
+ max_tokens=128,
46
+ system=_SUMMARIZE_SYSTEM,
47
+ messages=[{"role": "user", "content": texts}],
48
+ )
49
+ return response.content[0].text.strip()
bubble/archive.py ADDED
@@ -0,0 +1,48 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ _ARCHIVE_DIR = os.getenv("BUBBLE_ARCHIVE_DIR", "./data/archive")
6
+ _MKDIR_DONE = False
7
+
8
+
9
+ def _path(user_id: str) -> Path:
10
+ global _MKDIR_DONE
11
+ p = Path(_ARCHIVE_DIR)
12
+ if not _MKDIR_DONE:
13
+ p.mkdir(parents=True, exist_ok=True)
14
+ _MKDIR_DONE = True
15
+ return p / f"{user_id}.jsonl"
16
+
17
+
18
+ def read_segments(user_id: str):
19
+ """Yield all archived segment records for a user."""
20
+ path = _path(user_id)
21
+ if not path.exists():
22
+ return
23
+ with path.open("r", encoding="utf-8") as f:
24
+ for line in f:
25
+ line = line.strip()
26
+ if line:
27
+ yield json.loads(line)
28
+
29
+
30
+ def write_segment(
31
+ user_id: str,
32
+ *,
33
+ text: str,
34
+ prior: str | None,
35
+ intensity: float,
36
+ valence: str,
37
+ timestamp: str,
38
+ ) -> None:
39
+ """Append one segment record to the user's JSONL archive."""
40
+ entry = {
41
+ "text": text,
42
+ "prior": prior,
43
+ "intensity": intensity,
44
+ "valence": valence,
45
+ "timestamp": timestamp,
46
+ }
47
+ with _path(user_id).open("a", encoding="utf-8") as f:
48
+ f.write(json.dumps(entry) + "\n")
bubble/chain.py ADDED
@@ -0,0 +1,317 @@
1
+ """
2
+ Chain assignment pipeline — spec §3.8.
3
+
4
+ When a new Episode is created, assign it to a topic chain:
5
+ 1 — ANN top-1 against Episode centroids.
6
+ 2 — Similarity threshold check. Below → new isolated SnapshotNode.
7
+ 3 — LLM relatedness check (binary). Not related → new isolated SnapshotNode.
8
+ 4 — Related → traverse FOLLOWED_BY edges to chain tail, wire FOLLOWED_BY edge, join SnapshotNode.
9
+ """
10
+
11
+ import os
12
+ import uuid
13
+
14
+ import numpy as np
15
+
16
+ from ._shared import MODEL, _client, _normalize, _now
17
+ from .db import get_graph
18
+
19
+ _CHAIN_MAX_DISTANCE = float(os.getenv("BUBBLE_CHAIN_MAX_DISTANCE", "0.4"))
20
+ _NLI_ENABLED = os.getenv("BUBBLE_ENABLE_NLI", "false").lower() == "true"
21
+ _NLI_ENDPOINT = os.getenv("BUBBLE_NLI_ENDPOINT", "http://localhost:8999/predict")
22
+
23
+ _SNAPSHOT_SYSTEM = """\
24
+ A sequence of memory records about the user is listed below, from earliest to most recent.
25
+
26
+ Rules:
27
+ - The most recent memory takes precedence over earlier ones.
28
+ - Earlier memory provides historical context.
29
+ - Synthesize all memory into a single simplified coherent narrative that represents the full arc.
30
+ - Output one concise paragraph.
31
+ - No subject, start with verb.
32
+ - Do not explain or justify.\
33
+ """
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Relatedness check (LLM or NLI)
37
+ # ---------------------------------------------------------------------------
38
+
39
+
40
+ async def _related(summary_a: str, summary_b: str) -> bool:
41
+ """True if the two summaries are about the same topic.
42
+
43
+ BUBBLE_CHAIN_BACKEND=nli — local NLI model, argmax != neutral → related.
44
+ BUBBLE_CHAIN_BACKEND=llm — LLM yes/no (default).
45
+ """
46
+ if _NLI_ENABLED:
47
+ import httpx
48
+
49
+ async with httpx.AsyncClient() as client:
50
+ resp = await client.post(
51
+ _NLI_ENDPOINT,
52
+ json={"inputs": [summary_a, summary_b]},
53
+ headers={"Content-Type": "application/json", "Accept": "application/json"},
54
+ )
55
+ resp.raise_for_status()
56
+ scores = resp.json() # [{"label": ..., "score": ...}, ...]
57
+ return max(scores, key=lambda x: x["score"])["label"].lower() != "neutral"
58
+
59
+ answer = (
60
+ (
61
+ await _client.messages.create(
62
+ model=MODEL,
63
+ max_tokens=8,
64
+ system="You are a memory topic classifier. Reply with exactly 'yes' or 'no'.",
65
+ messages=[
66
+ {
67
+ "role": "user",
68
+ "content": (f"Are these two beliefs about the same topic or subject?\n\nA: {summary_a}\nB: {summary_b}"),
69
+ }
70
+ ],
71
+ )
72
+ )
73
+ .content[0]
74
+ .text.strip()
75
+ .lower()
76
+ )
77
+ return answer.startswith("y")
78
+
79
+
80
+ # ---------------------------------------------------------------------------
81
+ # Chain traversal
82
+ # ---------------------------------------------------------------------------
83
+
84
+
85
+ async def _traverse_to_tail(g, start_id: str) -> str:
86
+ """
87
+ Follow FOLLOWED_BY edges from start_id to the chain tail
88
+ (the node with no outgoing FOLLOWED_BY edge).
89
+ """
90
+ result = await g.query(
91
+ "MATCH (start:Episode {id: $id})-[:FOLLOWED_BY*0..]->(tail:Episode) WHERE NOT (tail)-[:FOLLOWED_BY]->() RETURN tail.id LIMIT 1",
92
+ {"id": start_id},
93
+ )
94
+ if result.result_set:
95
+ return result.result_set[0][0]
96
+ return start_id
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Graph writers
101
+ # ---------------------------------------------------------------------------
102
+
103
+
104
+ async def _wire_follows(g, from_id: str, to_id: str) -> None:
105
+ await g.query(
106
+ "MATCH (a:Episode {id: $a}), (b:Episode {id: $b}) CREATE (a)-[:FOLLOWED_BY]->(b)",
107
+ {"a": from_id, "b": to_id},
108
+ )
109
+
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # Graph loaders
113
+ # ---------------------------------------------------------------------------
114
+
115
+
116
+ def _rows_to_episode_dicts(rows) -> list[dict]:
117
+ return [
118
+ {
119
+ "id": r[0],
120
+ "summary": r[1],
121
+ "centroid": r[2],
122
+ "episodic": bool(r[3]),
123
+ "timestamp": r[4],
124
+ "valence": r[5] or "neu",
125
+ }
126
+ for r in rows
127
+ if r[2] is not None
128
+ ]
129
+
130
+
131
+ async def _load_node(g, episode_id: str) -> dict | None:
132
+ result = await g.query(
133
+ "MATCH (t:Episode {id: $id}) RETURN t.id, t.summary, t.centroid, t.episodic, t.timestamp, t.valence",
134
+ {"id": episode_id},
135
+ )
136
+ rows = _rows_to_episode_dicts(result.result_set)
137
+ return rows[0] if rows else None
138
+
139
+
140
+ # ---------------------------------------------------------------------------
141
+ # SnapshotNode management
142
+ # ---------------------------------------------------------------------------
143
+
144
+
145
+ async def _recompute_snapshot_centroid(g, snap_id: str) -> None:
146
+ result = await g.query(
147
+ "MATCH (snap:SnapshotNode {id: $id})-[:SYNTHESIZES]->(t:Episode) RETURN t.centroid",
148
+ {"id": snap_id},
149
+ )
150
+ centroids = [row[0] for row in result.result_set if row[0] is not None]
151
+ if not centroids:
152
+ return
153
+ new_centroid = _normalize(np.array(centroids, dtype=np.float32).mean(axis=0))
154
+ await g.query(
155
+ "MATCH (snap:SnapshotNode {id: $id}) SET snap.centroid = vecf32($centroid)",
156
+ {"id": snap_id, "centroid": new_centroid},
157
+ )
158
+
159
+
160
+ async def _join_snapshot(g, snap_id: str, new_node_id: str) -> None:
161
+ await g.query(
162
+ "MATCH (snap:SnapshotNode {id: $snap_id}), (t:Episode {id: $tid}) CREATE (snap)-[:SYNTHESIZES]->(t) SET snap.valid = false",
163
+ {"snap_id": snap_id, "tid": new_node_id},
164
+ )
165
+ await _recompute_snapshot_centroid(g, snap_id)
166
+
167
+
168
+ async def _create_snapshot(g, episode_id: str, summary: str, centroid: list[float]) -> None:
169
+ snap_id = str(uuid.uuid4())
170
+ await g.query(
171
+ "CREATE (snap:SnapshotNode { id: $id, summary: $summary, centroid: vecf32($centroid), valid: true, timestamp: $ts})",
172
+ {"id": snap_id, "summary": summary, "centroid": centroid, "ts": _now()},
173
+ )
174
+ await g.query(
175
+ "MATCH (snap:SnapshotNode {id: $snap_id}), (t:Episode {id: $tid}) CREATE (snap)-[:SYNTHESIZES]->(t)",
176
+ {"snap_id": snap_id, "tid": episode_id},
177
+ )
178
+
179
+
180
+ # ---------------------------------------------------------------------------
181
+ # Lazy snapshot summary generation
182
+ # ---------------------------------------------------------------------------
183
+
184
+
185
+ async def ensure_snapshot_summary(
186
+ g,
187
+ snap_id: str,
188
+ *,
189
+ valid: bool | None = None,
190
+ summary: str | None = None,
191
+ ) -> str | None:
192
+ """
193
+ Return the SnapshotNode summary, generating it lazily if valid=false or summary is null.
194
+
195
+ Pass `valid` and `summary` when already fetched (e.g. from ANN query) to skip the
196
+ redundant DB round-trip.
197
+
198
+ Single-member chains: copy Episode summary directly (no LLM).
199
+ Multi-member chains: LLM synthesis ordered by timestamp, episodic members last.
200
+ """
201
+ if valid is None or summary is None:
202
+ result = await g.query(
203
+ "MATCH (snap:SnapshotNode {id: $id}) RETURN snap.summary, snap.valid",
204
+ {"id": snap_id},
205
+ )
206
+ if not result.result_set:
207
+ return None
208
+ summary, valid = result.result_set[0]
209
+ if valid and summary:
210
+ return summary
211
+
212
+ result = await g.query(
213
+ "MATCH (snap:SnapshotNode {id: $id})-[:SYNTHESIZES]->(t:Episode) RETURN t.id, t.summary, t.episodic, t.timestamp",
214
+ {"id": snap_id},
215
+ )
216
+ members = [
217
+ {
218
+ "id": r[0],
219
+ "summary": r[1],
220
+ "episodic": bool(r[2]),
221
+ "timestamp": r[3] or "",
222
+ }
223
+ for r in result.result_set
224
+ ]
225
+
226
+ if not members:
227
+ return None
228
+
229
+ if len(members) == 1:
230
+ new_summary = members[0]["summary"]
231
+ else:
232
+ non_ep = sorted([m for m in members if not m["episodic"]], key=lambda m: m["timestamp"])
233
+ ep = sorted([m for m in members if m["episodic"]], key=lambda m: m["timestamp"])
234
+ ordered = non_ep + ep
235
+
236
+ count = len(ordered)
237
+ lines = []
238
+ for i, m in enumerate(ordered):
239
+ tag = " [episodic]" if m["episodic"] else ""
240
+ if i == 0:
241
+ label = f"Belief 1 (earliest){tag}"
242
+ elif i == count - 1:
243
+ label = f"Belief {i + 1} (most recent){tag}"
244
+ else:
245
+ label = f"Belief {i + 1}{tag}"
246
+ lines.append(f"{label}: {m['summary']}")
247
+
248
+ new_summary = (
249
+ (
250
+ await _client.messages.create(
251
+ model=MODEL,
252
+ max_tokens=256,
253
+ system=_SNAPSHOT_SYSTEM,
254
+ messages=[{"role": "user", "content": "\n\n".join(lines)}],
255
+ )
256
+ )
257
+ .content[0]
258
+ .text.strip()
259
+ )
260
+
261
+ await g.query(
262
+ "MATCH (snap:SnapshotNode {id: $id}) SET snap.summary = $summary, snap.valid = true",
263
+ {"id": snap_id, "summary": new_summary},
264
+ )
265
+ return new_summary
266
+
267
+
268
+ # ---------------------------------------------------------------------------
269
+ # Public API
270
+ # ---------------------------------------------------------------------------
271
+
272
+
273
+ async def check_new(user_id: str, new_episode_id: str) -> None:
274
+ """
275
+ Assign a newly created Episode to a topic chain.
276
+ Called by promote() and ingest._store_episodic() after Episode creation.
277
+ """
278
+ g = get_graph(user_id)
279
+ new_node = await _load_node(g, new_episode_id)
280
+ if new_node is None or new_node["centroid"] is None:
281
+ return
282
+
283
+ # Step 1 — ANN top-1 (k=3: self may occupy a slot if already indexed)
284
+ result = await g.query(
285
+ "CALL db.idx.vector.queryNodes('Episode', 'centroid', 2, vecf32($vec)) YIELD node, score WHERE node.id <> $id RETURN node.id, node.summary, score LIMIT 1",
286
+ {"vec": new_node["centroid"], "id": new_episode_id},
287
+ )
288
+
289
+ if not result.result_set:
290
+ # No other Episodes exist yet
291
+ await _create_snapshot(g, new_episode_id, new_node["summary"], new_node["centroid"])
292
+ return
293
+
294
+ closest_id, closest_summary, score = result.result_set[0]
295
+
296
+ # Step 2 — Similarity threshold
297
+ if score > _CHAIN_MAX_DISTANCE:
298
+ await _create_snapshot(g, new_episode_id, new_node["summary"], new_node["centroid"])
299
+ return
300
+
301
+ # Step 3 — relatedness check
302
+ if not await _related(new_node["summary"], closest_summary):
303
+ await _create_snapshot(g, new_episode_id, new_node["summary"], new_node["centroid"])
304
+ return
305
+
306
+ # Step 4 — Append to chain
307
+ tail_id = await _traverse_to_tail(g, closest_id)
308
+ await _wire_follows(g, tail_id, new_episode_id)
309
+
310
+ snap_result = await g.query(
311
+ "MATCH (snap:SnapshotNode)-[:SYNTHESIZES]->(t:Episode {id: $id}) RETURN snap.id",
312
+ {"id": closest_id},
313
+ )
314
+ if snap_result.result_set:
315
+ await _join_snapshot(g, snap_result.result_set[0][0], new_episode_id)
316
+ else:
317
+ await _create_snapshot(g, new_episode_id, new_node["summary"], new_node["centroid"])
bubble/cluster.py ADDED
@@ -0,0 +1,66 @@
1
+ import os
2
+
3
+ import numpy as np
4
+ from sklearn.cluster import HDBSCAN
5
+
6
+ from .db import get_graph
7
+
8
+ _CLUSTER_MIN_SIZE = int(os.getenv("BUBBLE_CLUSTER_MIN_SIZE", "3"))
9
+ _CLUSTER_DIMS = int(os.getenv("BUBBLE_CLUSTER_DIMS", "128"))
10
+
11
+
12
+ async def get_clusters(user_id: str) -> dict[int, list[dict]]:
13
+ """
14
+ Run HDBSCAN on all SegmentNodes for a user.
15
+ All SegmentNodes in the graph are the active pool — promoted nodes are deleted at promotion.
16
+
17
+ Returns {cluster_label: [node_dicts]} — noise (label -1) is excluded.
18
+ Returns empty dict if the pool is smaller than _CLUSTER_MIN_SIZE.
19
+
20
+ Each node dict contains: id, raw_text, embedding (768-dim), intensity, valence,
21
+ prior (str|None), timestamp (str).
22
+ """
23
+ g = get_graph(user_id)
24
+ result = await g.query(
25
+ "MATCH (n:SegmentNode) "
26
+ "RETURN n.id, n.raw_text, n.embedding, n.intensity, n.valence, n.prior, n.timestamp"
27
+ )
28
+
29
+ if not result.result_set:
30
+ return {}
31
+
32
+ nodes = [
33
+ {
34
+ "id": row[0],
35
+ "raw_text": row[1],
36
+ "embedding": row[2],
37
+ "intensity": row[3],
38
+ "valence": row[4],
39
+ "prior": row[5],
40
+ "timestamp": row[6],
41
+ }
42
+ for row in result.result_set
43
+ ]
44
+
45
+ if len(nodes) < _CLUSTER_MIN_SIZE:
46
+ return {}
47
+
48
+ # Truncate to 128 dims (Matryoshka), then re-normalize.
49
+ # The full 768-dim vector is unit-norm, but the truncated prefix is not —
50
+ # re-normalizing restores the cosine distance relationship for euclidean HDBSCAN.
51
+ raw = np.array([n["embedding"][:_CLUSTER_DIMS] for n in nodes], dtype=np.float32)
52
+ norms = np.linalg.norm(raw, axis=1, keepdims=True)
53
+ matrix = raw / np.where(norms > 0, norms, 1.0)
54
+
55
+ labels = HDBSCAN(
56
+ min_cluster_size=_CLUSTER_MIN_SIZE,
57
+ metric="euclidean", # equiv to cosine on unit vectors
58
+ ).fit_predict(matrix)
59
+
60
+ clusters: dict[int, list[dict]] = {}
61
+ for node, label in zip(nodes, labels):
62
+ if label == -1:
63
+ continue # noise — accumulates at Layer 0, never promotes
64
+ clusters.setdefault(int(label), []).append(node)
65
+
66
+ return clusters
bubble/db.py ADDED
@@ -0,0 +1,48 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from falkordb.asyncio import FalkorDB
4
+
5
+ load_dotenv()
6
+
7
+ _client: FalkorDB | None = None
8
+
9
+
10
+ def get_client() -> FalkorDB:
11
+ global _client
12
+ if _client is None:
13
+ _client = FalkorDB(
14
+ host=os.getenv("FALKORDB_HOST", "localhost"),
15
+ port=int(os.getenv("FALKORDB_PORT", "6379")),
16
+ )
17
+ return _client
18
+
19
+
20
+ def get_graph(user_id: str):
21
+ return get_client().select_graph(f"bubble:{user_id}")
22
+
23
+
24
+ async def init_graph(user_id: str) -> None:
25
+ """Create indexes for a user graph. Safe to call on an already-initialized graph."""
26
+ g = get_graph(user_id)
27
+
28
+ from .embed import EMBED_DIM
29
+
30
+ # HNSW vector index on SnapshotNode.centroid — retrieval (L2 entry point).
31
+ try:
32
+ await g.query(
33
+ "CREATE VECTOR INDEX FOR (n:SnapshotNode) ON (n.centroid) "
34
+ f"OPTIONS {{dimension: {EMBED_DIM}, similarityFunction: 'cosine'}}"
35
+ )
36
+ except Exception:
37
+ pass # already exists
38
+
39
+ # HNSW vector index on Episode.centroid — chain assignment ANN search.
40
+ # Used by check_new (chain.py) to find the nearest existing Episode
41
+ # for the three-gate chain assignment cascade.
42
+ try:
43
+ await g.query(
44
+ "CREATE VECTOR INDEX FOR (n:Episode) ON (n.centroid) "
45
+ f"OPTIONS {{dimension: {EMBED_DIM}, similarityFunction: 'cosine'}}"
46
+ )
47
+ except Exception:
48
+ pass # already exists