flurryx-code-memory 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_memory/__init__.py +1 -0
- code_memory/claims/__init__.py +32 -0
- code_memory/claims/extractor.py +325 -0
- code_memory/claims/indexer.py +258 -0
- code_memory/claims/resolver.py +186 -0
- code_memory/claims/store.py +424 -0
- code_memory/cli.py +1192 -0
- code_memory/config.py +268 -0
- code_memory/embed/__init__.py +224 -0
- code_memory/embed/cache.py +204 -0
- code_memory/embed/m3.py +174 -0
- code_memory/embed/ollama.py +92 -0
- code_memory/embed/tei.py +106 -0
- code_memory/episodic/__init__.py +3 -0
- code_memory/episodic/sqlite_store.py +278 -0
- code_memory/extractor/__init__.py +3 -0
- code_memory/extractor/csproj.py +166 -0
- code_memory/extractor/dll.py +385 -0
- code_memory/extractor/gitignore.py +162 -0
- code_memory/extractor/nuget.py +275 -0
- code_memory/extractor/sanity.py +124 -0
- code_memory/extractor/sln.py +108 -0
- code_memory/extractor/treesitter.py +1172 -0
- code_memory/graph/__init__.py +3 -0
- code_memory/graph/falkor_store.py +740 -0
- code_memory/mcp_server.py +1816 -0
- code_memory/metrics.py +260 -0
- code_memory/orchestrator/__init__.py +13 -0
- code_memory/orchestrator/git_delta.py +211 -0
- code_memory/orchestrator/ingest_state.py +71 -0
- code_memory/orchestrator/pipeline.py +1478 -0
- code_memory/orchestrator/reset.py +130 -0
- code_memory/orchestrator/resolver.py +825 -0
- code_memory/orchestrator/retrieve.py +505 -0
- code_memory/resilience.py +73 -0
- code_memory/sync/__init__.py +20 -0
- code_memory/sync/autostart/__init__.py +42 -0
- code_memory/sync/autostart/base.py +106 -0
- code_memory/sync/autostart/launchd.py +115 -0
- code_memory/sync/autostart/schtasks.py +155 -0
- code_memory/sync/autostart/systemd.py +113 -0
- code_memory/sync/hooks.py +164 -0
- code_memory/sync/safety.py +65 -0
- code_memory/sync/snapshot.py +461 -0
- code_memory/sync/store.py +399 -0
- code_memory/sync/sync.py +405 -0
- code_memory/sync/watcher.py +320 -0
- code_memory/vector/__init__.py +3 -0
- code_memory/vector/qdrant_store.py +302 -0
- flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
- flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
- flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
- flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,740 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from falkordb import FalkorDB
|
|
10
|
+
|
|
11
|
+
from ..config import CONFIG
|
|
12
|
+
|
|
13
|
+
NodeLabel = str # File | Symbol | Module
|
|
14
|
+
EdgeType = str # IMPORTS | CALLS | DEFINES | EXPORTS
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class GraphNode:
|
|
19
|
+
label: NodeLabel
|
|
20
|
+
key: str # stable identity, e.g. absolute path or fqn
|
|
21
|
+
props: dict[str, Any] = field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class GraphEdge:
|
|
26
|
+
type: EdgeType
|
|
27
|
+
src_label: NodeLabel
|
|
28
|
+
src_key: str
|
|
29
|
+
dst_label: NodeLabel
|
|
30
|
+
dst_key: str
|
|
31
|
+
props: dict[str, Any] = field(default_factory=dict)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class FalkorStore:
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
host: str | None = None,
|
|
38
|
+
port: int | None = None,
|
|
39
|
+
graph_name: str | None = None,
|
|
40
|
+
) -> None:
|
|
41
|
+
self.db = FalkorDB(
|
|
42
|
+
host=host or CONFIG.falkor_host,
|
|
43
|
+
port=port or CONFIG.falkor_port,
|
|
44
|
+
)
|
|
45
|
+
self.graph = self.db.select_graph(graph_name or CONFIG.falkor_graph)
|
|
46
|
+
# Two server-side tunables that bite on large graphs (e.g.
|
|
47
|
+
# 200K-symbol / 270K-call monorepos):
|
|
48
|
+
#
|
|
49
|
+
# 1. RESULTSET_SIZE — default 10000. Silently truncates the
|
|
50
|
+
# resolver's full-graph snapshot, which made "all calls
|
|
51
|
+
# resolved as external" the visible symptom because half the
|
|
52
|
+
# placeholders never made it into the in-memory index.
|
|
53
|
+
# 2. TIMEOUT_DEFAULT — default 1000ms. The first run of a
|
|
54
|
+
# topology query (callers/callees) routinely takes 2-5 s
|
|
55
|
+
# while Falkor compiles + warms its planner; cached runs are
|
|
56
|
+
# sub-100ms. The 1s cap killed every cold call.
|
|
57
|
+
for cmd in (
|
|
58
|
+
("GRAPH.CONFIG", "SET", "RESULTSET_SIZE", "-1"),
|
|
59
|
+
("GRAPH.CONFIG", "SET", "TIMEOUT_DEFAULT", "30000"),
|
|
60
|
+
("GRAPH.CONFIG", "SET", "TIMEOUT_MAX", "60000"),
|
|
61
|
+
):
|
|
62
|
+
try:
|
|
63
|
+
self.db.connection.execute_command(*cmd)
|
|
64
|
+
except Exception: # noqa: BLE001 — best-effort, server defaults persist
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def ensure_indexes(self) -> None:
|
|
68
|
+
for label in ("File", "Symbol", "Module"):
|
|
69
|
+
try:
|
|
70
|
+
self.graph.query(f"CREATE INDEX FOR (n:{label}) ON (n.key)")
|
|
71
|
+
except Exception:
|
|
72
|
+
# index may already exist
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
def upsert_nodes(
|
|
76
|
+
self,
|
|
77
|
+
nodes: Iterable[GraphNode],
|
|
78
|
+
*,
|
|
79
|
+
head_sha: str | None = None,
|
|
80
|
+
head_ord: int | None = None,
|
|
81
|
+
) -> None:
|
|
82
|
+
"""Bulk-upsert nodes via ``UNWIND``; stamps temporal lifecycle when
|
|
83
|
+
``head_sha`` is provided.
|
|
84
|
+
|
|
85
|
+
Previously this looped one ``MERGE`` query per node — a single
|
|
86
|
+
ingested file with 50 symbols + imports + calls triggered 50
|
|
87
|
+
Falkor round-trips. The UNWIND form collapses each label group
|
|
88
|
+
into one query, cutting ingest wall time by an order of
|
|
89
|
+
magnitude on real repos.
|
|
90
|
+
|
|
91
|
+
Stamping rules (per ``CHANGELOG`` "Temporal model"):
|
|
92
|
+
|
|
93
|
+
- On first insert: ``first_seen_sha = last_seen_sha = head_sha``,
|
|
94
|
+
ditto for the matching ``_ord`` integers when ``head_ord`` is
|
|
95
|
+
supplied (enables range comparisons across SHAs).
|
|
96
|
+
- On subsequent ingest of the same key: ``last_seen_sha = head_sha``,
|
|
97
|
+
``first_seen_sha`` preserved (COALESCE handles legacy rows that
|
|
98
|
+
have no value yet).
|
|
99
|
+
- ``invalid_sha`` / ``invalid_ord`` / ``invalid_at`` are always
|
|
100
|
+
cleared on a successful upsert — the node is alive again at
|
|
101
|
+
this SHA.
|
|
102
|
+
"""
|
|
103
|
+
by_label: dict[str, list[dict[str, Any]]] = defaultdict(list)
|
|
104
|
+
for n in nodes:
|
|
105
|
+
by_label[n.label].append({"key": n.key, "props": n.props})
|
|
106
|
+
if not by_label:
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
for label, rows in by_label.items():
|
|
110
|
+
if head_sha is None:
|
|
111
|
+
self.graph.query(
|
|
112
|
+
f"""
|
|
113
|
+
UNWIND $rows AS row
|
|
114
|
+
MERGE (n:{label} {{key: row.key}})
|
|
115
|
+
SET n += row.props
|
|
116
|
+
""",
|
|
117
|
+
{"rows": rows},
|
|
118
|
+
)
|
|
119
|
+
continue
|
|
120
|
+
self.graph.query(
|
|
121
|
+
f"""
|
|
122
|
+
UNWIND $rows AS row
|
|
123
|
+
MERGE (n:{label} {{key: row.key}})
|
|
124
|
+
ON CREATE SET n.first_seen_sha = $head,
|
|
125
|
+
n.last_seen_sha = $head,
|
|
126
|
+
n.first_seen_ord = $ord,
|
|
127
|
+
n.last_seen_ord = $ord
|
|
128
|
+
ON MATCH SET n.first_seen_sha = COALESCE(n.first_seen_sha, $head),
|
|
129
|
+
n.last_seen_sha = $head,
|
|
130
|
+
n.first_seen_ord = COALESCE(n.first_seen_ord, $ord),
|
|
131
|
+
n.last_seen_ord = $ord
|
|
132
|
+
SET n += row.props
|
|
133
|
+
SET n.invalid_sha = NULL,
|
|
134
|
+
n.invalid_ord = NULL,
|
|
135
|
+
n.invalid_at = NULL
|
|
136
|
+
""",
|
|
137
|
+
{"rows": rows, "head": head_sha, "ord": head_ord},
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def upsert_edges(
|
|
141
|
+
self,
|
|
142
|
+
edges: Iterable[GraphEdge],
|
|
143
|
+
*,
|
|
144
|
+
head_sha: str | None = None,
|
|
145
|
+
head_ord: int | None = None,
|
|
146
|
+
) -> None:
|
|
147
|
+
"""Bulk-upsert edges via ``UNWIND``; same temporal stamping as nodes.
|
|
148
|
+
|
|
149
|
+
Edges group by ``(src_label, type, dst_label)`` because Cypher
|
|
150
|
+
can't parameterize labels or relationship types. Each group
|
|
151
|
+
becomes one query batch instead of one query per edge.
|
|
152
|
+
"""
|
|
153
|
+
by_key: dict[tuple[str, str, str], list[dict[str, Any]]] = defaultdict(list)
|
|
154
|
+
for e in edges:
|
|
155
|
+
by_key[(e.src_label, e.type, e.dst_label)].append(
|
|
156
|
+
{"src": e.src_key, "dst": e.dst_key, "props": e.props}
|
|
157
|
+
)
|
|
158
|
+
if not by_key:
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
for (src_label, etype, dst_label), rows in by_key.items():
|
|
162
|
+
if head_sha is None:
|
|
163
|
+
self.graph.query(
|
|
164
|
+
f"""
|
|
165
|
+
UNWIND $rows AS row
|
|
166
|
+
MERGE (a:{src_label} {{key: row.src}})
|
|
167
|
+
MERGE (b:{dst_label} {{key: row.dst}})
|
|
168
|
+
MERGE (a)-[r:{etype}]->(b)
|
|
169
|
+
SET r += row.props
|
|
170
|
+
""",
|
|
171
|
+
{"rows": rows},
|
|
172
|
+
)
|
|
173
|
+
continue
|
|
174
|
+
self.graph.query(
|
|
175
|
+
f"""
|
|
176
|
+
UNWIND $rows AS row
|
|
177
|
+
MERGE (a:{src_label} {{key: row.src}})
|
|
178
|
+
MERGE (b:{dst_label} {{key: row.dst}})
|
|
179
|
+
MERGE (a)-[r:{etype}]->(b)
|
|
180
|
+
ON CREATE SET r.first_seen_sha = $head,
|
|
181
|
+
r.last_seen_sha = $head,
|
|
182
|
+
r.first_seen_ord = $ord,
|
|
183
|
+
r.last_seen_ord = $ord
|
|
184
|
+
ON MATCH SET r.first_seen_sha = COALESCE(r.first_seen_sha, $head),
|
|
185
|
+
r.last_seen_sha = $head,
|
|
186
|
+
r.first_seen_ord = COALESCE(r.first_seen_ord, $ord),
|
|
187
|
+
r.last_seen_ord = $ord
|
|
188
|
+
SET r += row.props
|
|
189
|
+
SET r.invalid_sha = NULL,
|
|
190
|
+
r.invalid_ord = NULL,
|
|
191
|
+
r.invalid_at = NULL
|
|
192
|
+
""",
|
|
193
|
+
{"rows": rows, "head": head_sha, "ord": head_ord},
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
def neighbors(
|
|
197
|
+
self,
|
|
198
|
+
label: NodeLabel,
|
|
199
|
+
key: str,
|
|
200
|
+
depth: int = 1,
|
|
201
|
+
edge_types: tuple[EdgeType, ...] | None = None,
|
|
202
|
+
) -> list[dict[str, Any]]:
|
|
203
|
+
rel = f":{'|'.join(edge_types)}" if edge_types else ""
|
|
204
|
+
q = (
|
|
205
|
+
f"MATCH (n:{label} {{key: $key}})-[{rel}*1..{depth}]-(m) "
|
|
206
|
+
"RETURN DISTINCT labels(m) AS labels, m.key AS key, m AS node"
|
|
207
|
+
)
|
|
208
|
+
result = self.graph.query(q, {"key": key})
|
|
209
|
+
out: list[dict[str, Any]] = []
|
|
210
|
+
for row in result.result_set:
|
|
211
|
+
labels, k, node = row
|
|
212
|
+
out.append({"labels": labels, "key": k, "props": dict(node.properties)})
|
|
213
|
+
return out
|
|
214
|
+
|
|
215
|
+
def delete_file(
|
|
216
|
+
self,
|
|
217
|
+
path: str,
|
|
218
|
+
*,
|
|
219
|
+
head_sha: str | None = None,
|
|
220
|
+
head_ord: int | None = None,
|
|
221
|
+
) -> None:
|
|
222
|
+
"""Remove or tombstone a File node and its owned graph elements.
|
|
223
|
+
|
|
224
|
+
When ``head_sha`` is provided, the File, its DEFINES-linked
|
|
225
|
+
Symbols (excluding shared ``name::X`` placeholders), and all
|
|
226
|
+
edges touching the File are marked with
|
|
227
|
+
``invalid_sha`` / ``invalid_ord`` / ``invalid_at`` instead of
|
|
228
|
+
being deleted. The triple lets vacuum filter by exact SHA,
|
|
229
|
+
ordinal range, or wall-clock age without re-resolving git on
|
|
230
|
+
every query.
|
|
231
|
+
|
|
232
|
+
When ``head_sha`` is ``None`` (non-git ingest, legacy callers),
|
|
233
|
+
the old hard-delete behaviour is kept.
|
|
234
|
+
"""
|
|
235
|
+
if head_sha is None:
|
|
236
|
+
self.graph.query(
|
|
237
|
+
"MATCH (f:File {key: $key}) DETACH DELETE f",
|
|
238
|
+
{"key": path},
|
|
239
|
+
)
|
|
240
|
+
return
|
|
241
|
+
now_ts = time.time()
|
|
242
|
+
params = {
|
|
243
|
+
"key": path,
|
|
244
|
+
"head": head_sha,
|
|
245
|
+
"ord": head_ord,
|
|
246
|
+
"ts": now_ts,
|
|
247
|
+
}
|
|
248
|
+
self.graph.query(
|
|
249
|
+
"""
|
|
250
|
+
MATCH (f:File {key: $key})
|
|
251
|
+
SET f.invalid_sha = $head,
|
|
252
|
+
f.invalid_ord = $ord,
|
|
253
|
+
f.invalid_at = $ts
|
|
254
|
+
""",
|
|
255
|
+
params,
|
|
256
|
+
)
|
|
257
|
+
self.graph.query(
|
|
258
|
+
"""
|
|
259
|
+
MATCH (f:File {key: $key})-[:DEFINES]->(s:Symbol)
|
|
260
|
+
WHERE s.unresolved IS NULL
|
|
261
|
+
SET s.invalid_sha = $head,
|
|
262
|
+
s.invalid_ord = $ord,
|
|
263
|
+
s.invalid_at = $ts
|
|
264
|
+
""",
|
|
265
|
+
params,
|
|
266
|
+
)
|
|
267
|
+
self.graph.query(
|
|
268
|
+
"""
|
|
269
|
+
MATCH (f:File {key: $key})-[r]-()
|
|
270
|
+
SET r.invalid_sha = $head,
|
|
271
|
+
r.invalid_ord = $ord,
|
|
272
|
+
r.invalid_at = $ts
|
|
273
|
+
""",
|
|
274
|
+
params,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# ------------------------------------------------------------------
|
|
278
|
+
# Vacuum + time-travel — bound monotonic graph growth and let callers
|
|
279
|
+
# query the historic state of the codebase at any past SHA.
|
|
280
|
+
|
|
281
|
+
def vacuum(
|
|
282
|
+
self,
|
|
283
|
+
*,
|
|
284
|
+
before_ord: int | None = None,
|
|
285
|
+
older_than_seconds: float | None = None,
|
|
286
|
+
drop_all: bool = False,
|
|
287
|
+
dry_run: bool = False,
|
|
288
|
+
) -> dict[str, int]:
|
|
289
|
+
"""Drop tombstoned nodes/edges according to the supplied policy.
|
|
290
|
+
|
|
291
|
+
Exactly one of ``before_ord`` / ``older_than_seconds`` / ``drop_all``
|
|
292
|
+
must be set. ``dry_run`` reports counts without writing.
|
|
293
|
+
|
|
294
|
+
Returns ``{"files": N, "symbols": N, "edges": N}`` of items
|
|
295
|
+
affected (counted before deletion when ``dry_run`` is True).
|
|
296
|
+
"""
|
|
297
|
+
modes = sum(
|
|
298
|
+
x is not None and x is not False
|
|
299
|
+
for x in (before_ord, older_than_seconds, drop_all or None)
|
|
300
|
+
)
|
|
301
|
+
if modes != 1:
|
|
302
|
+
raise ValueError(
|
|
303
|
+
"vacuum requires exactly one of before_ord / "
|
|
304
|
+
"older_than_seconds / drop_all"
|
|
305
|
+
)
|
|
306
|
+
# Build a predicate matching the chosen policy. Wrapped in
|
|
307
|
+
# ``invalid_sha IS NOT NULL`` so live nodes are never touched.
|
|
308
|
+
if drop_all:
|
|
309
|
+
pred = "n.invalid_sha IS NOT NULL"
|
|
310
|
+
params: dict[str, Any] = {}
|
|
311
|
+
elif before_ord is not None:
|
|
312
|
+
pred = (
|
|
313
|
+
"n.invalid_sha IS NOT NULL "
|
|
314
|
+
"AND n.invalid_ord IS NOT NULL "
|
|
315
|
+
"AND n.invalid_ord <= $ord"
|
|
316
|
+
)
|
|
317
|
+
params = {"ord": before_ord}
|
|
318
|
+
else:
|
|
319
|
+
assert older_than_seconds is not None # narrowing for mypy
|
|
320
|
+
cutoff = time.time() - older_than_seconds
|
|
321
|
+
pred = (
|
|
322
|
+
"n.invalid_sha IS NOT NULL "
|
|
323
|
+
"AND n.invalid_at IS NOT NULL "
|
|
324
|
+
"AND n.invalid_at <= $cutoff"
|
|
325
|
+
)
|
|
326
|
+
params = {"cutoff": cutoff}
|
|
327
|
+
|
|
328
|
+
file_count = self.graph.query(
|
|
329
|
+
f"MATCH (n:File) WHERE {pred} RETURN count(n)",
|
|
330
|
+
params,
|
|
331
|
+
).result_set[0][0]
|
|
332
|
+
sym_count = self.graph.query(
|
|
333
|
+
f"MATCH (n:Symbol) WHERE {pred} RETURN count(n)",
|
|
334
|
+
params,
|
|
335
|
+
).result_set[0][0]
|
|
336
|
+
# Falkor edge counting: alias the relationship as the predicate
|
|
337
|
+
# target so the same ``n.invalid_…`` predicate works.
|
|
338
|
+
edge_pred = pred.replace("n.", "r.")
|
|
339
|
+
edge_count = self.graph.query(
|
|
340
|
+
f"MATCH ()-[r]-() WHERE {edge_pred} RETURN count(r)",
|
|
341
|
+
params,
|
|
342
|
+
).result_set[0][0]
|
|
343
|
+
|
|
344
|
+
out = {
|
|
345
|
+
"files": int(file_count),
|
|
346
|
+
"symbols": int(sym_count),
|
|
347
|
+
"edges": int(edge_count) // 2, # undirected double-count fix
|
|
348
|
+
}
|
|
349
|
+
if dry_run:
|
|
350
|
+
return out
|
|
351
|
+
|
|
352
|
+
self.graph.query(
|
|
353
|
+
f"MATCH (n:File) WHERE {pred} DETACH DELETE n",
|
|
354
|
+
params,
|
|
355
|
+
)
|
|
356
|
+
self.graph.query(
|
|
357
|
+
f"MATCH (n:Symbol) WHERE {pred} DETACH DELETE n",
|
|
358
|
+
params,
|
|
359
|
+
)
|
|
360
|
+
# Dangling tombstoned edges between live nodes (rare — usually a
|
|
361
|
+
# node is deleted alongside the edge — but possible if only one
|
|
362
|
+
# endpoint got tombstoned). DELETE r leaves the endpoints alone.
|
|
363
|
+
self.graph.query(
|
|
364
|
+
f"MATCH ()-[r]-() WHERE {edge_pred} DELETE r",
|
|
365
|
+
params,
|
|
366
|
+
)
|
|
367
|
+
return out
|
|
368
|
+
|
|
369
|
+
def at_sha(
|
|
370
|
+
self,
|
|
371
|
+
sha: str,
|
|
372
|
+
sha_ord: int,
|
|
373
|
+
*,
|
|
374
|
+
label: NodeLabel = "Symbol",
|
|
375
|
+
limit: int = 200,
|
|
376
|
+
) -> list[dict[str, Any]]:
|
|
377
|
+
"""Return ``label`` nodes that were alive at the supplied SHA.
|
|
378
|
+
|
|
379
|
+
"Alive at SHA X" means ``first_seen_ord <= X_ord`` AND
|
|
380
|
+
(``invalid_ord IS NULL`` OR ``invalid_ord > X_ord``). Requires the
|
|
381
|
+
nodes to carry topological ordinals — anything ingested before
|
|
382
|
+
the temporal upgrade returns nothing here even if it existed at
|
|
383
|
+
that SHA, because we can't compare its lifecycle without an
|
|
384
|
+
ordinal.
|
|
385
|
+
|
|
386
|
+
Pass both ``sha`` and ``sha_ord`` so callers can resolve the
|
|
387
|
+
ordinal once per query (``git_delta.commit_ordinal``).
|
|
388
|
+
"""
|
|
389
|
+
rows = self.graph.query(
|
|
390
|
+
f"""
|
|
391
|
+
MATCH (n:{label})
|
|
392
|
+
WHERE n.first_seen_ord IS NOT NULL
|
|
393
|
+
AND n.first_seen_ord <= $ord
|
|
394
|
+
AND (n.invalid_ord IS NULL OR n.invalid_ord > $ord)
|
|
395
|
+
RETURN n.key, n.first_seen_sha, n.last_seen_sha, n.invalid_sha
|
|
396
|
+
LIMIT $limit
|
|
397
|
+
""",
|
|
398
|
+
{"ord": sha_ord, "limit": limit},
|
|
399
|
+
).result_set
|
|
400
|
+
return [
|
|
401
|
+
{
|
|
402
|
+
"key": key,
|
|
403
|
+
"first_seen_sha": fs,
|
|
404
|
+
"last_seen_sha": ls,
|
|
405
|
+
"invalid_sha": iv,
|
|
406
|
+
"at_sha": sha,
|
|
407
|
+
}
|
|
408
|
+
for key, fs, ls, iv in rows
|
|
409
|
+
]
|
|
410
|
+
|
|
411
|
+
def callers_at_sha(
|
|
412
|
+
self,
|
|
413
|
+
symbol_name: str,
|
|
414
|
+
sha: str,
|
|
415
|
+
sha_ord: int,
|
|
416
|
+
) -> list[dict[str, Any]]:
|
|
417
|
+
"""``callers(symbol_name)`` but as the graph looked at ``sha``.
|
|
418
|
+
|
|
419
|
+
Tombstones whose ``invalid_ord > sha_ord`` count as alive, so
|
|
420
|
+
questions like "what called X *before* commit Y deleted it" stop
|
|
421
|
+
needing a worktree checkout.
|
|
422
|
+
"""
|
|
423
|
+
rows = self.graph.query(
|
|
424
|
+
"""
|
|
425
|
+
MATCH (s:Symbol {name: $name})
|
|
426
|
+
WHERE s.unresolved IS NULL
|
|
427
|
+
AND s.first_seen_ord IS NOT NULL
|
|
428
|
+
AND s.first_seen_ord <= $ord
|
|
429
|
+
AND (s.invalid_ord IS NULL OR s.invalid_ord > $ord)
|
|
430
|
+
MATCH (caller:File)-[c:CALLS|REFERENCES]->(s)
|
|
431
|
+
WHERE caller.first_seen_ord <= $ord
|
|
432
|
+
AND (caller.invalid_ord IS NULL OR caller.invalid_ord > $ord)
|
|
433
|
+
AND c.first_seen_ord <= $ord
|
|
434
|
+
AND (c.invalid_ord IS NULL OR c.invalid_ord > $ord)
|
|
435
|
+
RETURN DISTINCT caller.key, s.file, s.start, s.end, s.kind
|
|
436
|
+
""",
|
|
437
|
+
{"name": symbol_name, "ord": sha_ord},
|
|
438
|
+
).result_set
|
|
439
|
+
return [
|
|
440
|
+
{
|
|
441
|
+
"caller": caller_key,
|
|
442
|
+
"target_file": file_key,
|
|
443
|
+
"target_start": start,
|
|
444
|
+
"target_end": end,
|
|
445
|
+
"target_kind": kind,
|
|
446
|
+
"at_sha": sha,
|
|
447
|
+
}
|
|
448
|
+
for caller_key, file_key, start, end, kind in rows
|
|
449
|
+
]
|
|
450
|
+
|
|
451
|
+
def drift(self, head_sha: str) -> list[dict[str, Any]]:
|
|
452
|
+
"""Return symbols whose ``last_seen_sha`` doesn't match ``head_sha``.
|
|
453
|
+
|
|
454
|
+
Two categories surface:
|
|
455
|
+
|
|
456
|
+
- **Stale**: ``invalid_sha`` is set (the symbol was tombstoned).
|
|
457
|
+
- **Drifted**: ``invalid_sha`` is NULL but the last ingest that
|
|
458
|
+
saw the node was an older HEAD — usually a hint that an
|
|
459
|
+
incremental ingest missed the file or it moved.
|
|
460
|
+
"""
|
|
461
|
+
rows = self.graph.query(
|
|
462
|
+
"""
|
|
463
|
+
MATCH (s:Symbol)
|
|
464
|
+
WHERE s.unresolved IS NULL
|
|
465
|
+
AND (s.invalid_sha IS NOT NULL OR s.last_seen_sha <> $head)
|
|
466
|
+
RETURN s.key, s.name, s.file, s.last_seen_sha, s.invalid_sha
|
|
467
|
+
""",
|
|
468
|
+
{"head": head_sha},
|
|
469
|
+
).result_set
|
|
470
|
+
out: list[dict[str, Any]] = []
|
|
471
|
+
for key, name, file_path, last_seen, invalid in rows:
|
|
472
|
+
out.append(
|
|
473
|
+
{
|
|
474
|
+
"key": key,
|
|
475
|
+
"name": name,
|
|
476
|
+
"file": file_path,
|
|
477
|
+
"last_seen_sha": last_seen,
|
|
478
|
+
"invalid_sha": invalid,
|
|
479
|
+
"status": "tombstoned" if invalid else "drifted",
|
|
480
|
+
}
|
|
481
|
+
)
|
|
482
|
+
return out
|
|
483
|
+
|
|
484
|
+
def clear_graph(self) -> None:
|
|
485
|
+
"""Remove every node + edge in this project's graph."""
|
|
486
|
+
self.graph.query("MATCH (n) DETACH DELETE n")
|
|
487
|
+
|
|
488
|
+
# ------------------------------------------------------------------
|
|
489
|
+
# Topology queries — exposed via MCP/CLI as `codememory_<op>` tools.
|
|
490
|
+
# All return ``list[dict]`` so callers can render or JSON-serialize.
|
|
491
|
+
# ``depth`` is capped at 3 to keep traversal bounded.
|
|
492
|
+
|
|
493
|
+
def callers(self, symbol_name: str, depth: int = 1) -> list[dict[str, Any]]:
|
|
494
|
+
"""Files (and their symbols) that call or reference ``symbol_name``.
|
|
495
|
+
|
|
496
|
+
Unions ``CALLS`` and ``REFERENCES`` edges so an interface like
|
|
497
|
+
``IFooService`` surfaces both the call sites of its members *and*
|
|
498
|
+
the files that declare a parameter / field / base list of that
|
|
499
|
+
type. Returns one row per direct caller file; symbol coordinates
|
|
500
|
+
of the called definition are included so the user can jump to it.
|
|
501
|
+
|
|
502
|
+
``depth > 1`` recurses in Python: each caller File is treated as
|
|
503
|
+
a new target by walking the symbols it DEFINES. A pure Cypher
|
|
504
|
+
variable-length path doesn't work because CALLS/REFERENCES go
|
|
505
|
+
File→Symbol only — the graph has no reverse edge to chain on.
|
|
506
|
+
"""
|
|
507
|
+
depth = max(1, min(depth, 3))
|
|
508
|
+
seen_callers: set[str] = set()
|
|
509
|
+
seen_names: set[str] = set()
|
|
510
|
+
out: list[dict[str, Any]] = []
|
|
511
|
+
frontier_names: list[str] = [symbol_name]
|
|
512
|
+
for hop in range(depth):
|
|
513
|
+
next_names: list[str] = []
|
|
514
|
+
ring_callers: list[str] = []
|
|
515
|
+
for name in frontier_names:
|
|
516
|
+
if name in seen_names:
|
|
517
|
+
continue
|
|
518
|
+
seen_names.add(name)
|
|
519
|
+
for row in self._callers_one_hop(name):
|
|
520
|
+
if row["caller"] in seen_callers:
|
|
521
|
+
continue
|
|
522
|
+
seen_callers.add(row["caller"])
|
|
523
|
+
out.append(row)
|
|
524
|
+
ring_callers.append(row["caller"])
|
|
525
|
+
if hop + 1 >= depth or not ring_callers:
|
|
526
|
+
break
|
|
527
|
+
for caller_file in ring_callers:
|
|
528
|
+
next_names.extend(self._defines_at(caller_file))
|
|
529
|
+
frontier_names = next_names
|
|
530
|
+
return out
|
|
531
|
+
|
|
532
|
+
def _callers_one_hop(self, symbol_name: str) -> list[dict[str, Any]]:
|
|
533
|
+
rows = self.graph.query(
|
|
534
|
+
"MATCH (s:Symbol {name: $name}) "
|
|
535
|
+
"WHERE s.unresolved IS NULL AND s.invalid_sha IS NULL "
|
|
536
|
+
"MATCH (caller:File)-[c:CALLS|REFERENCES]->(s) "
|
|
537
|
+
"WHERE caller.invalid_sha IS NULL "
|
|
538
|
+
"RETURN DISTINCT caller.key, s.file, s.start, s.end, s.kind "
|
|
539
|
+
"LIMIT 500",
|
|
540
|
+
{"name": symbol_name},
|
|
541
|
+
).result_set
|
|
542
|
+
return [
|
|
543
|
+
{
|
|
544
|
+
"caller": caller_key,
|
|
545
|
+
"target_file": file_key,
|
|
546
|
+
"target_start": start,
|
|
547
|
+
"target_end": end,
|
|
548
|
+
"target_kind": kind,
|
|
549
|
+
}
|
|
550
|
+
for caller_key, file_key, start, end, kind in rows
|
|
551
|
+
]
|
|
552
|
+
|
|
553
|
+
def _defines_at(self, file_key: str) -> list[str]:
|
|
554
|
+
"""Names of resolved symbols defined by ``file_key``."""
|
|
555
|
+
rows = self.graph.query(
|
|
556
|
+
"MATCH (f:File {key: $key})-[:DEFINES]->(s:Symbol) "
|
|
557
|
+
"WHERE s.unresolved IS NULL AND s.invalid_sha IS NULL "
|
|
558
|
+
" AND f.invalid_sha IS NULL "
|
|
559
|
+
"RETURN DISTINCT s.name LIMIT 500",
|
|
560
|
+
{"key": file_key},
|
|
561
|
+
).result_set
|
|
562
|
+
return [name for (name,) in rows if name]
|
|
563
|
+
|
|
564
|
+
def callees(self, symbol_name: str, depth: int = 1) -> list[dict[str, Any]]:
|
|
565
|
+
"""Callees reachable from the file that defines ``symbol_name``.
|
|
566
|
+
|
|
567
|
+
Returns both **resolved** targets (a Symbol or Type node the
|
|
568
|
+
resolver bound the call to) and **unresolved** placeholders.
|
|
569
|
+
Hiding placeholders silently turned ``callees`` into a no-op
|
|
570
|
+
for Angular clean-arch use cases where every call goes through
|
|
571
|
+
``this.port.method()`` and the bare method name can't be bound
|
|
572
|
+
to a unique definition — the agent saw an empty list with no
|
|
573
|
+
signal that calls actually exist.
|
|
574
|
+
|
|
575
|
+
``depth > 1`` recurses in Python by walking through DEFINES
|
|
576
|
+
edges of each discovered callee. A pure Cypher variable-length
|
|
577
|
+
path doesn't work here: CALLS goes File→Symbol only, so the
|
|
578
|
+
graph has no Symbol→File reverse to chain on.
|
|
579
|
+
"""
|
|
580
|
+
depth = max(1, min(depth, 3))
|
|
581
|
+
seen: set[tuple[str, str | None]] = set()
|
|
582
|
+
out: list[dict[str, Any]] = []
|
|
583
|
+
frontier_files: list[str | None] = [None] # None = start from defining file
|
|
584
|
+
frontier_symbol: str | None = symbol_name
|
|
585
|
+
for _ in range(depth):
|
|
586
|
+
rows = self._callees_one_hop(frontier_symbol, frontier_files)
|
|
587
|
+
next_files: list[str | None] = []
|
|
588
|
+
for row in rows:
|
|
589
|
+
key = (row["name"], row["file"])
|
|
590
|
+
if key in seen:
|
|
591
|
+
continue
|
|
592
|
+
seen.add(key)
|
|
593
|
+
out.append(row)
|
|
594
|
+
if row["resolved"] and row["file"]:
|
|
595
|
+
next_files.append(row["file"])
|
|
596
|
+
if not next_files:
|
|
597
|
+
break
|
|
598
|
+
frontier_symbol = None
|
|
599
|
+
frontier_files = next_files
|
|
600
|
+
return out
|
|
601
|
+
|
|
602
|
+
def _callees_one_hop(
|
|
603
|
+
self, symbol_name: str | None, files: list[str | None]
|
|
604
|
+
) -> list[dict[str, Any]]:
|
|
605
|
+
"""One CALLS hop. Either anchored by the symbol's defining file
|
|
606
|
+
(``symbol_name`` set, ``files`` ignored) or by a list of explicit
|
|
607
|
+
file keys (``symbol_name`` None)."""
|
|
608
|
+
if symbol_name is not None:
|
|
609
|
+
q = (
|
|
610
|
+
"MATCH (defFile:File)-[:DEFINES]->(s:Symbol {name: $name}) "
|
|
611
|
+
"WHERE defFile.invalid_sha IS NULL AND s.invalid_sha IS NULL "
|
|
612
|
+
"MATCH (defFile)-[:CALLS]->(target) "
|
|
613
|
+
"WHERE target.invalid_sha IS NULL "
|
|
614
|
+
" AND (labels(target)[0] = 'Symbol' OR labels(target)[0] = 'Type') "
|
|
615
|
+
"RETURN DISTINCT target.name, target.file, target.start, "
|
|
616
|
+
" target.end, target.kind, target.unresolved, labels(target)[0] "
|
|
617
|
+
"LIMIT 500"
|
|
618
|
+
)
|
|
619
|
+
params: dict[str, Any] = {"name": symbol_name}
|
|
620
|
+
else:
|
|
621
|
+
q = (
|
|
622
|
+
"MATCH (defFile:File) WHERE defFile.key IN $files "
|
|
623
|
+
" AND defFile.invalid_sha IS NULL "
|
|
624
|
+
"MATCH (defFile)-[:CALLS]->(target) "
|
|
625
|
+
"WHERE target.invalid_sha IS NULL "
|
|
626
|
+
" AND (labels(target)[0] = 'Symbol' OR labels(target)[0] = 'Type') "
|
|
627
|
+
"RETURN DISTINCT target.name, target.file, target.start, "
|
|
628
|
+
" target.end, target.kind, target.unresolved, labels(target)[0] "
|
|
629
|
+
"LIMIT 500"
|
|
630
|
+
)
|
|
631
|
+
params = {"files": [f for f in files if f]}
|
|
632
|
+
rows = self.graph.query(q, params).result_set
|
|
633
|
+
return [
|
|
634
|
+
{
|
|
635
|
+
"name": name,
|
|
636
|
+
"file": file_key,
|
|
637
|
+
"start": start,
|
|
638
|
+
"end": end,
|
|
639
|
+
"kind": kind,
|
|
640
|
+
"resolved": unresolved is None,
|
|
641
|
+
"label": label,
|
|
642
|
+
}
|
|
643
|
+
for name, file_key, start, end, kind, unresolved, label in rows
|
|
644
|
+
]
|
|
645
|
+
|
|
646
|
+
def injects(self, symbol_name: str) -> list[dict[str, Any]]:
|
|
647
|
+
"""DI dependencies declared in the file that defines ``symbol_name``.
|
|
648
|
+
|
|
649
|
+
Angular's ``inject(Token)`` primitive (and Razor's ``@inject``)
|
|
650
|
+
emit INJECTS edges separately from CALLS so the DI graph isn't
|
|
651
|
+
conflated with raw method invocation. Use this to answer
|
|
652
|
+
"what does this class depend on?" without sifting through
|
|
653
|
+
imported modules.
|
|
654
|
+
"""
|
|
655
|
+
rows = self.graph.query(
|
|
656
|
+
"MATCH (defFile:File)-[:DEFINES]->(s:Symbol {name: $name}) "
|
|
657
|
+
"WHERE defFile.invalid_sha IS NULL AND s.invalid_sha IS NULL "
|
|
658
|
+
"MATCH (defFile)-[:INJECTS]->(target) "
|
|
659
|
+
"WHERE target.invalid_sha IS NULL "
|
|
660
|
+
"RETURN DISTINCT target.name, target.key, target.file, "
|
|
661
|
+
" target.kind, target.unresolved "
|
|
662
|
+
"LIMIT 500",
|
|
663
|
+
{"name": symbol_name},
|
|
664
|
+
).result_set
|
|
665
|
+
return [
|
|
666
|
+
{
|
|
667
|
+
"name": name,
|
|
668
|
+
"key": key,
|
|
669
|
+
"file": file_key,
|
|
670
|
+
"kind": kind,
|
|
671
|
+
"resolved": unresolved is None,
|
|
672
|
+
}
|
|
673
|
+
for name, key, file_key, kind, unresolved in rows
|
|
674
|
+
]
|
|
675
|
+
|
|
676
|
+
def injectors(self, token: str) -> list[dict[str, Any]]:
|
|
677
|
+
"""Files that inject ``token`` (reverse INJECTS edges).
|
|
678
|
+
|
|
679
|
+
``token`` may be the bare name of a class/abstract class used as
|
|
680
|
+
an Angular DI token, or any symbol exposed via INJECTS.
|
|
681
|
+
"""
|
|
682
|
+
rows = self.graph.query(
|
|
683
|
+
"MATCH (f:File)-[:INJECTS]->(s:Symbol {name: $name}) "
|
|
684
|
+
"WHERE f.invalid_sha IS NULL AND s.invalid_sha IS NULL "
|
|
685
|
+
"RETURN DISTINCT f.key LIMIT 500",
|
|
686
|
+
{"name": token},
|
|
687
|
+
).result_set
|
|
688
|
+
return [{"file": file_key} for (file_key,) in rows]
|
|
689
|
+
|
|
690
|
+
def importers(self, target: str) -> list[dict[str, Any]]:
|
|
691
|
+
"""Files that import a Module whose key matches ``target``.
|
|
692
|
+
|
|
693
|
+
``target`` may be a package name (``@acme-ng/security``,
|
|
694
|
+
``rxjs``) or a relative path that was preserved on ingest
|
|
695
|
+
(``./bar``). Match is exact.
|
|
696
|
+
"""
|
|
697
|
+
rows = self.graph.query(
|
|
698
|
+
"MATCH (f:File)-[r:IMPORTS]->(m:Module {key: $key}) "
|
|
699
|
+
"WHERE f.invalid_sha IS NULL AND r.invalid_sha IS NULL "
|
|
700
|
+
"RETURN f.key, m.key",
|
|
701
|
+
{"key": target},
|
|
702
|
+
).result_set
|
|
703
|
+
return [{"file": f, "module": m} for f, m in rows]
|
|
704
|
+
|
|
705
|
+
def dependencies(self, file_path: str, depth: int = 1) -> list[dict[str, Any]]:
|
|
706
|
+
"""Modules imported by ``file_path`` (forward IMPORTS).
|
|
707
|
+
|
|
708
|
+
Depth>1 walks through *files* that the imported modules
|
|
709
|
+
correspond to, but only those modules already linked in the
|
|
710
|
+
graph; bare external packages don't have outgoing edges.
|
|
711
|
+
"""
|
|
712
|
+
depth = max(1, min(depth, 3))
|
|
713
|
+
q = (
|
|
714
|
+
"MATCH (f:File {key: $key}) "
|
|
715
|
+
"WHERE f.invalid_sha IS NULL "
|
|
716
|
+
"MATCH (f)-[:IMPORTS*1.." + str(depth) + "]->(m:Module) "
|
|
717
|
+
"WHERE m.invalid_sha IS NULL "
|
|
718
|
+
"RETURN DISTINCT m.key"
|
|
719
|
+
)
|
|
720
|
+
rows = self.graph.query(q, {"key": file_path}).result_set
|
|
721
|
+
return [{"module": m} for (m,) in rows]
|
|
722
|
+
|
|
723
|
+
def definitions(self, symbol_name: str) -> list[dict[str, Any]]:
|
|
724
|
+
"""All files+line ranges that DEFINE a symbol with ``symbol_name``.
|
|
725
|
+
|
|
726
|
+
Useful for disambiguation: tells the agent whether the name is
|
|
727
|
+
unique (one row) or shared across files (multiple rows).
|
|
728
|
+
"""
|
|
729
|
+
rows = self.graph.query(
|
|
730
|
+
"MATCH (f:File)-[:DEFINES]->(s:Symbol {name: $name}) "
|
|
731
|
+
"WHERE s.unresolved IS NULL "
|
|
732
|
+
" AND s.invalid_sha IS NULL "
|
|
733
|
+
" AND f.invalid_sha IS NULL "
|
|
734
|
+
"RETURN f.key, s.start, s.end, s.kind",
|
|
735
|
+
{"name": symbol_name},
|
|
736
|
+
).result_set
|
|
737
|
+
return [
|
|
738
|
+
{"file": f, "start": start, "end": end, "kind": kind}
|
|
739
|
+
for f, start, end, kind in rows
|
|
740
|
+
]
|