codecompass-mcp 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,485 @@
1
+ # Source files are authoritative; this graph is a stale-tolerant index that degrades gracefully.
2
+ """Neo4j client for code knowledge graphs.
3
+
4
+ Handles Project / Folder / File / Entity nodes and typed semantic edges
5
+ (CALLS, IMPORTS, INHERITS, etc.). Community-edition compatible — filters on
6
+ the `project` property instead of requiring separate Neo4j databases.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import uuid
12
+ from datetime import datetime, timezone
13
+ from typing import Optional
14
+
15
+ from neo4j import GraphDatabase
16
+
17
+ from config import neo4j_config
18
+ from models.code_types import CodeTriple, FileNode, FolderNode
19
+
20
+
21
+ # Relationship types emitted by code_parser. Only these are allowed in MERGE
22
+ # statements — validated before string interpolation to prevent injection.
23
+ _ALLOWED_REL_TYPES = frozenset({
24
+ "CALLS", "IMPORTS", "INHERITS", "DEFINED_IN",
25
+ "HAS_CLASS", "POSTS_TO", "INCLUDES", "STYLES", "USED_BY",
26
+ "USES_VAR", "REFERENCES",
27
+ })
28
+
29
+
30
+ def get_client(project: str) -> "CodeGraphClient":
31
+ """Return a CodeGraphClient connected to Neo4j for the given project."""
32
+ cfg = neo4j_config()
33
+ return CodeGraphClient(uri=cfg["uri"], user=cfg["user"], password=cfg["password"])
34
+
35
+
36
+ class CodeGraphClient:
37
+ """Manages code-graph persistence for a single project."""
38
+
39
+ def __init__(
40
+ self,
41
+ uri: str,
42
+ user: str,
43
+ password: str,
44
+ database: Optional[str] = None,
45
+ ) -> None:
46
+ self._driver = GraphDatabase.driver(uri, auth=(user, password))
47
+ self._database = database # None → Neo4j default (Community compatible)
48
+
49
+ # ------------------------------------------------------------------
50
+ # Structural nodes (hierarchy skeleton)
51
+ # ------------------------------------------------------------------
52
+
53
+ def merge_project_node(self, node_id: str, name: str, path: str) -> None:
54
+ """Upsert a Project node."""
55
+ self._run("""
56
+ MERGE (p:Project {id: $id})
57
+ SET p.name = $name,
58
+ p.path = $path,
59
+ p.last_ingested = $now
60
+ """, id=node_id, name=name, path=path, now=_now())
61
+
62
+ def merge_folder_node(self, node_id: str, folder: FolderNode, project: str) -> None:
63
+ """Upsert a Folder node."""
64
+ self._run("""
65
+ MERGE (f:Folder {id: $id})
66
+ SET f.name = $name,
67
+ f.path = $path,
68
+ f.depth = $depth,
69
+ f.project = $project
70
+ """, id=node_id, name=folder.name, path=folder.path,
71
+ depth=folder.depth, project=project)
72
+
73
+ def merge_file_node(self, node_id: str, file: FileNode, project: str) -> None:
74
+ """Upsert a File node, stamping updated_at on every write."""
75
+ self._run("""
76
+ MERGE (f:File {id: $id})
77
+ SET f.name = $name,
78
+ f.path = $path,
79
+ f.extension = $extension,
80
+ f.depth = $depth,
81
+ f.project = $project,
82
+ f.updated_at = $now
83
+ """, id=node_id, name=file.name, path=file.path,
84
+ extension=file.extension, depth=file.depth, project=project,
85
+ now=_now())
86
+
87
+ def merge_contains_edge(self, parent_id: str, child_id: str) -> None:
88
+ """Upsert a CONTAINS edge between any two structural nodes."""
89
+ self._run("""
90
+ MATCH (parent {id: $parent_id})
91
+ MATCH (child {id: $child_id})
92
+ MERGE (parent)-[:CONTAINS]->(child)
93
+ """, parent_id=parent_id, child_id=child_id)
94
+
95
+ # ------------------------------------------------------------------
96
+ # Entity nodes and semantic edges (from code triples)
97
+ # ------------------------------------------------------------------
98
+
99
+ def write_code_triple(self, triple: CodeTriple, file_node_id: str, project: str) -> None:
100
+ """Persist a CodeTriple as two Entity nodes plus a typed semantic edge.
101
+
102
+ Uses whitelist-validated string interpolation for the relationship type
103
+ because Cypher MERGE does not accept parameterised relationship labels.
104
+ """
105
+ from_id = _entity_id(triple.from_entity, project)
106
+ to_id = _entity_id(triple.to_entity, project)
107
+
108
+ rel_type = triple.relation_type if triple.relation_type in _ALLOWED_REL_TYPES else "RELATION"
109
+
110
+ self._run(f"""
111
+ MERGE (a:Entity {{id: $from_id}})
112
+ SET a.name = $from_name,
113
+ a.type = $from_type,
114
+ a.project = $project,
115
+ a.file = $source_file
116
+
117
+ MERGE (b:Entity {{id: $to_id}})
118
+ SET b.name = $to_name,
119
+ b.type = $to_type,
120
+ b.project = $project
121
+
122
+ MERGE (a)-[r:{rel_type}]->(b)
123
+ ON CREATE SET r.source_file = $source_file,
124
+ r.line = $line,
125
+ r.created_at = $now
126
+ """,
127
+ from_id=from_id,
128
+ from_name=triple.from_entity,
129
+ from_type=triple.from_type,
130
+ to_id=to_id,
131
+ to_name=triple.to_entity,
132
+ to_type=triple.to_type,
133
+ source_file=triple.source_file,
134
+ line=triple.line_number,
135
+ project=project,
136
+ now=_now(),
137
+ )
138
+
139
+ self._run("""
140
+ MATCH (f:File {id: $file_id})
141
+ MATCH (e:Entity {id: $entity_id})
142
+ MERGE (f)-[:CONTAINS]->(e)
143
+ """, file_id=file_node_id, entity_id=from_id)
144
+
145
+ def write_code_triples_batch(
146
+ self,
147
+ triples: list[CodeTriple],
148
+ file_id_map: dict[str, str],
149
+ project: str,
150
+ batch_size: int = 500,
151
+ ) -> int:
152
+ """Write a list of CodeTriples to Neo4j using UNWIND batching.
153
+
154
+ Groups triples by relation type (so the type can be a Cypher literal)
155
+ then writes each group in chunks of batch_size. Typically 10-50x faster
156
+ than calling write_code_triple in a loop.
157
+
158
+ Returns the number of triples written.
159
+ """
160
+ from collections import defaultdict
161
+
162
+ now = _now()
163
+ by_rel: dict[str, list[dict]] = defaultdict(list)
164
+
165
+ for triple in triples:
166
+ rel_type = triple.relation_type if triple.relation_type in _ALLOWED_REL_TYPES else "RELATION"
167
+ by_rel[rel_type].append({
168
+ "from_id": _entity_id(triple.from_entity, project),
169
+ "from_name": triple.from_entity,
170
+ "from_type": triple.from_type,
171
+ "to_id": _entity_id(triple.to_entity, project),
172
+ "to_name": triple.to_entity,
173
+ "to_type": triple.to_type,
174
+ "source_file": triple.source_file,
175
+ "line": triple.line_number,
176
+ "file_node_id": file_id_map.get(triple.source_file, ""),
177
+ "project": project,
178
+ "now": now,
179
+ })
180
+
181
+ total_batches = sum(-(-len(v) // batch_size) for v in by_rel.values())
182
+
183
+ try:
184
+ from tqdm import tqdm
185
+ batch_iter = tqdm(total=total_batches, desc="Writing triples", unit="batch")
186
+ except ImportError:
187
+ batch_iter = None
188
+
189
+ written = 0
190
+ for rel_type, records in by_rel.items():
191
+ for i in range(0, len(records), batch_size):
192
+ chunk = records[i : i + batch_size]
193
+ self._run(f"""
194
+ UNWIND $records AS t
195
+ MERGE (a:Entity {{id: t.from_id}})
196
+ SET a.name = t.from_name,
197
+ a.type = t.from_type,
198
+ a.project = t.project,
199
+ a.file = t.source_file
200
+ MERGE (b:Entity {{id: t.to_id}})
201
+ SET b.name = t.to_name,
202
+ b.type = t.to_type,
203
+ b.project = t.project
204
+ MERGE (a)-[r:{rel_type}]->(b)
205
+ ON CREATE SET r.source_file = t.source_file,
206
+ r.line = t.line,
207
+ r.created_at = t.now
208
+ WITH a, t
209
+ WHERE t.file_node_id <> ""
210
+ MATCH (f:File {{id: t.file_node_id}})
211
+ MERGE (f)-[:CONTAINS]->(a)
212
+ """, records=chunk)
213
+ written += len(chunk)
214
+ if batch_iter is not None:
215
+ batch_iter.update(1)
216
+
217
+ if batch_iter is not None:
218
+ batch_iter.close()
219
+
220
+ return written
221
+
222
+ # ------------------------------------------------------------------
223
+ # Traversal queries used by code_query_cli
224
+ # ------------------------------------------------------------------
225
+
226
+ def find_callers(self, entity_name: str, project: str, max_hops: int = 3) -> list[dict]:
227
+ """Return everything that calls/uses/references entity_name (reverse traversal).
228
+
229
+ Traverses CALLS (code), USES_VAR (CSS variable consumers), and REFERENCES
230
+ (HTML component tag usages) so --impact works for functions, CSS variables,
231
+ and web component tags alike.
232
+ """
233
+ entity_id = _entity_id(entity_name, project)
234
+ return self._run_read("""
235
+ MATCH path = (caller:Entity)-[:CALLS|USES_VAR|REFERENCES*]->(target:Entity {id: $id})
236
+ WHERE caller.project = $project AND length(path) <= $hops
237
+ RETURN caller.name AS caller_name,
238
+ caller.type AS caller_type,
239
+ caller.file AS caller_file,
240
+ length(path) AS depth
241
+ ORDER BY depth
242
+ """, id=entity_id, project=project, hops=max_hops)
243
+
244
+ def find_dependencies(self, file_path: str, project: str, max_hops: int = 3) -> list[dict]:
245
+ """Return all modules imported (directly or transitively) by file_path."""
246
+ return self._run_read("""
247
+ MATCH (f:File {path: $path, project: $project})
248
+ MATCH path = (f)-[:CONTAINS]->(:Entity)-[:IMPORTS*]->(dep:Entity)
249
+ WHERE length(path) <= $hops
250
+ RETURN DISTINCT dep.name AS dependency,
251
+ dep.type AS dep_type,
252
+ length(path) AS depth
253
+ ORDER BY depth, dep.name
254
+ """, path=file_path, project=project, hops=max_hops)
255
+
256
+ def find_styles(self, element_name: str, project: str) -> list[dict]:
257
+ """Return all CSS selectors that style element_name."""
258
+ return self._run_read("""
259
+ MATCH (sel:Entity)-[r:STYLES]->(el:Entity)
260
+ WHERE el.name = $name AND el.project = $project
261
+ RETURN sel.name AS selector,
262
+ sel.file AS source_file,
263
+ r.line AS line
264
+ ORDER BY sel.name
265
+ """, name=element_name, project=project)
266
+
267
+ def trace_calls(self, start_name: str, project: str, max_hops: int = 4) -> list[dict]:
268
+ """Trace the call chain forward from start_name up to max_hops deep."""
269
+ start_id = _entity_id(start_name, project)
270
+ return self._run_read("""
271
+ MATCH path = (start:Entity {id: $id})-[:CALLS*]->(callee:Entity)
272
+ WHERE callee.project = $project AND length(path) <= $hops
273
+ RETURN callee.name AS callee_name,
274
+ callee.type AS callee_type,
275
+ callee.file AS callee_file,
276
+ length(path) AS depth
277
+ ORDER BY depth, callee.name
278
+ """, id=start_id, project=project, hops=max_hops)
279
+
280
+ def get_project_tree(self, project: str) -> list[dict]:
281
+ """Return the full containment hierarchy for a project."""
282
+ return self._run_read("""
283
+ MATCH (root:Project {name: $project})-[:CONTAINS*]->(child)
284
+ RETURN labels(child)[0] AS node_type,
285
+ child.name AS name,
286
+ child.path AS path,
287
+ child.depth AS depth
288
+ ORDER BY child.depth, child.path
289
+ """, project=project)
290
+
291
+ def get_file_updated_at(self, file_path: str, project: str) -> Optional[str]:
292
+ """Return the updated_at timestamp for a File node, or None if not found."""
293
+ rows = self._run_read("""
294
+ MATCH (f:File {path: $path, project: $project})
295
+ RETURN f.updated_at AS updated_at
296
+ """, path=file_path, project=project)
297
+ return rows[0]["updated_at"] if rows else None
298
+
299
+ def get_project_last_ingested(self, project: str) -> Optional[str]:
300
+ """Return the last_ingested timestamp for a Project node, or None if not found."""
301
+ rows = self._run_read("""
302
+ MATCH (p:Project {name: $project})
303
+ RETURN p.last_ingested AS last_ingested
304
+ """, project=project)
305
+ return rows[0]["last_ingested"] if rows else None
306
+
307
+ # ------------------------------------------------------------------
308
+ # Indexes
309
+ # ------------------------------------------------------------------
310
+
311
+ def ensure_indexes(self) -> None:
312
+ """Create all required Neo4j indexes — idempotent, safe to call every startup.
313
+
314
+ Without indexes, every Cypher MATCH scans all nodes linearly (10k+ nodes).
315
+ With indexes, lookups are O(log n) — removing the #1 source of timeouts.
316
+ """
317
+ indexes = [
318
+ "CREATE INDEX entity_id IF NOT EXISTS FOR (e:Entity) ON (e.id)",
319
+ "CREATE INDEX entity_project IF NOT EXISTS FOR (e:Entity) ON (e.project)",
320
+ "CREATE INDEX entity_name_project IF NOT EXISTS FOR (e:Entity) ON (e.name, e.project)",
321
+ "CREATE INDEX entity_file IF NOT EXISTS FOR (e:Entity) ON (e.file)",
322
+ "CREATE INDEX file_path_project IF NOT EXISTS FOR (f:File) ON (f.path, f.project)",
323
+ "CREATE INDEX file_project IF NOT EXISTS FOR (f:File) ON (f.project)",
324
+ "CREATE INDEX project_name IF NOT EXISTS FOR (p:Project) ON (p.name)",
325
+ ]
326
+ for stmt in indexes:
327
+ self._run(stmt)
328
+
329
+ # ------------------------------------------------------------------
330
+ # Cleanup
331
+ # ------------------------------------------------------------------
332
+
333
+ def delete_file_triples(self, file_path: str, project: str) -> None:
334
+ """Remove all Entity nodes sourced from file_path (before re-ingesting a modified file).
335
+
336
+ Leaves the File node intact so the hierarchy skeleton remains valid.
337
+ """
338
+ self._run("""
339
+ MATCH (e:Entity {project: $project, file: $path})
340
+ DETACH DELETE e
341
+ """, project=project, path=file_path)
342
+
343
+ def delete_file(self, file_path: str, project: str) -> None:
344
+ """Remove both the File node and all Entity nodes sourced from file_path.
345
+
346
+ Use for deleted or moved files — removes ghost nodes from the index.
347
+ """
348
+ self._run("""
349
+ MATCH (e:Entity {project: $project, file: $path})
350
+ DETACH DELETE e
351
+ """, project=project, path=file_path)
352
+ self._run("""
353
+ MATCH (f:File {path: $path, project: $project})
354
+ DETACH DELETE f
355
+ """, project=project, path=file_path)
356
+
357
+ # ------------------------------------------------------------------
358
+ # Utility
359
+ # ------------------------------------------------------------------
360
+
361
+ def get_blast_radius(
362
+ self, target: str, project: str, max_hops: int = 3
363
+ ) -> tuple[list[dict], str | None]:
364
+ """Return all files reachable from target via CALLS/IMPORTS/INHERITS (forward).
365
+
366
+ Tries target as a symbol name first, then as a file path.
367
+ Returns (rows, target_file) where target_file is the file containing the
368
+ target (or the path itself for file targets). Returns ([], None) when not found.
369
+ Each row: {file, edge_type, hops}.
370
+ """
371
+ entity_id = _entity_id(target, project)
372
+ entity_rows = self._run_read(
373
+ "MATCH (e:Entity {id: $id, project: $project}) RETURN e.file AS file LIMIT 1",
374
+ id=entity_id, project=project,
375
+ )
376
+ if entity_rows:
377
+ target_file = entity_rows[0].get("file")
378
+ rows = self._run_read("""
379
+ MATCH path = (start:Entity {id: $id})-[:CALLS|IMPORTS|INHERITS*]->(dep:Entity)
380
+ WHERE dep.project = $project AND length(path) <= $hops AND dep.file IS NOT NULL
381
+ RETURN dep.file AS file,
382
+ type(relationships(path)[0]) AS edge_type,
383
+ length(path) AS hops
384
+ ORDER BY hops, dep.file
385
+ """, id=entity_id, project=project, hops=max_hops)
386
+ return rows, target_file
387
+
388
+ file_rows = self._run_read(
389
+ "MATCH (f:File {path: $path, project: $project}) RETURN f.path AS file LIMIT 1",
390
+ path=target, project=project,
391
+ )
392
+ if file_rows:
393
+ target_file = file_rows[0].get("file")
394
+ rows = self._run_read("""
395
+ MATCH (f:File {path: $path, project: $project})-[:CONTAINS]->(e:Entity)
396
+ MATCH path = (e)-[:CALLS|IMPORTS|INHERITS*]->(dep:Entity)
397
+ WHERE dep.project = $project AND length(path) <= $hops
398
+ AND dep.file IS NOT NULL AND dep.file <> $path
399
+ RETURN dep.file AS file,
400
+ type(relationships(path)[0]) AS edge_type,
401
+ length(path) AS hops
402
+ ORDER BY hops, dep.file
403
+ """, path=target, project=project, hops=max_hops)
404
+ return rows, target_file
405
+
406
+ return [], None
407
+
408
+ def get_all_entity_names(self, project: str) -> list[dict]:
409
+ """Return name + id for every entity in a project."""
410
+ return self._run_read("""
411
+ MATCH (e:Entity {project: $project})
412
+ RETURN e.id AS id, e.name AS name, e.type AS entity_type
413
+ ORDER BY e.name
414
+ """, project=project)
415
+
416
+ def find_entity_by_name(self, name: str, project: str) -> Optional[dict]:
417
+ """Exact-match lookup for a single entity."""
418
+ rows = self._run_read("""
419
+ MATCH (e:Entity {name: $name, project: $project})
420
+ RETURN e.id AS id, e.name AS name, e.type AS entity_type
421
+ LIMIT 1
422
+ """, name=name, project=project)
423
+ return rows[0] if rows else None
424
+
425
+ def get_all_projects(self) -> list[str]:
426
+ """Return the names of all ingested projects, ordered alphabetically."""
427
+ rows = self._run_read("MATCH (p:Project) RETURN p.name AS name ORDER BY p.name")
428
+ return [r["name"] for r in rows]
429
+
430
+ def get_file_nodes(self, project: str) -> list[dict]:
431
+ """Return {id, path} for every File node in a project — used by load-triples."""
432
+ return self._run_read("""
433
+ MATCH (f:File {project: $project})
434
+ RETURN f.id AS id, f.path AS path
435
+ """, project=project)
436
+
437
+ def node_count(self) -> int:
438
+ """Return total node count across all types."""
439
+ rows = self._run_read("MATCH (n) RETURN count(n) AS cnt")
440
+ return rows[0]["cnt"] if rows else 0
441
+
442
+ def close(self) -> None:
443
+ self._driver.close()
444
+
445
+ # ------------------------------------------------------------------
446
+ # Internal helpers
447
+ # ------------------------------------------------------------------
448
+
449
+ def _run(self, query: str, **params) -> None:
450
+ try:
451
+ with self._driver.session(database=self._database) as session:
452
+ session.run(query, params)
453
+ except Exception as exc:
454
+ if "DatabaseNotFound" in str(exc) and self._database is not None:
455
+ self._database = None
456
+ with self._driver.session() as session:
457
+ session.run(query, params)
458
+ else:
459
+ raise
460
+
461
+ def _run_read(self, query: str, **params) -> list[dict]:
462
+ try:
463
+ with self._driver.session(database=self._database) as session:
464
+ result = session.run(query, params)
465
+ return [record.data() for record in result]
466
+ except Exception as exc:
467
+ if "DatabaseNotFound" in str(exc) and self._database is not None:
468
+ self._database = None
469
+ with self._driver.session() as session:
470
+ result = session.run(query, params)
471
+ return [record.data() for record in result]
472
+ raise
473
+
474
+
475
+ # ---------------------------------------------------------------------------
476
+ # Module-level helpers
477
+ # ---------------------------------------------------------------------------
478
+
479
+ def _entity_id(name: str, project: str) -> str:
480
+ """Stable ID scoped to a project — prevents cross-project ID collisions."""
481
+ return str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{project}:{name.lower()}"))
482
+
483
+
484
+ def _now() -> str:
485
+ return datetime.now(timezone.utc).isoformat()