code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,665 @@
1
+ """Kùzu embedded graph database service - No Docker required."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import types
7
+ from collections.abc import Generator, Sequence
8
+ from contextlib import contextmanager
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any
11
+
12
+ from loguru import logger
13
+
14
+ from ..types import (
15
+ GraphData,
16
+ PropertyDict,
17
+ PropertyValue,
18
+ ResultRow,
19
+ )
20
+
21
+ if TYPE_CHECKING:
22
+ import kuzu
23
+
24
+
25
+ class KuzuIngestor:
26
+ """Ingestor for writing code graph data to Kùzu embedded database.
27
+
28
+ Kùzu is an embedded graph database that requires no server or Docker.
29
+ Perfect for local development and testing.
30
+
31
+ Example:
32
+ >>> ingestor = KuzuIngestor("./my_graph.db")
33
+ >>> with ingestor:
34
+ ... ingestor.ensure_node_batch("Function", {"name": "foo", "id": "1"})
35
+ ... ingestor.flush_all()
36
+ >>> # Query later
37
+ >>> results = ingestor.query("MATCH (f:Function) RETURN f.name")
38
+ """
39
+
40
+ def __init__(self, db_path: str | Path, batch_size: int = 1000):
41
+ """Initialize Kùzu ingestor.
42
+
43
+ Args:
44
+ db_path: Path to store the database files
45
+ batch_size: Batch size for writes
46
+ """
47
+ self.db_path = Path(db_path)
48
+ self.batch_size = batch_size
49
+ self._db: kuzu.Database | None = None
50
+ self._conn: kuzu.Connection | None = None
51
+ self.node_buffer: list[tuple[str, dict[str, PropertyValue]]] = []
52
+ self.relationship_buffer: list[
53
+ tuple[
54
+ tuple[str, str, PropertyValue],
55
+ str,
56
+ tuple[str, str, PropertyValue],
57
+ dict[str, PropertyValue] | None,
58
+ ]
59
+ ] = []
60
+ self._initialized = False
61
+
62
+ def __enter__(self) -> KuzuIngestor:
63
+ """Enter context manager and initialize database."""
64
+ import kuzu
65
+
66
+ logger.info(f"Opening Kùzu database at {self.db_path}")
67
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
68
+ self._db = kuzu.Database(str(self.db_path))
69
+ self._conn = kuzu.Connection(self._db)
70
+ logger.info("Kùzu database opened successfully")
71
+ return self
72
+
73
+ def __exit__(
74
+ self,
75
+ exc_type: type | None,
76
+ exc_val: Exception | None,
77
+ exc_tb: types.TracebackType | None,
78
+ ) -> None:
79
+ """Exit context manager and cleanup."""
80
+ if exc_type:
81
+ logger.exception(f"Exception during ingest: {exc_val}")
82
+ try:
83
+ self.flush_all()
84
+ except Exception as flush_err:
85
+ logger.error(f"Flush error during exception handling: {flush_err}")
86
+ else:
87
+ self.flush_all()
88
+
89
+ if self._conn:
90
+ # Kùzu connection doesn't need explicit close
91
+ self._conn = None
92
+ if self._db:
93
+ # Kùzu database doesn't need explicit close
94
+ self._db = None
95
+ logger.info("Kùzu database closed")
96
+
97
+ def _ensure_schema(self, label: str) -> None:
98
+ """Ensure node table exists for the given label."""
99
+ if not self._conn:
100
+ raise ConnectionError("Not connected to database")
101
+
102
+ # Kùzu requires predefined schema, create generic node table
103
+ try:
104
+ # Check if table exists by trying to query
105
+ self._conn.execute(f"MATCH (n:{label}) RETURN n LIMIT 1")
106
+ except Exception:
107
+ # Table doesn't exist, create it
108
+ # Kùzu uses CREATE NODE TABLE with specific properties
109
+ logger.info(f"Creating node table for label: {label}")
110
+ try:
111
+ self._conn.execute(f"""
112
+ CREATE NODE TABLE {label} (
113
+ qualified_name STRING,
114
+ name STRING,
115
+ path STRING,
116
+ start_line INT64,
117
+ end_line INT64,
118
+ docstring STRING,
119
+ return_type STRING,
120
+ signature STRING,
121
+ visibility STRING,
122
+ parameters STRING[],
123
+ kind STRING,
124
+ PRIMARY KEY (qualified_name)
125
+ )
126
+ """)
127
+ except Exception as e:
128
+ logger.debug(f"Table creation may have failed (could already exist): {e}")
129
+
130
+ def _ensure_rel_schema(self, rel_type: str, from_label: str, to_label: str) -> None:
131
+ """Ensure relationship table exists for the given label combination.
132
+
133
+ Kùzu REL TABLEs are bound to specific (FROM, TO) node label pairs.
134
+ A single rel_type (e.g., DEFINES) may need multiple tables if it
135
+ connects different label pairs (Module→Function, Module→Class, etc.).
136
+ Kùzu supports REL TABLE GROUP for this purpose.
137
+ """
138
+ if not self._conn:
139
+ raise ConnectionError("Not connected to database")
140
+
141
+ cache_key = (rel_type, from_label, to_label)
142
+ if not hasattr(self, "_rel_schema_cache"):
143
+ self._rel_schema_cache: set[tuple[str, str, str]] = set()
144
+ if cache_key in self._rel_schema_cache:
145
+ return
146
+ self._rel_schema_cache.add(cache_key)
147
+
148
+ # Try to create the relationship. If the table already exists with the
149
+ # same (FROM, TO) pair, the CREATE will fail harmlessly.
150
+ # If the table exists but with a DIFFERENT pair, we need ALTER or GROUP.
151
+ try:
152
+ self._conn.execute(f"""
153
+ CREATE REL TABLE {rel_type} (
154
+ FROM {from_label} TO {to_label},
155
+ MANY_MANY
156
+ )
157
+ """)
158
+ logger.info(f"Created relationship table: {rel_type} ({from_label}→{to_label})")
159
+ except Exception:
160
+ # Table may already exist — check if it supports this label pair
161
+ try:
162
+ # Try a probe MATCH with the specific labels
163
+ self._conn.execute(
164
+ f"MATCH (a:{from_label})-[r:{rel_type}]->(b:{to_label}) RETURN r LIMIT 1"
165
+ )
166
+ except Exception:
167
+ # Label pair not supported — try creating a REL TABLE GROUP
168
+ group_name = f"{rel_type}_{from_label}_{to_label}"
169
+ try:
170
+ self._conn.execute(f"""
171
+ CREATE REL TABLE {group_name} (
172
+ FROM {from_label} TO {to_label},
173
+ MANY_MANY
174
+ )
175
+ """)
176
+ logger.info(f"Created additional rel table: {group_name} ({from_label}→{to_label})")
177
+ # Update the relationship buffer to use the new table name
178
+ # We need to intercept flush_relationships for this label pair
179
+ if not hasattr(self, "_rel_table_overrides"):
180
+ self._rel_table_overrides: dict[tuple[str, str, str], str] = {}
181
+ self._rel_table_overrides[(rel_type, from_label, to_label)] = group_name
182
+ except Exception as e:
183
+ logger.debug(f"Additional rel table creation failed: {e}")
184
+
185
+ def ensure_node_batch(self, label: str, properties: PropertyDict) -> None:
186
+ """Add a node to the batch buffer.
187
+
188
+ Args:
189
+ label: Node label (e.g., "Function", "Class")
190
+ properties: Node properties dictionary
191
+ """
192
+ self.node_buffer.append((label, properties.copy()))
193
+ if len(self.node_buffer) >= self.batch_size:
194
+ self.flush_nodes()
195
+
196
+ def ensure_relationship_batch(
197
+ self,
198
+ source: tuple[str, str, PropertyValue],
199
+ rel_type: str,
200
+ target: tuple[str, str, PropertyValue],
201
+ properties: PropertyDict | None = None,
202
+ ) -> None:
203
+ """Add a relationship to the batch buffer.
204
+
205
+ Args:
206
+ source: (label, key, value) tuple for source node
207
+ rel_type: Relationship type (e.g., "CALLS", "DEFINES")
208
+ target: (label, key, value) tuple for target node
209
+ properties: Optional relationship properties
210
+ """
211
+ self.relationship_buffer.append((source, rel_type, target, properties))
212
+ if len(self.relationship_buffer) >= self.batch_size:
213
+ self.flush_relationships()
214
+
215
+ def _value_to_cypher(self, value: PropertyValue) -> str:
216
+ """Convert Python value to Cypher literal."""
217
+ if value is None:
218
+ return "NULL"
219
+ if isinstance(value, bool):
220
+ return str(value).lower()
221
+ if isinstance(value, (int, float)):
222
+ return str(value)
223
+ if isinstance(value, str):
224
+ # Escape quotes
225
+ escaped = value.replace("'", "\\'")
226
+ return f"'{escaped}'"
227
+ if isinstance(value, list):
228
+ items = [self._value_to_cypher(v) for v in value]
229
+ return f"[{', '.join(items)}]"
230
+ return f"'{str(value)}'"
231
+
232
+ def flush_nodes(self) -> None:
233
+ """Flush node buffer to database."""
234
+ if not self.node_buffer or not self._conn:
235
+ return
236
+
237
+ # Group nodes by label
238
+ by_label: dict[str, list[PropertyDict]] = {}
239
+ for label, props in self.node_buffer:
240
+ if label not in by_label:
241
+ by_label[label] = []
242
+ by_label[label].append(props)
243
+
244
+ for label, nodes in by_label.items():
245
+ self._ensure_schema(label)
246
+
247
+ for props in nodes:
248
+ # Build CREATE statement
249
+ qualified_name = props.get("qualified_name", props.get("name", ""))
250
+ name = props.get("name", "")
251
+ path = props.get("path", "")
252
+ start_line = props.get("start_line", 0)
253
+ end_line = props.get("end_line", 0)
254
+ docstring = props.get("docstring", "")
255
+ return_type = props.get("return_type", "")
256
+ signature = props.get("signature", "")
257
+ visibility = props.get("visibility", "")
258
+ parameters = props.get("parameters")
259
+ kind = props.get("kind", "")
260
+
261
+ try:
262
+ cypher = f"""
263
+ CREATE (n:{label} {{
264
+ qualified_name: {self._value_to_cypher(qualified_name)},
265
+ name: {self._value_to_cypher(name)},
266
+ path: {self._value_to_cypher(path)},
267
+ start_line: {start_line},
268
+ end_line: {end_line},
269
+ docstring: {self._value_to_cypher(docstring)},
270
+ return_type: {self._value_to_cypher(return_type)},
271
+ signature: {self._value_to_cypher(signature)},
272
+ visibility: {self._value_to_cypher(visibility)},
273
+ parameters: {self._value_to_cypher(parameters if parameters else [])},
274
+ kind: {self._value_to_cypher(kind)}
275
+ }})
276
+ """
277
+ self._conn.execute(cypher)
278
+ except Exception as e:
279
+ logger.debug(f"Error creating node: {e}")
280
+
281
+ logger.debug(f"Flushed {len(self.node_buffer)} nodes")
282
+ self.node_buffer = []
283
+
284
+ def flush_relationships(self) -> None:
285
+ """Flush relationship buffer to database."""
286
+ if not self.relationship_buffer or not self._conn:
287
+ return
288
+
289
+ seen: set[tuple] = set()
290
+ for source, rel_type, target, _props in self.relationship_buffer:
291
+ from_label, from_key, from_val = source
292
+ to_label, to_key, to_val = target
293
+
294
+ dedup_key = (from_val, rel_type, to_val)
295
+ if dedup_key in seen:
296
+ continue
297
+ seen.add(dedup_key)
298
+
299
+ self._ensure_rel_schema(rel_type, from_label, to_label)
300
+
301
+ # Use override table name if one was created for this label pair
302
+ actual_rel = rel_type
303
+ overrides = getattr(self, "_rel_table_overrides", {})
304
+ override_key = (rel_type, from_label, to_label)
305
+ if override_key in overrides:
306
+ actual_rel = overrides[override_key]
307
+
308
+ try:
309
+ cypher = f"""
310
+ MATCH (a:{from_label} {{{from_key}: {self._value_to_cypher(from_val)}}}),
311
+ (b:{to_label} {{{to_key}: {self._value_to_cypher(to_val)}}})
312
+ CREATE (a)-[:{actual_rel}]->(b)
313
+ """
314
+ self._conn.execute(cypher)
315
+ except Exception as e:
316
+ logger.debug(f"Error creating relationship: {e}")
317
+
318
+ logger.debug(f"Flushed {len(self.relationship_buffer)} relationships")
319
+ self.relationship_buffer = []
320
+
321
+ def flush_all(self) -> None:
322
+ """Flush all pending data."""
323
+ self.flush_nodes()
324
+ self.flush_relationships()
325
+
326
+ def query(self, cypher_query: str, params: PropertyDict | None = None) -> list[ResultRow]:
327
+ """Execute a Cypher query.
328
+
329
+ Args:
330
+ cypher_query: Cypher query string
331
+ params: Optional query parameters
332
+
333
+ Returns:
334
+ List of result rows as dictionaries
335
+ """
336
+ if not self._conn:
337
+ raise ConnectionError("Not connected to database")
338
+
339
+ try:
340
+ result = self._conn.execute(cypher_query, parameters=params or {})
341
+ col_names = result.get_column_names()
342
+ rows = []
343
+ while result.has_next():
344
+ row = result.get_next()
345
+ if col_names:
346
+ rows.append(dict(zip(col_names, row)))
347
+ else:
348
+ rows.append({"result": row})
349
+ return rows
350
+ except Exception as e:
351
+ logger.error(f"Query error: {e}")
352
+ return []
353
+
354
+ def fetch_all(self, cypher_query: str, params: PropertyDict | None = None) -> list[ResultRow]:
355
+ """Alias for query(); satisfies GraphServiceProtocol."""
356
+ return self.query(cypher_query, params)
357
+
358
+ def fetch_module_apis(
359
+ self,
360
+ module_qn: str | None = None,
361
+ visibility: str | None = "public",
362
+ ) -> list[ResultRow]:
363
+ """Fetch API interfaces (functions) for a module or the entire project.
364
+
365
+ Args:
366
+ module_qn: Qualified name of a module. If None, returns APIs across all modules.
367
+ visibility: Filter by visibility ("public", "static", or None for all).
368
+
369
+ Returns:
370
+ List of result rows with function name, signature, return_type, etc.
371
+ """
372
+ if not self._conn:
373
+ raise ConnectionError("Not connected to database")
374
+
375
+ conditions: list[str] = []
376
+ if module_qn:
377
+ safe_qn = module_qn.replace("'", "\\'")
378
+ conditions.append(f"m.qualified_name = '{safe_qn}'")
379
+ # Note: Function nodes in C graphs may not have a visibility property;
380
+ # skip that filter to avoid schema errors.
381
+
382
+ where_clause = f"WHERE {' AND '.join(conditions)}" if conditions else ""
383
+
384
+ # Use only universally-available properties to avoid schema mismatches.
385
+ # (Kuzu silently returns empty results when a non-existent property is
386
+ # referenced, rather than raising an error.)
387
+ # The caller uses `signature or name` for display, so returning '' is fine.
388
+ cypher = f"""
389
+ MATCH (m:Module)-[:DEFINES]->(f:Function)
390
+ {where_clause}
391
+ RETURN m.qualified_name AS module,
392
+ f.name AS name,
393
+ '' AS signature,
394
+ '' AS return_type,
395
+ '' AS visibility,
396
+ '' AS parameters,
397
+ f.start_line AS start_line,
398
+ f.end_line AS end_line
399
+ ORDER BY m.qualified_name, f.start_line
400
+ """
401
+
402
+ try:
403
+ return self.query(cypher)
404
+ except Exception as e:
405
+ logger.error(f"fetch_module_apis error: {e}")
406
+ return []
407
+
408
+ def fetch_module_type_apis(
409
+ self,
410
+ module_qn: str | None = None,
411
+ ) -> list[ResultRow]:
412
+ """Fetch type API interfaces (structs, unions, enums, typedefs) for a module.
413
+
414
+ Args:
415
+ module_qn: Qualified name of a module. If None, returns across all modules.
416
+
417
+ Returns:
418
+ List of result rows with type name, kind, signature, members, etc.
419
+ """
420
+ if not self._conn:
421
+ raise ConnectionError("Not connected to database")
422
+
423
+ # Query Class nodes (struct/union/enum)
424
+ class_conditions: list[str] = []
425
+ if module_qn:
426
+ safe_qn = module_qn.replace("'", "\\'")
427
+ class_conditions.append(f"m.qualified_name = '{safe_qn}'")
428
+
429
+ class_where = f"WHERE {' AND '.join(class_conditions)}" if class_conditions else ""
430
+
431
+ # Class nodes only store name/path/start_line/end_line/docstring in C graphs.
432
+ class_cypher = f"""
433
+ MATCH (m:Module)-[:DEFINES]->(c:Class)
434
+ {class_where}
435
+ RETURN m.qualified_name AS module,
436
+ c.name AS name,
437
+ 'struct' AS kind,
438
+ '' AS signature,
439
+ '' AS members,
440
+ c.start_line AS start_line,
441
+ c.end_line AS end_line
442
+ ORDER BY m.qualified_name, c.start_line
443
+ """
444
+
445
+ # Query Type nodes (typedefs)
446
+ type_conditions: list[str] = []
447
+ if module_qn:
448
+ safe_qn = module_qn.replace("'", "\\'")
449
+ type_conditions.append(f"m.qualified_name = '{safe_qn}'")
450
+
451
+ type_where = f"WHERE {' AND '.join(type_conditions)}" if type_conditions else ""
452
+
453
+ type_cypher = f"""
454
+ MATCH (m:Module)-[:DEFINES]->(t:Type)
455
+ {type_where}
456
+ RETURN m.qualified_name AS module,
457
+ t.name AS name,
458
+ t.kind AS kind,
459
+ t.signature AS signature,
460
+ t.start_line AS start_line,
461
+ t.end_line AS end_line
462
+ ORDER BY m.qualified_name, t.start_line
463
+ """
464
+
465
+ try:
466
+ class_rows = self.query(class_cypher)
467
+ except Exception as e:
468
+ logger.error(f"fetch_module_type_apis (class) error: {e}")
469
+ class_rows = []
470
+ try:
471
+ type_rows = self.query(type_cypher)
472
+ except Exception as e:
473
+ logger.error(f"fetch_module_type_apis (type) error: {e}")
474
+ type_rows = []
475
+ return class_rows + type_rows
476
+
477
+ def fetch_all_calls(self) -> list[ResultRow]:
478
+ """Fetch all CALLS relationships in the graph.
479
+
480
+ Returns a list of rows, each containing
481
+ [caller_qn, callee_qn, callee_path, callee_start_line].
482
+ """
483
+ if not self._conn:
484
+ raise ConnectionError("Not connected to database")
485
+
486
+ cypher = """
487
+ MATCH (caller:Function)-[:CALLS]->(callee:Function)
488
+ RETURN DISTINCT caller.qualified_name AS caller_qn,
489
+ callee.qualified_name AS callee_qn,
490
+ callee.path AS callee_path,
491
+ callee.start_line AS callee_start_line
492
+ """
493
+ try:
494
+ return self.query(cypher)
495
+ except Exception as e:
496
+ logger.error(f"fetch_all_calls error: {e}")
497
+ return []
498
+
499
+ def fetch_all_functions_for_docs(self) -> list[ResultRow]:
500
+ """Fetch all functions with module info, docstring, and path for doc generation.
501
+
502
+ Returns rows with:
503
+ [module_qn, module_path, func_qn, func_name, signature, return_type,
504
+ visibility, parameters, docstring, start_line, end_line, path].
505
+ """
506
+ if not self._conn:
507
+ raise ConnectionError("Not connected to database")
508
+
509
+ cypher = """
510
+ MATCH (m:Module)-[:DEFINES]->(f:Function)
511
+ RETURN DISTINCT m.qualified_name AS module_qn,
512
+ m.path AS module_path,
513
+ f.qualified_name AS func_qn,
514
+ f.name AS func_name,
515
+ f.signature AS signature,
516
+ f.return_type AS return_type,
517
+ f.visibility AS visibility,
518
+ f.parameters AS parameters,
519
+ f.docstring AS docstring,
520
+ f.start_line AS start_line,
521
+ f.end_line AS end_line,
522
+ f.path AS path
523
+ ORDER BY m.qualified_name, f.start_line
524
+ """
525
+ try:
526
+ return self.query(cypher)
527
+ except Exception as e:
528
+ logger.error(f"fetch_all_functions_for_docs error: {e}")
529
+ return []
530
+
531
+ def fetch_all_types_for_docs(self) -> list[ResultRow]:
532
+ """Fetch all type definitions (structs, unions, enums, typedefs) for doc generation."""
533
+ if not self._conn:
534
+ raise ConnectionError("Not connected to database")
535
+
536
+ class_cypher = """
537
+ MATCH (m:Module)-[:DEFINES]->(c:Class)
538
+ RETURN m.qualified_name AS module_qn,
539
+ c.name AS name,
540
+ c.kind AS kind,
541
+ c.signature AS signature,
542
+ c.parameters AS members,
543
+ c.start_line AS start_line,
544
+ c.end_line AS end_line
545
+ ORDER BY m.qualified_name, c.start_line
546
+ """
547
+ type_cypher = """
548
+ MATCH (m:Module)-[:DEFINES]->(t:Type)
549
+ RETURN m.qualified_name AS module_qn,
550
+ t.name AS name,
551
+ t.kind AS kind,
552
+ t.signature AS signature,
553
+ t.start_line AS start_line,
554
+ t.end_line AS end_line
555
+ ORDER BY m.qualified_name, t.start_line
556
+ """
557
+ try:
558
+ return self.query(class_cypher) + self.query(type_cypher)
559
+ except Exception as e:
560
+ logger.error(f"fetch_all_types_for_docs error: {e}")
561
+ return []
562
+
563
+ def clean_database(self) -> None:
564
+ """Clean all data from the database."""
565
+ if not self._conn:
566
+ raise ConnectionError("Not connected to database")
567
+
568
+ try:
569
+ # Drop all tables
570
+ result = self._conn.execute("CALL show_tables() RETURN *")
571
+ tables = []
572
+ while result.has_next():
573
+ row = result.get_next()
574
+ tables.append(row[0] if row else None)
575
+
576
+ for table in tables:
577
+ if table:
578
+ try:
579
+ # Quote table name to handle special cases (e.g., numeric names)
580
+ self._conn.execute(f'DROP TABLE "{table}"')
581
+ except Exception as e:
582
+ logger.debug(f"Error dropping table {table}: {e}")
583
+
584
+ logger.info("Database cleaned")
585
+ except Exception as e:
586
+ logger.error(f"Error cleaning database: {e}")
587
+
588
+ def export_graph(self) -> GraphData:
589
+ """Export the entire graph as GraphData."""
590
+ if not self._conn:
591
+ raise ConnectionError("Not connected to database")
592
+
593
+ nodes = []
594
+ relationships = []
595
+
596
+ try:
597
+ # Get all nodes
598
+ result = self._conn.execute("MATCH (n) RETURN n")
599
+ while result.has_next():
600
+ row = result.get_next()
601
+ if row and len(row) > 0:
602
+ node = row[0]
603
+ nodes.append({
604
+ "label": node.get("_label", "Unknown"),
605
+ "properties": dict(node),
606
+ })
607
+
608
+ # Get all relationships
609
+ result = self._conn.execute("MATCH (a)-[r]->(b) RETURN a, r, b")
610
+ while result.has_next():
611
+ row = result.get_next()
612
+ if row and len(row) >= 3:
613
+ relationships.append({
614
+ "source": {"qualified_name": row[0].get("qualified_name", "")},
615
+ "type": row[1].get("_label", "UNKNOWN"),
616
+ "target": {"qualified_name": row[2].get("qualified_name", "")},
617
+ })
618
+
619
+ except Exception as e:
620
+ logger.error(f"Export error: {e}")
621
+
622
+ return {"nodes": nodes, "relationships": relationships}
623
+
624
+ def get_statistics(self) -> dict[str, Any]:
625
+ """Get database statistics."""
626
+ if not self._conn:
627
+ raise ConnectionError("Not connected to database")
628
+
629
+ stats: dict[str, Any] = {
630
+ "node_count": 0,
631
+ "relationship_count": 0,
632
+ "node_labels": {},
633
+ "relationship_types": {},
634
+ }
635
+
636
+ try:
637
+ # Count nodes
638
+ result = self._conn.execute("MATCH (n) RETURN count(n) as count")
639
+ if result.has_next():
640
+ stats["node_count"] = result.get_next()[0]
641
+
642
+ # Count relationships
643
+ result = self._conn.execute("MATCH ()-[r]->() RETURN count(r) as count")
644
+ if result.has_next():
645
+ stats["relationship_count"] = result.get_next()[0]
646
+
647
+ # Get labels with counts
648
+ result = self._conn.execute("CALL show_tables() RETURN *")
649
+ while result.has_next():
650
+ row = result.get_next()
651
+ if row:
652
+ label = row[0]
653
+ # Count nodes for this label
654
+ try:
655
+ count_result = self._conn.execute(f"MATCH (n:{label}) RETURN count(n) as count")
656
+ if count_result.has_next():
657
+ count = count_result.get_next()[0]
658
+ stats["node_labels"][label] = count
659
+ except Exception:
660
+ stats["node_labels"][label] = 0
661
+
662
+ except Exception as e:
663
+ logger.error(f"Statistics error: {e}")
664
+
665
+ return stats