code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
"""Kùzu embedded graph database service - No Docker required."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import types
|
|
7
|
+
from collections.abc import Generator, Sequence
|
|
8
|
+
from contextlib import contextmanager
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
from ..types import (
|
|
15
|
+
GraphData,
|
|
16
|
+
PropertyDict,
|
|
17
|
+
PropertyValue,
|
|
18
|
+
ResultRow,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
import kuzu
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class KuzuIngestor:
|
|
26
|
+
"""Ingestor for writing code graph data to Kùzu embedded database.
|
|
27
|
+
|
|
28
|
+
Kùzu is an embedded graph database that requires no server or Docker.
|
|
29
|
+
Perfect for local development and testing.
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
>>> ingestor = KuzuIngestor("./my_graph.db")
|
|
33
|
+
>>> with ingestor:
|
|
34
|
+
... ingestor.ensure_node_batch("Function", {"name": "foo", "id": "1"})
|
|
35
|
+
... ingestor.flush_all()
|
|
36
|
+
>>> # Query later
|
|
37
|
+
>>> results = ingestor.query("MATCH (f:Function) RETURN f.name")
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, db_path: str | Path, batch_size: int = 1000):
|
|
41
|
+
"""Initialize Kùzu ingestor.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
db_path: Path to store the database files
|
|
45
|
+
batch_size: Batch size for writes
|
|
46
|
+
"""
|
|
47
|
+
self.db_path = Path(db_path)
|
|
48
|
+
self.batch_size = batch_size
|
|
49
|
+
self._db: kuzu.Database | None = None
|
|
50
|
+
self._conn: kuzu.Connection | None = None
|
|
51
|
+
self.node_buffer: list[tuple[str, dict[str, PropertyValue]]] = []
|
|
52
|
+
self.relationship_buffer: list[
|
|
53
|
+
tuple[
|
|
54
|
+
tuple[str, str, PropertyValue],
|
|
55
|
+
str,
|
|
56
|
+
tuple[str, str, PropertyValue],
|
|
57
|
+
dict[str, PropertyValue] | None,
|
|
58
|
+
]
|
|
59
|
+
] = []
|
|
60
|
+
self._initialized = False
|
|
61
|
+
|
|
62
|
+
def __enter__(self) -> KuzuIngestor:
|
|
63
|
+
"""Enter context manager and initialize database."""
|
|
64
|
+
import kuzu
|
|
65
|
+
|
|
66
|
+
logger.info(f"Opening Kùzu database at {self.db_path}")
|
|
67
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
self._db = kuzu.Database(str(self.db_path))
|
|
69
|
+
self._conn = kuzu.Connection(self._db)
|
|
70
|
+
logger.info("Kùzu database opened successfully")
|
|
71
|
+
return self
|
|
72
|
+
|
|
73
|
+
def __exit__(
|
|
74
|
+
self,
|
|
75
|
+
exc_type: type | None,
|
|
76
|
+
exc_val: Exception | None,
|
|
77
|
+
exc_tb: types.TracebackType | None,
|
|
78
|
+
) -> None:
|
|
79
|
+
"""Exit context manager and cleanup."""
|
|
80
|
+
if exc_type:
|
|
81
|
+
logger.exception(f"Exception during ingest: {exc_val}")
|
|
82
|
+
try:
|
|
83
|
+
self.flush_all()
|
|
84
|
+
except Exception as flush_err:
|
|
85
|
+
logger.error(f"Flush error during exception handling: {flush_err}")
|
|
86
|
+
else:
|
|
87
|
+
self.flush_all()
|
|
88
|
+
|
|
89
|
+
if self._conn:
|
|
90
|
+
# Kùzu connection doesn't need explicit close
|
|
91
|
+
self._conn = None
|
|
92
|
+
if self._db:
|
|
93
|
+
# Kùzu database doesn't need explicit close
|
|
94
|
+
self._db = None
|
|
95
|
+
logger.info("Kùzu database closed")
|
|
96
|
+
|
|
97
|
+
def _ensure_schema(self, label: str) -> None:
|
|
98
|
+
"""Ensure node table exists for the given label."""
|
|
99
|
+
if not self._conn:
|
|
100
|
+
raise ConnectionError("Not connected to database")
|
|
101
|
+
|
|
102
|
+
# Kùzu requires predefined schema, create generic node table
|
|
103
|
+
try:
|
|
104
|
+
# Check if table exists by trying to query
|
|
105
|
+
self._conn.execute(f"MATCH (n:{label}) RETURN n LIMIT 1")
|
|
106
|
+
except Exception:
|
|
107
|
+
# Table doesn't exist, create it
|
|
108
|
+
# Kùzu uses CREATE NODE TABLE with specific properties
|
|
109
|
+
logger.info(f"Creating node table for label: {label}")
|
|
110
|
+
try:
|
|
111
|
+
self._conn.execute(f"""
|
|
112
|
+
CREATE NODE TABLE {label} (
|
|
113
|
+
qualified_name STRING,
|
|
114
|
+
name STRING,
|
|
115
|
+
path STRING,
|
|
116
|
+
start_line INT64,
|
|
117
|
+
end_line INT64,
|
|
118
|
+
docstring STRING,
|
|
119
|
+
return_type STRING,
|
|
120
|
+
signature STRING,
|
|
121
|
+
visibility STRING,
|
|
122
|
+
parameters STRING[],
|
|
123
|
+
kind STRING,
|
|
124
|
+
PRIMARY KEY (qualified_name)
|
|
125
|
+
)
|
|
126
|
+
""")
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.debug(f"Table creation may have failed (could already exist): {e}")
|
|
129
|
+
|
|
130
|
+
def _ensure_rel_schema(self, rel_type: str, from_label: str, to_label: str) -> None:
|
|
131
|
+
"""Ensure relationship table exists for the given label combination.
|
|
132
|
+
|
|
133
|
+
Kùzu REL TABLEs are bound to specific (FROM, TO) node label pairs.
|
|
134
|
+
A single rel_type (e.g., DEFINES) may need multiple tables if it
|
|
135
|
+
connects different label pairs (Module→Function, Module→Class, etc.).
|
|
136
|
+
Kùzu supports REL TABLE GROUP for this purpose.
|
|
137
|
+
"""
|
|
138
|
+
if not self._conn:
|
|
139
|
+
raise ConnectionError("Not connected to database")
|
|
140
|
+
|
|
141
|
+
cache_key = (rel_type, from_label, to_label)
|
|
142
|
+
if not hasattr(self, "_rel_schema_cache"):
|
|
143
|
+
self._rel_schema_cache: set[tuple[str, str, str]] = set()
|
|
144
|
+
if cache_key in self._rel_schema_cache:
|
|
145
|
+
return
|
|
146
|
+
self._rel_schema_cache.add(cache_key)
|
|
147
|
+
|
|
148
|
+
# Try to create the relationship. If the table already exists with the
|
|
149
|
+
# same (FROM, TO) pair, the CREATE will fail harmlessly.
|
|
150
|
+
# If the table exists but with a DIFFERENT pair, we need ALTER or GROUP.
|
|
151
|
+
try:
|
|
152
|
+
self._conn.execute(f"""
|
|
153
|
+
CREATE REL TABLE {rel_type} (
|
|
154
|
+
FROM {from_label} TO {to_label},
|
|
155
|
+
MANY_MANY
|
|
156
|
+
)
|
|
157
|
+
""")
|
|
158
|
+
logger.info(f"Created relationship table: {rel_type} ({from_label}→{to_label})")
|
|
159
|
+
except Exception:
|
|
160
|
+
# Table may already exist — check if it supports this label pair
|
|
161
|
+
try:
|
|
162
|
+
# Try a probe MATCH with the specific labels
|
|
163
|
+
self._conn.execute(
|
|
164
|
+
f"MATCH (a:{from_label})-[r:{rel_type}]->(b:{to_label}) RETURN r LIMIT 1"
|
|
165
|
+
)
|
|
166
|
+
except Exception:
|
|
167
|
+
# Label pair not supported — try creating a REL TABLE GROUP
|
|
168
|
+
group_name = f"{rel_type}_{from_label}_{to_label}"
|
|
169
|
+
try:
|
|
170
|
+
self._conn.execute(f"""
|
|
171
|
+
CREATE REL TABLE {group_name} (
|
|
172
|
+
FROM {from_label} TO {to_label},
|
|
173
|
+
MANY_MANY
|
|
174
|
+
)
|
|
175
|
+
""")
|
|
176
|
+
logger.info(f"Created additional rel table: {group_name} ({from_label}→{to_label})")
|
|
177
|
+
# Update the relationship buffer to use the new table name
|
|
178
|
+
# We need to intercept flush_relationships for this label pair
|
|
179
|
+
if not hasattr(self, "_rel_table_overrides"):
|
|
180
|
+
self._rel_table_overrides: dict[tuple[str, str, str], str] = {}
|
|
181
|
+
self._rel_table_overrides[(rel_type, from_label, to_label)] = group_name
|
|
182
|
+
except Exception as e:
|
|
183
|
+
logger.debug(f"Additional rel table creation failed: {e}")
|
|
184
|
+
|
|
185
|
+
def ensure_node_batch(self, label: str, properties: PropertyDict) -> None:
|
|
186
|
+
"""Add a node to the batch buffer.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
label: Node label (e.g., "Function", "Class")
|
|
190
|
+
properties: Node properties dictionary
|
|
191
|
+
"""
|
|
192
|
+
self.node_buffer.append((label, properties.copy()))
|
|
193
|
+
if len(self.node_buffer) >= self.batch_size:
|
|
194
|
+
self.flush_nodes()
|
|
195
|
+
|
|
196
|
+
def ensure_relationship_batch(
|
|
197
|
+
self,
|
|
198
|
+
source: tuple[str, str, PropertyValue],
|
|
199
|
+
rel_type: str,
|
|
200
|
+
target: tuple[str, str, PropertyValue],
|
|
201
|
+
properties: PropertyDict | None = None,
|
|
202
|
+
) -> None:
|
|
203
|
+
"""Add a relationship to the batch buffer.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
source: (label, key, value) tuple for source node
|
|
207
|
+
rel_type: Relationship type (e.g., "CALLS", "DEFINES")
|
|
208
|
+
target: (label, key, value) tuple for target node
|
|
209
|
+
properties: Optional relationship properties
|
|
210
|
+
"""
|
|
211
|
+
self.relationship_buffer.append((source, rel_type, target, properties))
|
|
212
|
+
if len(self.relationship_buffer) >= self.batch_size:
|
|
213
|
+
self.flush_relationships()
|
|
214
|
+
|
|
215
|
+
def _value_to_cypher(self, value: PropertyValue) -> str:
|
|
216
|
+
"""Convert Python value to Cypher literal."""
|
|
217
|
+
if value is None:
|
|
218
|
+
return "NULL"
|
|
219
|
+
if isinstance(value, bool):
|
|
220
|
+
return str(value).lower()
|
|
221
|
+
if isinstance(value, (int, float)):
|
|
222
|
+
return str(value)
|
|
223
|
+
if isinstance(value, str):
|
|
224
|
+
# Escape quotes
|
|
225
|
+
escaped = value.replace("'", "\\'")
|
|
226
|
+
return f"'{escaped}'"
|
|
227
|
+
if isinstance(value, list):
|
|
228
|
+
items = [self._value_to_cypher(v) for v in value]
|
|
229
|
+
return f"[{', '.join(items)}]"
|
|
230
|
+
return f"'{str(value)}'"
|
|
231
|
+
|
|
232
|
+
def flush_nodes(self) -> None:
|
|
233
|
+
"""Flush node buffer to database."""
|
|
234
|
+
if not self.node_buffer or not self._conn:
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
# Group nodes by label
|
|
238
|
+
by_label: dict[str, list[PropertyDict]] = {}
|
|
239
|
+
for label, props in self.node_buffer:
|
|
240
|
+
if label not in by_label:
|
|
241
|
+
by_label[label] = []
|
|
242
|
+
by_label[label].append(props)
|
|
243
|
+
|
|
244
|
+
for label, nodes in by_label.items():
|
|
245
|
+
self._ensure_schema(label)
|
|
246
|
+
|
|
247
|
+
for props in nodes:
|
|
248
|
+
# Build CREATE statement
|
|
249
|
+
qualified_name = props.get("qualified_name", props.get("name", ""))
|
|
250
|
+
name = props.get("name", "")
|
|
251
|
+
path = props.get("path", "")
|
|
252
|
+
start_line = props.get("start_line", 0)
|
|
253
|
+
end_line = props.get("end_line", 0)
|
|
254
|
+
docstring = props.get("docstring", "")
|
|
255
|
+
return_type = props.get("return_type", "")
|
|
256
|
+
signature = props.get("signature", "")
|
|
257
|
+
visibility = props.get("visibility", "")
|
|
258
|
+
parameters = props.get("parameters")
|
|
259
|
+
kind = props.get("kind", "")
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
cypher = f"""
|
|
263
|
+
CREATE (n:{label} {{
|
|
264
|
+
qualified_name: {self._value_to_cypher(qualified_name)},
|
|
265
|
+
name: {self._value_to_cypher(name)},
|
|
266
|
+
path: {self._value_to_cypher(path)},
|
|
267
|
+
start_line: {start_line},
|
|
268
|
+
end_line: {end_line},
|
|
269
|
+
docstring: {self._value_to_cypher(docstring)},
|
|
270
|
+
return_type: {self._value_to_cypher(return_type)},
|
|
271
|
+
signature: {self._value_to_cypher(signature)},
|
|
272
|
+
visibility: {self._value_to_cypher(visibility)},
|
|
273
|
+
parameters: {self._value_to_cypher(parameters if parameters else [])},
|
|
274
|
+
kind: {self._value_to_cypher(kind)}
|
|
275
|
+
}})
|
|
276
|
+
"""
|
|
277
|
+
self._conn.execute(cypher)
|
|
278
|
+
except Exception as e:
|
|
279
|
+
logger.debug(f"Error creating node: {e}")
|
|
280
|
+
|
|
281
|
+
logger.debug(f"Flushed {len(self.node_buffer)} nodes")
|
|
282
|
+
self.node_buffer = []
|
|
283
|
+
|
|
284
|
+
def flush_relationships(self) -> None:
|
|
285
|
+
"""Flush relationship buffer to database."""
|
|
286
|
+
if not self.relationship_buffer or not self._conn:
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
seen: set[tuple] = set()
|
|
290
|
+
for source, rel_type, target, _props in self.relationship_buffer:
|
|
291
|
+
from_label, from_key, from_val = source
|
|
292
|
+
to_label, to_key, to_val = target
|
|
293
|
+
|
|
294
|
+
dedup_key = (from_val, rel_type, to_val)
|
|
295
|
+
if dedup_key in seen:
|
|
296
|
+
continue
|
|
297
|
+
seen.add(dedup_key)
|
|
298
|
+
|
|
299
|
+
self._ensure_rel_schema(rel_type, from_label, to_label)
|
|
300
|
+
|
|
301
|
+
# Use override table name if one was created for this label pair
|
|
302
|
+
actual_rel = rel_type
|
|
303
|
+
overrides = getattr(self, "_rel_table_overrides", {})
|
|
304
|
+
override_key = (rel_type, from_label, to_label)
|
|
305
|
+
if override_key in overrides:
|
|
306
|
+
actual_rel = overrides[override_key]
|
|
307
|
+
|
|
308
|
+
try:
|
|
309
|
+
cypher = f"""
|
|
310
|
+
MATCH (a:{from_label} {{{from_key}: {self._value_to_cypher(from_val)}}}),
|
|
311
|
+
(b:{to_label} {{{to_key}: {self._value_to_cypher(to_val)}}})
|
|
312
|
+
CREATE (a)-[:{actual_rel}]->(b)
|
|
313
|
+
"""
|
|
314
|
+
self._conn.execute(cypher)
|
|
315
|
+
except Exception as e:
|
|
316
|
+
logger.debug(f"Error creating relationship: {e}")
|
|
317
|
+
|
|
318
|
+
logger.debug(f"Flushed {len(self.relationship_buffer)} relationships")
|
|
319
|
+
self.relationship_buffer = []
|
|
320
|
+
|
|
321
|
+
def flush_all(self) -> None:
|
|
322
|
+
"""Flush all pending data."""
|
|
323
|
+
self.flush_nodes()
|
|
324
|
+
self.flush_relationships()
|
|
325
|
+
|
|
326
|
+
def query(self, cypher_query: str, params: PropertyDict | None = None) -> list[ResultRow]:
|
|
327
|
+
"""Execute a Cypher query.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
cypher_query: Cypher query string
|
|
331
|
+
params: Optional query parameters
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
List of result rows as dictionaries
|
|
335
|
+
"""
|
|
336
|
+
if not self._conn:
|
|
337
|
+
raise ConnectionError("Not connected to database")
|
|
338
|
+
|
|
339
|
+
try:
|
|
340
|
+
result = self._conn.execute(cypher_query, parameters=params or {})
|
|
341
|
+
col_names = result.get_column_names()
|
|
342
|
+
rows = []
|
|
343
|
+
while result.has_next():
|
|
344
|
+
row = result.get_next()
|
|
345
|
+
if col_names:
|
|
346
|
+
rows.append(dict(zip(col_names, row)))
|
|
347
|
+
else:
|
|
348
|
+
rows.append({"result": row})
|
|
349
|
+
return rows
|
|
350
|
+
except Exception as e:
|
|
351
|
+
logger.error(f"Query error: {e}")
|
|
352
|
+
return []
|
|
353
|
+
|
|
354
|
+
def fetch_all(self, cypher_query: str, params: PropertyDict | None = None) -> list[ResultRow]:
|
|
355
|
+
"""Alias for query(); satisfies GraphServiceProtocol."""
|
|
356
|
+
return self.query(cypher_query, params)
|
|
357
|
+
|
|
358
|
+
def fetch_module_apis(
|
|
359
|
+
self,
|
|
360
|
+
module_qn: str | None = None,
|
|
361
|
+
visibility: str | None = "public",
|
|
362
|
+
) -> list[ResultRow]:
|
|
363
|
+
"""Fetch API interfaces (functions) for a module or the entire project.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
module_qn: Qualified name of a module. If None, returns APIs across all modules.
|
|
367
|
+
visibility: Filter by visibility ("public", "static", or None for all).
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
List of result rows with function name, signature, return_type, etc.
|
|
371
|
+
"""
|
|
372
|
+
if not self._conn:
|
|
373
|
+
raise ConnectionError("Not connected to database")
|
|
374
|
+
|
|
375
|
+
conditions: list[str] = []
|
|
376
|
+
if module_qn:
|
|
377
|
+
safe_qn = module_qn.replace("'", "\\'")
|
|
378
|
+
conditions.append(f"m.qualified_name = '{safe_qn}'")
|
|
379
|
+
# Note: Function nodes in C graphs may not have a visibility property;
|
|
380
|
+
# skip that filter to avoid schema errors.
|
|
381
|
+
|
|
382
|
+
where_clause = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
|
383
|
+
|
|
384
|
+
# Use only universally-available properties to avoid schema mismatches.
|
|
385
|
+
# (Kuzu silently returns empty results when a non-existent property is
|
|
386
|
+
# referenced, rather than raising an error.)
|
|
387
|
+
# The caller uses `signature or name` for display, so returning '' is fine.
|
|
388
|
+
cypher = f"""
|
|
389
|
+
MATCH (m:Module)-[:DEFINES]->(f:Function)
|
|
390
|
+
{where_clause}
|
|
391
|
+
RETURN m.qualified_name AS module,
|
|
392
|
+
f.name AS name,
|
|
393
|
+
'' AS signature,
|
|
394
|
+
'' AS return_type,
|
|
395
|
+
'' AS visibility,
|
|
396
|
+
'' AS parameters,
|
|
397
|
+
f.start_line AS start_line,
|
|
398
|
+
f.end_line AS end_line
|
|
399
|
+
ORDER BY m.qualified_name, f.start_line
|
|
400
|
+
"""
|
|
401
|
+
|
|
402
|
+
try:
|
|
403
|
+
return self.query(cypher)
|
|
404
|
+
except Exception as e:
|
|
405
|
+
logger.error(f"fetch_module_apis error: {e}")
|
|
406
|
+
return []
|
|
407
|
+
|
|
408
|
+
def fetch_module_type_apis(
|
|
409
|
+
self,
|
|
410
|
+
module_qn: str | None = None,
|
|
411
|
+
) -> list[ResultRow]:
|
|
412
|
+
"""Fetch type API interfaces (structs, unions, enums, typedefs) for a module.
|
|
413
|
+
|
|
414
|
+
Args:
|
|
415
|
+
module_qn: Qualified name of a module. If None, returns across all modules.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
List of result rows with type name, kind, signature, members, etc.
|
|
419
|
+
"""
|
|
420
|
+
if not self._conn:
|
|
421
|
+
raise ConnectionError("Not connected to database")
|
|
422
|
+
|
|
423
|
+
# Query Class nodes (struct/union/enum)
|
|
424
|
+
class_conditions: list[str] = []
|
|
425
|
+
if module_qn:
|
|
426
|
+
safe_qn = module_qn.replace("'", "\\'")
|
|
427
|
+
class_conditions.append(f"m.qualified_name = '{safe_qn}'")
|
|
428
|
+
|
|
429
|
+
class_where = f"WHERE {' AND '.join(class_conditions)}" if class_conditions else ""
|
|
430
|
+
|
|
431
|
+
# Class nodes only store name/path/start_line/end_line/docstring in C graphs.
|
|
432
|
+
class_cypher = f"""
|
|
433
|
+
MATCH (m:Module)-[:DEFINES]->(c:Class)
|
|
434
|
+
{class_where}
|
|
435
|
+
RETURN m.qualified_name AS module,
|
|
436
|
+
c.name AS name,
|
|
437
|
+
'struct' AS kind,
|
|
438
|
+
'' AS signature,
|
|
439
|
+
'' AS members,
|
|
440
|
+
c.start_line AS start_line,
|
|
441
|
+
c.end_line AS end_line
|
|
442
|
+
ORDER BY m.qualified_name, c.start_line
|
|
443
|
+
"""
|
|
444
|
+
|
|
445
|
+
# Query Type nodes (typedefs)
|
|
446
|
+
type_conditions: list[str] = []
|
|
447
|
+
if module_qn:
|
|
448
|
+
safe_qn = module_qn.replace("'", "\\'")
|
|
449
|
+
type_conditions.append(f"m.qualified_name = '{safe_qn}'")
|
|
450
|
+
|
|
451
|
+
type_where = f"WHERE {' AND '.join(type_conditions)}" if type_conditions else ""
|
|
452
|
+
|
|
453
|
+
type_cypher = f"""
|
|
454
|
+
MATCH (m:Module)-[:DEFINES]->(t:Type)
|
|
455
|
+
{type_where}
|
|
456
|
+
RETURN m.qualified_name AS module,
|
|
457
|
+
t.name AS name,
|
|
458
|
+
t.kind AS kind,
|
|
459
|
+
t.signature AS signature,
|
|
460
|
+
t.start_line AS start_line,
|
|
461
|
+
t.end_line AS end_line
|
|
462
|
+
ORDER BY m.qualified_name, t.start_line
|
|
463
|
+
"""
|
|
464
|
+
|
|
465
|
+
try:
|
|
466
|
+
class_rows = self.query(class_cypher)
|
|
467
|
+
except Exception as e:
|
|
468
|
+
logger.error(f"fetch_module_type_apis (class) error: {e}")
|
|
469
|
+
class_rows = []
|
|
470
|
+
try:
|
|
471
|
+
type_rows = self.query(type_cypher)
|
|
472
|
+
except Exception as e:
|
|
473
|
+
logger.error(f"fetch_module_type_apis (type) error: {e}")
|
|
474
|
+
type_rows = []
|
|
475
|
+
return class_rows + type_rows
|
|
476
|
+
|
|
477
|
+
def fetch_all_calls(self) -> list[ResultRow]:
|
|
478
|
+
"""Fetch all CALLS relationships in the graph.
|
|
479
|
+
|
|
480
|
+
Returns a list of rows, each containing
|
|
481
|
+
[caller_qn, callee_qn, callee_path, callee_start_line].
|
|
482
|
+
"""
|
|
483
|
+
if not self._conn:
|
|
484
|
+
raise ConnectionError("Not connected to database")
|
|
485
|
+
|
|
486
|
+
cypher = """
|
|
487
|
+
MATCH (caller:Function)-[:CALLS]->(callee:Function)
|
|
488
|
+
RETURN DISTINCT caller.qualified_name AS caller_qn,
|
|
489
|
+
callee.qualified_name AS callee_qn,
|
|
490
|
+
callee.path AS callee_path,
|
|
491
|
+
callee.start_line AS callee_start_line
|
|
492
|
+
"""
|
|
493
|
+
try:
|
|
494
|
+
return self.query(cypher)
|
|
495
|
+
except Exception as e:
|
|
496
|
+
logger.error(f"fetch_all_calls error: {e}")
|
|
497
|
+
return []
|
|
498
|
+
|
|
499
|
+
def fetch_all_functions_for_docs(self) -> list[ResultRow]:
|
|
500
|
+
"""Fetch all functions with module info, docstring, and path for doc generation.
|
|
501
|
+
|
|
502
|
+
Returns rows with:
|
|
503
|
+
[module_qn, module_path, func_qn, func_name, signature, return_type,
|
|
504
|
+
visibility, parameters, docstring, start_line, end_line, path].
|
|
505
|
+
"""
|
|
506
|
+
if not self._conn:
|
|
507
|
+
raise ConnectionError("Not connected to database")
|
|
508
|
+
|
|
509
|
+
cypher = """
|
|
510
|
+
MATCH (m:Module)-[:DEFINES]->(f:Function)
|
|
511
|
+
RETURN DISTINCT m.qualified_name AS module_qn,
|
|
512
|
+
m.path AS module_path,
|
|
513
|
+
f.qualified_name AS func_qn,
|
|
514
|
+
f.name AS func_name,
|
|
515
|
+
f.signature AS signature,
|
|
516
|
+
f.return_type AS return_type,
|
|
517
|
+
f.visibility AS visibility,
|
|
518
|
+
f.parameters AS parameters,
|
|
519
|
+
f.docstring AS docstring,
|
|
520
|
+
f.start_line AS start_line,
|
|
521
|
+
f.end_line AS end_line,
|
|
522
|
+
f.path AS path
|
|
523
|
+
ORDER BY m.qualified_name, f.start_line
|
|
524
|
+
"""
|
|
525
|
+
try:
|
|
526
|
+
return self.query(cypher)
|
|
527
|
+
except Exception as e:
|
|
528
|
+
logger.error(f"fetch_all_functions_for_docs error: {e}")
|
|
529
|
+
return []
|
|
530
|
+
|
|
531
|
+
def fetch_all_types_for_docs(self) -> list[ResultRow]:
|
|
532
|
+
"""Fetch all type definitions (structs, unions, enums, typedefs) for doc generation."""
|
|
533
|
+
if not self._conn:
|
|
534
|
+
raise ConnectionError("Not connected to database")
|
|
535
|
+
|
|
536
|
+
class_cypher = """
|
|
537
|
+
MATCH (m:Module)-[:DEFINES]->(c:Class)
|
|
538
|
+
RETURN m.qualified_name AS module_qn,
|
|
539
|
+
c.name AS name,
|
|
540
|
+
c.kind AS kind,
|
|
541
|
+
c.signature AS signature,
|
|
542
|
+
c.parameters AS members,
|
|
543
|
+
c.start_line AS start_line,
|
|
544
|
+
c.end_line AS end_line
|
|
545
|
+
ORDER BY m.qualified_name, c.start_line
|
|
546
|
+
"""
|
|
547
|
+
type_cypher = """
|
|
548
|
+
MATCH (m:Module)-[:DEFINES]->(t:Type)
|
|
549
|
+
RETURN m.qualified_name AS module_qn,
|
|
550
|
+
t.name AS name,
|
|
551
|
+
t.kind AS kind,
|
|
552
|
+
t.signature AS signature,
|
|
553
|
+
t.start_line AS start_line,
|
|
554
|
+
t.end_line AS end_line
|
|
555
|
+
ORDER BY m.qualified_name, t.start_line
|
|
556
|
+
"""
|
|
557
|
+
try:
|
|
558
|
+
return self.query(class_cypher) + self.query(type_cypher)
|
|
559
|
+
except Exception as e:
|
|
560
|
+
logger.error(f"fetch_all_types_for_docs error: {e}")
|
|
561
|
+
return []
|
|
562
|
+
|
|
563
|
+
def clean_database(self) -> None:
|
|
564
|
+
"""Clean all data from the database."""
|
|
565
|
+
if not self._conn:
|
|
566
|
+
raise ConnectionError("Not connected to database")
|
|
567
|
+
|
|
568
|
+
try:
|
|
569
|
+
# Drop all tables
|
|
570
|
+
result = self._conn.execute("CALL show_tables() RETURN *")
|
|
571
|
+
tables = []
|
|
572
|
+
while result.has_next():
|
|
573
|
+
row = result.get_next()
|
|
574
|
+
tables.append(row[0] if row else None)
|
|
575
|
+
|
|
576
|
+
for table in tables:
|
|
577
|
+
if table:
|
|
578
|
+
try:
|
|
579
|
+
# Quote table name to handle special cases (e.g., numeric names)
|
|
580
|
+
self._conn.execute(f'DROP TABLE "{table}"')
|
|
581
|
+
except Exception as e:
|
|
582
|
+
logger.debug(f"Error dropping table {table}: {e}")
|
|
583
|
+
|
|
584
|
+
logger.info("Database cleaned")
|
|
585
|
+
except Exception as e:
|
|
586
|
+
logger.error(f"Error cleaning database: {e}")
|
|
587
|
+
|
|
588
|
+
def export_graph(self) -> GraphData:
|
|
589
|
+
"""Export the entire graph as GraphData."""
|
|
590
|
+
if not self._conn:
|
|
591
|
+
raise ConnectionError("Not connected to database")
|
|
592
|
+
|
|
593
|
+
nodes = []
|
|
594
|
+
relationships = []
|
|
595
|
+
|
|
596
|
+
try:
|
|
597
|
+
# Get all nodes
|
|
598
|
+
result = self._conn.execute("MATCH (n) RETURN n")
|
|
599
|
+
while result.has_next():
|
|
600
|
+
row = result.get_next()
|
|
601
|
+
if row and len(row) > 0:
|
|
602
|
+
node = row[0]
|
|
603
|
+
nodes.append({
|
|
604
|
+
"label": node.get("_label", "Unknown"),
|
|
605
|
+
"properties": dict(node),
|
|
606
|
+
})
|
|
607
|
+
|
|
608
|
+
# Get all relationships
|
|
609
|
+
result = self._conn.execute("MATCH (a)-[r]->(b) RETURN a, r, b")
|
|
610
|
+
while result.has_next():
|
|
611
|
+
row = result.get_next()
|
|
612
|
+
if row and len(row) >= 3:
|
|
613
|
+
relationships.append({
|
|
614
|
+
"source": {"qualified_name": row[0].get("qualified_name", "")},
|
|
615
|
+
"type": row[1].get("_label", "UNKNOWN"),
|
|
616
|
+
"target": {"qualified_name": row[2].get("qualified_name", "")},
|
|
617
|
+
})
|
|
618
|
+
|
|
619
|
+
except Exception as e:
|
|
620
|
+
logger.error(f"Export error: {e}")
|
|
621
|
+
|
|
622
|
+
return {"nodes": nodes, "relationships": relationships}
|
|
623
|
+
|
|
624
|
+
def get_statistics(self) -> dict[str, Any]:
|
|
625
|
+
"""Get database statistics."""
|
|
626
|
+
if not self._conn:
|
|
627
|
+
raise ConnectionError("Not connected to database")
|
|
628
|
+
|
|
629
|
+
stats: dict[str, Any] = {
|
|
630
|
+
"node_count": 0,
|
|
631
|
+
"relationship_count": 0,
|
|
632
|
+
"node_labels": {},
|
|
633
|
+
"relationship_types": {},
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
try:
|
|
637
|
+
# Count nodes
|
|
638
|
+
result = self._conn.execute("MATCH (n) RETURN count(n) as count")
|
|
639
|
+
if result.has_next():
|
|
640
|
+
stats["node_count"] = result.get_next()[0]
|
|
641
|
+
|
|
642
|
+
# Count relationships
|
|
643
|
+
result = self._conn.execute("MATCH ()-[r]->() RETURN count(r) as count")
|
|
644
|
+
if result.has_next():
|
|
645
|
+
stats["relationship_count"] = result.get_next()[0]
|
|
646
|
+
|
|
647
|
+
# Get labels with counts
|
|
648
|
+
result = self._conn.execute("CALL show_tables() RETURN *")
|
|
649
|
+
while result.has_next():
|
|
650
|
+
row = result.get_next()
|
|
651
|
+
if row:
|
|
652
|
+
label = row[0]
|
|
653
|
+
# Count nodes for this label
|
|
654
|
+
try:
|
|
655
|
+
count_result = self._conn.execute(f"MATCH (n:{label}) RETURN count(n) as count")
|
|
656
|
+
if count_result.has_next():
|
|
657
|
+
count = count_result.get_next()[0]
|
|
658
|
+
stats["node_labels"][label] = count
|
|
659
|
+
except Exception:
|
|
660
|
+
stats["node_labels"][label] = 0
|
|
661
|
+
|
|
662
|
+
except Exception as e:
|
|
663
|
+
logger.error(f"Statistics error: {e}")
|
|
664
|
+
|
|
665
|
+
return stats
|