code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""Semantic search tools for code using embeddings.
|
|
2
|
+
|
|
3
|
+
This module provides tools for semantic code search using vector embeddings.
|
|
4
|
+
Integrates with both Memgraph and Kuzu backends for graph data retrieval.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
|
12
|
+
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
from ..embeddings.qwen3_embedder import BaseEmbedder
|
|
16
|
+
from ..embeddings.vector_store import SearchResult, VectorStore
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from ..services import IngestorProtocol, QueryProtocol
|
|
20
|
+
from ..types import ResultRow
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class SemanticSearchResult:
|
|
25
|
+
"""Result from semantic code search.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
node_id: Node identifier in graph database
|
|
29
|
+
qualified_name: Fully qualified name of code entity
|
|
30
|
+
name: Simple name of the entity
|
|
31
|
+
type: Entity type (Function, Class, Method, etc.)
|
|
32
|
+
score: Similarity score (0-1)
|
|
33
|
+
source_code: Source code if available
|
|
34
|
+
file_path: File path if available
|
|
35
|
+
start_line: Start line number
|
|
36
|
+
end_line: End line number
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
node_id: int
|
|
40
|
+
qualified_name: str
|
|
41
|
+
name: str
|
|
42
|
+
type: str
|
|
43
|
+
score: float
|
|
44
|
+
source_code: str | None = None
|
|
45
|
+
file_path: str | None = None
|
|
46
|
+
start_line: int | None = None
|
|
47
|
+
end_line: int | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@runtime_checkable
|
|
51
|
+
class GraphServiceProtocol(Protocol):
|
|
52
|
+
"""Protocol for graph service operations needed by semantic search."""
|
|
53
|
+
|
|
54
|
+
def fetch_all(self, query: str, params: dict | None = None) -> list[ResultRow]: ...
|
|
55
|
+
|
|
56
|
+
def query(self, cypher: str, params: dict | None = None) -> list[ResultRow]: ...
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class SemanticSearchService:
|
|
60
|
+
"""Service for semantic code search.
|
|
61
|
+
|
|
62
|
+
Combines vector similarity search with graph database queries
|
|
63
|
+
to provide rich semantic search capabilities.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
>>> from code_graph_builder.embeddings import create_embedder, create_vector_store
|
|
67
|
+
>>> from code_graph_builder.services import MemgraphIngestor
|
|
68
|
+
>>> from code_graph_builder.tools.semantic_search import SemanticSearchService
|
|
69
|
+
>>>
|
|
70
|
+
>>> embedder = create_embedder()
|
|
71
|
+
>>> vector_store = create_vector_store(backend="memory", dimension=1536)
|
|
72
|
+
>>>
|
|
73
|
+
>>> with MemgraphIngestor("localhost", 7687) as ingestor:
|
|
74
|
+
... service = SemanticSearchService(
|
|
75
|
+
... embedder=embedder,
|
|
76
|
+
... vector_store=vector_store,
|
|
77
|
+
... graph_service=ingestor
|
|
78
|
+
... )
|
|
79
|
+
... results = service.search("recursive fibonacci implementation", top_k=5)
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
embedder: BaseEmbedder,
|
|
85
|
+
vector_store: VectorStore,
|
|
86
|
+
graph_service: GraphServiceProtocol | None = None,
|
|
87
|
+
):
|
|
88
|
+
"""Initialize semantic search service.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
embedder: Embedder for generating query embeddings
|
|
92
|
+
vector_store: Vector store for similarity search
|
|
93
|
+
graph_service: Optional graph service for retrieving full node data
|
|
94
|
+
"""
|
|
95
|
+
self.embedder = embedder
|
|
96
|
+
self.vector_store = vector_store
|
|
97
|
+
self.graph_service = graph_service
|
|
98
|
+
|
|
99
|
+
def search(
|
|
100
|
+
self,
|
|
101
|
+
query: str,
|
|
102
|
+
top_k: int = 5,
|
|
103
|
+
entity_types: list[str] | None = None,
|
|
104
|
+
) -> list[SemanticSearchResult]:
|
|
105
|
+
"""Search for code semantically similar to the query.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
query: Natural language query describing what to find
|
|
109
|
+
top_k: Number of results to return
|
|
110
|
+
entity_types: Optional filter for entity types (Function, Class, etc.)
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
List of semantic search results
|
|
114
|
+
"""
|
|
115
|
+
try:
|
|
116
|
+
# Generate query embedding
|
|
117
|
+
query_embedding = self.embedder.embed_query(query)
|
|
118
|
+
|
|
119
|
+
# Search vector store
|
|
120
|
+
filter_metadata = None
|
|
121
|
+
if entity_types:
|
|
122
|
+
filter_metadata = {"type": entity_types[0]} if len(entity_types) == 1 else None
|
|
123
|
+
|
|
124
|
+
vector_results = self.vector_store.search_similar(
|
|
125
|
+
query_embedding, top_k=top_k, filter_metadata=filter_metadata
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
if not vector_results:
|
|
129
|
+
return []
|
|
130
|
+
|
|
131
|
+
# Enrich with graph data if available
|
|
132
|
+
if self.graph_service:
|
|
133
|
+
return self._enrich_results_from_graph(vector_results)
|
|
134
|
+
else:
|
|
135
|
+
return self._convert_results(vector_results)
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(f"Semantic search failed: {e}")
|
|
139
|
+
return []
|
|
140
|
+
|
|
141
|
+
def _convert_results(self, vector_results: list[SearchResult]) -> list[SemanticSearchResult]:
|
|
142
|
+
"""Convert vector search results to semantic search results."""
|
|
143
|
+
results: list[SemanticSearchResult] = []
|
|
144
|
+
|
|
145
|
+
for vr in vector_results:
|
|
146
|
+
# Extract name from qualified name
|
|
147
|
+
name = vr.qualified_name.split(".")[-1] if "." in vr.qualified_name else vr.qualified_name
|
|
148
|
+
|
|
149
|
+
results.append(
|
|
150
|
+
SemanticSearchResult(
|
|
151
|
+
node_id=vr.node_id,
|
|
152
|
+
qualified_name=vr.qualified_name,
|
|
153
|
+
name=name,
|
|
154
|
+
type="Unknown",
|
|
155
|
+
score=vr.score,
|
|
156
|
+
)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return results
|
|
160
|
+
|
|
161
|
+
def _enrich_results_from_graph(
|
|
162
|
+
self, vector_results: list[SearchResult]
|
|
163
|
+
) -> list[SemanticSearchResult]:
|
|
164
|
+
"""Enrich vector search results with data from graph database."""
|
|
165
|
+
if not self.graph_service:
|
|
166
|
+
return self._convert_results(vector_results)
|
|
167
|
+
|
|
168
|
+
qnames = [vr.qualified_name for vr in vector_results]
|
|
169
|
+
query = self._build_nodes_query(qnames)
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
graph_results = self.graph_service.fetch_all(query, {"qnames": qnames})
|
|
173
|
+
graph_data_map = {
|
|
174
|
+
row.get("qualified_name", ""): row for row in graph_results
|
|
175
|
+
}
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.warning(f"Failed to enrich results from graph: {e}")
|
|
178
|
+
return self._convert_results(vector_results)
|
|
179
|
+
|
|
180
|
+
results: list[SemanticSearchResult] = []
|
|
181
|
+
for vr in vector_results:
|
|
182
|
+
graph_data = graph_data_map.get(vr.qualified_name, {})
|
|
183
|
+
name = graph_data.get("name") or (
|
|
184
|
+
vr.qualified_name.split(".")[-1]
|
|
185
|
+
if "." in vr.qualified_name
|
|
186
|
+
else vr.qualified_name
|
|
187
|
+
)
|
|
188
|
+
results.append(
|
|
189
|
+
SemanticSearchResult(
|
|
190
|
+
node_id=vr.node_id,
|
|
191
|
+
qualified_name=vr.qualified_name,
|
|
192
|
+
name=name,
|
|
193
|
+
type=graph_data.get("type", "Unknown"),
|
|
194
|
+
score=vr.score,
|
|
195
|
+
source_code=graph_data.get("source_code"),
|
|
196
|
+
file_path=graph_data.get("path") or None,
|
|
197
|
+
start_line=graph_data.get("start_line"),
|
|
198
|
+
end_line=graph_data.get("end_line"),
|
|
199
|
+
)
|
|
200
|
+
)
|
|
201
|
+
return results
|
|
202
|
+
|
|
203
|
+
def _build_nodes_query(self, qualified_names: list[str]) -> str:
|
|
204
|
+
"""Build Cypher query to fetch node details by qualified names."""
|
|
205
|
+
return """
|
|
206
|
+
MATCH (m:Module)-[:DEFINES]->(f:Function)
|
|
207
|
+
WHERE f.qualified_name IN $qnames
|
|
208
|
+
RETURN DISTINCT f.qualified_name AS qualified_name,
|
|
209
|
+
f.name AS name,
|
|
210
|
+
m.path AS path,
|
|
211
|
+
f.start_line AS start_line,
|
|
212
|
+
f.end_line AS end_line
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
def get_source_code(self, node_id: int) -> str | None:
|
|
216
|
+
"""Get source code for a specific node by ID.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
node_id: Node identifier
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
Source code string or None if not found
|
|
223
|
+
"""
|
|
224
|
+
if not self.graph_service:
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
query = """
|
|
228
|
+
MATCH (n)
|
|
229
|
+
WHERE n.node_id = $node_id
|
|
230
|
+
OR n.id = $node_id
|
|
231
|
+
OR id(n) = $node_id
|
|
232
|
+
RETURN n.source_code AS source_code,
|
|
233
|
+
n.path AS path,
|
|
234
|
+
n.start_line AS start_line,
|
|
235
|
+
n.end_line AS end_line
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
results = self.graph_service.fetch_all(query, {"node_id": node_id})
|
|
240
|
+
if results:
|
|
241
|
+
return str(results[0].get("source_code", "")) or None
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.warning(f"Failed to get source code for node {node_id}: {e}")
|
|
244
|
+
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
def get_source_from_file(
|
|
248
|
+
self,
|
|
249
|
+
file_path: str,
|
|
250
|
+
start_line: int,
|
|
251
|
+
end_line: int,
|
|
252
|
+
repo_path: Path | None = None,
|
|
253
|
+
) -> str | None:
|
|
254
|
+
"""Extract source code from file.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
file_path: Path to the file
|
|
258
|
+
start_line: Start line (1-indexed)
|
|
259
|
+
end_line: End line (inclusive)
|
|
260
|
+
repo_path: Repository root path for resolving relative paths
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
Source code string or None if extraction fails
|
|
264
|
+
"""
|
|
265
|
+
try:
|
|
266
|
+
path = Path(file_path)
|
|
267
|
+
if repo_path and not path.is_absolute():
|
|
268
|
+
path = repo_path / path
|
|
269
|
+
|
|
270
|
+
if not path.exists():
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
with open(path, "r", encoding="utf-8", errors="ignore") as f:
|
|
274
|
+
lines = f.readlines()
|
|
275
|
+
|
|
276
|
+
# Adjust for 1-indexed lines
|
|
277
|
+
start_idx = max(0, start_line - 1)
|
|
278
|
+
end_idx = min(len(lines), end_line)
|
|
279
|
+
|
|
280
|
+
return "".join(lines[start_idx:end_idx])
|
|
281
|
+
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logger.debug(f"Failed to extract source from {file_path}: {e}")
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def create_semantic_search_service(
|
|
288
|
+
embedder: BaseEmbedder,
|
|
289
|
+
vector_store: VectorStore,
|
|
290
|
+
graph_service: GraphServiceProtocol | None = None,
|
|
291
|
+
) -> SemanticSearchService:
|
|
292
|
+
"""Factory function to create semantic search service.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
embedder: Embedder instance
|
|
296
|
+
vector_store: Vector store instance
|
|
297
|
+
graph_service: Optional graph service for data enrichment
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Configured SemanticSearchService
|
|
301
|
+
"""
|
|
302
|
+
return SemanticSearchService(
|
|
303
|
+
embedder=embedder,
|
|
304
|
+
vector_store=vector_store,
|
|
305
|
+
graph_service=graph_service,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# Convenience functions for direct use
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def semantic_code_search(
|
|
313
|
+
query: str,
|
|
314
|
+
embedder: BaseEmbedder,
|
|
315
|
+
vector_store: VectorStore,
|
|
316
|
+
graph_service: GraphServiceProtocol | None = None,
|
|
317
|
+
top_k: int = 5,
|
|
318
|
+
) -> list[SemanticSearchResult]:
|
|
319
|
+
"""Perform semantic code search.
|
|
320
|
+
|
|
321
|
+
Convenience function for one-off searches without creating a service.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
query: Natural language query
|
|
325
|
+
embedder: Embedder for query encoding
|
|
326
|
+
vector_store: Vector store to search
|
|
327
|
+
graph_service: Optional graph service for enrichment
|
|
328
|
+
top_k: Number of results
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
List of search results
|
|
332
|
+
"""
|
|
333
|
+
service = SemanticSearchService(
|
|
334
|
+
embedder=embedder,
|
|
335
|
+
vector_store=vector_store,
|
|
336
|
+
graph_service=graph_service,
|
|
337
|
+
)
|
|
338
|
+
return service.search(query, top_k=top_k)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def get_function_source_by_node_id(
|
|
342
|
+
node_id: int,
|
|
343
|
+
graph_service: GraphServiceProtocol,
|
|
344
|
+
repo_path: Path | None = None,
|
|
345
|
+
) -> str | None:
|
|
346
|
+
"""Get function source code by node ID.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
node_id: Node identifier
|
|
350
|
+
graph_service: Graph service to query
|
|
351
|
+
repo_path: Repository path for file resolution
|
|
352
|
+
|
|
353
|
+
Returns:
|
|
354
|
+
Source code or None
|
|
355
|
+
"""
|
|
356
|
+
service = SemanticSearchService(
|
|
357
|
+
embedder=None, # Not needed for this operation
|
|
358
|
+
vector_store=None,
|
|
359
|
+
graph_service=graph_service,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Try to get from graph first
|
|
363
|
+
source = service.get_source_code(node_id)
|
|
364
|
+
if source:
|
|
365
|
+
return source
|
|
366
|
+
|
|
367
|
+
# Try to get from file
|
|
368
|
+
query = """
|
|
369
|
+
MATCH (n)
|
|
370
|
+
WHERE n.node_id = $node_id OR n.id = $node_id OR id(n) = $node_id
|
|
371
|
+
RETURN n.path AS path, n.start_line AS start_line, n.end_line AS end_line
|
|
372
|
+
"""
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
results = graph_service.fetch_all(query, {"node_id": node_id})
|
|
376
|
+
if results and repo_path:
|
|
377
|
+
row = results[0]
|
|
378
|
+
return service.get_source_from_file(
|
|
379
|
+
str(row.get("path", "")),
|
|
380
|
+
int(row.get("start_line", 0)) if row.get("start_line") else 0,
|
|
381
|
+
int(row.get("end_line", 0)) if row.get("end_line") else 0,
|
|
382
|
+
repo_path,
|
|
383
|
+
)
|
|
384
|
+
except Exception as e:
|
|
385
|
+
logger.warning(f"Failed to get source for node {node_id}: {e}")
|
|
386
|
+
|
|
387
|
+
return None
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""Code Graph Builder - Type Definitions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import ItemsView, KeysView, Sequence
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import TYPE_CHECKING, Callable, NamedTuple, Protocol, TypedDict
|
|
10
|
+
|
|
11
|
+
from .constants import NodeLabel, RelationshipType, SupportedLanguage
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from tree_sitter import Language, Node, Parser, Query
|
|
15
|
+
|
|
16
|
+
from .models import LanguageSpec
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Basic type aliases
|
|
20
|
+
PropertyValue = str | int | float | bool | list[str] | None
|
|
21
|
+
PropertyDict = dict[str, PropertyValue]
|
|
22
|
+
|
|
23
|
+
ResultScalar = str | int | float | bool | None
|
|
24
|
+
ResultValue = ResultScalar | list[ResultScalar] | dict[str, ResultScalar]
|
|
25
|
+
ResultRow = dict[str, ResultValue]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Node and relationship types
|
|
29
|
+
class NodeType(StrEnum):
|
|
30
|
+
FUNCTION = "Function"
|
|
31
|
+
METHOD = "Method"
|
|
32
|
+
CLASS = "Class"
|
|
33
|
+
MODULE = "Module"
|
|
34
|
+
INTERFACE = "Interface"
|
|
35
|
+
PACKAGE = "Package"
|
|
36
|
+
ENUM = "Enum"
|
|
37
|
+
TYPE = "Type"
|
|
38
|
+
UNION = "Union"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Type aliases for function registry
|
|
42
|
+
SimpleName = str
|
|
43
|
+
QualifiedName = str
|
|
44
|
+
SimpleNameLookup = dict[SimpleName, set[QualifiedName]]
|
|
45
|
+
TrieNode = dict[str, "TrieNode | QualifiedName | NodeType"]
|
|
46
|
+
FunctionRegistry = dict[QualifiedName, NodeType]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# AST types (use string literal for forward reference since Node is TYPE_CHECKING only)
|
|
50
|
+
ASTNode = "Node"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# Graph data types
|
|
54
|
+
class GraphMetadata(TypedDict):
|
|
55
|
+
total_nodes: int
|
|
56
|
+
total_relationships: int
|
|
57
|
+
exported_at: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class NodeData(TypedDict):
|
|
61
|
+
node_id: int
|
|
62
|
+
labels: list[str]
|
|
63
|
+
properties: dict[str, PropertyValue]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class RelationshipData(TypedDict):
|
|
67
|
+
from_id: int
|
|
68
|
+
to_id: int
|
|
69
|
+
type: str
|
|
70
|
+
properties: dict[str, PropertyValue]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class GraphData(TypedDict):
|
|
74
|
+
nodes: list[NodeData] | list[ResultRow]
|
|
75
|
+
relationships: list[RelationshipData] | list[ResultRow]
|
|
76
|
+
metadata: GraphMetadata
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class GraphSummary(TypedDict):
|
|
80
|
+
total_nodes: int
|
|
81
|
+
total_relationships: int
|
|
82
|
+
node_labels: dict[str, int]
|
|
83
|
+
relationship_types: dict[str, int]
|
|
84
|
+
metadata: GraphMetadata
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Batch types
|
|
88
|
+
class NodeBatchRow(TypedDict):
|
|
89
|
+
id: PropertyValue
|
|
90
|
+
props: PropertyDict
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class RelBatchRow(TypedDict):
|
|
94
|
+
from_val: PropertyValue
|
|
95
|
+
to_val: PropertyValue
|
|
96
|
+
props: PropertyDict
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
BatchParams = NodeBatchRow | RelBatchRow | PropertyDict
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class BatchWrapper(TypedDict):
|
|
103
|
+
batch: Sequence[BatchParams]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Language query types
|
|
107
|
+
class LanguageQueries(TypedDict, total=False):
|
|
108
|
+
functions: Query | None
|
|
109
|
+
classes: Query | None
|
|
110
|
+
calls: Query | None
|
|
111
|
+
imports: Query | None
|
|
112
|
+
locals: Query | None
|
|
113
|
+
typedefs: Query | None
|
|
114
|
+
macros: Query | None
|
|
115
|
+
config: LanguageSpec
|
|
116
|
+
language: Language
|
|
117
|
+
parser: Parser
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Function match types
|
|
121
|
+
class FunctionMatch(TypedDict):
|
|
122
|
+
node: Node
|
|
123
|
+
simple_name: str
|
|
124
|
+
qualified_name: str
|
|
125
|
+
parent_class: str | None
|
|
126
|
+
line_number: int
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Embedding query result
|
|
130
|
+
class EmbeddingQueryResult(TypedDict):
|
|
131
|
+
node_id: int
|
|
132
|
+
qualified_name: str
|
|
133
|
+
start_line: int | None
|
|
134
|
+
end_line: int | None
|
|
135
|
+
path: str | None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# Build result
|
|
139
|
+
@dataclass
|
|
140
|
+
class BuildResult:
|
|
141
|
+
"""Result of building a code graph."""
|
|
142
|
+
|
|
143
|
+
project_name: str
|
|
144
|
+
nodes_created: int
|
|
145
|
+
relationships_created: int
|
|
146
|
+
functions_found: int
|
|
147
|
+
classes_found: int
|
|
148
|
+
files_processed: int
|
|
149
|
+
errors: list[str]
|
|
150
|
+
|
|
151
|
+
def __post_init__(self) -> None:
|
|
152
|
+
if self.errors is None:
|
|
153
|
+
self.errors = []
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# Protocols
|
|
157
|
+
class FunctionRegistryTrieProtocol(Protocol):
|
|
158
|
+
def __contains__(self, qualified_name: QualifiedName) -> bool: ...
|
|
159
|
+
def __getitem__(self, qualified_name: QualifiedName) -> NodeType: ...
|
|
160
|
+
def __setitem__(self, qualified_name: QualifiedName, func_type: NodeType) -> None: ...
|
|
161
|
+
def get(self, qualified_name: QualifiedName, default: NodeType | None = None) -> NodeType | None: ...
|
|
162
|
+
def keys(self) -> KeysView[QualifiedName]: ...
|
|
163
|
+
def items(self) -> ItemsView[QualifiedName, NodeType]: ...
|
|
164
|
+
def find_with_prefix(self, prefix: str) -> list[tuple[QualifiedName, NodeType]]: ...
|
|
165
|
+
def find_ending_with(self, suffix: str) -> list[QualifiedName]: ...
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ASTCacheProtocol(Protocol):
|
|
169
|
+
def __setitem__(self, key: Path, value: tuple[Node, SupportedLanguage]) -> None: ...
|
|
170
|
+
def __getitem__(self, key: Path) -> tuple[Node, SupportedLanguage]: ...
|
|
171
|
+
def __delitem__(self, key: Path) -> None: ...
|
|
172
|
+
def __contains__(self, key: Path) -> bool: ...
|
|
173
|
+
def items(self) -> ItemsView[Path, tuple[Node, SupportedLanguage]]: ...
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class ColumnDescriptor(Protocol):
|
|
177
|
+
@property
|
|
178
|
+
def name(self) -> str: ...
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class CursorProtocol(Protocol):
|
|
182
|
+
def execute(
|
|
183
|
+
self,
|
|
184
|
+
query: str,
|
|
185
|
+
params: PropertyDict | Sequence[BatchParams] | BatchWrapper | None = None,
|
|
186
|
+
) -> None: ...
|
|
187
|
+
def close(self) -> None: ...
|
|
188
|
+
@property
|
|
189
|
+
def description(self) -> Sequence[ColumnDescriptor] | None: ...
|
|
190
|
+
def fetchall(self) -> list[tuple[PropertyValue, ...]]: ...
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
# Node identifier type
|
|
194
|
+
NodeIdentifier = tuple[NodeLabel | str, str, str | None]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# Language import type
|
|
198
|
+
class LanguageImport(NamedTuple):
|
|
199
|
+
lang_key: SupportedLanguage
|
|
200
|
+
module_path: str
|
|
201
|
+
attr_name: str
|
|
202
|
+
submodule_name: SupportedLanguage
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# Language loader type (use string literal for forward reference since Language is TYPE_CHECKING only)
|
|
206
|
+
LanguageLoader = "Callable[[], Language] | None"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# Graph node for search results
|
|
210
|
+
@dataclass
|
|
211
|
+
class GraphNode:
|
|
212
|
+
"""A node in the code graph with full information.
|
|
213
|
+
|
|
214
|
+
Attributes:
|
|
215
|
+
node_id: Unique node identifier
|
|
216
|
+
labels: Node labels (e.g., ["Function", "Method"])
|
|
217
|
+
qualified_name: Fully qualified name
|
|
218
|
+
name: Simple name
|
|
219
|
+
path: File path
|
|
220
|
+
start_line: Start line number
|
|
221
|
+
end_line: End line number
|
|
222
|
+
docstring: Documentation string
|
|
223
|
+
properties: Additional properties
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
node_id: int
|
|
227
|
+
labels: list[str]
|
|
228
|
+
qualified_name: str
|
|
229
|
+
name: str
|
|
230
|
+
path: str | None = None
|
|
231
|
+
start_line: int | None = None
|
|
232
|
+
end_line: int | None = None
|
|
233
|
+
docstring: str | None = None
|
|
234
|
+
properties: dict[str, PropertyValue] = field(default_factory=dict)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# Semantic search result with graph node
|
|
238
|
+
@dataclass
|
|
239
|
+
class SemanticSearchResult:
|
|
240
|
+
"""Result from semantic search with graph node information.
|
|
241
|
+
|
|
242
|
+
Attributes:
|
|
243
|
+
node: The graph node
|
|
244
|
+
score: Similarity score (0-1)
|
|
245
|
+
embedding: The embedding vector (optional)
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
node: GraphNode
|
|
249
|
+
score: float
|
|
250
|
+
embedding: list[float] | None = None
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
# Graph service protocol
|
|
254
|
+
class GraphServiceProtocol(Protocol):
|
|
255
|
+
"""Protocol for graph database services."""
|
|
256
|
+
|
|
257
|
+
def get_node_by_id(self, node_id: int) -> GraphNode | None:
|
|
258
|
+
"""Get a node by its ID.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
node_id: Node identifier
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
GraphNode if found, None otherwise
|
|
265
|
+
"""
|
|
266
|
+
...
|
|
267
|
+
|
|
268
|
+
def get_nodes_by_ids(self, node_ids: list[int]) -> list[GraphNode]:
|
|
269
|
+
"""Get multiple nodes by their IDs.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
node_ids: List of node identifiers
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
List of GraphNode objects
|
|
276
|
+
"""
|
|
277
|
+
...
|
|
278
|
+
|
|
279
|
+
def search_nodes(
|
|
280
|
+
self,
|
|
281
|
+
query: str,
|
|
282
|
+
label: str | None = None,
|
|
283
|
+
limit: int = 10,
|
|
284
|
+
) -> list[GraphNode]:
|
|
285
|
+
"""Search nodes by name or qualified name.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
query: Search query string
|
|
289
|
+
label: Optional node label filter
|
|
290
|
+
limit: Maximum number of results
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
List of matching GraphNode objects
|
|
294
|
+
"""
|
|
295
|
+
...
|
|
296
|
+
|
|
297
|
+
def get_node_relationships(
|
|
298
|
+
self,
|
|
299
|
+
node_id: int,
|
|
300
|
+
rel_type: str | None = None,
|
|
301
|
+
direction: str = "both",
|
|
302
|
+
) -> list[tuple[GraphNode, str, str]]:
|
|
303
|
+
"""Get relationships for a node.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
node_id: Node identifier
|
|
307
|
+
rel_type: Optional relationship type filter
|
|
308
|
+
direction: "out", "in", or "both"
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
List of (related_node, relationship_type, direction) tuples
|
|
312
|
+
"""
|
|
313
|
+
...
|
|
314
|
+
|
|
315
|
+
def execute_query(
|
|
316
|
+
self,
|
|
317
|
+
query: str,
|
|
318
|
+
params: PropertyDict | None = None,
|
|
319
|
+
) -> list[ResultRow]:
|
|
320
|
+
"""Execute a Cypher query.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
query: Cypher query string
|
|
324
|
+
params: Optional query parameters
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
List of result rows
|
|
328
|
+
"""
|
|
329
|
+
...
|
|
330
|
+
|
|
331
|
+
def close(self) -> None:
|
|
332
|
+
"""Close the service connection."""
|
|
333
|
+
...
|
|
File without changes
|