code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Code Graph Builder - Processor Factory."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..constants import SupportedLanguage
|
|
6
|
+
from ..services import IngestorProtocol
|
|
7
|
+
from ..types import (
|
|
8
|
+
ASTCacheProtocol,
|
|
9
|
+
FunctionRegistryTrieProtocol,
|
|
10
|
+
LanguageQueries,
|
|
11
|
+
SimpleNameLookup,
|
|
12
|
+
)
|
|
13
|
+
from .call_processor import CallProcessor
|
|
14
|
+
from .definition_processor import DefinitionProcessor
|
|
15
|
+
from .import_processor import ImportProcessor
|
|
16
|
+
from .structure_processor import StructureProcessor
|
|
17
|
+
from .type_inference import TypeInferenceEngine
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ProcessorFactory:
|
|
21
|
+
"""Factory for creating and caching processors."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
ingestor: IngestorProtocol,
|
|
26
|
+
repo_path: Path,
|
|
27
|
+
project_name: str,
|
|
28
|
+
queries: dict[SupportedLanguage, LanguageQueries],
|
|
29
|
+
function_registry: FunctionRegistryTrieProtocol,
|
|
30
|
+
simple_name_lookup: SimpleNameLookup,
|
|
31
|
+
ast_cache: ASTCacheProtocol,
|
|
32
|
+
unignore_paths: frozenset[str] | None = None,
|
|
33
|
+
exclude_paths: frozenset[str] | None = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self.ingestor = ingestor
|
|
36
|
+
self.repo_path = repo_path
|
|
37
|
+
self.project_name = project_name
|
|
38
|
+
self.queries = queries
|
|
39
|
+
self.function_registry = function_registry
|
|
40
|
+
self.simple_name_lookup = simple_name_lookup
|
|
41
|
+
self.ast_cache = ast_cache
|
|
42
|
+
self.unignore_paths = unignore_paths
|
|
43
|
+
self.exclude_paths = exclude_paths
|
|
44
|
+
|
|
45
|
+
self.module_qn_to_file_path: dict[str, Path] = {}
|
|
46
|
+
|
|
47
|
+
self._import_processor: ImportProcessor | None = None
|
|
48
|
+
self._structure_processor: StructureProcessor | None = None
|
|
49
|
+
self._definition_processor: DefinitionProcessor | None = None
|
|
50
|
+
self._type_inference: TypeInferenceEngine | None = None
|
|
51
|
+
self._call_processor: CallProcessor | None = None
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def import_processor(self) -> ImportProcessor:
|
|
55
|
+
if self._import_processor is None:
|
|
56
|
+
self._import_processor = ImportProcessor(
|
|
57
|
+
repo_path=self.repo_path,
|
|
58
|
+
project_name=self.project_name,
|
|
59
|
+
ingestor=self.ingestor,
|
|
60
|
+
function_registry=self.function_registry,
|
|
61
|
+
)
|
|
62
|
+
return self._import_processor
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def structure_processor(self) -> StructureProcessor:
|
|
66
|
+
if self._structure_processor is None:
|
|
67
|
+
self._structure_processor = StructureProcessor(
|
|
68
|
+
ingestor=self.ingestor,
|
|
69
|
+
repo_path=self.repo_path,
|
|
70
|
+
project_name=self.project_name,
|
|
71
|
+
queries=self.queries,
|
|
72
|
+
unignore_paths=self.unignore_paths,
|
|
73
|
+
exclude_paths=self.exclude_paths,
|
|
74
|
+
)
|
|
75
|
+
return self._structure_processor
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def definition_processor(self) -> DefinitionProcessor:
|
|
79
|
+
if self._definition_processor is None:
|
|
80
|
+
self._definition_processor = DefinitionProcessor(
|
|
81
|
+
ingestor=self.ingestor,
|
|
82
|
+
repo_path=self.repo_path,
|
|
83
|
+
project_name=self.project_name,
|
|
84
|
+
function_registry=self.function_registry,
|
|
85
|
+
simple_name_lookup=self.simple_name_lookup,
|
|
86
|
+
import_processor=self.import_processor,
|
|
87
|
+
module_qn_to_file_path=self.module_qn_to_file_path,
|
|
88
|
+
)
|
|
89
|
+
return self._definition_processor
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def type_inference(self) -> TypeInferenceEngine:
|
|
93
|
+
if self._type_inference is None:
|
|
94
|
+
self._type_inference = TypeInferenceEngine(
|
|
95
|
+
import_processor=self.import_processor,
|
|
96
|
+
function_registry=self.function_registry,
|
|
97
|
+
repo_path=self.repo_path,
|
|
98
|
+
project_name=self.project_name,
|
|
99
|
+
ast_cache=self.ast_cache,
|
|
100
|
+
queries=self.queries,
|
|
101
|
+
module_qn_to_file_path=self.module_qn_to_file_path,
|
|
102
|
+
class_inheritance=self.definition_processor.class_inheritance,
|
|
103
|
+
simple_name_lookup=self.simple_name_lookup,
|
|
104
|
+
)
|
|
105
|
+
return self._type_inference
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def call_processor(self) -> CallProcessor:
|
|
109
|
+
if self._call_processor is None:
|
|
110
|
+
self._call_processor = CallProcessor(
|
|
111
|
+
ingestor=self.ingestor,
|
|
112
|
+
repo_path=self.repo_path,
|
|
113
|
+
project_name=self.project_name,
|
|
114
|
+
function_registry=self.function_registry,
|
|
115
|
+
import_processor=self.import_processor,
|
|
116
|
+
type_inference=self.type_inference,
|
|
117
|
+
class_inheritance=self.definition_processor.class_inheritance,
|
|
118
|
+
)
|
|
119
|
+
return self._call_processor
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Code Graph Builder - Import Processor."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from tree_sitter import Node, QueryCursor
|
|
10
|
+
|
|
11
|
+
from .. import constants as cs
|
|
12
|
+
from ..parsers.utils import safe_decode_text
|
|
13
|
+
from ..services import IngestorProtocol
|
|
14
|
+
from ..types import FunctionRegistryTrieProtocol
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from ..types import LanguageQueries
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ImportProcessor:
|
|
21
|
+
"""Process import statements in source code."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
repo_path: Path,
|
|
26
|
+
project_name: str,
|
|
27
|
+
ingestor: IngestorProtocol | None = None,
|
|
28
|
+
function_registry: FunctionRegistryTrieProtocol | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
self.repo_path = repo_path
|
|
31
|
+
self.project_name = project_name
|
|
32
|
+
self.ingestor = ingestor
|
|
33
|
+
self.function_registry = function_registry
|
|
34
|
+
self.import_mapping: dict[str, dict[str, str]] = {}
|
|
35
|
+
|
|
36
|
+
def parse_imports(
|
|
37
|
+
self,
|
|
38
|
+
root_node: Node,
|
|
39
|
+
module_qn: str,
|
|
40
|
+
language: cs.SupportedLanguage,
|
|
41
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Parse imports from a file."""
|
|
44
|
+
if language not in queries:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
lang_queries = queries[language]
|
|
48
|
+
imports_query = lang_queries.get(cs.QUERY_IMPORTS)
|
|
49
|
+
if not imports_query:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
self.import_mapping[module_qn] = {}
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
cursor = QueryCursor(imports_query)
|
|
56
|
+
captures = cursor.captures(root_node)
|
|
57
|
+
|
|
58
|
+
match language:
|
|
59
|
+
case cs.SupportedLanguage.PYTHON:
|
|
60
|
+
self._parse_python_imports(captures, module_qn)
|
|
61
|
+
case cs.SupportedLanguage.JS | cs.SupportedLanguage.TS:
|
|
62
|
+
self._parse_js_ts_imports(captures, module_qn)
|
|
63
|
+
case cs.SupportedLanguage.JAVA:
|
|
64
|
+
self._parse_java_imports(captures, module_qn)
|
|
65
|
+
case cs.SupportedLanguage.RUST:
|
|
66
|
+
self._parse_rust_imports(captures, module_qn)
|
|
67
|
+
case cs.SupportedLanguage.GO:
|
|
68
|
+
self._parse_go_imports(captures, module_qn)
|
|
69
|
+
case cs.SupportedLanguage.C | cs.SupportedLanguage.CPP:
|
|
70
|
+
self._parse_c_cpp_imports(captures, module_qn)
|
|
71
|
+
case _:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
logger.debug(f"Parsed {len(self.import_mapping[module_qn])} imports for {module_qn}")
|
|
75
|
+
|
|
76
|
+
if self.ingestor:
|
|
77
|
+
for alias, full_name in self.import_mapping[module_qn].items():
|
|
78
|
+
self.ingestor.ensure_relationship_batch(
|
|
79
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
|
|
80
|
+
cs.RelationshipType.IMPORTS,
|
|
81
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, full_name),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.warning(f"Failed to parse imports for {module_qn}: {e}")
|
|
86
|
+
|
|
87
|
+
def _parse_python_imports(self, captures: dict, module_qn: str) -> None:
|
|
88
|
+
"""Parse Python import statements."""
|
|
89
|
+
import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
|
|
90
|
+
import_from_nodes = captures.get(cs.CAPTURE_IMPORT_FROM, [])
|
|
91
|
+
|
|
92
|
+
for node in import_nodes + import_from_nodes:
|
|
93
|
+
if not isinstance(node, Node):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
if node.type == "import_statement":
|
|
97
|
+
self._handle_python_import(node, module_qn)
|
|
98
|
+
elif node.type == "import_from_statement":
|
|
99
|
+
self._handle_python_from_import(node, module_qn)
|
|
100
|
+
|
|
101
|
+
def _handle_python_import(self, node: Node, module_qn: str) -> None:
|
|
102
|
+
"""Handle 'import xxx' or 'import xxx as yyy'."""
|
|
103
|
+
for child in node.named_children:
|
|
104
|
+
if child.type == "dotted_name":
|
|
105
|
+
name = self._get_dotted_name(child)
|
|
106
|
+
if name:
|
|
107
|
+
full_qn = f"{self.project_name}.{name.replace('.', cs.SEPARATOR_DOT)}"
|
|
108
|
+
self.import_mapping[module_qn][name.split(cs.SEPARATOR_DOT)[0]] = full_qn
|
|
109
|
+
|
|
110
|
+
def _handle_python_from_import(self, node: Node, module_qn: str) -> None:
|
|
111
|
+
"""Handle 'from xxx import yyy'."""
|
|
112
|
+
module_node = None
|
|
113
|
+
for child in node.children:
|
|
114
|
+
if child.type == "dotted_name":
|
|
115
|
+
module_node = child
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
if not module_node:
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
module_name = self._get_dotted_name(module_node)
|
|
122
|
+
if not module_name:
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
module_prefix = f"{self.project_name}.{module_name.replace('.', cs.SEPARATOR_DOT)}"
|
|
126
|
+
|
|
127
|
+
for child in node.named_children:
|
|
128
|
+
if child.type == "imported_name" or child.type == "identifier":
|
|
129
|
+
name = safe_decode_text(child)
|
|
130
|
+
if name:
|
|
131
|
+
full_qn = f"{module_prefix}.{name}"
|
|
132
|
+
self.import_mapping[module_qn][name] = full_qn
|
|
133
|
+
|
|
134
|
+
def _parse_js_ts_imports(self, captures: dict, module_qn: str) -> None:
|
|
135
|
+
"""Parse JavaScript/TypeScript imports."""
|
|
136
|
+
import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
|
|
137
|
+
|
|
138
|
+
for node in import_nodes:
|
|
139
|
+
if not isinstance(node, Node):
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
if node.type == "import_statement":
|
|
143
|
+
self._handle_js_ts_import(node, module_qn)
|
|
144
|
+
|
|
145
|
+
def _handle_js_ts_import(self, node: Node, module_qn: str) -> None:
|
|
146
|
+
"""Handle ES6 import statements."""
|
|
147
|
+
source_node = None
|
|
148
|
+
for child in node.children:
|
|
149
|
+
if child.type == "string":
|
|
150
|
+
source_node = child
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
if not source_node:
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
source = safe_decode_text(source_node)
|
|
157
|
+
if not source:
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
source = source.strip("'\"")
|
|
161
|
+
|
|
162
|
+
for child in node.named_children:
|
|
163
|
+
if child.type == "import_clause":
|
|
164
|
+
self._process_js_import_clause(child, source, module_qn)
|
|
165
|
+
|
|
166
|
+
def _process_js_import_clause(self, node: Node, source: str, module_qn: str) -> None:
|
|
167
|
+
"""Process import clause (default, named, or namespace imports)."""
|
|
168
|
+
name = safe_decode_text(node)
|
|
169
|
+
if name:
|
|
170
|
+
self.import_mapping[module_qn][name] = source
|
|
171
|
+
|
|
172
|
+
def _parse_java_imports(self, captures: dict, module_qn: str) -> None:
|
|
173
|
+
"""Parse Java imports."""
|
|
174
|
+
import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
|
|
175
|
+
|
|
176
|
+
for node in import_nodes:
|
|
177
|
+
if not isinstance(node, Node):
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
scoped_name = None
|
|
181
|
+
for child in node.named_children:
|
|
182
|
+
if child.type == "scoped_identifier":
|
|
183
|
+
scoped_name = safe_decode_text(child)
|
|
184
|
+
break
|
|
185
|
+
elif child.type == "identifier":
|
|
186
|
+
scoped_name = safe_decode_text(child)
|
|
187
|
+
|
|
188
|
+
if scoped_name:
|
|
189
|
+
parts = scoped_name.split(cs.SEPARATOR_DOT)
|
|
190
|
+
if parts:
|
|
191
|
+
self.import_mapping[module_qn][parts[-1]] = scoped_name.replace(
|
|
192
|
+
cs.SEPARATOR_DOT, "."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def _parse_rust_imports(self, captures: dict, module_qn: str) -> None:
|
|
196
|
+
"""Parse Rust use statements."""
|
|
197
|
+
import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
|
|
198
|
+
|
|
199
|
+
for node in import_nodes:
|
|
200
|
+
if not isinstance(node, Node):
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
if node.type == "use_declaration":
|
|
204
|
+
self._handle_rust_use(node, module_qn)
|
|
205
|
+
|
|
206
|
+
def _handle_rust_use(self, node: Node, module_qn: str) -> None:
|
|
207
|
+
"""Handle Rust use statements."""
|
|
208
|
+
for child in node.named_children:
|
|
209
|
+
if child.type == "scoped_use_list":
|
|
210
|
+
prefix = None
|
|
211
|
+
use_list = None
|
|
212
|
+
for c in child.children:
|
|
213
|
+
if c.type == "identifier" or c.type == "scoped_identifier":
|
|
214
|
+
prefix = safe_decode_text(c)
|
|
215
|
+
elif c.type == "use_list":
|
|
216
|
+
use_list = c
|
|
217
|
+
|
|
218
|
+
if prefix and use_list:
|
|
219
|
+
for item in use_list.named_children:
|
|
220
|
+
name = safe_decode_text(item)
|
|
221
|
+
if name:
|
|
222
|
+
full_qn = f"{prefix}::{name}"
|
|
223
|
+
self.import_mapping[module_qn][name] = full_qn
|
|
224
|
+
elif child.type in ("scoped_identifier", "identifier"):
|
|
225
|
+
name = safe_decode_text(child)
|
|
226
|
+
if name:
|
|
227
|
+
parts = name.split("::")
|
|
228
|
+
if parts:
|
|
229
|
+
self.import_mapping[module_qn][parts[-1]] = name
|
|
230
|
+
|
|
231
|
+
def _parse_go_imports(self, captures: dict, module_qn: str) -> None:
|
|
232
|
+
"""Parse Go imports."""
|
|
233
|
+
import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
|
|
234
|
+
|
|
235
|
+
for node in import_nodes:
|
|
236
|
+
if not isinstance(node, Node):
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
if node.type == "import_declaration":
|
|
240
|
+
for child in node.named_children:
|
|
241
|
+
if child.type == "import_spec":
|
|
242
|
+
self._handle_go_import_spec(child, module_qn)
|
|
243
|
+
elif child.type == "import_spec_list":
|
|
244
|
+
for spec in child.named_children:
|
|
245
|
+
if spec.type == "import_spec":
|
|
246
|
+
self._handle_go_import_spec(spec, module_qn)
|
|
247
|
+
|
|
248
|
+
def _handle_go_import_spec(self, node: Node, module_qn: str) -> None:
|
|
249
|
+
"""Handle Go import specification."""
|
|
250
|
+
alias = None
|
|
251
|
+
path = None
|
|
252
|
+
|
|
253
|
+
for child in node.named_children:
|
|
254
|
+
if child.type == "package_identifier":
|
|
255
|
+
alias = safe_decode_text(child)
|
|
256
|
+
elif child.type == "interpreted_string_literal":
|
|
257
|
+
path = safe_decode_text(child)
|
|
258
|
+
|
|
259
|
+
if path:
|
|
260
|
+
path = path.strip('"')
|
|
261
|
+
key = alias if alias else path.split("/")[-1]
|
|
262
|
+
self.import_mapping[module_qn][key] = path
|
|
263
|
+
|
|
264
|
+
def _parse_c_cpp_imports(self, captures: dict, module_qn: str) -> None:
|
|
265
|
+
"""Parse C/C++ #include directives."""
|
|
266
|
+
import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
|
|
267
|
+
|
|
268
|
+
for node in import_nodes:
|
|
269
|
+
if not isinstance(node, Node):
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
if node.type == "preproc_include":
|
|
273
|
+
for child in node.named_children:
|
|
274
|
+
if child.type in ("string_literal", "system_lib_string"):
|
|
275
|
+
header = safe_decode_text(child)
|
|
276
|
+
if header:
|
|
277
|
+
header = header.strip('"<>')
|
|
278
|
+
key = header.replace(".", "_")
|
|
279
|
+
self.import_mapping[module_qn][key] = header
|
|
280
|
+
|
|
281
|
+
def _get_dotted_name(self, node: Node) -> str | None:
|
|
282
|
+
"""Get dotted name from a node."""
|
|
283
|
+
parts = []
|
|
284
|
+
for child in node.children:
|
|
285
|
+
if child.type == "identifier":
|
|
286
|
+
name = safe_decode_text(child)
|
|
287
|
+
if name:
|
|
288
|
+
parts.append(name)
|
|
289
|
+
return cs.SEPARATOR_DOT.join(parts) if parts else None
|
|
290
|
+
|
|
291
|
+
def get_import_mapping(self, module_qn: str) -> dict[str, str]:
|
|
292
|
+
"""Get import mapping for a module."""
|
|
293
|
+
return self.import_mapping.get(module_qn, {})
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Code Graph Builder - Structure Processor."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
from .. import constants as cs
|
|
8
|
+
from ..services import IngestorProtocol
|
|
9
|
+
from ..types import LanguageQueries, NodeIdentifier
|
|
10
|
+
from ..utils.path_utils import should_skip_path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class StructureProcessor:
|
|
14
|
+
"""Processor for identifying project structure (packages, folders, files)."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
ingestor: IngestorProtocol,
|
|
19
|
+
repo_path: Path,
|
|
20
|
+
project_name: str,
|
|
21
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
22
|
+
unignore_paths: frozenset[str] | None = None,
|
|
23
|
+
exclude_paths: frozenset[str] | None = None,
|
|
24
|
+
):
|
|
25
|
+
self.ingestor = ingestor
|
|
26
|
+
self.repo_path = repo_path
|
|
27
|
+
self.project_name = project_name
|
|
28
|
+
self.queries = queries
|
|
29
|
+
self.structural_elements: dict[Path, str | None] = {}
|
|
30
|
+
self.unignore_paths = unignore_paths
|
|
31
|
+
self.exclude_paths = exclude_paths
|
|
32
|
+
|
|
33
|
+
def _get_parent_identifier(
|
|
34
|
+
self, parent_rel_path: Path, parent_container_qn: str | None
|
|
35
|
+
) -> NodeIdentifier:
|
|
36
|
+
"""Get parent node identifier for relationship creation."""
|
|
37
|
+
if parent_rel_path == Path(cs.PATH_CURRENT_DIR):
|
|
38
|
+
return (cs.NodeLabel.PROJECT, cs.KEY_NAME, self.project_name)
|
|
39
|
+
if parent_container_qn:
|
|
40
|
+
return (cs.NodeLabel.PACKAGE, cs.KEY_QUALIFIED_NAME, parent_container_qn)
|
|
41
|
+
return (cs.NodeLabel.FOLDER, cs.KEY_PATH, parent_rel_path.as_posix())
|
|
42
|
+
|
|
43
|
+
def identify_structure(self) -> None:
|
|
44
|
+
"""Identify project structure: packages and folders."""
|
|
45
|
+
# Create project node first
|
|
46
|
+
self.ingestor.ensure_node_batch(
|
|
47
|
+
cs.NodeLabel.PROJECT,
|
|
48
|
+
{
|
|
49
|
+
cs.KEY_NAME: self.project_name,
|
|
50
|
+
cs.KEY_QUALIFIED_NAME: self.project_name,
|
|
51
|
+
cs.KEY_PATH: str(self.repo_path),
|
|
52
|
+
},
|
|
53
|
+
)
|
|
54
|
+
logger.info(f"Created Project node: {self.project_name}")
|
|
55
|
+
|
|
56
|
+
directories = {self.repo_path}
|
|
57
|
+
for path in self.repo_path.rglob(cs.GLOB_ALL):
|
|
58
|
+
if path.is_dir() and not should_skip_path(
|
|
59
|
+
path,
|
|
60
|
+
self.repo_path,
|
|
61
|
+
exclude_paths=self.exclude_paths,
|
|
62
|
+
unignore_paths=self.unignore_paths,
|
|
63
|
+
):
|
|
64
|
+
directories.add(path)
|
|
65
|
+
|
|
66
|
+
for root in sorted(directories):
|
|
67
|
+
relative_root = root.relative_to(self.repo_path)
|
|
68
|
+
|
|
69
|
+
parent_rel_path = relative_root.parent
|
|
70
|
+
parent_container_qn = self.structural_elements.get(parent_rel_path)
|
|
71
|
+
|
|
72
|
+
is_package = False
|
|
73
|
+
package_indicators: set[str] = set()
|
|
74
|
+
|
|
75
|
+
for lang_queries in self.queries.values():
|
|
76
|
+
lang_config = lang_queries[cs.QUERY_CONFIG]
|
|
77
|
+
package_indicators.update(lang_config.package_indicators)
|
|
78
|
+
|
|
79
|
+
for indicator in package_indicators:
|
|
80
|
+
if (root / indicator).exists():
|
|
81
|
+
is_package = True
|
|
82
|
+
break
|
|
83
|
+
|
|
84
|
+
if is_package:
|
|
85
|
+
package_qn = cs.SEPARATOR_DOT.join(
|
|
86
|
+
[self.project_name] + list(relative_root.parts)
|
|
87
|
+
)
|
|
88
|
+
self.structural_elements[relative_root] = package_qn
|
|
89
|
+
logger.info(f"Identified Package: {package_qn}")
|
|
90
|
+
self.ingestor.ensure_node_batch(
|
|
91
|
+
cs.NodeLabel.PACKAGE,
|
|
92
|
+
{
|
|
93
|
+
cs.KEY_QUALIFIED_NAME: package_qn,
|
|
94
|
+
cs.KEY_NAME: root.name,
|
|
95
|
+
cs.KEY_PATH: relative_root.as_posix(),
|
|
96
|
+
},
|
|
97
|
+
)
|
|
98
|
+
parent_identifier = self._get_parent_identifier(
|
|
99
|
+
parent_rel_path, parent_container_qn
|
|
100
|
+
)
|
|
101
|
+
self.ingestor.ensure_relationship_batch(
|
|
102
|
+
parent_identifier,
|
|
103
|
+
cs.RelationshipType.CONTAINS_PACKAGE,
|
|
104
|
+
(cs.NodeLabel.PACKAGE, cs.KEY_QUALIFIED_NAME, package_qn),
|
|
105
|
+
)
|
|
106
|
+
elif root != self.repo_path:
|
|
107
|
+
self.structural_elements[relative_root] = None
|
|
108
|
+
logger.info(f"Identified Folder: {relative_root}")
|
|
109
|
+
self.ingestor.ensure_node_batch(
|
|
110
|
+
cs.NodeLabel.FOLDER,
|
|
111
|
+
{cs.KEY_PATH: relative_root.as_posix(), cs.KEY_NAME: root.name},
|
|
112
|
+
)
|
|
113
|
+
parent_identifier = self._get_parent_identifier(
|
|
114
|
+
parent_rel_path, parent_container_qn
|
|
115
|
+
)
|
|
116
|
+
self.ingestor.ensure_relationship_batch(
|
|
117
|
+
parent_identifier,
|
|
118
|
+
cs.RelationshipType.CONTAINS_FOLDER,
|
|
119
|
+
(cs.NodeLabel.FOLDER, cs.KEY_PATH, relative_root.as_posix()),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def process_generic_file(self, file_path: Path, file_name: str) -> None:
|
|
123
|
+
"""Process a generic file node."""
|
|
124
|
+
relative_filepath = file_path.relative_to(self.repo_path).as_posix()
|
|
125
|
+
relative_root = file_path.parent.relative_to(self.repo_path)
|
|
126
|
+
|
|
127
|
+
parent_container_qn = self.structural_elements.get(relative_root)
|
|
128
|
+
parent_identifier = self._get_parent_identifier(
|
|
129
|
+
relative_root, parent_container_qn
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
self.ingestor.ensure_node_batch(
|
|
133
|
+
cs.NodeLabel.FILE,
|
|
134
|
+
{
|
|
135
|
+
cs.KEY_PATH: relative_filepath,
|
|
136
|
+
cs.KEY_NAME: file_name,
|
|
137
|
+
cs.KEY_EXTENSION: file_path.suffix,
|
|
138
|
+
},
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
self.ingestor.ensure_relationship_batch(
|
|
142
|
+
parent_identifier,
|
|
143
|
+
cs.RelationshipType.CONTAINS_FILE,
|
|
144
|
+
(cs.NodeLabel.FILE, cs.KEY_PATH, relative_filepath),
|
|
145
|
+
)
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""Code Graph Builder - Type Inference."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from tree_sitter import Node
|
|
9
|
+
|
|
10
|
+
from .. import constants as cs
|
|
11
|
+
from ..parsers.utils import safe_decode_text
|
|
12
|
+
from ..types import ASTCacheProtocol, FunctionRegistryTrieProtocol, SimpleNameLookup
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from ..types import LanguageQueries
|
|
16
|
+
from .import_processor import ImportProcessor
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TypeInferenceEngine:
|
|
20
|
+
"""Infer types from source code."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
import_processor: ImportProcessor,
|
|
25
|
+
function_registry: FunctionRegistryTrieProtocol,
|
|
26
|
+
repo_path: Path,
|
|
27
|
+
project_name: str,
|
|
28
|
+
ast_cache: ASTCacheProtocol,
|
|
29
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
30
|
+
module_qn_to_file_path: dict[str, Path],
|
|
31
|
+
class_inheritance: dict[str, list[str]],
|
|
32
|
+
simple_name_lookup: SimpleNameLookup,
|
|
33
|
+
):
|
|
34
|
+
self.import_processor = import_processor
|
|
35
|
+
self.function_registry = function_registry
|
|
36
|
+
self.repo_path = repo_path
|
|
37
|
+
self.project_name = project_name
|
|
38
|
+
self.ast_cache = ast_cache
|
|
39
|
+
self.queries = queries
|
|
40
|
+
self.module_qn_to_file_path = module_qn_to_file_path
|
|
41
|
+
self.class_inheritance = class_inheritance
|
|
42
|
+
self.simple_name_lookup = simple_name_lookup
|
|
43
|
+
self._variable_types: dict[str, dict[str, str]] = {}
|
|
44
|
+
|
|
45
|
+
def infer_variable_type(
|
|
46
|
+
self,
|
|
47
|
+
var_name: str,
|
|
48
|
+
scope_qn: str,
|
|
49
|
+
local_node: Node | None = None,
|
|
50
|
+
) -> str | None:
|
|
51
|
+
"""Infer the type of a variable in a given scope."""
|
|
52
|
+
# Check if we have cached type info
|
|
53
|
+
if scope_qn in self._variable_types:
|
|
54
|
+
if var_name in self._variable_types[scope_qn]:
|
|
55
|
+
return self._variable_types[scope_qn][var_name]
|
|
56
|
+
|
|
57
|
+
# Try to infer from local node
|
|
58
|
+
if local_node:
|
|
59
|
+
inferred = self._infer_from_node(var_name, local_node)
|
|
60
|
+
if inferred:
|
|
61
|
+
if scope_qn not in self._variable_types:
|
|
62
|
+
self._variable_types[scope_qn] = {}
|
|
63
|
+
self._variable_types[scope_qn][var_name] = inferred
|
|
64
|
+
return inferred
|
|
65
|
+
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def _infer_from_node(self, var_name: str, node: Node) -> str | None:
|
|
69
|
+
"""Try to infer type from AST node."""
|
|
70
|
+
# Look for variable declaration
|
|
71
|
+
for child in node.children:
|
|
72
|
+
if child.type in (
|
|
73
|
+
"variable_declarator",
|
|
74
|
+
"variable_declaration",
|
|
75
|
+
"lexical_declaration",
|
|
76
|
+
):
|
|
77
|
+
type_hint = self._get_type_from_declaration(child, var_name)
|
|
78
|
+
if type_hint:
|
|
79
|
+
return type_hint
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
def _get_type_from_declaration(self, node: Node, var_name: str) -> str | None:
|
|
83
|
+
"""Extract type from a variable declaration."""
|
|
84
|
+
name_node = node.child_by_field_name(cs.FIELD_NAME)
|
|
85
|
+
if name_node:
|
|
86
|
+
name = safe_decode_text(name_node)
|
|
87
|
+
if name == var_name:
|
|
88
|
+
# Check for type annotation
|
|
89
|
+
type_node = node.child_by_field_name(cs.FIELD_TYPE)
|
|
90
|
+
if type_node:
|
|
91
|
+
return safe_decode_text(type_node)
|
|
92
|
+
|
|
93
|
+
# Check for initialization value
|
|
94
|
+
value_node = node.child_by_field_name(cs.FIELD_VALUE)
|
|
95
|
+
if value_node:
|
|
96
|
+
return self._infer_from_value(value_node)
|
|
97
|
+
|
|
98
|
+
return None
|
|
99
|
+
|
|
100
|
+
def _infer_from_value(self, node: Node) -> str | None:
|
|
101
|
+
"""Infer type from a value node."""
|
|
102
|
+
type_mapping = {
|
|
103
|
+
"string": "str",
|
|
104
|
+
"string_literal": "str",
|
|
105
|
+
"integer": "int",
|
|
106
|
+
"integer_literal": "int",
|
|
107
|
+
"float": "float",
|
|
108
|
+
"floating_point_literal": "float",
|
|
109
|
+
"true": "bool",
|
|
110
|
+
"false": "bool",
|
|
111
|
+
"boolean_literal": "bool",
|
|
112
|
+
"list": "list",
|
|
113
|
+
"list_literal": "list",
|
|
114
|
+
"dictionary": "dict",
|
|
115
|
+
"dict_literal": "dict",
|
|
116
|
+
"tuple": "tuple",
|
|
117
|
+
"call_expression": None, # Would need to resolve the call
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
return type_mapping.get(node.type)
|
|
121
|
+
|
|
122
|
+
def get_class_for_variable(
|
|
123
|
+
self,
|
|
124
|
+
var_name: str,
|
|
125
|
+
scope_qn: str,
|
|
126
|
+
module_qn: str,
|
|
127
|
+
) -> str | None:
|
|
128
|
+
"""Get the class type for a variable."""
|
|
129
|
+
var_type = self.infer_variable_type(var_name, scope_qn)
|
|
130
|
+
if not var_type:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
# Check if it's a class from imports
|
|
134
|
+
import_map = self.import_processor.get_import_mapping(module_qn)
|
|
135
|
+
if var_type in import_map:
|
|
136
|
+
return import_map[var_type]
|
|
137
|
+
|
|
138
|
+
# Check if it's a local class
|
|
139
|
+
class_qn = f"{module_qn}.{var_type}"
|
|
140
|
+
if class_qn in self.class_inheritance:
|
|
141
|
+
return class_qn
|
|
142
|
+
|
|
143
|
+
return var_type
|