code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,119 @@
1
+ """Code Graph Builder - Processor Factory."""
2
+
3
+ from pathlib import Path
4
+
5
+ from ..constants import SupportedLanguage
6
+ from ..services import IngestorProtocol
7
+ from ..types import (
8
+ ASTCacheProtocol,
9
+ FunctionRegistryTrieProtocol,
10
+ LanguageQueries,
11
+ SimpleNameLookup,
12
+ )
13
+ from .call_processor import CallProcessor
14
+ from .definition_processor import DefinitionProcessor
15
+ from .import_processor import ImportProcessor
16
+ from .structure_processor import StructureProcessor
17
+ from .type_inference import TypeInferenceEngine
18
+
19
+
20
+ class ProcessorFactory:
21
+ """Factory for creating and caching processors."""
22
+
23
+ def __init__(
24
+ self,
25
+ ingestor: IngestorProtocol,
26
+ repo_path: Path,
27
+ project_name: str,
28
+ queries: dict[SupportedLanguage, LanguageQueries],
29
+ function_registry: FunctionRegistryTrieProtocol,
30
+ simple_name_lookup: SimpleNameLookup,
31
+ ast_cache: ASTCacheProtocol,
32
+ unignore_paths: frozenset[str] | None = None,
33
+ exclude_paths: frozenset[str] | None = None,
34
+ ) -> None:
35
+ self.ingestor = ingestor
36
+ self.repo_path = repo_path
37
+ self.project_name = project_name
38
+ self.queries = queries
39
+ self.function_registry = function_registry
40
+ self.simple_name_lookup = simple_name_lookup
41
+ self.ast_cache = ast_cache
42
+ self.unignore_paths = unignore_paths
43
+ self.exclude_paths = exclude_paths
44
+
45
+ self.module_qn_to_file_path: dict[str, Path] = {}
46
+
47
+ self._import_processor: ImportProcessor | None = None
48
+ self._structure_processor: StructureProcessor | None = None
49
+ self._definition_processor: DefinitionProcessor | None = None
50
+ self._type_inference: TypeInferenceEngine | None = None
51
+ self._call_processor: CallProcessor | None = None
52
+
53
+ @property
54
+ def import_processor(self) -> ImportProcessor:
55
+ if self._import_processor is None:
56
+ self._import_processor = ImportProcessor(
57
+ repo_path=self.repo_path,
58
+ project_name=self.project_name,
59
+ ingestor=self.ingestor,
60
+ function_registry=self.function_registry,
61
+ )
62
+ return self._import_processor
63
+
64
+ @property
65
+ def structure_processor(self) -> StructureProcessor:
66
+ if self._structure_processor is None:
67
+ self._structure_processor = StructureProcessor(
68
+ ingestor=self.ingestor,
69
+ repo_path=self.repo_path,
70
+ project_name=self.project_name,
71
+ queries=self.queries,
72
+ unignore_paths=self.unignore_paths,
73
+ exclude_paths=self.exclude_paths,
74
+ )
75
+ return self._structure_processor
76
+
77
+ @property
78
+ def definition_processor(self) -> DefinitionProcessor:
79
+ if self._definition_processor is None:
80
+ self._definition_processor = DefinitionProcessor(
81
+ ingestor=self.ingestor,
82
+ repo_path=self.repo_path,
83
+ project_name=self.project_name,
84
+ function_registry=self.function_registry,
85
+ simple_name_lookup=self.simple_name_lookup,
86
+ import_processor=self.import_processor,
87
+ module_qn_to_file_path=self.module_qn_to_file_path,
88
+ )
89
+ return self._definition_processor
90
+
91
+ @property
92
+ def type_inference(self) -> TypeInferenceEngine:
93
+ if self._type_inference is None:
94
+ self._type_inference = TypeInferenceEngine(
95
+ import_processor=self.import_processor,
96
+ function_registry=self.function_registry,
97
+ repo_path=self.repo_path,
98
+ project_name=self.project_name,
99
+ ast_cache=self.ast_cache,
100
+ queries=self.queries,
101
+ module_qn_to_file_path=self.module_qn_to_file_path,
102
+ class_inheritance=self.definition_processor.class_inheritance,
103
+ simple_name_lookup=self.simple_name_lookup,
104
+ )
105
+ return self._type_inference
106
+
107
+ @property
108
+ def call_processor(self) -> CallProcessor:
109
+ if self._call_processor is None:
110
+ self._call_processor = CallProcessor(
111
+ ingestor=self.ingestor,
112
+ repo_path=self.repo_path,
113
+ project_name=self.project_name,
114
+ function_registry=self.function_registry,
115
+ import_processor=self.import_processor,
116
+ type_inference=self.type_inference,
117
+ class_inheritance=self.definition_processor.class_inheritance,
118
+ )
119
+ return self._call_processor
@@ -0,0 +1,293 @@
1
+ """Code Graph Builder - Import Processor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from loguru import logger
9
+ from tree_sitter import Node, QueryCursor
10
+
11
+ from .. import constants as cs
12
+ from ..parsers.utils import safe_decode_text
13
+ from ..services import IngestorProtocol
14
+ from ..types import FunctionRegistryTrieProtocol
15
+
16
+ if TYPE_CHECKING:
17
+ from ..types import LanguageQueries
18
+
19
+
20
+ class ImportProcessor:
21
+ """Process import statements in source code."""
22
+
23
+ def __init__(
24
+ self,
25
+ repo_path: Path,
26
+ project_name: str,
27
+ ingestor: IngestorProtocol | None = None,
28
+ function_registry: FunctionRegistryTrieProtocol | None = None,
29
+ ) -> None:
30
+ self.repo_path = repo_path
31
+ self.project_name = project_name
32
+ self.ingestor = ingestor
33
+ self.function_registry = function_registry
34
+ self.import_mapping: dict[str, dict[str, str]] = {}
35
+
36
+ def parse_imports(
37
+ self,
38
+ root_node: Node,
39
+ module_qn: str,
40
+ language: cs.SupportedLanguage,
41
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
42
+ ) -> None:
43
+ """Parse imports from a file."""
44
+ if language not in queries:
45
+ return
46
+
47
+ lang_queries = queries[language]
48
+ imports_query = lang_queries.get(cs.QUERY_IMPORTS)
49
+ if not imports_query:
50
+ return
51
+
52
+ self.import_mapping[module_qn] = {}
53
+
54
+ try:
55
+ cursor = QueryCursor(imports_query)
56
+ captures = cursor.captures(root_node)
57
+
58
+ match language:
59
+ case cs.SupportedLanguage.PYTHON:
60
+ self._parse_python_imports(captures, module_qn)
61
+ case cs.SupportedLanguage.JS | cs.SupportedLanguage.TS:
62
+ self._parse_js_ts_imports(captures, module_qn)
63
+ case cs.SupportedLanguage.JAVA:
64
+ self._parse_java_imports(captures, module_qn)
65
+ case cs.SupportedLanguage.RUST:
66
+ self._parse_rust_imports(captures, module_qn)
67
+ case cs.SupportedLanguage.GO:
68
+ self._parse_go_imports(captures, module_qn)
69
+ case cs.SupportedLanguage.C | cs.SupportedLanguage.CPP:
70
+ self._parse_c_cpp_imports(captures, module_qn)
71
+ case _:
72
+ pass
73
+
74
+ logger.debug(f"Parsed {len(self.import_mapping[module_qn])} imports for {module_qn}")
75
+
76
+ if self.ingestor:
77
+ for alias, full_name in self.import_mapping[module_qn].items():
78
+ self.ingestor.ensure_relationship_batch(
79
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
80
+ cs.RelationshipType.IMPORTS,
81
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, full_name),
82
+ )
83
+
84
+ except Exception as e:
85
+ logger.warning(f"Failed to parse imports for {module_qn}: {e}")
86
+
87
+ def _parse_python_imports(self, captures: dict, module_qn: str) -> None:
88
+ """Parse Python import statements."""
89
+ import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
90
+ import_from_nodes = captures.get(cs.CAPTURE_IMPORT_FROM, [])
91
+
92
+ for node in import_nodes + import_from_nodes:
93
+ if not isinstance(node, Node):
94
+ continue
95
+
96
+ if node.type == "import_statement":
97
+ self._handle_python_import(node, module_qn)
98
+ elif node.type == "import_from_statement":
99
+ self._handle_python_from_import(node, module_qn)
100
+
101
+ def _handle_python_import(self, node: Node, module_qn: str) -> None:
102
+ """Handle 'import xxx' or 'import xxx as yyy'."""
103
+ for child in node.named_children:
104
+ if child.type == "dotted_name":
105
+ name = self._get_dotted_name(child)
106
+ if name:
107
+ full_qn = f"{self.project_name}.{name.replace('.', cs.SEPARATOR_DOT)}"
108
+ self.import_mapping[module_qn][name.split(cs.SEPARATOR_DOT)[0]] = full_qn
109
+
110
+ def _handle_python_from_import(self, node: Node, module_qn: str) -> None:
111
+ """Handle 'from xxx import yyy'."""
112
+ module_node = None
113
+ for child in node.children:
114
+ if child.type == "dotted_name":
115
+ module_node = child
116
+ break
117
+
118
+ if not module_node:
119
+ return
120
+
121
+ module_name = self._get_dotted_name(module_node)
122
+ if not module_name:
123
+ return
124
+
125
+ module_prefix = f"{self.project_name}.{module_name.replace('.', cs.SEPARATOR_DOT)}"
126
+
127
+ for child in node.named_children:
128
+ if child.type == "imported_name" or child.type == "identifier":
129
+ name = safe_decode_text(child)
130
+ if name:
131
+ full_qn = f"{module_prefix}.{name}"
132
+ self.import_mapping[module_qn][name] = full_qn
133
+
134
+ def _parse_js_ts_imports(self, captures: dict, module_qn: str) -> None:
135
+ """Parse JavaScript/TypeScript imports."""
136
+ import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
137
+
138
+ for node in import_nodes:
139
+ if not isinstance(node, Node):
140
+ continue
141
+
142
+ if node.type == "import_statement":
143
+ self._handle_js_ts_import(node, module_qn)
144
+
145
+ def _handle_js_ts_import(self, node: Node, module_qn: str) -> None:
146
+ """Handle ES6 import statements."""
147
+ source_node = None
148
+ for child in node.children:
149
+ if child.type == "string":
150
+ source_node = child
151
+ break
152
+
153
+ if not source_node:
154
+ return
155
+
156
+ source = safe_decode_text(source_node)
157
+ if not source:
158
+ return
159
+
160
+ source = source.strip("'\"")
161
+
162
+ for child in node.named_children:
163
+ if child.type == "import_clause":
164
+ self._process_js_import_clause(child, source, module_qn)
165
+
166
+ def _process_js_import_clause(self, node: Node, source: str, module_qn: str) -> None:
167
+ """Process import clause (default, named, or namespace imports)."""
168
+ name = safe_decode_text(node)
169
+ if name:
170
+ self.import_mapping[module_qn][name] = source
171
+
172
+ def _parse_java_imports(self, captures: dict, module_qn: str) -> None:
173
+ """Parse Java imports."""
174
+ import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
175
+
176
+ for node in import_nodes:
177
+ if not isinstance(node, Node):
178
+ continue
179
+
180
+ scoped_name = None
181
+ for child in node.named_children:
182
+ if child.type == "scoped_identifier":
183
+ scoped_name = safe_decode_text(child)
184
+ break
185
+ elif child.type == "identifier":
186
+ scoped_name = safe_decode_text(child)
187
+
188
+ if scoped_name:
189
+ parts = scoped_name.split(cs.SEPARATOR_DOT)
190
+ if parts:
191
+ self.import_mapping[module_qn][parts[-1]] = scoped_name.replace(
192
+ cs.SEPARATOR_DOT, "."
193
+ )
194
+
195
+ def _parse_rust_imports(self, captures: dict, module_qn: str) -> None:
196
+ """Parse Rust use statements."""
197
+ import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
198
+
199
+ for node in import_nodes:
200
+ if not isinstance(node, Node):
201
+ continue
202
+
203
+ if node.type == "use_declaration":
204
+ self._handle_rust_use(node, module_qn)
205
+
206
+ def _handle_rust_use(self, node: Node, module_qn: str) -> None:
207
+ """Handle Rust use statements."""
208
+ for child in node.named_children:
209
+ if child.type == "scoped_use_list":
210
+ prefix = None
211
+ use_list = None
212
+ for c in child.children:
213
+ if c.type == "identifier" or c.type == "scoped_identifier":
214
+ prefix = safe_decode_text(c)
215
+ elif c.type == "use_list":
216
+ use_list = c
217
+
218
+ if prefix and use_list:
219
+ for item in use_list.named_children:
220
+ name = safe_decode_text(item)
221
+ if name:
222
+ full_qn = f"{prefix}::{name}"
223
+ self.import_mapping[module_qn][name] = full_qn
224
+ elif child.type in ("scoped_identifier", "identifier"):
225
+ name = safe_decode_text(child)
226
+ if name:
227
+ parts = name.split("::")
228
+ if parts:
229
+ self.import_mapping[module_qn][parts[-1]] = name
230
+
231
+ def _parse_go_imports(self, captures: dict, module_qn: str) -> None:
232
+ """Parse Go imports."""
233
+ import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
234
+
235
+ for node in import_nodes:
236
+ if not isinstance(node, Node):
237
+ continue
238
+
239
+ if node.type == "import_declaration":
240
+ for child in node.named_children:
241
+ if child.type == "import_spec":
242
+ self._handle_go_import_spec(child, module_qn)
243
+ elif child.type == "import_spec_list":
244
+ for spec in child.named_children:
245
+ if spec.type == "import_spec":
246
+ self._handle_go_import_spec(spec, module_qn)
247
+
248
+ def _handle_go_import_spec(self, node: Node, module_qn: str) -> None:
249
+ """Handle Go import specification."""
250
+ alias = None
251
+ path = None
252
+
253
+ for child in node.named_children:
254
+ if child.type == "package_identifier":
255
+ alias = safe_decode_text(child)
256
+ elif child.type == "interpreted_string_literal":
257
+ path = safe_decode_text(child)
258
+
259
+ if path:
260
+ path = path.strip('"')
261
+ key = alias if alias else path.split("/")[-1]
262
+ self.import_mapping[module_qn][key] = path
263
+
264
+ def _parse_c_cpp_imports(self, captures: dict, module_qn: str) -> None:
265
+ """Parse C/C++ #include directives."""
266
+ import_nodes = captures.get(cs.CAPTURE_IMPORT, [])
267
+
268
+ for node in import_nodes:
269
+ if not isinstance(node, Node):
270
+ continue
271
+
272
+ if node.type == "preproc_include":
273
+ for child in node.named_children:
274
+ if child.type in ("string_literal", "system_lib_string"):
275
+ header = safe_decode_text(child)
276
+ if header:
277
+ header = header.strip('"<>')
278
+ key = header.replace(".", "_")
279
+ self.import_mapping[module_qn][key] = header
280
+
281
+ def _get_dotted_name(self, node: Node) -> str | None:
282
+ """Get dotted name from a node."""
283
+ parts = []
284
+ for child in node.children:
285
+ if child.type == "identifier":
286
+ name = safe_decode_text(child)
287
+ if name:
288
+ parts.append(name)
289
+ return cs.SEPARATOR_DOT.join(parts) if parts else None
290
+
291
+ def get_import_mapping(self, module_qn: str) -> dict[str, str]:
292
+ """Get import mapping for a module."""
293
+ return self.import_mapping.get(module_qn, {})
@@ -0,0 +1,145 @@
1
+ """Code Graph Builder - Structure Processor."""
2
+
3
+ from pathlib import Path
4
+
5
+ from loguru import logger
6
+
7
+ from .. import constants as cs
8
+ from ..services import IngestorProtocol
9
+ from ..types import LanguageQueries, NodeIdentifier
10
+ from ..utils.path_utils import should_skip_path
11
+
12
+
13
+ class StructureProcessor:
14
+ """Processor for identifying project structure (packages, folders, files)."""
15
+
16
+ def __init__(
17
+ self,
18
+ ingestor: IngestorProtocol,
19
+ repo_path: Path,
20
+ project_name: str,
21
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
22
+ unignore_paths: frozenset[str] | None = None,
23
+ exclude_paths: frozenset[str] | None = None,
24
+ ):
25
+ self.ingestor = ingestor
26
+ self.repo_path = repo_path
27
+ self.project_name = project_name
28
+ self.queries = queries
29
+ self.structural_elements: dict[Path, str | None] = {}
30
+ self.unignore_paths = unignore_paths
31
+ self.exclude_paths = exclude_paths
32
+
33
+ def _get_parent_identifier(
34
+ self, parent_rel_path: Path, parent_container_qn: str | None
35
+ ) -> NodeIdentifier:
36
+ """Get parent node identifier for relationship creation."""
37
+ if parent_rel_path == Path(cs.PATH_CURRENT_DIR):
38
+ return (cs.NodeLabel.PROJECT, cs.KEY_NAME, self.project_name)
39
+ if parent_container_qn:
40
+ return (cs.NodeLabel.PACKAGE, cs.KEY_QUALIFIED_NAME, parent_container_qn)
41
+ return (cs.NodeLabel.FOLDER, cs.KEY_PATH, parent_rel_path.as_posix())
42
+
43
+ def identify_structure(self) -> None:
44
+ """Identify project structure: packages and folders."""
45
+ # Create project node first
46
+ self.ingestor.ensure_node_batch(
47
+ cs.NodeLabel.PROJECT,
48
+ {
49
+ cs.KEY_NAME: self.project_name,
50
+ cs.KEY_QUALIFIED_NAME: self.project_name,
51
+ cs.KEY_PATH: str(self.repo_path),
52
+ },
53
+ )
54
+ logger.info(f"Created Project node: {self.project_name}")
55
+
56
+ directories = {self.repo_path}
57
+ for path in self.repo_path.rglob(cs.GLOB_ALL):
58
+ if path.is_dir() and not should_skip_path(
59
+ path,
60
+ self.repo_path,
61
+ exclude_paths=self.exclude_paths,
62
+ unignore_paths=self.unignore_paths,
63
+ ):
64
+ directories.add(path)
65
+
66
+ for root in sorted(directories):
67
+ relative_root = root.relative_to(self.repo_path)
68
+
69
+ parent_rel_path = relative_root.parent
70
+ parent_container_qn = self.structural_elements.get(parent_rel_path)
71
+
72
+ is_package = False
73
+ package_indicators: set[str] = set()
74
+
75
+ for lang_queries in self.queries.values():
76
+ lang_config = lang_queries[cs.QUERY_CONFIG]
77
+ package_indicators.update(lang_config.package_indicators)
78
+
79
+ for indicator in package_indicators:
80
+ if (root / indicator).exists():
81
+ is_package = True
82
+ break
83
+
84
+ if is_package:
85
+ package_qn = cs.SEPARATOR_DOT.join(
86
+ [self.project_name] + list(relative_root.parts)
87
+ )
88
+ self.structural_elements[relative_root] = package_qn
89
+ logger.info(f"Identified Package: {package_qn}")
90
+ self.ingestor.ensure_node_batch(
91
+ cs.NodeLabel.PACKAGE,
92
+ {
93
+ cs.KEY_QUALIFIED_NAME: package_qn,
94
+ cs.KEY_NAME: root.name,
95
+ cs.KEY_PATH: relative_root.as_posix(),
96
+ },
97
+ )
98
+ parent_identifier = self._get_parent_identifier(
99
+ parent_rel_path, parent_container_qn
100
+ )
101
+ self.ingestor.ensure_relationship_batch(
102
+ parent_identifier,
103
+ cs.RelationshipType.CONTAINS_PACKAGE,
104
+ (cs.NodeLabel.PACKAGE, cs.KEY_QUALIFIED_NAME, package_qn),
105
+ )
106
+ elif root != self.repo_path:
107
+ self.structural_elements[relative_root] = None
108
+ logger.info(f"Identified Folder: {relative_root}")
109
+ self.ingestor.ensure_node_batch(
110
+ cs.NodeLabel.FOLDER,
111
+ {cs.KEY_PATH: relative_root.as_posix(), cs.KEY_NAME: root.name},
112
+ )
113
+ parent_identifier = self._get_parent_identifier(
114
+ parent_rel_path, parent_container_qn
115
+ )
116
+ self.ingestor.ensure_relationship_batch(
117
+ parent_identifier,
118
+ cs.RelationshipType.CONTAINS_FOLDER,
119
+ (cs.NodeLabel.FOLDER, cs.KEY_PATH, relative_root.as_posix()),
120
+ )
121
+
122
+ def process_generic_file(self, file_path: Path, file_name: str) -> None:
123
+ """Process a generic file node."""
124
+ relative_filepath = file_path.relative_to(self.repo_path).as_posix()
125
+ relative_root = file_path.parent.relative_to(self.repo_path)
126
+
127
+ parent_container_qn = self.structural_elements.get(relative_root)
128
+ parent_identifier = self._get_parent_identifier(
129
+ relative_root, parent_container_qn
130
+ )
131
+
132
+ self.ingestor.ensure_node_batch(
133
+ cs.NodeLabel.FILE,
134
+ {
135
+ cs.KEY_PATH: relative_filepath,
136
+ cs.KEY_NAME: file_name,
137
+ cs.KEY_EXTENSION: file_path.suffix,
138
+ },
139
+ )
140
+
141
+ self.ingestor.ensure_relationship_batch(
142
+ parent_identifier,
143
+ cs.RelationshipType.CONTAINS_FILE,
144
+ (cs.NodeLabel.FILE, cs.KEY_PATH, relative_filepath),
145
+ )
@@ -0,0 +1,143 @@
1
+ """Code Graph Builder - Type Inference."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from tree_sitter import Node
9
+
10
+ from .. import constants as cs
11
+ from ..parsers.utils import safe_decode_text
12
+ from ..types import ASTCacheProtocol, FunctionRegistryTrieProtocol, SimpleNameLookup
13
+
14
+ if TYPE_CHECKING:
15
+ from ..types import LanguageQueries
16
+ from .import_processor import ImportProcessor
17
+
18
+
19
+ class TypeInferenceEngine:
20
+ """Infer types from source code."""
21
+
22
+ def __init__(
23
+ self,
24
+ import_processor: ImportProcessor,
25
+ function_registry: FunctionRegistryTrieProtocol,
26
+ repo_path: Path,
27
+ project_name: str,
28
+ ast_cache: ASTCacheProtocol,
29
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
30
+ module_qn_to_file_path: dict[str, Path],
31
+ class_inheritance: dict[str, list[str]],
32
+ simple_name_lookup: SimpleNameLookup,
33
+ ):
34
+ self.import_processor = import_processor
35
+ self.function_registry = function_registry
36
+ self.repo_path = repo_path
37
+ self.project_name = project_name
38
+ self.ast_cache = ast_cache
39
+ self.queries = queries
40
+ self.module_qn_to_file_path = module_qn_to_file_path
41
+ self.class_inheritance = class_inheritance
42
+ self.simple_name_lookup = simple_name_lookup
43
+ self._variable_types: dict[str, dict[str, str]] = {}
44
+
45
+ def infer_variable_type(
46
+ self,
47
+ var_name: str,
48
+ scope_qn: str,
49
+ local_node: Node | None = None,
50
+ ) -> str | None:
51
+ """Infer the type of a variable in a given scope."""
52
+ # Check if we have cached type info
53
+ if scope_qn in self._variable_types:
54
+ if var_name in self._variable_types[scope_qn]:
55
+ return self._variable_types[scope_qn][var_name]
56
+
57
+ # Try to infer from local node
58
+ if local_node:
59
+ inferred = self._infer_from_node(var_name, local_node)
60
+ if inferred:
61
+ if scope_qn not in self._variable_types:
62
+ self._variable_types[scope_qn] = {}
63
+ self._variable_types[scope_qn][var_name] = inferred
64
+ return inferred
65
+
66
+ return None
67
+
68
+ def _infer_from_node(self, var_name: str, node: Node) -> str | None:
69
+ """Try to infer type from AST node."""
70
+ # Look for variable declaration
71
+ for child in node.children:
72
+ if child.type in (
73
+ "variable_declarator",
74
+ "variable_declaration",
75
+ "lexical_declaration",
76
+ ):
77
+ type_hint = self._get_type_from_declaration(child, var_name)
78
+ if type_hint:
79
+ return type_hint
80
+ return None
81
+
82
+ def _get_type_from_declaration(self, node: Node, var_name: str) -> str | None:
83
+ """Extract type from a variable declaration."""
84
+ name_node = node.child_by_field_name(cs.FIELD_NAME)
85
+ if name_node:
86
+ name = safe_decode_text(name_node)
87
+ if name == var_name:
88
+ # Check for type annotation
89
+ type_node = node.child_by_field_name(cs.FIELD_TYPE)
90
+ if type_node:
91
+ return safe_decode_text(type_node)
92
+
93
+ # Check for initialization value
94
+ value_node = node.child_by_field_name(cs.FIELD_VALUE)
95
+ if value_node:
96
+ return self._infer_from_value(value_node)
97
+
98
+ return None
99
+
100
+ def _infer_from_value(self, node: Node) -> str | None:
101
+ """Infer type from a value node."""
102
+ type_mapping = {
103
+ "string": "str",
104
+ "string_literal": "str",
105
+ "integer": "int",
106
+ "integer_literal": "int",
107
+ "float": "float",
108
+ "floating_point_literal": "float",
109
+ "true": "bool",
110
+ "false": "bool",
111
+ "boolean_literal": "bool",
112
+ "list": "list",
113
+ "list_literal": "list",
114
+ "dictionary": "dict",
115
+ "dict_literal": "dict",
116
+ "tuple": "tuple",
117
+ "call_expression": None, # Would need to resolve the call
118
+ }
119
+
120
+ return type_mapping.get(node.type)
121
+
122
+ def get_class_for_variable(
123
+ self,
124
+ var_name: str,
125
+ scope_qn: str,
126
+ module_qn: str,
127
+ ) -> str | None:
128
+ """Get the class type for a variable."""
129
+ var_type = self.infer_variable_type(var_name, scope_qn)
130
+ if not var_type:
131
+ return None
132
+
133
+ # Check if it's a class from imports
134
+ import_map = self.import_processor.get_import_mapping(module_qn)
135
+ if var_type in import_map:
136
+ return import_map[var_type]
137
+
138
+ # Check if it's a local class
139
+ class_qn = f"{module_qn}.{var_type}"
140
+ if class_qn in self.class_inheritance:
141
+ return class_qn
142
+
143
+ return var_type