code-graph-builder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_graph_builder/__init__.py +82 -0
- code_graph_builder/builder.py +366 -0
- code_graph_builder/cgb_cli.py +32 -0
- code_graph_builder/cli.py +564 -0
- code_graph_builder/commands_cli.py +1288 -0
- code_graph_builder/config.py +340 -0
- code_graph_builder/constants.py +708 -0
- code_graph_builder/embeddings/__init__.py +40 -0
- code_graph_builder/embeddings/qwen3_embedder.py +573 -0
- code_graph_builder/embeddings/vector_store.py +584 -0
- code_graph_builder/examples/__init__.py +0 -0
- code_graph_builder/examples/example_configuration.py +276 -0
- code_graph_builder/examples/example_kuzu_usage.py +109 -0
- code_graph_builder/examples/example_semantic_search_full.py +347 -0
- code_graph_builder/examples/generate_wiki.py +915 -0
- code_graph_builder/examples/graph_export_example.py +100 -0
- code_graph_builder/examples/rag_example.py +206 -0
- code_graph_builder/examples/test_cli_demo.py +129 -0
- code_graph_builder/examples/test_embedding_api.py +153 -0
- code_graph_builder/examples/test_kuzu_local.py +190 -0
- code_graph_builder/examples/test_rag_redis.py +390 -0
- code_graph_builder/graph_updater.py +605 -0
- code_graph_builder/guidance/__init__.py +1 -0
- code_graph_builder/guidance/agent.py +123 -0
- code_graph_builder/guidance/prompts.py +74 -0
- code_graph_builder/guidance/toolset.py +264 -0
- code_graph_builder/language_spec.py +536 -0
- code_graph_builder/mcp/__init__.py +21 -0
- code_graph_builder/mcp/api_doc_generator.py +764 -0
- code_graph_builder/mcp/file_editor.py +207 -0
- code_graph_builder/mcp/pipeline.py +777 -0
- code_graph_builder/mcp/server.py +161 -0
- code_graph_builder/mcp/tools.py +1800 -0
- code_graph_builder/models.py +115 -0
- code_graph_builder/parser_loader.py +344 -0
- code_graph_builder/parsers/__init__.py +7 -0
- code_graph_builder/parsers/call_processor.py +306 -0
- code_graph_builder/parsers/call_resolver.py +139 -0
- code_graph_builder/parsers/definition_processor.py +796 -0
- code_graph_builder/parsers/factory.py +119 -0
- code_graph_builder/parsers/import_processor.py +293 -0
- code_graph_builder/parsers/structure_processor.py +145 -0
- code_graph_builder/parsers/type_inference.py +143 -0
- code_graph_builder/parsers/utils.py +134 -0
- code_graph_builder/rag/__init__.py +68 -0
- code_graph_builder/rag/camel_agent.py +429 -0
- code_graph_builder/rag/client.py +298 -0
- code_graph_builder/rag/config.py +239 -0
- code_graph_builder/rag/cypher_generator.py +67 -0
- code_graph_builder/rag/llm_backend.py +210 -0
- code_graph_builder/rag/markdown_generator.py +352 -0
- code_graph_builder/rag/prompt_templates.py +440 -0
- code_graph_builder/rag/rag_engine.py +640 -0
- code_graph_builder/rag/review_report.md +172 -0
- code_graph_builder/rag/tests/__init__.py +3 -0
- code_graph_builder/rag/tests/test_camel_agent.py +313 -0
- code_graph_builder/rag/tests/test_client.py +221 -0
- code_graph_builder/rag/tests/test_config.py +177 -0
- code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
- code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
- code_graph_builder/services/__init__.py +39 -0
- code_graph_builder/services/graph_service.py +465 -0
- code_graph_builder/services/kuzu_service.py +665 -0
- code_graph_builder/services/memory_service.py +171 -0
- code_graph_builder/settings.py +75 -0
- code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
- code_graph_builder/tests/__init__.py +1 -0
- code_graph_builder/tests/run_acceptance_check.py +378 -0
- code_graph_builder/tests/test_api_find.py +231 -0
- code_graph_builder/tests/test_api_find_integration.py +226 -0
- code_graph_builder/tests/test_basic.py +78 -0
- code_graph_builder/tests/test_c_api_extraction.py +388 -0
- code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
- code_graph_builder/tests/test_embedder.py +411 -0
- code_graph_builder/tests/test_integration_semantic.py +434 -0
- code_graph_builder/tests/test_mcp_protocol.py +298 -0
- code_graph_builder/tests/test_mcp_user_flow.py +190 -0
- code_graph_builder/tests/test_rag.py +404 -0
- code_graph_builder/tests/test_settings.py +135 -0
- code_graph_builder/tests/test_step1_graph_build.py +264 -0
- code_graph_builder/tests/test_step2_api_docs.py +323 -0
- code_graph_builder/tests/test_step3_embedding.py +278 -0
- code_graph_builder/tests/test_vector_store.py +552 -0
- code_graph_builder/tools/__init__.py +40 -0
- code_graph_builder/tools/graph_query.py +495 -0
- code_graph_builder/tools/semantic_search.py +387 -0
- code_graph_builder/types.py +333 -0
- code_graph_builder/utils/__init__.py +0 -0
- code_graph_builder/utils/path_utils.py +30 -0
- code_graph_builder-0.2.0.dist-info/METADATA +321 -0
- code_graph_builder-0.2.0.dist-info/RECORD +93 -0
- code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
- code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,796 @@
|
|
|
1
|
+
"""Definition processor for ingesting code definitions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from tree_sitter import Node, QueryCursor
|
|
10
|
+
|
|
11
|
+
from .. import constants as cs
|
|
12
|
+
from ..services import IngestorProtocol
|
|
13
|
+
from ..types import LanguageQueries, NodeType, PropertyDict, SimpleNameLookup
|
|
14
|
+
from .utils import safe_decode_text
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from ..types import FunctionRegistryTrieProtocol
|
|
18
|
+
from .import_processor import ImportProcessor
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DefinitionProcessor:
|
|
22
|
+
"""Process file definitions (functions, classes, methods)."""
|
|
23
|
+
|
|
24
|
+
# C language storage class specifiers that indicate static (file-local) visibility
|
|
25
|
+
_C_STATIC_SPECIFIER = "storage_class_specifier"
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
ingestor: IngestorProtocol,
|
|
30
|
+
repo_path: Path,
|
|
31
|
+
project_name: str,
|
|
32
|
+
function_registry: FunctionRegistryTrieProtocol,
|
|
33
|
+
simple_name_lookup: SimpleNameLookup,
|
|
34
|
+
import_processor: ImportProcessor,
|
|
35
|
+
module_qn_to_file_path: dict[str, Path],
|
|
36
|
+
):
|
|
37
|
+
self.ingestor = ingestor
|
|
38
|
+
self.repo_path = repo_path
|
|
39
|
+
self.project_name = project_name
|
|
40
|
+
self.function_registry = function_registry
|
|
41
|
+
self.simple_name_lookup = simple_name_lookup
|
|
42
|
+
self.import_processor = import_processor
|
|
43
|
+
self.module_qn_to_file_path = module_qn_to_file_path
|
|
44
|
+
self.class_inheritance: dict[str, list[str]] = {}
|
|
45
|
+
# Track function declarations found in header files for visibility resolution
|
|
46
|
+
self._header_declarations: set[str] = set()
|
|
47
|
+
|
|
48
|
+
def process_file(
|
|
49
|
+
self,
|
|
50
|
+
file_path: Path,
|
|
51
|
+
language: cs.SupportedLanguage,
|
|
52
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
53
|
+
structural_elements: dict[Path, str | None],
|
|
54
|
+
) -> tuple[Node, cs.SupportedLanguage] | None:
|
|
55
|
+
"""Process a single file and extract definitions."""
|
|
56
|
+
relative_path = file_path.relative_to(self.repo_path)
|
|
57
|
+
logger.info(f"Processing file: {relative_path}")
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
lang_queries = queries.get(language)
|
|
61
|
+
if not lang_queries:
|
|
62
|
+
logger.warning(f"No queries for language: {language}")
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
parser = lang_queries.get("parser")
|
|
66
|
+
if not parser:
|
|
67
|
+
logger.warning(f"No parser for language: {language}")
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
source_bytes = file_path.read_bytes()
|
|
71
|
+
tree = parser.parse(source_bytes)
|
|
72
|
+
root_node = tree.root_node
|
|
73
|
+
|
|
74
|
+
# Build module qualified name
|
|
75
|
+
module_qn = cs.SEPARATOR_DOT.join(
|
|
76
|
+
[self.project_name] + list(relative_path.with_suffix("").parts)
|
|
77
|
+
)
|
|
78
|
+
if file_path.name in (cs.INIT_PY, cs.MOD_RS):
|
|
79
|
+
module_qn = cs.SEPARATOR_DOT.join(
|
|
80
|
+
[self.project_name] + list(relative_path.parent.parts)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
self.module_qn_to_file_path[module_qn] = file_path
|
|
84
|
+
|
|
85
|
+
# Create module node and relationships
|
|
86
|
+
self._create_module_node(module_qn, file_path.name, str(relative_path))
|
|
87
|
+
self._create_module_relationships(
|
|
88
|
+
module_qn, relative_path, structural_elements
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Parse imports
|
|
92
|
+
self.import_processor.parse_imports(
|
|
93
|
+
root_node, module_qn, language, queries
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Ingest functions and classes
|
|
97
|
+
self._ingest_functions(root_node, module_qn, language, queries)
|
|
98
|
+
self._ingest_classes(root_node, module_qn, language, queries)
|
|
99
|
+
|
|
100
|
+
# Ingest C-specific constructs: typedefs and macros
|
|
101
|
+
if language == cs.SupportedLanguage.C:
|
|
102
|
+
self._ingest_c_typedefs(root_node, module_qn, queries)
|
|
103
|
+
self._ingest_c_macros(root_node, module_qn, queries)
|
|
104
|
+
|
|
105
|
+
return (root_node, language)
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Error processing {file_path}: {e}")
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def _create_module_node(self, module_qn: str, name: str, path: str) -> None:
|
|
112
|
+
"""Create a module node."""
|
|
113
|
+
self.ingestor.ensure_node_batch(
|
|
114
|
+
cs.NodeLabel.MODULE,
|
|
115
|
+
{
|
|
116
|
+
cs.KEY_QUALIFIED_NAME: module_qn,
|
|
117
|
+
cs.KEY_NAME: name,
|
|
118
|
+
cs.KEY_PATH: path,
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
def _create_module_relationships(
|
|
123
|
+
self,
|
|
124
|
+
module_qn: str,
|
|
125
|
+
relative_path: Path,
|
|
126
|
+
structural_elements: dict[Path, str | None],
|
|
127
|
+
) -> None:
|
|
128
|
+
"""Create relationships for the module."""
|
|
129
|
+
parent_rel_path = relative_path.parent
|
|
130
|
+
parent_container_qn = structural_elements.get(parent_rel_path)
|
|
131
|
+
|
|
132
|
+
if parent_container_qn:
|
|
133
|
+
parent_label, parent_key, parent_val = (
|
|
134
|
+
cs.NodeLabel.PACKAGE,
|
|
135
|
+
cs.KEY_QUALIFIED_NAME,
|
|
136
|
+
parent_container_qn,
|
|
137
|
+
)
|
|
138
|
+
elif parent_rel_path != Path("."):
|
|
139
|
+
parent_label, parent_key, parent_val = (
|
|
140
|
+
cs.NodeLabel.FOLDER,
|
|
141
|
+
cs.KEY_PATH,
|
|
142
|
+
str(parent_rel_path),
|
|
143
|
+
)
|
|
144
|
+
else:
|
|
145
|
+
parent_label, parent_key, parent_val = (
|
|
146
|
+
cs.NodeLabel.PROJECT,
|
|
147
|
+
cs.KEY_NAME,
|
|
148
|
+
self.project_name,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
self.ingestor.ensure_relationship_batch(
|
|
152
|
+
(parent_label, parent_key, parent_val),
|
|
153
|
+
cs.RelationshipType.CONTAINS_MODULE,
|
|
154
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _ingest_functions(
|
|
158
|
+
self,
|
|
159
|
+
root_node: Node,
|
|
160
|
+
module_qn: str,
|
|
161
|
+
language: cs.SupportedLanguage,
|
|
162
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
163
|
+
) -> None:
|
|
164
|
+
"""Ingest functions from the AST."""
|
|
165
|
+
lang_queries = queries.get(language)
|
|
166
|
+
if not lang_queries:
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
func_query = lang_queries.get("functions")
|
|
170
|
+
if not func_query:
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
# Determine the file path from module_qn for visibility analysis
|
|
174
|
+
file_path = self.module_qn_to_file_path.get(module_qn)
|
|
175
|
+
is_header = file_path is not None and file_path.suffix == cs.EXT_H
|
|
176
|
+
is_c_lang = language == cs.SupportedLanguage.C
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
cursor = QueryCursor(func_query)
|
|
180
|
+
captures = cursor.captures(root_node)
|
|
181
|
+
func_nodes = captures.get(cs.CAPTURE_FUNCTION, [])
|
|
182
|
+
|
|
183
|
+
for func_node in func_nodes:
|
|
184
|
+
if not isinstance(func_node, Node):
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
# Skip methods (handled by class processing)
|
|
188
|
+
if self._is_method(func_node, lang_queries.get("config")):
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
func_name = self._extract_function_name(func_node)
|
|
192
|
+
if not func_name:
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
func_qn = f"{module_qn}.{func_name}"
|
|
196
|
+
|
|
197
|
+
func_props: PropertyDict = {
|
|
198
|
+
cs.KEY_QUALIFIED_NAME: func_qn,
|
|
199
|
+
cs.KEY_NAME: func_name,
|
|
200
|
+
cs.KEY_START_LINE: func_node.start_point[0] + 1,
|
|
201
|
+
cs.KEY_END_LINE: func_node.end_point[0] + 1,
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# Extract C/C++ comment as docstring
|
|
205
|
+
if is_c_lang:
|
|
206
|
+
c_docstring = self._extract_c_comment(func_node)
|
|
207
|
+
if c_docstring:
|
|
208
|
+
func_props[cs.KEY_DOCSTRING] = c_docstring
|
|
209
|
+
|
|
210
|
+
# Extract API interface properties for C language
|
|
211
|
+
if is_c_lang:
|
|
212
|
+
return_type = self._extract_c_return_type(func_node)
|
|
213
|
+
parameters = self._extract_c_parameters(func_node)
|
|
214
|
+
visibility = self._extract_c_visibility(func_node, is_header)
|
|
215
|
+
signature = self._build_c_signature(
|
|
216
|
+
func_name, return_type, parameters
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
func_props[cs.KEY_RETURN_TYPE] = return_type
|
|
220
|
+
func_props[cs.KEY_PARAMETERS] = parameters
|
|
221
|
+
func_props[cs.KEY_SIGNATURE] = signature
|
|
222
|
+
func_props[cs.KEY_VISIBILITY] = visibility
|
|
223
|
+
|
|
224
|
+
# Track header declarations for cross-file visibility
|
|
225
|
+
if is_header:
|
|
226
|
+
self._header_declarations.add(func_name)
|
|
227
|
+
|
|
228
|
+
logger.info(f" Found function: {func_name}")
|
|
229
|
+
self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, func_props)
|
|
230
|
+
self.function_registry[func_qn] = NodeType.FUNCTION
|
|
231
|
+
if func_name not in self.simple_name_lookup:
|
|
232
|
+
self.simple_name_lookup[func_name] = set()
|
|
233
|
+
self.simple_name_lookup[func_name].add(func_qn)
|
|
234
|
+
|
|
235
|
+
self.ingestor.ensure_relationship_batch(
|
|
236
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
|
|
237
|
+
cs.RelationshipType.DEFINES,
|
|
238
|
+
(cs.NodeLabel.FUNCTION, cs.KEY_QUALIFIED_NAME, func_qn),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.debug(f"Error ingesting functions: {e}")
|
|
243
|
+
|
|
244
|
+
def _ingest_classes(
|
|
245
|
+
self,
|
|
246
|
+
root_node: Node,
|
|
247
|
+
module_qn: str,
|
|
248
|
+
language: cs.SupportedLanguage,
|
|
249
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
250
|
+
) -> None:
|
|
251
|
+
"""Ingest classes and their methods from the AST."""
|
|
252
|
+
lang_queries = queries.get(language)
|
|
253
|
+
if not lang_queries:
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
class_query = lang_queries.get("classes")
|
|
257
|
+
if not class_query:
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
is_c_lang = language == cs.SupportedLanguage.C
|
|
261
|
+
|
|
262
|
+
try:
|
|
263
|
+
cursor = QueryCursor(class_query)
|
|
264
|
+
captures = cursor.captures(root_node)
|
|
265
|
+
class_nodes = captures.get(cs.CAPTURE_CLASS, [])
|
|
266
|
+
|
|
267
|
+
for class_node in class_nodes:
|
|
268
|
+
if not isinstance(class_node, Node):
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
class_name = self._extract_class_name(class_node)
|
|
272
|
+
if not class_name:
|
|
273
|
+
continue
|
|
274
|
+
|
|
275
|
+
class_qn = f"{module_qn}.{class_name}"
|
|
276
|
+
|
|
277
|
+
class_props: PropertyDict = {
|
|
278
|
+
cs.KEY_QUALIFIED_NAME: class_qn,
|
|
279
|
+
cs.KEY_NAME: class_name,
|
|
280
|
+
cs.KEY_START_LINE: class_node.start_point[0] + 1,
|
|
281
|
+
cs.KEY_END_LINE: class_node.end_point[0] + 1,
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
# Extract C/C++ comment as docstring for struct/union/enum
|
|
285
|
+
if is_c_lang:
|
|
286
|
+
c_docstring = self._extract_c_comment(class_node)
|
|
287
|
+
if c_docstring:
|
|
288
|
+
class_props[cs.KEY_DOCSTRING] = c_docstring
|
|
289
|
+
|
|
290
|
+
# Extract C struct/union/enum members and build signature
|
|
291
|
+
if is_c_lang:
|
|
292
|
+
kind = self._c_class_kind(class_node)
|
|
293
|
+
members = self._extract_c_members(class_node)
|
|
294
|
+
class_props[cs.KEY_KIND] = kind
|
|
295
|
+
class_props[cs.KEY_PARAMETERS] = members
|
|
296
|
+
class_props[cs.KEY_SIGNATURE] = self._build_c_class_signature(
|
|
297
|
+
kind, class_name, members
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
logger.info(f" Found class: {class_name}")
|
|
301
|
+
self.ingestor.ensure_node_batch(cs.NodeLabel.CLASS, class_props)
|
|
302
|
+
|
|
303
|
+
self.ingestor.ensure_relationship_batch(
|
|
304
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
|
|
305
|
+
cs.RelationshipType.DEFINES,
|
|
306
|
+
(cs.NodeLabel.CLASS, cs.KEY_QUALIFIED_NAME, class_qn),
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Process class methods
|
|
310
|
+
self._ingest_class_methods(
|
|
311
|
+
class_node, class_qn, module_qn, language, queries
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.debug(f"Error ingesting classes: {e}")
|
|
316
|
+
|
|
317
|
+
def _ingest_class_methods(
|
|
318
|
+
self,
|
|
319
|
+
class_node: Node,
|
|
320
|
+
class_qn: str,
|
|
321
|
+
module_qn: str,
|
|
322
|
+
language: cs.SupportedLanguage,
|
|
323
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
324
|
+
) -> None:
|
|
325
|
+
"""Ingest methods of a class."""
|
|
326
|
+
lang_queries = queries.get(language)
|
|
327
|
+
if not lang_queries:
|
|
328
|
+
return
|
|
329
|
+
|
|
330
|
+
func_query = lang_queries.get("functions")
|
|
331
|
+
if not func_query:
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
try:
|
|
335
|
+
body_node = class_node.child_by_field_name(cs.FIELD_BODY)
|
|
336
|
+
if not body_node:
|
|
337
|
+
return
|
|
338
|
+
|
|
339
|
+
method_cursor = QueryCursor(func_query)
|
|
340
|
+
captures = method_cursor.captures(body_node)
|
|
341
|
+
|
|
342
|
+
for method_node in captures.get(cs.CAPTURE_FUNCTION, []):
|
|
343
|
+
if not isinstance(method_node, Node):
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
method_name = self._extract_function_name(method_node)
|
|
347
|
+
if not method_name:
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
method_qn = f"{class_qn}.{method_name}"
|
|
351
|
+
|
|
352
|
+
method_props: PropertyDict = {
|
|
353
|
+
cs.KEY_QUALIFIED_NAME: method_qn,
|
|
354
|
+
cs.KEY_NAME: method_name,
|
|
355
|
+
cs.KEY_START_LINE: method_node.start_point[0] + 1,
|
|
356
|
+
cs.KEY_END_LINE: method_node.end_point[0] + 1,
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
logger.info(f" Found method: {method_name}")
|
|
360
|
+
self.ingestor.ensure_node_batch(cs.NodeLabel.METHOD, method_props)
|
|
361
|
+
self.function_registry[method_qn] = NodeType.METHOD
|
|
362
|
+
if method_name not in self.simple_name_lookup:
|
|
363
|
+
self.simple_name_lookup[method_name] = set()
|
|
364
|
+
self.simple_name_lookup[method_name].add(method_qn)
|
|
365
|
+
|
|
366
|
+
self.ingestor.ensure_relationship_batch(
|
|
367
|
+
(cs.NodeLabel.CLASS, cs.KEY_QUALIFIED_NAME, class_qn),
|
|
368
|
+
cs.RelationshipType.DEFINES_METHOD,
|
|
369
|
+
(cs.NodeLabel.METHOD, cs.KEY_QUALIFIED_NAME, method_qn),
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
except Exception as e:
|
|
373
|
+
logger.debug(f"Error ingesting class methods: {e}")
|
|
374
|
+
|
|
375
|
+
# -----------------------------------------------------------------
|
|
376
|
+
# C/C++ comment extraction
|
|
377
|
+
# -----------------------------------------------------------------
|
|
378
|
+
|
|
379
|
+
@staticmethod
|
|
380
|
+
def _extract_c_comment(func_node: Node) -> str | None:
|
|
381
|
+
"""Extract comment block immediately above a C/C++ function node.
|
|
382
|
+
|
|
383
|
+
Handles:
|
|
384
|
+
- Single-line: ``// comment``
|
|
385
|
+
- Multi-line: ``/* comment */``
|
|
386
|
+
- Block of consecutive ``//`` lines
|
|
387
|
+
- Doxygen-style: ``/** ... */`` or ``/// ...``
|
|
388
|
+
|
|
389
|
+
Returns cleaned comment text or *None*.
|
|
390
|
+
"""
|
|
391
|
+
comment_lines: list[str] = []
|
|
392
|
+
|
|
393
|
+
# Walk backwards through previous siblings to collect comment nodes
|
|
394
|
+
current = func_node.prev_named_sibling
|
|
395
|
+
if current is None:
|
|
396
|
+
current = func_node.prev_sibling
|
|
397
|
+
|
|
398
|
+
last_end_line = func_node.start_point[0] # 0-based line number
|
|
399
|
+
|
|
400
|
+
while current is not None:
|
|
401
|
+
if current.type != "comment":
|
|
402
|
+
break
|
|
403
|
+
|
|
404
|
+
# Check adjacency: comment must be within 1 line of the function
|
|
405
|
+
# or the previous comment we already collected.
|
|
406
|
+
if last_end_line - current.end_point[0] > 1:
|
|
407
|
+
break
|
|
408
|
+
|
|
409
|
+
text = safe_decode_text(current)
|
|
410
|
+
if text is None:
|
|
411
|
+
break
|
|
412
|
+
|
|
413
|
+
comment_lines.insert(0, text)
|
|
414
|
+
last_end_line = current.start_point[0]
|
|
415
|
+
|
|
416
|
+
prev = current.prev_named_sibling
|
|
417
|
+
if prev is None:
|
|
418
|
+
prev = current.prev_sibling
|
|
419
|
+
current = prev
|
|
420
|
+
|
|
421
|
+
if not comment_lines:
|
|
422
|
+
return None
|
|
423
|
+
|
|
424
|
+
# Clean comment markers
|
|
425
|
+
cleaned: list[str] = []
|
|
426
|
+
for line in comment_lines:
|
|
427
|
+
line = line.strip()
|
|
428
|
+
# Block comment: /* ... */ or /** ... */
|
|
429
|
+
if line.startswith("/*"):
|
|
430
|
+
line = line[2:]
|
|
431
|
+
if line.startswith("*"): # /** doxygen */
|
|
432
|
+
line = line[1:]
|
|
433
|
+
if line.endswith("*/"):
|
|
434
|
+
line = line[:-2]
|
|
435
|
+
# Line comment: // or ///
|
|
436
|
+
if line.startswith("//"):
|
|
437
|
+
line = line[2:]
|
|
438
|
+
if line.startswith("/"): # /// doxygen
|
|
439
|
+
line = line[1:]
|
|
440
|
+
# Interior block comment lines: * text
|
|
441
|
+
if line.startswith("*"):
|
|
442
|
+
line = line[1:]
|
|
443
|
+
|
|
444
|
+
line = line.strip()
|
|
445
|
+
if line and not all(ch in "-=*#~" for ch in line):
|
|
446
|
+
cleaned.append(line)
|
|
447
|
+
|
|
448
|
+
return "\n".join(cleaned) if cleaned else None
|
|
449
|
+
|
|
450
|
+
# -----------------------------------------------------------------
|
|
451
|
+
# C language API interface extraction helpers
|
|
452
|
+
# -----------------------------------------------------------------
|
|
453
|
+
|
|
454
|
+
def _extract_c_return_type(self, func_node: Node) -> str | None:
|
|
455
|
+
"""Extract the return type from a C function node.
|
|
456
|
+
|
|
457
|
+
For ``function_definition``, the return type is the ``type`` field.
|
|
458
|
+
For a forward ``declaration``, the type specifiers precede the declarator.
|
|
459
|
+
"""
|
|
460
|
+
# function_definition → type field (e.g. "int", "void", "struct foo *")
|
|
461
|
+
type_node = func_node.child_by_field_name(cs.FIELD_TYPE)
|
|
462
|
+
if type_node and type_node.text:
|
|
463
|
+
return safe_decode_text(type_node)
|
|
464
|
+
|
|
465
|
+
# Fallback: collect all type-specifier children that appear before the
|
|
466
|
+
# declarator (covers ``static inline int func(…)`` patterns).
|
|
467
|
+
parts: list[str] = []
|
|
468
|
+
for child in func_node.children:
|
|
469
|
+
if child.type in (
|
|
470
|
+
"primitive_type",
|
|
471
|
+
"sized_type_specifier",
|
|
472
|
+
"type_identifier",
|
|
473
|
+
"struct_specifier",
|
|
474
|
+
"union_specifier",
|
|
475
|
+
"enum_specifier",
|
|
476
|
+
):
|
|
477
|
+
text = safe_decode_text(child)
|
|
478
|
+
if text:
|
|
479
|
+
parts.append(text)
|
|
480
|
+
elif child.type == cs.FIELD_DECLARATOR or child.type == "function_declarator":
|
|
481
|
+
break
|
|
482
|
+
return " ".join(parts) if parts else None
|
|
483
|
+
|
|
484
|
+
def _extract_c_parameters(self, func_node: Node) -> list[str]:
|
|
485
|
+
"""Extract parameter list from a C function node.
|
|
486
|
+
|
|
487
|
+
Returns a list of parameter strings like ``["int fd", "const char *buf"]``.
|
|
488
|
+
"""
|
|
489
|
+
# Navigate to parameter_list: may be nested under declarator → function_declarator
|
|
490
|
+
params_node = self._find_c_parameter_list(func_node)
|
|
491
|
+
if not params_node:
|
|
492
|
+
return []
|
|
493
|
+
|
|
494
|
+
params: list[str] = []
|
|
495
|
+
for child in params_node.children:
|
|
496
|
+
if child.type == "parameter_declaration":
|
|
497
|
+
text = safe_decode_text(child)
|
|
498
|
+
if text:
|
|
499
|
+
params.append(text)
|
|
500
|
+
elif child.type == "variadic_parameter":
|
|
501
|
+
params.append("...")
|
|
502
|
+
return params
|
|
503
|
+
|
|
504
|
+
def _find_c_parameter_list(self, func_node: Node) -> Node | None:
|
|
505
|
+
"""Locate the parameter_list node within a C function AST node."""
|
|
506
|
+
# Direct: function_definition → declarator → function_declarator → parameters
|
|
507
|
+
declarator = func_node.child_by_field_name(cs.FIELD_DECLARATOR)
|
|
508
|
+
if declarator:
|
|
509
|
+
if declarator.type == "function_declarator":
|
|
510
|
+
return declarator.child_by_field_name(cs.FIELD_PARAMETERS)
|
|
511
|
+
# pointer_declarator wrapping: int *func(…)
|
|
512
|
+
inner = declarator.child_by_field_name(cs.FIELD_DECLARATOR)
|
|
513
|
+
if inner and inner.type == "function_declarator":
|
|
514
|
+
return inner.child_by_field_name(cs.FIELD_PARAMETERS)
|
|
515
|
+
return None
|
|
516
|
+
|
|
517
|
+
def _extract_c_visibility(self, func_node: Node, is_header: bool) -> str:
|
|
518
|
+
"""Determine C function visibility.
|
|
519
|
+
|
|
520
|
+
Rules:
|
|
521
|
+
- ``static`` keyword → "static" (file-local, private)
|
|
522
|
+
- Declared in a ``.h`` header file → "public"
|
|
523
|
+
- Function name found in a previously processed header → "public"
|
|
524
|
+
- Otherwise → "extern" (external linkage but not declared in a header)
|
|
525
|
+
"""
|
|
526
|
+
# Check for ``static`` storage class specifier
|
|
527
|
+
for child in func_node.children:
|
|
528
|
+
if child.type == self._C_STATIC_SPECIFIER:
|
|
529
|
+
text = safe_decode_text(child)
|
|
530
|
+
if text and "static" in text:
|
|
531
|
+
return "static"
|
|
532
|
+
if is_header:
|
|
533
|
+
return "public"
|
|
534
|
+
# Check if this function was declared in a previously processed header
|
|
535
|
+
func_name = self._extract_function_name(func_node)
|
|
536
|
+
if func_name and func_name in self._header_declarations:
|
|
537
|
+
return "public"
|
|
538
|
+
return "extern"
|
|
539
|
+
|
|
540
|
+
@staticmethod
|
|
541
|
+
def _build_c_signature(
|
|
542
|
+
name: str,
|
|
543
|
+
return_type: str | None,
|
|
544
|
+
parameters: list[str],
|
|
545
|
+
) -> str:
|
|
546
|
+
"""Build a full C function signature string."""
|
|
547
|
+
ret = return_type or "void"
|
|
548
|
+
params = ", ".join(parameters) if parameters else "void"
|
|
549
|
+
return f"{ret} {name}({params})"
|
|
550
|
+
|
|
551
|
+
# -----------------------------------------------------------------
|
|
552
|
+
# C struct/union/enum member extraction
|
|
553
|
+
# -----------------------------------------------------------------
|
|
554
|
+
|
|
555
|
+
@staticmethod
|
|
556
|
+
def _c_class_kind(class_node: Node) -> str:
|
|
557
|
+
"""Return the C type kind: 'struct', 'union', or 'enum'."""
|
|
558
|
+
node_type = class_node.type
|
|
559
|
+
if node_type == "struct_specifier":
|
|
560
|
+
return "struct"
|
|
561
|
+
if node_type == "union_specifier":
|
|
562
|
+
return "union"
|
|
563
|
+
if node_type == "enum_specifier":
|
|
564
|
+
return "enum"
|
|
565
|
+
return "struct"
|
|
566
|
+
|
|
567
|
+
@staticmethod
|
|
568
|
+
def _extract_c_members(class_node: Node) -> list[str]:
|
|
569
|
+
"""Extract member declarations from a C struct/union/enum.
|
|
570
|
+
|
|
571
|
+
For struct/union: returns field declarations like ``["int x", "char *name"]``.
|
|
572
|
+
For enum: returns enumerator names like ``["RED", "GREEN", "BLUE"]``.
|
|
573
|
+
"""
|
|
574
|
+
members: list[str] = []
|
|
575
|
+
body = class_node.child_by_field_name("body")
|
|
576
|
+
if not body:
|
|
577
|
+
return members
|
|
578
|
+
|
|
579
|
+
for child in body.children:
|
|
580
|
+
if child.type == "field_declaration":
|
|
581
|
+
text = safe_decode_text(child)
|
|
582
|
+
if text:
|
|
583
|
+
# Strip trailing semicolons
|
|
584
|
+
members.append(text.rstrip(";").strip())
|
|
585
|
+
elif child.type == "enumerator":
|
|
586
|
+
name_node = child.child_by_field_name("name")
|
|
587
|
+
if name_node:
|
|
588
|
+
text = safe_decode_text(name_node)
|
|
589
|
+
if text:
|
|
590
|
+
members.append(text)
|
|
591
|
+
return members
|
|
592
|
+
|
|
593
|
+
@staticmethod
|
|
594
|
+
def _build_c_class_signature(kind: str, name: str, members: list[str]) -> str:
|
|
595
|
+
"""Build a summary signature for a C struct/union/enum."""
|
|
596
|
+
if not members:
|
|
597
|
+
return f"{kind} {name}"
|
|
598
|
+
member_str = "; ".join(members)
|
|
599
|
+
return f"{kind} {name} {{ {member_str} }}"
|
|
600
|
+
|
|
601
|
+
# -----------------------------------------------------------------
|
|
602
|
+
# C typedef extraction
|
|
603
|
+
# -----------------------------------------------------------------
|
|
604
|
+
|
|
605
|
+
def _ingest_c_typedefs(
|
|
606
|
+
self,
|
|
607
|
+
root_node: Node,
|
|
608
|
+
module_qn: str,
|
|
609
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
610
|
+
) -> None:
|
|
611
|
+
"""Extract typedef declarations and create Type nodes."""
|
|
612
|
+
lang_queries = queries.get(cs.SupportedLanguage.C)
|
|
613
|
+
if not lang_queries:
|
|
614
|
+
return
|
|
615
|
+
|
|
616
|
+
typedef_query = lang_queries.get("typedefs")
|
|
617
|
+
if not typedef_query:
|
|
618
|
+
return
|
|
619
|
+
|
|
620
|
+
try:
|
|
621
|
+
cursor = QueryCursor(typedef_query)
|
|
622
|
+
captures = cursor.captures(root_node)
|
|
623
|
+
typedef_nodes = captures.get(cs.CAPTURE_TYPEDEF, [])
|
|
624
|
+
|
|
625
|
+
for td_node in typedef_nodes:
|
|
626
|
+
if not isinstance(td_node, Node):
|
|
627
|
+
continue
|
|
628
|
+
|
|
629
|
+
td_name = self._extract_c_typedef_name(td_node)
|
|
630
|
+
if not td_name:
|
|
631
|
+
continue
|
|
632
|
+
|
|
633
|
+
td_qn = f"{module_qn}.{td_name}"
|
|
634
|
+
td_text = safe_decode_text(td_node)
|
|
635
|
+
signature = td_text.rstrip(";").strip() if td_text else f"typedef {td_name}"
|
|
636
|
+
|
|
637
|
+
c_docstring = self._extract_c_comment(td_node)
|
|
638
|
+
|
|
639
|
+
td_props: PropertyDict = {
|
|
640
|
+
cs.KEY_QUALIFIED_NAME: td_qn,
|
|
641
|
+
cs.KEY_NAME: td_name,
|
|
642
|
+
cs.KEY_START_LINE: td_node.start_point[0] + 1,
|
|
643
|
+
cs.KEY_END_LINE: td_node.end_point[0] + 1,
|
|
644
|
+
cs.KEY_SIGNATURE: signature,
|
|
645
|
+
cs.KEY_KIND: "typedef",
|
|
646
|
+
}
|
|
647
|
+
if c_docstring:
|
|
648
|
+
td_props[cs.KEY_DOCSTRING] = c_docstring
|
|
649
|
+
|
|
650
|
+
logger.info(f" Found typedef: {td_name}")
|
|
651
|
+
self.ingestor.ensure_node_batch(cs.NodeLabel.TYPE, td_props)
|
|
652
|
+
|
|
653
|
+
self.ingestor.ensure_relationship_batch(
|
|
654
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
|
|
655
|
+
cs.RelationshipType.DEFINES,
|
|
656
|
+
(cs.NodeLabel.TYPE, cs.KEY_QUALIFIED_NAME, td_qn),
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
except Exception as e:
|
|
660
|
+
logger.debug(f"Error ingesting typedefs: {e}")
|
|
661
|
+
|
|
662
|
+
@staticmethod
|
|
663
|
+
def _extract_c_typedef_name(td_node: Node) -> str | None:
|
|
664
|
+
"""Extract the name introduced by a typedef.
|
|
665
|
+
|
|
666
|
+
The ``type_definition`` node has a ``declarator`` field which contains
|
|
667
|
+
the new type name (a ``type_identifier``).
|
|
668
|
+
"""
|
|
669
|
+
declarator = td_node.child_by_field_name("declarator")
|
|
670
|
+
if declarator:
|
|
671
|
+
if declarator.type == "type_identifier":
|
|
672
|
+
return safe_decode_text(declarator)
|
|
673
|
+
# Pointer typedefs: typedef int *int_ptr;
|
|
674
|
+
inner = declarator.child_by_field_name("declarator")
|
|
675
|
+
if inner:
|
|
676
|
+
return safe_decode_text(inner)
|
|
677
|
+
return None
|
|
678
|
+
|
|
679
|
+
# -----------------------------------------------------------------
|
|
680
|
+
# C macro extraction
|
|
681
|
+
# -----------------------------------------------------------------
|
|
682
|
+
|
|
683
|
+
def _ingest_c_macros(
|
|
684
|
+
self,
|
|
685
|
+
root_node: Node,
|
|
686
|
+
module_qn: str,
|
|
687
|
+
queries: dict[cs.SupportedLanguage, LanguageQueries],
|
|
688
|
+
) -> None:
|
|
689
|
+
"""Extract #define macro definitions and create Function nodes with kind='macro'."""
|
|
690
|
+
lang_queries = queries.get(cs.SupportedLanguage.C)
|
|
691
|
+
if not lang_queries:
|
|
692
|
+
return
|
|
693
|
+
|
|
694
|
+
macro_query = lang_queries.get("macros")
|
|
695
|
+
if not macro_query:
|
|
696
|
+
return
|
|
697
|
+
|
|
698
|
+
try:
|
|
699
|
+
cursor = QueryCursor(macro_query)
|
|
700
|
+
captures = cursor.captures(root_node)
|
|
701
|
+
macro_nodes = captures.get(cs.CAPTURE_MACRO, [])
|
|
702
|
+
|
|
703
|
+
for macro_node in macro_nodes:
|
|
704
|
+
if not isinstance(macro_node, Node):
|
|
705
|
+
continue
|
|
706
|
+
|
|
707
|
+
macro_name = self._extract_c_macro_name(macro_node)
|
|
708
|
+
if not macro_name:
|
|
709
|
+
continue
|
|
710
|
+
|
|
711
|
+
macro_qn = f"{module_qn}.{macro_name}"
|
|
712
|
+
macro_text = safe_decode_text(macro_node)
|
|
713
|
+
signature = macro_text.strip() if macro_text else f"#define {macro_name}"
|
|
714
|
+
|
|
715
|
+
c_docstring = self._extract_c_comment(macro_node)
|
|
716
|
+
|
|
717
|
+
macro_props: PropertyDict = {
|
|
718
|
+
cs.KEY_QUALIFIED_NAME: macro_qn,
|
|
719
|
+
cs.KEY_NAME: macro_name,
|
|
720
|
+
cs.KEY_START_LINE: macro_node.start_point[0] + 1,
|
|
721
|
+
cs.KEY_END_LINE: macro_node.end_point[0] + 1,
|
|
722
|
+
cs.KEY_SIGNATURE: signature,
|
|
723
|
+
cs.KEY_KIND: "macro",
|
|
724
|
+
cs.KEY_VISIBILITY: "public",
|
|
725
|
+
}
|
|
726
|
+
if c_docstring:
|
|
727
|
+
macro_props[cs.KEY_DOCSTRING] = c_docstring
|
|
728
|
+
|
|
729
|
+
logger.info(f" Found macro: {macro_name}")
|
|
730
|
+
self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, macro_props)
|
|
731
|
+
|
|
732
|
+
self.ingestor.ensure_relationship_batch(
|
|
733
|
+
(cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
|
|
734
|
+
cs.RelationshipType.DEFINES,
|
|
735
|
+
(cs.NodeLabel.FUNCTION, cs.KEY_QUALIFIED_NAME, macro_qn),
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
except Exception as e:
|
|
739
|
+
logger.debug(f"Error ingesting macros: {e}")
|
|
740
|
+
|
|
741
|
+
@staticmethod
|
|
742
|
+
def _extract_c_macro_name(macro_node: Node) -> str | None:
|
|
743
|
+
"""Extract the macro name from a preproc_def node."""
|
|
744
|
+
name_node = macro_node.child_by_field_name("name")
|
|
745
|
+
if name_node:
|
|
746
|
+
return safe_decode_text(name_node)
|
|
747
|
+
return None
|
|
748
|
+
|
|
749
|
+
def _extract_function_name(self, func_node: Node) -> str | None:
|
|
750
|
+
"""Extract function name from a function node."""
|
|
751
|
+
# Try standard name field first
|
|
752
|
+
name_node = func_node.child_by_field_name(cs.FIELD_NAME)
|
|
753
|
+
if name_node and name_node.text:
|
|
754
|
+
return safe_decode_text(name_node)
|
|
755
|
+
|
|
756
|
+
# For C language: function_definition -> declarator -> function_declarator -> declarator (name)
|
|
757
|
+
declarator = func_node.child_by_field_name(cs.FIELD_DECLARATOR)
|
|
758
|
+
if declarator:
|
|
759
|
+
if declarator.type == "function_declarator":
|
|
760
|
+
name_node = declarator.child_by_field_name(cs.FIELD_DECLARATOR)
|
|
761
|
+
else:
|
|
762
|
+
name_node = declarator
|
|
763
|
+
if name_node and name_node.text:
|
|
764
|
+
return safe_decode_text(name_node)
|
|
765
|
+
|
|
766
|
+
return None
|
|
767
|
+
|
|
768
|
+
def _extract_class_name(self, class_node: Node) -> str | None:
|
|
769
|
+
"""Extract class name from a class node."""
|
|
770
|
+
name_node = class_node.child_by_field_name(cs.FIELD_NAME)
|
|
771
|
+
if name_node and name_node.text:
|
|
772
|
+
return safe_decode_text(name_node)
|
|
773
|
+
return None
|
|
774
|
+
|
|
775
|
+
def _is_method(self, func_node: Node, lang_config) -> bool:
|
|
776
|
+
"""Check if a function node is a method."""
|
|
777
|
+
if not lang_config:
|
|
778
|
+
return False
|
|
779
|
+
|
|
780
|
+
current = func_node.parent
|
|
781
|
+
if not isinstance(current, Node):
|
|
782
|
+
return False
|
|
783
|
+
|
|
784
|
+
while current and current.type not in lang_config.module_node_types:
|
|
785
|
+
if current.type in lang_config.class_node_types:
|
|
786
|
+
return True
|
|
787
|
+
current = current.parent
|
|
788
|
+
return False
|
|
789
|
+
|
|
790
|
+
def process_dependencies(self, filepath: Path) -> None:
|
|
791
|
+
"""Process dependency files."""
|
|
792
|
+
logger.info(f"Processing dependencies: {filepath}")
|
|
793
|
+
|
|
794
|
+
def process_all_method_overrides(self) -> None:
|
|
795
|
+
"""Process all method overrides."""
|
|
796
|
+
pass
|