code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,796 @@
1
+ """Definition processor for ingesting code definitions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from loguru import logger
9
+ from tree_sitter import Node, QueryCursor
10
+
11
+ from .. import constants as cs
12
+ from ..services import IngestorProtocol
13
+ from ..types import LanguageQueries, NodeType, PropertyDict, SimpleNameLookup
14
+ from .utils import safe_decode_text
15
+
16
+ if TYPE_CHECKING:
17
+ from ..types import FunctionRegistryTrieProtocol
18
+ from .import_processor import ImportProcessor
19
+
20
+
21
+ class DefinitionProcessor:
22
+ """Process file definitions (functions, classes, methods)."""
23
+
24
+ # C language storage class specifiers that indicate static (file-local) visibility
25
+ _C_STATIC_SPECIFIER = "storage_class_specifier"
26
+
27
+ def __init__(
28
+ self,
29
+ ingestor: IngestorProtocol,
30
+ repo_path: Path,
31
+ project_name: str,
32
+ function_registry: FunctionRegistryTrieProtocol,
33
+ simple_name_lookup: SimpleNameLookup,
34
+ import_processor: ImportProcessor,
35
+ module_qn_to_file_path: dict[str, Path],
36
+ ):
37
+ self.ingestor = ingestor
38
+ self.repo_path = repo_path
39
+ self.project_name = project_name
40
+ self.function_registry = function_registry
41
+ self.simple_name_lookup = simple_name_lookup
42
+ self.import_processor = import_processor
43
+ self.module_qn_to_file_path = module_qn_to_file_path
44
+ self.class_inheritance: dict[str, list[str]] = {}
45
+ # Track function declarations found in header files for visibility resolution
46
+ self._header_declarations: set[str] = set()
47
+
48
+ def process_file(
49
+ self,
50
+ file_path: Path,
51
+ language: cs.SupportedLanguage,
52
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
53
+ structural_elements: dict[Path, str | None],
54
+ ) -> tuple[Node, cs.SupportedLanguage] | None:
55
+ """Process a single file and extract definitions."""
56
+ relative_path = file_path.relative_to(self.repo_path)
57
+ logger.info(f"Processing file: {relative_path}")
58
+
59
+ try:
60
+ lang_queries = queries.get(language)
61
+ if not lang_queries:
62
+ logger.warning(f"No queries for language: {language}")
63
+ return None
64
+
65
+ parser = lang_queries.get("parser")
66
+ if not parser:
67
+ logger.warning(f"No parser for language: {language}")
68
+ return None
69
+
70
+ source_bytes = file_path.read_bytes()
71
+ tree = parser.parse(source_bytes)
72
+ root_node = tree.root_node
73
+
74
+ # Build module qualified name
75
+ module_qn = cs.SEPARATOR_DOT.join(
76
+ [self.project_name] + list(relative_path.with_suffix("").parts)
77
+ )
78
+ if file_path.name in (cs.INIT_PY, cs.MOD_RS):
79
+ module_qn = cs.SEPARATOR_DOT.join(
80
+ [self.project_name] + list(relative_path.parent.parts)
81
+ )
82
+
83
+ self.module_qn_to_file_path[module_qn] = file_path
84
+
85
+ # Create module node and relationships
86
+ self._create_module_node(module_qn, file_path.name, str(relative_path))
87
+ self._create_module_relationships(
88
+ module_qn, relative_path, structural_elements
89
+ )
90
+
91
+ # Parse imports
92
+ self.import_processor.parse_imports(
93
+ root_node, module_qn, language, queries
94
+ )
95
+
96
+ # Ingest functions and classes
97
+ self._ingest_functions(root_node, module_qn, language, queries)
98
+ self._ingest_classes(root_node, module_qn, language, queries)
99
+
100
+ # Ingest C-specific constructs: typedefs and macros
101
+ if language == cs.SupportedLanguage.C:
102
+ self._ingest_c_typedefs(root_node, module_qn, queries)
103
+ self._ingest_c_macros(root_node, module_qn, queries)
104
+
105
+ return (root_node, language)
106
+
107
+ except Exception as e:
108
+ logger.error(f"Error processing {file_path}: {e}")
109
+ return None
110
+
111
+ def _create_module_node(self, module_qn: str, name: str, path: str) -> None:
112
+ """Create a module node."""
113
+ self.ingestor.ensure_node_batch(
114
+ cs.NodeLabel.MODULE,
115
+ {
116
+ cs.KEY_QUALIFIED_NAME: module_qn,
117
+ cs.KEY_NAME: name,
118
+ cs.KEY_PATH: path,
119
+ },
120
+ )
121
+
122
+ def _create_module_relationships(
123
+ self,
124
+ module_qn: str,
125
+ relative_path: Path,
126
+ structural_elements: dict[Path, str | None],
127
+ ) -> None:
128
+ """Create relationships for the module."""
129
+ parent_rel_path = relative_path.parent
130
+ parent_container_qn = structural_elements.get(parent_rel_path)
131
+
132
+ if parent_container_qn:
133
+ parent_label, parent_key, parent_val = (
134
+ cs.NodeLabel.PACKAGE,
135
+ cs.KEY_QUALIFIED_NAME,
136
+ parent_container_qn,
137
+ )
138
+ elif parent_rel_path != Path("."):
139
+ parent_label, parent_key, parent_val = (
140
+ cs.NodeLabel.FOLDER,
141
+ cs.KEY_PATH,
142
+ str(parent_rel_path),
143
+ )
144
+ else:
145
+ parent_label, parent_key, parent_val = (
146
+ cs.NodeLabel.PROJECT,
147
+ cs.KEY_NAME,
148
+ self.project_name,
149
+ )
150
+
151
+ self.ingestor.ensure_relationship_batch(
152
+ (parent_label, parent_key, parent_val),
153
+ cs.RelationshipType.CONTAINS_MODULE,
154
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
155
+ )
156
+
157
+ def _ingest_functions(
158
+ self,
159
+ root_node: Node,
160
+ module_qn: str,
161
+ language: cs.SupportedLanguage,
162
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
163
+ ) -> None:
164
+ """Ingest functions from the AST."""
165
+ lang_queries = queries.get(language)
166
+ if not lang_queries:
167
+ return
168
+
169
+ func_query = lang_queries.get("functions")
170
+ if not func_query:
171
+ return
172
+
173
+ # Determine the file path from module_qn for visibility analysis
174
+ file_path = self.module_qn_to_file_path.get(module_qn)
175
+ is_header = file_path is not None and file_path.suffix == cs.EXT_H
176
+ is_c_lang = language == cs.SupportedLanguage.C
177
+
178
+ try:
179
+ cursor = QueryCursor(func_query)
180
+ captures = cursor.captures(root_node)
181
+ func_nodes = captures.get(cs.CAPTURE_FUNCTION, [])
182
+
183
+ for func_node in func_nodes:
184
+ if not isinstance(func_node, Node):
185
+ continue
186
+
187
+ # Skip methods (handled by class processing)
188
+ if self._is_method(func_node, lang_queries.get("config")):
189
+ continue
190
+
191
+ func_name = self._extract_function_name(func_node)
192
+ if not func_name:
193
+ continue
194
+
195
+ func_qn = f"{module_qn}.{func_name}"
196
+
197
+ func_props: PropertyDict = {
198
+ cs.KEY_QUALIFIED_NAME: func_qn,
199
+ cs.KEY_NAME: func_name,
200
+ cs.KEY_START_LINE: func_node.start_point[0] + 1,
201
+ cs.KEY_END_LINE: func_node.end_point[0] + 1,
202
+ }
203
+
204
+ # Extract C/C++ comment as docstring
205
+ if is_c_lang:
206
+ c_docstring = self._extract_c_comment(func_node)
207
+ if c_docstring:
208
+ func_props[cs.KEY_DOCSTRING] = c_docstring
209
+
210
+ # Extract API interface properties for C language
211
+ if is_c_lang:
212
+ return_type = self._extract_c_return_type(func_node)
213
+ parameters = self._extract_c_parameters(func_node)
214
+ visibility = self._extract_c_visibility(func_node, is_header)
215
+ signature = self._build_c_signature(
216
+ func_name, return_type, parameters
217
+ )
218
+
219
+ func_props[cs.KEY_RETURN_TYPE] = return_type
220
+ func_props[cs.KEY_PARAMETERS] = parameters
221
+ func_props[cs.KEY_SIGNATURE] = signature
222
+ func_props[cs.KEY_VISIBILITY] = visibility
223
+
224
+ # Track header declarations for cross-file visibility
225
+ if is_header:
226
+ self._header_declarations.add(func_name)
227
+
228
+ logger.info(f" Found function: {func_name}")
229
+ self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, func_props)
230
+ self.function_registry[func_qn] = NodeType.FUNCTION
231
+ if func_name not in self.simple_name_lookup:
232
+ self.simple_name_lookup[func_name] = set()
233
+ self.simple_name_lookup[func_name].add(func_qn)
234
+
235
+ self.ingestor.ensure_relationship_batch(
236
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
237
+ cs.RelationshipType.DEFINES,
238
+ (cs.NodeLabel.FUNCTION, cs.KEY_QUALIFIED_NAME, func_qn),
239
+ )
240
+
241
+ except Exception as e:
242
+ logger.debug(f"Error ingesting functions: {e}")
243
+
244
+ def _ingest_classes(
245
+ self,
246
+ root_node: Node,
247
+ module_qn: str,
248
+ language: cs.SupportedLanguage,
249
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
250
+ ) -> None:
251
+ """Ingest classes and their methods from the AST."""
252
+ lang_queries = queries.get(language)
253
+ if not lang_queries:
254
+ return
255
+
256
+ class_query = lang_queries.get("classes")
257
+ if not class_query:
258
+ return
259
+
260
+ is_c_lang = language == cs.SupportedLanguage.C
261
+
262
+ try:
263
+ cursor = QueryCursor(class_query)
264
+ captures = cursor.captures(root_node)
265
+ class_nodes = captures.get(cs.CAPTURE_CLASS, [])
266
+
267
+ for class_node in class_nodes:
268
+ if not isinstance(class_node, Node):
269
+ continue
270
+
271
+ class_name = self._extract_class_name(class_node)
272
+ if not class_name:
273
+ continue
274
+
275
+ class_qn = f"{module_qn}.{class_name}"
276
+
277
+ class_props: PropertyDict = {
278
+ cs.KEY_QUALIFIED_NAME: class_qn,
279
+ cs.KEY_NAME: class_name,
280
+ cs.KEY_START_LINE: class_node.start_point[0] + 1,
281
+ cs.KEY_END_LINE: class_node.end_point[0] + 1,
282
+ }
283
+
284
+ # Extract C/C++ comment as docstring for struct/union/enum
285
+ if is_c_lang:
286
+ c_docstring = self._extract_c_comment(class_node)
287
+ if c_docstring:
288
+ class_props[cs.KEY_DOCSTRING] = c_docstring
289
+
290
+ # Extract C struct/union/enum members and build signature
291
+ if is_c_lang:
292
+ kind = self._c_class_kind(class_node)
293
+ members = self._extract_c_members(class_node)
294
+ class_props[cs.KEY_KIND] = kind
295
+ class_props[cs.KEY_PARAMETERS] = members
296
+ class_props[cs.KEY_SIGNATURE] = self._build_c_class_signature(
297
+ kind, class_name, members
298
+ )
299
+
300
+ logger.info(f" Found class: {class_name}")
301
+ self.ingestor.ensure_node_batch(cs.NodeLabel.CLASS, class_props)
302
+
303
+ self.ingestor.ensure_relationship_batch(
304
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
305
+ cs.RelationshipType.DEFINES,
306
+ (cs.NodeLabel.CLASS, cs.KEY_QUALIFIED_NAME, class_qn),
307
+ )
308
+
309
+ # Process class methods
310
+ self._ingest_class_methods(
311
+ class_node, class_qn, module_qn, language, queries
312
+ )
313
+
314
+ except Exception as e:
315
+ logger.debug(f"Error ingesting classes: {e}")
316
+
317
+ def _ingest_class_methods(
318
+ self,
319
+ class_node: Node,
320
+ class_qn: str,
321
+ module_qn: str,
322
+ language: cs.SupportedLanguage,
323
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
324
+ ) -> None:
325
+ """Ingest methods of a class."""
326
+ lang_queries = queries.get(language)
327
+ if not lang_queries:
328
+ return
329
+
330
+ func_query = lang_queries.get("functions")
331
+ if not func_query:
332
+ return
333
+
334
+ try:
335
+ body_node = class_node.child_by_field_name(cs.FIELD_BODY)
336
+ if not body_node:
337
+ return
338
+
339
+ method_cursor = QueryCursor(func_query)
340
+ captures = method_cursor.captures(body_node)
341
+
342
+ for method_node in captures.get(cs.CAPTURE_FUNCTION, []):
343
+ if not isinstance(method_node, Node):
344
+ continue
345
+
346
+ method_name = self._extract_function_name(method_node)
347
+ if not method_name:
348
+ continue
349
+
350
+ method_qn = f"{class_qn}.{method_name}"
351
+
352
+ method_props: PropertyDict = {
353
+ cs.KEY_QUALIFIED_NAME: method_qn,
354
+ cs.KEY_NAME: method_name,
355
+ cs.KEY_START_LINE: method_node.start_point[0] + 1,
356
+ cs.KEY_END_LINE: method_node.end_point[0] + 1,
357
+ }
358
+
359
+ logger.info(f" Found method: {method_name}")
360
+ self.ingestor.ensure_node_batch(cs.NodeLabel.METHOD, method_props)
361
+ self.function_registry[method_qn] = NodeType.METHOD
362
+ if method_name not in self.simple_name_lookup:
363
+ self.simple_name_lookup[method_name] = set()
364
+ self.simple_name_lookup[method_name].add(method_qn)
365
+
366
+ self.ingestor.ensure_relationship_batch(
367
+ (cs.NodeLabel.CLASS, cs.KEY_QUALIFIED_NAME, class_qn),
368
+ cs.RelationshipType.DEFINES_METHOD,
369
+ (cs.NodeLabel.METHOD, cs.KEY_QUALIFIED_NAME, method_qn),
370
+ )
371
+
372
+ except Exception as e:
373
+ logger.debug(f"Error ingesting class methods: {e}")
374
+
375
+ # -----------------------------------------------------------------
376
+ # C/C++ comment extraction
377
+ # -----------------------------------------------------------------
378
+
379
+ @staticmethod
380
+ def _extract_c_comment(func_node: Node) -> str | None:
381
+ """Extract comment block immediately above a C/C++ function node.
382
+
383
+ Handles:
384
+ - Single-line: ``// comment``
385
+ - Multi-line: ``/* comment */``
386
+ - Block of consecutive ``//`` lines
387
+ - Doxygen-style: ``/** ... */`` or ``/// ...``
388
+
389
+ Returns cleaned comment text or *None*.
390
+ """
391
+ comment_lines: list[str] = []
392
+
393
+ # Walk backwards through previous siblings to collect comment nodes
394
+ current = func_node.prev_named_sibling
395
+ if current is None:
396
+ current = func_node.prev_sibling
397
+
398
+ last_end_line = func_node.start_point[0] # 0-based line number
399
+
400
+ while current is not None:
401
+ if current.type != "comment":
402
+ break
403
+
404
+ # Check adjacency: comment must be within 1 line of the function
405
+ # or the previous comment we already collected.
406
+ if last_end_line - current.end_point[0] > 1:
407
+ break
408
+
409
+ text = safe_decode_text(current)
410
+ if text is None:
411
+ break
412
+
413
+ comment_lines.insert(0, text)
414
+ last_end_line = current.start_point[0]
415
+
416
+ prev = current.prev_named_sibling
417
+ if prev is None:
418
+ prev = current.prev_sibling
419
+ current = prev
420
+
421
+ if not comment_lines:
422
+ return None
423
+
424
+ # Clean comment markers
425
+ cleaned: list[str] = []
426
+ for line in comment_lines:
427
+ line = line.strip()
428
+ # Block comment: /* ... */ or /** ... */
429
+ if line.startswith("/*"):
430
+ line = line[2:]
431
+ if line.startswith("*"): # /** doxygen */
432
+ line = line[1:]
433
+ if line.endswith("*/"):
434
+ line = line[:-2]
435
+ # Line comment: // or ///
436
+ if line.startswith("//"):
437
+ line = line[2:]
438
+ if line.startswith("/"): # /// doxygen
439
+ line = line[1:]
440
+ # Interior block comment lines: * text
441
+ if line.startswith("*"):
442
+ line = line[1:]
443
+
444
+ line = line.strip()
445
+ if line and not all(ch in "-=*#~" for ch in line):
446
+ cleaned.append(line)
447
+
448
+ return "\n".join(cleaned) if cleaned else None
449
+
450
+ # -----------------------------------------------------------------
451
+ # C language API interface extraction helpers
452
+ # -----------------------------------------------------------------
453
+
454
+ def _extract_c_return_type(self, func_node: Node) -> str | None:
455
+ """Extract the return type from a C function node.
456
+
457
+ For ``function_definition``, the return type is the ``type`` field.
458
+ For a forward ``declaration``, the type specifiers precede the declarator.
459
+ """
460
+ # function_definition → type field (e.g. "int", "void", "struct foo *")
461
+ type_node = func_node.child_by_field_name(cs.FIELD_TYPE)
462
+ if type_node and type_node.text:
463
+ return safe_decode_text(type_node)
464
+
465
+ # Fallback: collect all type-specifier children that appear before the
466
+ # declarator (covers ``static inline int func(…)`` patterns).
467
+ parts: list[str] = []
468
+ for child in func_node.children:
469
+ if child.type in (
470
+ "primitive_type",
471
+ "sized_type_specifier",
472
+ "type_identifier",
473
+ "struct_specifier",
474
+ "union_specifier",
475
+ "enum_specifier",
476
+ ):
477
+ text = safe_decode_text(child)
478
+ if text:
479
+ parts.append(text)
480
+ elif child.type == cs.FIELD_DECLARATOR or child.type == "function_declarator":
481
+ break
482
+ return " ".join(parts) if parts else None
483
+
484
+ def _extract_c_parameters(self, func_node: Node) -> list[str]:
485
+ """Extract parameter list from a C function node.
486
+
487
+ Returns a list of parameter strings like ``["int fd", "const char *buf"]``.
488
+ """
489
+ # Navigate to parameter_list: may be nested under declarator → function_declarator
490
+ params_node = self._find_c_parameter_list(func_node)
491
+ if not params_node:
492
+ return []
493
+
494
+ params: list[str] = []
495
+ for child in params_node.children:
496
+ if child.type == "parameter_declaration":
497
+ text = safe_decode_text(child)
498
+ if text:
499
+ params.append(text)
500
+ elif child.type == "variadic_parameter":
501
+ params.append("...")
502
+ return params
503
+
504
+ def _find_c_parameter_list(self, func_node: Node) -> Node | None:
505
+ """Locate the parameter_list node within a C function AST node."""
506
+ # Direct: function_definition → declarator → function_declarator → parameters
507
+ declarator = func_node.child_by_field_name(cs.FIELD_DECLARATOR)
508
+ if declarator:
509
+ if declarator.type == "function_declarator":
510
+ return declarator.child_by_field_name(cs.FIELD_PARAMETERS)
511
+ # pointer_declarator wrapping: int *func(…)
512
+ inner = declarator.child_by_field_name(cs.FIELD_DECLARATOR)
513
+ if inner and inner.type == "function_declarator":
514
+ return inner.child_by_field_name(cs.FIELD_PARAMETERS)
515
+ return None
516
+
517
+ def _extract_c_visibility(self, func_node: Node, is_header: bool) -> str:
518
+ """Determine C function visibility.
519
+
520
+ Rules:
521
+ - ``static`` keyword → "static" (file-local, private)
522
+ - Declared in a ``.h`` header file → "public"
523
+ - Function name found in a previously processed header → "public"
524
+ - Otherwise → "extern" (external linkage but not declared in a header)
525
+ """
526
+ # Check for ``static`` storage class specifier
527
+ for child in func_node.children:
528
+ if child.type == self._C_STATIC_SPECIFIER:
529
+ text = safe_decode_text(child)
530
+ if text and "static" in text:
531
+ return "static"
532
+ if is_header:
533
+ return "public"
534
+ # Check if this function was declared in a previously processed header
535
+ func_name = self._extract_function_name(func_node)
536
+ if func_name and func_name in self._header_declarations:
537
+ return "public"
538
+ return "extern"
539
+
540
+ @staticmethod
541
+ def _build_c_signature(
542
+ name: str,
543
+ return_type: str | None,
544
+ parameters: list[str],
545
+ ) -> str:
546
+ """Build a full C function signature string."""
547
+ ret = return_type or "void"
548
+ params = ", ".join(parameters) if parameters else "void"
549
+ return f"{ret} {name}({params})"
550
+
551
+ # -----------------------------------------------------------------
552
+ # C struct/union/enum member extraction
553
+ # -----------------------------------------------------------------
554
+
555
+ @staticmethod
556
+ def _c_class_kind(class_node: Node) -> str:
557
+ """Return the C type kind: 'struct', 'union', or 'enum'."""
558
+ node_type = class_node.type
559
+ if node_type == "struct_specifier":
560
+ return "struct"
561
+ if node_type == "union_specifier":
562
+ return "union"
563
+ if node_type == "enum_specifier":
564
+ return "enum"
565
+ return "struct"
566
+
567
+ @staticmethod
568
+ def _extract_c_members(class_node: Node) -> list[str]:
569
+ """Extract member declarations from a C struct/union/enum.
570
+
571
+ For struct/union: returns field declarations like ``["int x", "char *name"]``.
572
+ For enum: returns enumerator names like ``["RED", "GREEN", "BLUE"]``.
573
+ """
574
+ members: list[str] = []
575
+ body = class_node.child_by_field_name("body")
576
+ if not body:
577
+ return members
578
+
579
+ for child in body.children:
580
+ if child.type == "field_declaration":
581
+ text = safe_decode_text(child)
582
+ if text:
583
+ # Strip trailing semicolons
584
+ members.append(text.rstrip(";").strip())
585
+ elif child.type == "enumerator":
586
+ name_node = child.child_by_field_name("name")
587
+ if name_node:
588
+ text = safe_decode_text(name_node)
589
+ if text:
590
+ members.append(text)
591
+ return members
592
+
593
+ @staticmethod
594
+ def _build_c_class_signature(kind: str, name: str, members: list[str]) -> str:
595
+ """Build a summary signature for a C struct/union/enum."""
596
+ if not members:
597
+ return f"{kind} {name}"
598
+ member_str = "; ".join(members)
599
+ return f"{kind} {name} {{ {member_str} }}"
600
+
601
+ # -----------------------------------------------------------------
602
+ # C typedef extraction
603
+ # -----------------------------------------------------------------
604
+
605
+ def _ingest_c_typedefs(
606
+ self,
607
+ root_node: Node,
608
+ module_qn: str,
609
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
610
+ ) -> None:
611
+ """Extract typedef declarations and create Type nodes."""
612
+ lang_queries = queries.get(cs.SupportedLanguage.C)
613
+ if not lang_queries:
614
+ return
615
+
616
+ typedef_query = lang_queries.get("typedefs")
617
+ if not typedef_query:
618
+ return
619
+
620
+ try:
621
+ cursor = QueryCursor(typedef_query)
622
+ captures = cursor.captures(root_node)
623
+ typedef_nodes = captures.get(cs.CAPTURE_TYPEDEF, [])
624
+
625
+ for td_node in typedef_nodes:
626
+ if not isinstance(td_node, Node):
627
+ continue
628
+
629
+ td_name = self._extract_c_typedef_name(td_node)
630
+ if not td_name:
631
+ continue
632
+
633
+ td_qn = f"{module_qn}.{td_name}"
634
+ td_text = safe_decode_text(td_node)
635
+ signature = td_text.rstrip(";").strip() if td_text else f"typedef {td_name}"
636
+
637
+ c_docstring = self._extract_c_comment(td_node)
638
+
639
+ td_props: PropertyDict = {
640
+ cs.KEY_QUALIFIED_NAME: td_qn,
641
+ cs.KEY_NAME: td_name,
642
+ cs.KEY_START_LINE: td_node.start_point[0] + 1,
643
+ cs.KEY_END_LINE: td_node.end_point[0] + 1,
644
+ cs.KEY_SIGNATURE: signature,
645
+ cs.KEY_KIND: "typedef",
646
+ }
647
+ if c_docstring:
648
+ td_props[cs.KEY_DOCSTRING] = c_docstring
649
+
650
+ logger.info(f" Found typedef: {td_name}")
651
+ self.ingestor.ensure_node_batch(cs.NodeLabel.TYPE, td_props)
652
+
653
+ self.ingestor.ensure_relationship_batch(
654
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
655
+ cs.RelationshipType.DEFINES,
656
+ (cs.NodeLabel.TYPE, cs.KEY_QUALIFIED_NAME, td_qn),
657
+ )
658
+
659
+ except Exception as e:
660
+ logger.debug(f"Error ingesting typedefs: {e}")
661
+
662
+ @staticmethod
663
+ def _extract_c_typedef_name(td_node: Node) -> str | None:
664
+ """Extract the name introduced by a typedef.
665
+
666
+ The ``type_definition`` node has a ``declarator`` field which contains
667
+ the new type name (a ``type_identifier``).
668
+ """
669
+ declarator = td_node.child_by_field_name("declarator")
670
+ if declarator:
671
+ if declarator.type == "type_identifier":
672
+ return safe_decode_text(declarator)
673
+ # Pointer typedefs: typedef int *int_ptr;
674
+ inner = declarator.child_by_field_name("declarator")
675
+ if inner:
676
+ return safe_decode_text(inner)
677
+ return None
678
+
679
+ # -----------------------------------------------------------------
680
+ # C macro extraction
681
+ # -----------------------------------------------------------------
682
+
683
+ def _ingest_c_macros(
684
+ self,
685
+ root_node: Node,
686
+ module_qn: str,
687
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
688
+ ) -> None:
689
+ """Extract #define macro definitions and create Function nodes with kind='macro'."""
690
+ lang_queries = queries.get(cs.SupportedLanguage.C)
691
+ if not lang_queries:
692
+ return
693
+
694
+ macro_query = lang_queries.get("macros")
695
+ if not macro_query:
696
+ return
697
+
698
+ try:
699
+ cursor = QueryCursor(macro_query)
700
+ captures = cursor.captures(root_node)
701
+ macro_nodes = captures.get(cs.CAPTURE_MACRO, [])
702
+
703
+ for macro_node in macro_nodes:
704
+ if not isinstance(macro_node, Node):
705
+ continue
706
+
707
+ macro_name = self._extract_c_macro_name(macro_node)
708
+ if not macro_name:
709
+ continue
710
+
711
+ macro_qn = f"{module_qn}.{macro_name}"
712
+ macro_text = safe_decode_text(macro_node)
713
+ signature = macro_text.strip() if macro_text else f"#define {macro_name}"
714
+
715
+ c_docstring = self._extract_c_comment(macro_node)
716
+
717
+ macro_props: PropertyDict = {
718
+ cs.KEY_QUALIFIED_NAME: macro_qn,
719
+ cs.KEY_NAME: macro_name,
720
+ cs.KEY_START_LINE: macro_node.start_point[0] + 1,
721
+ cs.KEY_END_LINE: macro_node.end_point[0] + 1,
722
+ cs.KEY_SIGNATURE: signature,
723
+ cs.KEY_KIND: "macro",
724
+ cs.KEY_VISIBILITY: "public",
725
+ }
726
+ if c_docstring:
727
+ macro_props[cs.KEY_DOCSTRING] = c_docstring
728
+
729
+ logger.info(f" Found macro: {macro_name}")
730
+ self.ingestor.ensure_node_batch(cs.NodeLabel.FUNCTION, macro_props)
731
+
732
+ self.ingestor.ensure_relationship_batch(
733
+ (cs.NodeLabel.MODULE, cs.KEY_QUALIFIED_NAME, module_qn),
734
+ cs.RelationshipType.DEFINES,
735
+ (cs.NodeLabel.FUNCTION, cs.KEY_QUALIFIED_NAME, macro_qn),
736
+ )
737
+
738
+ except Exception as e:
739
+ logger.debug(f"Error ingesting macros: {e}")
740
+
741
+ @staticmethod
742
+ def _extract_c_macro_name(macro_node: Node) -> str | None:
743
+ """Extract the macro name from a preproc_def node."""
744
+ name_node = macro_node.child_by_field_name("name")
745
+ if name_node:
746
+ return safe_decode_text(name_node)
747
+ return None
748
+
749
+ def _extract_function_name(self, func_node: Node) -> str | None:
750
+ """Extract function name from a function node."""
751
+ # Try standard name field first
752
+ name_node = func_node.child_by_field_name(cs.FIELD_NAME)
753
+ if name_node and name_node.text:
754
+ return safe_decode_text(name_node)
755
+
756
+ # For C language: function_definition -> declarator -> function_declarator -> declarator (name)
757
+ declarator = func_node.child_by_field_name(cs.FIELD_DECLARATOR)
758
+ if declarator:
759
+ if declarator.type == "function_declarator":
760
+ name_node = declarator.child_by_field_name(cs.FIELD_DECLARATOR)
761
+ else:
762
+ name_node = declarator
763
+ if name_node and name_node.text:
764
+ return safe_decode_text(name_node)
765
+
766
+ return None
767
+
768
+ def _extract_class_name(self, class_node: Node) -> str | None:
769
+ """Extract class name from a class node."""
770
+ name_node = class_node.child_by_field_name(cs.FIELD_NAME)
771
+ if name_node and name_node.text:
772
+ return safe_decode_text(name_node)
773
+ return None
774
+
775
+ def _is_method(self, func_node: Node, lang_config) -> bool:
776
+ """Check if a function node is a method."""
777
+ if not lang_config:
778
+ return False
779
+
780
+ current = func_node.parent
781
+ if not isinstance(current, Node):
782
+ return False
783
+
784
+ while current and current.type not in lang_config.module_node_types:
785
+ if current.type in lang_config.class_node_types:
786
+ return True
787
+ current = current.parent
788
+ return False
789
+
790
+ def process_dependencies(self, filepath: Path) -> None:
791
+ """Process dependency files."""
792
+ logger.info(f"Processing dependencies: {filepath}")
793
+
794
+ def process_all_method_overrides(self) -> None:
795
+ """Process all method overrides."""
796
+ pass