code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,306 @@
1
+ """Code Graph Builder - Call Processor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from loguru import logger
9
+ from tree_sitter import Node, QueryCursor
10
+
11
+ from .. import constants as cs
12
+ from ..parsers.utils import safe_decode_text
13
+ from ..services import IngestorProtocol
14
+ from ..types import FunctionRegistryTrieProtocol
15
+
16
+ if TYPE_CHECKING:
17
+ from ..types import LanguageQueries
18
+ from .call_resolver import CallResolver
19
+ from .import_processor import ImportProcessor
20
+ from .type_inference import TypeInferenceEngine
21
+
22
+
23
+ class CallProcessor:
24
+ """Process function calls in source code."""
25
+
26
+ def __init__(
27
+ self,
28
+ ingestor: IngestorProtocol,
29
+ repo_path: Path,
30
+ project_name: str,
31
+ function_registry: FunctionRegistryTrieProtocol,
32
+ import_processor: ImportProcessor,
33
+ type_inference: TypeInferenceEngine | None,
34
+ class_inheritance: dict[str, list[str]],
35
+ ):
36
+ self.ingestor = ingestor
37
+ self.repo_path = repo_path
38
+ self.project_name = project_name
39
+ self.function_registry = function_registry
40
+ self.import_processor = import_processor
41
+ self.type_inference = type_inference
42
+ self.class_inheritance = class_inheritance
43
+ self._call_resolver: CallResolver | None = None
44
+
45
+ def _get_call_resolver(self) -> CallResolver:
46
+ """Get or create the call resolver."""
47
+ if self._call_resolver is None:
48
+ from .call_resolver import CallResolver
49
+
50
+ self._call_resolver = CallResolver(
51
+ function_registry=self.function_registry,
52
+ import_processor=self.import_processor,
53
+ )
54
+ return self._call_resolver
55
+
56
+ def process_calls_in_file(
57
+ self,
58
+ file_path: Path,
59
+ root_node: Node,
60
+ language: cs.SupportedLanguage,
61
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
62
+ ) -> None:
63
+ """Process all function calls in a file."""
64
+ relative_path = file_path.relative_to(self.repo_path)
65
+ logger.debug(f"Processing calls in: {relative_path}")
66
+
67
+ try:
68
+ lang_queries = queries.get(language)
69
+ if not lang_queries:
70
+ return
71
+
72
+ call_query = lang_queries.get(cs.QUERY_CALLS)
73
+ if not call_query:
74
+ return
75
+
76
+ # Build module qualified name
77
+ module_qn = cs.SEPARATOR_DOT.join(
78
+ [self.project_name] + list(relative_path.with_suffix("").parts)
79
+ )
80
+ if file_path.name in (cs.INIT_PY, cs.MOD_RS):
81
+ module_qn = cs.SEPARATOR_DOT.join(
82
+ [self.project_name] + list(relative_path.parent.parts)
83
+ )
84
+
85
+ # Process calls using the calls query
86
+ cursor = QueryCursor(call_query)
87
+ captures = cursor.captures(root_node)
88
+ call_nodes = captures.get(cs.CAPTURE_CALL, [])
89
+
90
+ for call_node in call_nodes:
91
+ if not isinstance(call_node, Node):
92
+ continue
93
+
94
+ self._process_call_node(
95
+ call_node, module_qn, language, root_node
96
+ )
97
+
98
+ except Exception as e:
99
+ logger.warning(f"Failed to process calls in {file_path}: {e}")
100
+
101
+ def _process_call_node(
102
+ self,
103
+ call_node: Node,
104
+ module_qn: str,
105
+ language: cs.SupportedLanguage,
106
+ root_node: Node,
107
+ ) -> None:
108
+ """Process a single call node."""
109
+ # Extract the function name being called
110
+ call_name = self._extract_call_name(call_node, language)
111
+ if not call_name:
112
+ return
113
+
114
+ # Find the caller function (enclosing function)
115
+ caller_qn = self._find_caller_function(call_node, module_qn, language)
116
+ if not caller_qn:
117
+ return
118
+
119
+ # Resolve the target function
120
+ class_context = self._get_class_context(call_node)
121
+ resolver = self._get_call_resolver()
122
+ target_qn = resolver.resolve_call(call_name, module_qn, class_context)
123
+
124
+ if target_qn:
125
+ # Create CALLS relationship
126
+ self.ingestor.ensure_relationship_batch(
127
+ (cs.NodeLabel.FUNCTION, cs.KEY_QUALIFIED_NAME, caller_qn),
128
+ cs.RelationshipType.CALLS,
129
+ (cs.NodeLabel.FUNCTION, cs.KEY_QUALIFIED_NAME, target_qn),
130
+ )
131
+ logger.debug(f"Created CALLS: {caller_qn} -> {target_qn}")
132
+
133
+ def _extract_call_name(
134
+ self, call_node: Node, language: cs.SupportedLanguage
135
+ ) -> str | None:
136
+ """Extract the function name from a call node."""
137
+ # For call_expression, the function being called is typically in the "function" field
138
+ func_node = call_node.child_by_field_name(cs.FIELD_NAME)
139
+ if not func_node:
140
+ # Try "function" field for call_expression
141
+ func_node = call_node.child_by_field_name("function")
142
+
143
+ if not func_node:
144
+ return None
145
+
146
+ # Handle different call patterns
147
+ if func_node.type == "identifier":
148
+ return safe_decode_text(func_node)
149
+ elif func_node.type == "scoped_identifier":
150
+ # For qualified calls like module.func()
151
+ return self._get_scoped_name(func_node)
152
+ elif func_node.type == "field_expression":
153
+ # For method calls like obj.method()
154
+ return self._get_field_expression_name(func_node)
155
+ elif func_node.type == "member_expression":
156
+ # JavaScript/TypeScript member access
157
+ return self._get_member_expression_name(func_node)
158
+
159
+ return safe_decode_text(func_node)
160
+
161
+ def _get_scoped_name(self, node: Node) -> str | None:
162
+ """Get the full name from a scoped identifier."""
163
+ parts = []
164
+ for child in node.children:
165
+ if child.type == "identifier":
166
+ name = safe_decode_text(child)
167
+ if name:
168
+ parts.append(name)
169
+ return ".".join(parts) if parts else None
170
+
171
+ def _get_field_expression_name(self, node: Node) -> str | None:
172
+ """Get name from a field expression (e.g., obj.method)."""
173
+ # For C/C++ field expressions
174
+ object_node = node.child_by_field_name(cs.FIELD_OBJECT)
175
+ field_node = node.child_by_field_name(cs.FIELD_FIELD)
176
+
177
+ if field_node:
178
+ field_name = safe_decode_text(field_node)
179
+ if object_node and field_name:
180
+ obj_name = safe_decode_text(object_node)
181
+ if obj_name:
182
+ return f"{obj_name}.{field_name}"
183
+ return field_name
184
+
185
+ return safe_decode_text(node)
186
+
187
+ def _get_member_expression_name(self, node: Node) -> str | None:
188
+ """Get name from a member expression (e.g., obj.method)."""
189
+ object_node = node.child_by_field_name(cs.FIELD_OBJECT)
190
+ property_node = node.child_by_field_name(cs.FIELD_PROPERTY)
191
+
192
+ if property_node:
193
+ prop_name = safe_decode_text(property_node)
194
+ if object_node and prop_name:
195
+ obj_name = safe_decode_text(object_node)
196
+ if obj_name:
197
+ return f"{obj_name}.{prop_name}"
198
+ return prop_name
199
+
200
+ return safe_decode_text(node)
201
+
202
+ def _find_caller_function(
203
+ self,
204
+ call_node: Node,
205
+ module_qn: str,
206
+ language: cs.SupportedLanguage,
207
+ ) -> str | None:
208
+ """Find the enclosing function's qualified name."""
209
+ current = call_node.parent
210
+
211
+ while current:
212
+ # Check if this is a function node
213
+ if self._is_function_node(current, language):
214
+ func_name = self._get_function_name(current, language)
215
+ if func_name:
216
+ # Check if inside a class
217
+ class_name = self._get_enclosing_class_name(current)
218
+ if class_name:
219
+ return f"{module_qn}.{class_name}.{func_name}"
220
+ return f"{module_qn}.{func_name}"
221
+
222
+ current = current.parent
223
+
224
+ return None
225
+
226
+ def _is_function_node(self, node: Node, language: cs.SupportedLanguage) -> bool:
227
+ """Check if a node is a function definition."""
228
+ func_types = {
229
+ cs.SupportedLanguage.PYTHON: ("function_definition", "lambda"),
230
+ cs.SupportedLanguage.JS: (
231
+ "function_declaration",
232
+ "function_expression",
233
+ "arrow_function",
234
+ "method_definition",
235
+ ),
236
+ cs.SupportedLanguage.TS: (
237
+ "function_declaration",
238
+ "function_expression",
239
+ "arrow_function",
240
+ "method_definition",
241
+ ),
242
+ cs.SupportedLanguage.C: ("function_definition",),
243
+ cs.SupportedLanguage.CPP: ("function_definition", "lambda_expression"),
244
+ cs.SupportedLanguage.JAVA: ("method_declaration", "constructor_declaration"),
245
+ cs.SupportedLanguage.RUST: ("function_item", "closure_expression"),
246
+ cs.SupportedLanguage.GO: ("function_declaration", "method_declaration"),
247
+ }
248
+
249
+ return node.type in func_types.get(language, ())
250
+
251
+ def _get_function_name(
252
+ self, func_node: Node, language: cs.SupportedLanguage
253
+ ) -> str | None:
254
+ """Get the name of a function node."""
255
+ name_node = func_node.child_by_field_name(cs.FIELD_NAME)
256
+ if name_node:
257
+ return safe_decode_text(name_node)
258
+
259
+ # For C/C++ function definitions with declarator
260
+ declarator = func_node.child_by_field_name(cs.FIELD_DECLARATOR)
261
+ if declarator:
262
+ if declarator.type == "function_declarator":
263
+ name_node = declarator.child_by_field_name(cs.FIELD_DECLARATOR)
264
+ if name_node:
265
+ return safe_decode_text(name_node)
266
+ else:
267
+ return safe_decode_text(declarator)
268
+
269
+ return None
270
+
271
+ def _get_enclosing_class_name(self, node: Node) -> str | None:
272
+ """Get the name of the enclosing class if any."""
273
+ current = node.parent
274
+
275
+ while current:
276
+ if current.type in (
277
+ "class_definition",
278
+ "class_declaration",
279
+ "class_specifier",
280
+ "struct_specifier",
281
+ "impl_item",
282
+ ):
283
+ name_node = current.child_by_field_name(cs.FIELD_NAME)
284
+ if name_node:
285
+ return safe_decode_text(name_node)
286
+ current = current.parent
287
+
288
+ return None
289
+
290
+ def _get_class_context(self, node: Node) -> str | None:
291
+ """Get the class context for a node (for self/this calls)."""
292
+ current = node.parent
293
+
294
+ while current:
295
+ if current.type in (
296
+ "class_definition",
297
+ "class_declaration",
298
+ "class_specifier",
299
+ "struct_specifier",
300
+ ):
301
+ name_node = current.child_by_field_name(cs.FIELD_NAME)
302
+ if name_node:
303
+ return safe_decode_text(name_node)
304
+ current = current.parent
305
+
306
+ return None
@@ -0,0 +1,139 @@
1
+ """Code Graph Builder - Call Resolver."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from loguru import logger
8
+
9
+ from .. import constants as cs
10
+ from ..types import FunctionRegistryTrieProtocol
11
+
12
+ if TYPE_CHECKING:
13
+ from .import_processor import ImportProcessor
14
+
15
+
16
+ class CallResolver:
17
+ """Resolve function calls to their targets."""
18
+
19
+ def __init__(
20
+ self,
21
+ function_registry: FunctionRegistryTrieProtocol,
22
+ import_processor: ImportProcessor,
23
+ ) -> None:
24
+ self.function_registry = function_registry
25
+ self.import_processor = import_processor
26
+
27
+ def resolve_call(
28
+ self,
29
+ call_name: str,
30
+ module_qn: str,
31
+ class_context: str | None = None,
32
+ ) -> str | None:
33
+ """
34
+ Resolve a function call to its fully qualified name.
35
+
36
+ Args:
37
+ call_name: The name of the call (e.g., "foo", "module.bar", "self.method")
38
+ module_qn: The qualified name of the current module
39
+ class_context: The class context if inside a class method
40
+
41
+ Returns:
42
+ The fully qualified name of the target function, or None if not resolved
43
+ """
44
+ if not call_name:
45
+ return None
46
+
47
+ # Try to resolve self/this calls within class context
48
+ if class_context and self._is_self_call(call_name):
49
+ return self._resolve_self_call(call_name, class_context)
50
+
51
+ # Try direct resolution (fully qualified name)
52
+ if cs.SEPARATOR_DOT in call_name:
53
+ return self._resolve_qualified_call(call_name, module_qn)
54
+
55
+ # Try import resolution
56
+ if resolved := self._resolve_via_imports(call_name, module_qn):
57
+ return resolved
58
+
59
+ # Try same module resolution
60
+ if resolved := self._resolve_same_module(call_name, module_qn):
61
+ return resolved
62
+
63
+ # Try function registry lookup
64
+ return self._resolve_via_registry(call_name, module_qn)
65
+
66
+ def _is_self_call(self, call_name: str) -> bool:
67
+ """Check if this is a self/this call."""
68
+ return call_name.startswith("self.") or call_name.startswith("this.")
69
+
70
+ def _resolve_self_call(self, call_name: str, class_context: str) -> str | None:
71
+ """Resolve a self/this call to a method."""
72
+ # Remove self./this. prefix
73
+ if call_name.startswith("self."):
74
+ method_name = call_name[5:]
75
+ elif call_name.startswith("this."):
76
+ method_name = call_name[5:]
77
+ else:
78
+ method_name = call_name
79
+
80
+ # Try to find method in class
81
+ method_qn = f"{class_context}.{method_name}"
82
+ if method_qn in self.function_registry:
83
+ return method_qn
84
+
85
+ return None
86
+
87
+ def _resolve_qualified_call(self, call_name: str, module_qn: str) -> str | None:
88
+ """Resolve a qualified call like 'module.function'."""
89
+ parts = call_name.split(cs.SEPARATOR_DOT)
90
+
91
+ if len(parts) >= 2:
92
+ # Check if first part is an imported module
93
+ import_map = self.import_processor.get_import_mapping(module_qn)
94
+
95
+ if parts[0] in import_map:
96
+ imported = import_map[parts[0]]
97
+ # Reconstruct with imported module
98
+ rest = cs.SEPARATOR_DOT.join(parts[1:])
99
+ full_qn = f"{imported}.{rest}"
100
+
101
+ if full_qn in self.function_registry:
102
+ return full_qn
103
+
104
+ # Try as fully qualified
105
+ if call_name in self.function_registry:
106
+ return call_name
107
+
108
+ return None
109
+
110
+ def _resolve_via_imports(self, call_name: str, module_qn: str) -> str | None:
111
+ """Try to resolve call through import mapping."""
112
+ import_map = self.import_processor.get_import_mapping(module_qn)
113
+
114
+ if call_name in import_map:
115
+ imported_qn = import_map[call_name]
116
+ if imported_qn in self.function_registry:
117
+ return imported_qn
118
+
119
+ return None
120
+
121
+ def _resolve_same_module(self, call_name: str, module_qn: str) -> str | None:
122
+ """Try to resolve call in the same module."""
123
+ full_qn = f"{module_qn}.{call_name}"
124
+
125
+ if full_qn in self.function_registry:
126
+ return full_qn
127
+
128
+ return None
129
+
130
+ def _resolve_via_registry(self, call_name: str, module_qn: str) -> str | None:
131
+ """Try to find function in registry by simple name."""
132
+ # This is a fallback that might return incorrect results
133
+ # if multiple functions have the same name
134
+ for qn in self.function_registry._entries.keys() if hasattr(self.function_registry, '_entries') else []:
135
+ if qn.endswith(f".{call_name}"):
136
+ logger.debug(f"Resolved {call_name} to {qn} via registry lookup")
137
+ return qn
138
+
139
+ return None