code-graph-builder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. code_graph_builder/__init__.py +82 -0
  2. code_graph_builder/builder.py +366 -0
  3. code_graph_builder/cgb_cli.py +32 -0
  4. code_graph_builder/cli.py +564 -0
  5. code_graph_builder/commands_cli.py +1288 -0
  6. code_graph_builder/config.py +340 -0
  7. code_graph_builder/constants.py +708 -0
  8. code_graph_builder/embeddings/__init__.py +40 -0
  9. code_graph_builder/embeddings/qwen3_embedder.py +573 -0
  10. code_graph_builder/embeddings/vector_store.py +584 -0
  11. code_graph_builder/examples/__init__.py +0 -0
  12. code_graph_builder/examples/example_configuration.py +276 -0
  13. code_graph_builder/examples/example_kuzu_usage.py +109 -0
  14. code_graph_builder/examples/example_semantic_search_full.py +347 -0
  15. code_graph_builder/examples/generate_wiki.py +915 -0
  16. code_graph_builder/examples/graph_export_example.py +100 -0
  17. code_graph_builder/examples/rag_example.py +206 -0
  18. code_graph_builder/examples/test_cli_demo.py +129 -0
  19. code_graph_builder/examples/test_embedding_api.py +153 -0
  20. code_graph_builder/examples/test_kuzu_local.py +190 -0
  21. code_graph_builder/examples/test_rag_redis.py +390 -0
  22. code_graph_builder/graph_updater.py +605 -0
  23. code_graph_builder/guidance/__init__.py +1 -0
  24. code_graph_builder/guidance/agent.py +123 -0
  25. code_graph_builder/guidance/prompts.py +74 -0
  26. code_graph_builder/guidance/toolset.py +264 -0
  27. code_graph_builder/language_spec.py +536 -0
  28. code_graph_builder/mcp/__init__.py +21 -0
  29. code_graph_builder/mcp/api_doc_generator.py +764 -0
  30. code_graph_builder/mcp/file_editor.py +207 -0
  31. code_graph_builder/mcp/pipeline.py +777 -0
  32. code_graph_builder/mcp/server.py +161 -0
  33. code_graph_builder/mcp/tools.py +1800 -0
  34. code_graph_builder/models.py +115 -0
  35. code_graph_builder/parser_loader.py +344 -0
  36. code_graph_builder/parsers/__init__.py +7 -0
  37. code_graph_builder/parsers/call_processor.py +306 -0
  38. code_graph_builder/parsers/call_resolver.py +139 -0
  39. code_graph_builder/parsers/definition_processor.py +796 -0
  40. code_graph_builder/parsers/factory.py +119 -0
  41. code_graph_builder/parsers/import_processor.py +293 -0
  42. code_graph_builder/parsers/structure_processor.py +145 -0
  43. code_graph_builder/parsers/type_inference.py +143 -0
  44. code_graph_builder/parsers/utils.py +134 -0
  45. code_graph_builder/rag/__init__.py +68 -0
  46. code_graph_builder/rag/camel_agent.py +429 -0
  47. code_graph_builder/rag/client.py +298 -0
  48. code_graph_builder/rag/config.py +239 -0
  49. code_graph_builder/rag/cypher_generator.py +67 -0
  50. code_graph_builder/rag/llm_backend.py +210 -0
  51. code_graph_builder/rag/markdown_generator.py +352 -0
  52. code_graph_builder/rag/prompt_templates.py +440 -0
  53. code_graph_builder/rag/rag_engine.py +640 -0
  54. code_graph_builder/rag/review_report.md +172 -0
  55. code_graph_builder/rag/tests/__init__.py +3 -0
  56. code_graph_builder/rag/tests/test_camel_agent.py +313 -0
  57. code_graph_builder/rag/tests/test_client.py +221 -0
  58. code_graph_builder/rag/tests/test_config.py +177 -0
  59. code_graph_builder/rag/tests/test_markdown_generator.py +240 -0
  60. code_graph_builder/rag/tests/test_prompt_templates.py +160 -0
  61. code_graph_builder/services/__init__.py +39 -0
  62. code_graph_builder/services/graph_service.py +465 -0
  63. code_graph_builder/services/kuzu_service.py +665 -0
  64. code_graph_builder/services/memory_service.py +171 -0
  65. code_graph_builder/settings.py +75 -0
  66. code_graph_builder/tests/ACCEPTANCE_CRITERIA_PHASE2.md +401 -0
  67. code_graph_builder/tests/__init__.py +1 -0
  68. code_graph_builder/tests/run_acceptance_check.py +378 -0
  69. code_graph_builder/tests/test_api_find.py +231 -0
  70. code_graph_builder/tests/test_api_find_integration.py +226 -0
  71. code_graph_builder/tests/test_basic.py +78 -0
  72. code_graph_builder/tests/test_c_api_extraction.py +388 -0
  73. code_graph_builder/tests/test_call_resolution_scenarios.py +504 -0
  74. code_graph_builder/tests/test_embedder.py +411 -0
  75. code_graph_builder/tests/test_integration_semantic.py +434 -0
  76. code_graph_builder/tests/test_mcp_protocol.py +298 -0
  77. code_graph_builder/tests/test_mcp_user_flow.py +190 -0
  78. code_graph_builder/tests/test_rag.py +404 -0
  79. code_graph_builder/tests/test_settings.py +135 -0
  80. code_graph_builder/tests/test_step1_graph_build.py +264 -0
  81. code_graph_builder/tests/test_step2_api_docs.py +323 -0
  82. code_graph_builder/tests/test_step3_embedding.py +278 -0
  83. code_graph_builder/tests/test_vector_store.py +552 -0
  84. code_graph_builder/tools/__init__.py +40 -0
  85. code_graph_builder/tools/graph_query.py +495 -0
  86. code_graph_builder/tools/semantic_search.py +387 -0
  87. code_graph_builder/types.py +333 -0
  88. code_graph_builder/utils/__init__.py +0 -0
  89. code_graph_builder/utils/path_utils.py +30 -0
  90. code_graph_builder-0.2.0.dist-info/METADATA +321 -0
  91. code_graph_builder-0.2.0.dist-info/RECORD +93 -0
  92. code_graph_builder-0.2.0.dist-info/WHEEL +4 -0
  93. code_graph_builder-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,605 @@
1
+ """Graph updater for building code knowledge graphs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from collections import OrderedDict, defaultdict
7
+ from collections.abc import Callable, ItemsView, KeysView
8
+ from pathlib import Path
9
+ from typing import TYPE_CHECKING
10
+
11
+ from loguru import logger
12
+ from tree_sitter import Node, Parser
13
+
14
+ from . import constants as cs
15
+ from .language_spec import get_language_spec
16
+ from .parsers.factory import ProcessorFactory
17
+ from .services import IngestorProtocol
18
+ from .types import (
19
+ FunctionRegistry,
20
+ LanguageQueries,
21
+ NodeType,
22
+ PropertyDict,
23
+ QualifiedName,
24
+ SimpleNameLookup,
25
+ TrieNode,
26
+ )
27
+
28
+ if TYPE_CHECKING:
29
+ from .embeddings.qwen3_embedder import BaseEmbedder
30
+ from .embeddings.vector_store import VectorStore
31
+ from .utils.path_utils import should_skip_path
32
+
33
+
34
+ class FunctionRegistryTrie:
35
+ """Trie-based registry for efficient function lookup."""
36
+
37
+ def __init__(self, simple_name_lookup: SimpleNameLookup | None = None) -> None:
38
+ self.root: TrieNode = {}
39
+ self._entries: FunctionRegistry = {}
40
+ self._simple_name_lookup = simple_name_lookup
41
+
42
+ def insert(self, qualified_name: QualifiedName, func_type: NodeType) -> None:
43
+ self._entries[qualified_name] = func_type
44
+
45
+ parts = qualified_name.split(cs.SEPARATOR_DOT)
46
+ current: TrieNode = self.root
47
+
48
+ for part in parts:
49
+ if part not in current:
50
+ current[part] = {}
51
+ child = current[part]
52
+ assert isinstance(child, dict)
53
+ current = child
54
+
55
+ current[cs.TRIE_TYPE_KEY] = func_type
56
+ current[cs.TRIE_QN_KEY] = qualified_name
57
+
58
+ def get(
59
+ self, qualified_name: QualifiedName, default: NodeType | None = None
60
+ ) -> NodeType | None:
61
+ return self._entries.get(qualified_name, default)
62
+
63
+ def __contains__(self, qualified_name: QualifiedName) -> bool:
64
+ return qualified_name in self._entries
65
+
66
+ def __getitem__(self, qualified_name: QualifiedName) -> NodeType:
67
+ return self._entries[qualified_name]
68
+
69
+ def __setitem__(self, qualified_name: QualifiedName, func_type: NodeType) -> None:
70
+ self.insert(qualified_name, func_type)
71
+
72
+ def __delitem__(self, qualified_name: QualifiedName) -> None:
73
+ if qualified_name not in self._entries:
74
+ return
75
+
76
+ del self._entries[qualified_name]
77
+
78
+ parts = qualified_name.split(cs.SEPARATOR_DOT)
79
+ self._cleanup_trie_path(parts, self.root)
80
+
81
+ def _cleanup_trie_path(self, parts: list[str], node: TrieNode) -> bool:
82
+ if not parts:
83
+ node.pop(cs.TRIE_QN_KEY, None)
84
+ node.pop(cs.TRIE_TYPE_KEY, None)
85
+ return not node
86
+
87
+ part = parts[0]
88
+ if part not in node:
89
+ return False
90
+
91
+ child = node[part]
92
+ assert isinstance(child, dict)
93
+ if self._cleanup_trie_path(parts[1:], child):
94
+ del node[part]
95
+
96
+ is_endpoint = cs.TRIE_QN_KEY in node
97
+ has_children = any(not key.startswith(cs.TRIE_INTERNAL_PREFIX) for key in node)
98
+ return not has_children and not is_endpoint
99
+
100
+ def _navigate_to_prefix(self, prefix: str) -> TrieNode | None:
101
+ parts = prefix.split(cs.SEPARATOR_DOT) if prefix else []
102
+ current: TrieNode = self.root
103
+ for part in parts:
104
+ if part not in current:
105
+ return None
106
+ child = current[part]
107
+ assert isinstance(child, dict)
108
+ current = child
109
+ return current
110
+
111
+ def _collect_from_subtree(
112
+ self,
113
+ node: TrieNode,
114
+ filter_fn: Callable[[QualifiedName], bool] | None = None,
115
+ ) -> list[tuple[QualifiedName, NodeType]]:
116
+ results: list[tuple[QualifiedName, NodeType]] = []
117
+
118
+ def dfs(n: TrieNode) -> None:
119
+ if cs.TRIE_QN_KEY in n:
120
+ qn = n[cs.TRIE_QN_KEY]
121
+ func_type = n[cs.TRIE_TYPE_KEY]
122
+ assert isinstance(qn, str) and isinstance(func_type, NodeType)
123
+ if filter_fn is None or filter_fn(qn):
124
+ results.append((qn, func_type))
125
+
126
+ for key, child in n.items():
127
+ if not key.startswith(cs.TRIE_INTERNAL_PREFIX):
128
+ assert isinstance(child, dict)
129
+ dfs(child)
130
+
131
+ dfs(node)
132
+ return results
133
+
134
+ def keys(self) -> KeysView[QualifiedName]:
135
+ return self._entries.keys()
136
+
137
+ def items(self) -> ItemsView[QualifiedName, NodeType]:
138
+ return self._entries.items()
139
+
140
+ def __len__(self) -> int:
141
+ return len(self._entries)
142
+
143
+ def find_with_prefix_and_suffix(
144
+ self, prefix: str, suffix: str
145
+ ) -> list[QualifiedName]:
146
+ node = self._navigate_to_prefix(prefix)
147
+ if node is None:
148
+ return []
149
+ suffix_pattern = f".{suffix}"
150
+ matches = self._collect_from_subtree(
151
+ node, lambda qn: qn.endswith(suffix_pattern)
152
+ )
153
+ return [qn for qn, _ in matches]
154
+
155
+ def find_ending_with(self, suffix: str) -> list[QualifiedName]:
156
+ if self._simple_name_lookup is not None and suffix in self._simple_name_lookup:
157
+ return list(self._simple_name_lookup[suffix])
158
+ return [qn for qn in self._entries.keys() if qn.endswith(f".{suffix}")]
159
+
160
+ def find_with_prefix(self, prefix: str) -> list[tuple[QualifiedName, NodeType]]:
161
+ node = self._navigate_to_prefix(prefix)
162
+ return [] if node is None else self._collect_from_subtree(node)
163
+
164
+
165
+ class BoundedASTCache:
166
+ """LRU cache for AST nodes with memory limits."""
167
+
168
+ def __init__(
169
+ self,
170
+ max_entries: int = 1000,
171
+ max_memory_mb: int = 500,
172
+ ):
173
+ self.cache: OrderedDict[Path, tuple[Node, cs.SupportedLanguage]] = OrderedDict()
174
+ self.max_entries = max_entries
175
+ self.max_memory_bytes = max_memory_mb * cs.BYTES_PER_MB
176
+
177
+ def __setitem__(self, key: Path, value: tuple[Node, cs.SupportedLanguage]) -> None:
178
+ if key in self.cache:
179
+ del self.cache[key]
180
+
181
+ self.cache[key] = value
182
+ self._enforce_limits()
183
+
184
+ def __getitem__(self, key: Path) -> tuple[Node, cs.SupportedLanguage]:
185
+ value = self.cache[key]
186
+ self.cache.move_to_end(key)
187
+ return value
188
+
189
+ def __delitem__(self, key: Path) -> None:
190
+ if key in self.cache:
191
+ del self.cache[key]
192
+
193
+ def __contains__(self, key: Path) -> bool:
194
+ return key in self.cache
195
+
196
+ def items(self) -> ItemsView[Path, tuple[Node, cs.SupportedLanguage]]:
197
+ return self.cache.items()
198
+
199
+ def _enforce_limits(self) -> None:
200
+ while len(self.cache) > self.max_entries:
201
+ self.cache.popitem(last=False)
202
+
203
+ if self._should_evict_for_memory():
204
+ entries_to_remove = max(1, len(self.cache) // 10)
205
+ for _ in range(entries_to_remove):
206
+ if self.cache:
207
+ self.cache.popitem(last=False)
208
+
209
+ def _should_evict_for_memory(self) -> bool:
210
+ try:
211
+ cache_size = sum(sys.getsizeof(v) for v in self.cache.values())
212
+ return cache_size > self.max_memory_bytes
213
+ except Exception:
214
+ return len(self.cache) > int(self.max_entries * 0.8)
215
+
216
+
217
+ class GraphUpdater:
218
+ """Main coordinator for building code knowledge graphs."""
219
+
220
+ def __init__(
221
+ self,
222
+ ingestor: IngestorProtocol,
223
+ repo_path: Path,
224
+ parsers: dict[cs.SupportedLanguage, Parser],
225
+ queries: dict[cs.SupportedLanguage, LanguageQueries],
226
+ unignore_paths: frozenset[str] | None = None,
227
+ exclude_paths: frozenset[str] | None = None,
228
+ embedder: BaseEmbedder | None = None,
229
+ vector_store: VectorStore | None = None,
230
+ embedding_config: dict[str, bool | int | str] | None = None,
231
+ ):
232
+ self.ingestor = ingestor
233
+ self.repo_path = repo_path
234
+ self.parsers = parsers
235
+ self.queries = queries
236
+ self.project_name = repo_path.resolve().name
237
+ self.simple_name_lookup: SimpleNameLookup = defaultdict(set)
238
+ self.function_registry = FunctionRegistryTrie(
239
+ simple_name_lookup=self.simple_name_lookup
240
+ )
241
+ self.ast_cache = BoundedASTCache()
242
+ self.unignore_paths = unignore_paths
243
+ self.exclude_paths = exclude_paths
244
+
245
+ self.embedder = embedder
246
+ self.vector_store = vector_store
247
+ self.embedding_config = embedding_config or {}
248
+ self._embedding_enabled = self.embedding_config.get("enabled", False)
249
+
250
+ self.factory = ProcessorFactory(
251
+ ingestor=self.ingestor,
252
+ repo_path=self.repo_path,
253
+ project_name=self.project_name,
254
+ queries=self.queries,
255
+ function_registry=self.function_registry,
256
+ simple_name_lookup=self.simple_name_lookup,
257
+ ast_cache=self.ast_cache,
258
+ unignore_paths=self.unignore_paths,
259
+ exclude_paths=self.exclude_paths,
260
+ )
261
+
262
+ def _is_dependency_file(self, file_name: str, filepath: Path) -> bool:
263
+ return (
264
+ file_name.lower() in cs.DEPENDENCY_FILES
265
+ or filepath.suffix.lower() == ".csproj"
266
+ )
267
+
268
+ def run(self) -> None:
269
+ """Run the graph building process."""
270
+ logger.info(f"Building graph for project: {self.project_name}")
271
+
272
+ # Pass 1: Structure
273
+ logger.info("Pass 1: Identifying project structure")
274
+ self.factory.structure_processor.identify_structure()
275
+
276
+ # Pass 2: Files
277
+ logger.info("Pass 2: Processing files")
278
+ self._process_files()
279
+
280
+ logger.info(f"Found {len(self.function_registry)} functions")
281
+
282
+ # Pass 3: Calls
283
+ logger.info("Pass 3: Processing function calls")
284
+ self._process_function_calls()
285
+
286
+ # Process method overrides
287
+ self.factory.definition_processor.process_all_method_overrides()
288
+
289
+ # Pass 4: Semantic Embeddings (optional)
290
+ if self._embedding_enabled and self.embedder and self.vector_store:
291
+ logger.info("Pass 4: Generating semantic embeddings")
292
+ self._generate_semantic_embeddings()
293
+
294
+ logger.info("Analysis complete")
295
+ self.ingestor.flush_all()
296
+
297
+ def _process_files(self) -> None:
298
+ """Process all files in the repository."""
299
+ try:
300
+ from .utils.path_utils import should_skip_path
301
+ except ImportError:
302
+ # Fallback if utils not available
303
+ def should_skip_path(
304
+ filepath: Path,
305
+ repo_path: Path,
306
+ exclude_paths: frozenset[str] | None = None,
307
+ unignore_paths: frozenset[str] | None = None,
308
+ ) -> bool:
309
+ rel_path = filepath.relative_to(repo_path)
310
+ path_str = str(rel_path)
311
+
312
+ # Skip common directories
313
+ skip_dirs = {".git", "__pycache__", "node_modules", "venv", ".venv", ".pytest_cache"}
314
+ if any(part in skip_dirs for part in rel_path.parts):
315
+ return True
316
+
317
+ # Skip excluded paths
318
+ if exclude_paths:
319
+ for pattern in exclude_paths:
320
+ if pattern in path_str:
321
+ return True
322
+
323
+ return False
324
+
325
+ # Sort files so header files (.h) are processed before source files (.c)
326
+ # to populate header declarations before visibility resolution.
327
+ all_files = sorted(
328
+ self.repo_path.rglob("*"),
329
+ key=lambda p: (0 if p.suffix == cs.EXT_H else 1, str(p)),
330
+ )
331
+ for filepath in all_files:
332
+ if filepath.is_file() and not should_skip_path(
333
+ filepath,
334
+ self.repo_path,
335
+ exclude_paths=self.exclude_paths,
336
+ unignore_paths=self.unignore_paths,
337
+ ):
338
+ lang_config = get_language_spec(filepath.suffix)
339
+ # Fallback: if the mapped language (e.g. CPP for .h) isn't
340
+ # available but C is, use C for header files.
341
+ if (
342
+ lang_config
343
+ and isinstance(lang_config.language, cs.SupportedLanguage)
344
+ and lang_config.language not in self.parsers
345
+ and filepath.suffix == cs.EXT_H
346
+ and cs.SupportedLanguage.C in self.parsers
347
+ ):
348
+ from .language_spec import LANGUAGE_SPECS
349
+ lang_config = LANGUAGE_SPECS.get(cs.SupportedLanguage.C)
350
+ if (
351
+ lang_config
352
+ and isinstance(lang_config.language, cs.SupportedLanguage)
353
+ and lang_config.language in self.parsers
354
+ ):
355
+ result = self.factory.definition_processor.process_file(
356
+ filepath,
357
+ lang_config.language,
358
+ self.queries,
359
+ self.factory.structure_processor.structural_elements,
360
+ )
361
+ if result:
362
+ root_node, language = result
363
+ self.ast_cache[filepath] = (root_node, language)
364
+ elif self._is_dependency_file(filepath.name, filepath):
365
+ self.factory.definition_processor.process_dependencies(filepath)
366
+
367
+ self.factory.structure_processor.process_generic_file(
368
+ filepath, filepath.name
369
+ )
370
+
371
+ def _process_function_calls(self) -> None:
372
+ """Process function calls in all cached ASTs."""
373
+ ast_cache_items = list(self.ast_cache.items())
374
+ for file_path, (root_node, language) in ast_cache_items:
375
+ self.factory.call_processor.process_calls_in_file(
376
+ file_path, root_node, language, self.queries
377
+ )
378
+
379
+ def _generate_semantic_embeddings(self) -> None:
380
+ """Generate semantic embeddings for functions and classes.
381
+
382
+ This is Pass 4 of the graph building process.
383
+ Extracts source code for each function/method and generates
384
+ embeddings using the configured embedder.
385
+ """
386
+ if not self.embedder or not self.vector_store:
387
+ logger.warning("Embedder or vector store not configured, skipping embeddings")
388
+ return
389
+
390
+ try:
391
+ from .embeddings.vector_store import VectorRecord
392
+
393
+ records_to_store: list[VectorRecord] = []
394
+ texts_to_embed: list[str] = []
395
+ node_info: list[tuple[int, str, PropertyDict]] = []
396
+
397
+ batch_size = self.embedding_config.get("batch_size", 32)
398
+
399
+ for qn, node_type in self.function_registry.items():
400
+ if node_type not in (NodeType.FUNCTION, NodeType.METHOD, NodeType.CLASS):
401
+ continue
402
+
403
+ try:
404
+ source_code = self._extract_source_for_qualified_name(qn)
405
+ if not source_code:
406
+ continue
407
+
408
+ node_id = self._get_node_id_for_qualified_name(qn)
409
+ if node_id is None:
410
+ continue
411
+
412
+ texts_to_embed.append(source_code)
413
+ node_info.append((node_id, qn, {"type": str(node_type)}))
414
+
415
+ if len(texts_to_embed) >= batch_size:
416
+ self._embed_and_store_batch(
417
+ texts_to_embed, node_info, records_to_store
418
+ )
419
+ texts_to_embed = []
420
+ node_info = []
421
+
422
+ except Exception as e:
423
+ logger.warning(f"Failed to prepare embedding for {qn}: {e}")
424
+ continue
425
+
426
+ if texts_to_embed:
427
+ self._embed_and_store_batch(texts_to_embed, node_info, records_to_store)
428
+
429
+ stats = self.vector_store.get_stats()
430
+ logger.info(f"Generated embeddings for {stats['count']} code entities")
431
+
432
+ except Exception as e:
433
+ logger.error(f"Failed to generate semantic embeddings: {e}")
434
+
435
+ def _embed_and_store_batch(
436
+ self,
437
+ texts: list[str],
438
+ node_info: list[tuple[int, str, PropertyDict]],
439
+ records: list,
440
+ ) -> None:
441
+ """Embed a batch of texts and store in vector store.
442
+
443
+ Args:
444
+ texts: Source code texts to embed
445
+ node_info: Tuple of (node_id, qualified_name, metadata)
446
+ records: Accumulated records list
447
+ """
448
+ from .embeddings.vector_store import VectorRecord
449
+
450
+ if not self.embedder or not self.vector_store:
451
+ return
452
+
453
+ try:
454
+ embeddings = self.embedder.embed_documents(texts, show_progress=False)
455
+
456
+ for (node_id, qn, metadata), embedding in zip(node_info, embeddings):
457
+ record = VectorRecord(
458
+ node_id=node_id,
459
+ qualified_name=qn,
460
+ embedding=embedding,
461
+ metadata=metadata,
462
+ )
463
+ records.append(record)
464
+
465
+ self.vector_store.store_embeddings_batch(records)
466
+ records.clear()
467
+
468
+ except Exception as e:
469
+ logger.warning(f"Failed to embed batch: {e}")
470
+
471
+ def _extract_source_for_qualified_name(self, qualified_name: str) -> str | None:
472
+ """Extract source code for a qualified name.
473
+
474
+ Args:
475
+ qualified_name: Fully qualified name of the entity
476
+
477
+ Returns:
478
+ Source code string or None if not found
479
+ """
480
+ try:
481
+ parts = qualified_name.split(cs.SEPARATOR_DOT)
482
+ if len(parts) < 2:
483
+ return None
484
+
485
+ file_path = self._resolve_file_from_qn(parts)
486
+ if not file_path or not file_path.exists():
487
+ return None
488
+
489
+ if file_path not in self.ast_cache:
490
+ return None
491
+
492
+ root_node, language = self.ast_cache[file_path]
493
+
494
+ source_code = file_path.read_text(encoding="utf-8", errors="ignore")
495
+
496
+ entity_name = parts[-1]
497
+ lines = source_code.split("\n")
498
+
499
+ for i, line in enumerate(lines):
500
+ if entity_name in line and self._is_definition_line(line, entity_name):
501
+ start_line = max(0, i - 2)
502
+ end_line = min(len(lines), i + 50)
503
+ return "\n".join(lines[start_line:end_line])
504
+
505
+ return source_code[:2000]
506
+
507
+ except Exception as e:
508
+ logger.debug(f"Failed to extract source for {qualified_name}: {e}")
509
+ return None
510
+
511
+ def _is_definition_line(self, line: str, name: str) -> bool:
512
+ """Check if a line contains a definition for the given name.
513
+
514
+ Args:
515
+ line: Source code line
516
+ name: Entity name to check
517
+
518
+ Returns:
519
+ True if this looks like a definition line
520
+ """
521
+ stripped = line.strip()
522
+ keywords = ["def ", "class ", "function ", "fn ", "func "]
523
+ return any(kw in stripped for kw in keywords) and name in stripped
524
+
525
+ def _resolve_file_from_qn(self, parts: list[str]) -> Path | None:
526
+ """Resolve file path from qualified name parts.
527
+
528
+ Args:
529
+ parts: Parts of the qualified name
530
+
531
+ Returns:
532
+ Path object or None if not resolved
533
+ """
534
+ try:
535
+ if parts[0] != self.project_name:
536
+ return None
537
+
538
+ relative_parts = parts[1:]
539
+
540
+ for i in range(len(relative_parts), 0, -1):
541
+ candidate = self.repo_path.joinpath(*relative_parts[:i])
542
+ if candidate.exists() and candidate.is_file():
543
+ return candidate
544
+
545
+ for ext in [".py", ".js", ".ts", ".rs", ".go", ".java", ".cpp", ".c"]:
546
+ candidate_with_ext = self.repo_path.joinpath(
547
+ *relative_parts[:i]
548
+ ).with_suffix(ext)
549
+ if candidate_with_ext.exists():
550
+ return candidate_with_ext
551
+
552
+ return None
553
+
554
+ except Exception:
555
+ return None
556
+
557
+ def _get_node_id_for_qualified_name(self, qualified_name: str) -> int | None:
558
+ """Get node ID for a qualified name from the ingestor.
559
+
560
+ Args:
561
+ qualified_name: Fully qualified name
562
+
563
+ Returns:
564
+ Node ID or None if not found
565
+ """
566
+ try:
567
+ if hasattr(self.ingestor, "_node_id_cache"):
568
+ cache = self.ingestor._node_id_cache
569
+ for key, node_id in cache.items():
570
+ if isinstance(key, tuple) and len(key) >= 3:
571
+ if key[2] == qualified_name:
572
+ return node_id
573
+
574
+ return hash(qualified_name) % (2**31)
575
+
576
+ except Exception:
577
+ return None
578
+
579
+ def remove_file_from_state(self, file_path: Path) -> None:
580
+ """Remove a file from the internal state."""
581
+ logger.debug(f"Removing state for: {file_path}")
582
+
583
+ if file_path in self.ast_cache:
584
+ del self.ast_cache[file_path]
585
+
586
+ relative_path = file_path.relative_to(self.repo_path)
587
+ path_parts = (
588
+ relative_path.parent.parts
589
+ if file_path.name == cs.INIT_PY
590
+ else relative_path.with_suffix("").parts
591
+ )
592
+ module_qn_prefix = cs.SEPARATOR_DOT.join([self.project_name, *path_parts])
593
+
594
+ qns_to_remove = set()
595
+
596
+ for qn in list(self.function_registry.keys()):
597
+ if qn.startswith(f"{module_qn_prefix}.") or qn == module_qn_prefix:
598
+ qns_to_remove.add(qn)
599
+ del self.function_registry[qn]
600
+
601
+ for simple_name, qn_set in self.simple_name_lookup.items():
602
+ original_count = len(qn_set)
603
+ new_qn_set = qn_set - qns_to_remove
604
+ if len(new_qn_set) < original_count:
605
+ self.simple_name_lookup[simple_name] = new_qn_set
@@ -0,0 +1 @@
1
+ """Guidance agent: converts design documents into code generation guidance."""