kodit 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (70) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/code_indexing_factory.py +56 -29
  3. kodit/application/services/code_indexing_application_service.py +152 -118
  4. kodit/cli.py +14 -41
  5. kodit/domain/entities.py +268 -197
  6. kodit/domain/protocols.py +61 -0
  7. kodit/domain/services/embedding_service.py +1 -1
  8. kodit/domain/services/index_query_service.py +66 -0
  9. kodit/domain/services/index_service.py +282 -0
  10. kodit/domain/value_objects.py +143 -65
  11. kodit/infrastructure/cloning/git/working_copy.py +17 -8
  12. kodit/infrastructure/cloning/metadata.py +37 -67
  13. kodit/infrastructure/embedding/embedding_factory.py +1 -1
  14. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  15. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +1 -1
  16. kodit/infrastructure/enrichment/null_enrichment_provider.py +4 -10
  17. kodit/infrastructure/git/git_utils.py +1 -63
  18. kodit/infrastructure/ignore/ignore_pattern_provider.py +1 -2
  19. kodit/infrastructure/indexing/auto_indexing_service.py +2 -12
  20. kodit/infrastructure/indexing/fusion_service.py +1 -1
  21. kodit/infrastructure/mappers/__init__.py +1 -0
  22. kodit/infrastructure/mappers/index_mapper.py +344 -0
  23. kodit/infrastructure/slicing/__init__.py +1 -0
  24. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  25. kodit/infrastructure/slicing/slicer.py +894 -0
  26. kodit/infrastructure/sqlalchemy/embedding_repository.py +1 -1
  27. kodit/infrastructure/sqlalchemy/entities.py +203 -0
  28. kodit/infrastructure/sqlalchemy/index_repository.py +579 -0
  29. kodit/mcp.py +0 -7
  30. kodit/migrations/env.py +1 -1
  31. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +36 -0
  32. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  33. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  34. kodit/migrations/versions/85155663351e_initial.py +64 -48
  35. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  36. kodit/utils/__init__.py +1 -0
  37. kodit/utils/path_utils.py +54 -0
  38. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
  39. kodit-0.3.4.dist-info/RECORD +89 -0
  40. kodit/domain/enums.py +0 -9
  41. kodit/domain/repositories.py +0 -128
  42. kodit/domain/services/ignore_service.py +0 -45
  43. kodit/domain/services/indexing_service.py +0 -204
  44. kodit/domain/services/snippet_extraction_service.py +0 -89
  45. kodit/domain/services/snippet_service.py +0 -215
  46. kodit/domain/services/source_service.py +0 -85
  47. kodit/infrastructure/cloning/folder/__init__.py +0 -1
  48. kodit/infrastructure/cloning/folder/factory.py +0 -128
  49. kodit/infrastructure/cloning/folder/working_copy.py +0 -38
  50. kodit/infrastructure/cloning/git/factory.py +0 -153
  51. kodit/infrastructure/indexing/index_repository.py +0 -286
  52. kodit/infrastructure/indexing/snippet_domain_service_factory.py +0 -37
  53. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  54. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  55. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  56. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  57. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  58. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  59. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  60. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  61. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  62. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -45
  63. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  64. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  65. kodit/infrastructure/sqlalchemy/repository.py +0 -133
  66. kodit/infrastructure/sqlalchemy/snippet_repository.py +0 -259
  67. kodit-0.3.2.dist-info/RECORD +0 -103
  68. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
  69. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
  70. {kodit-0.3.2.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,894 @@
1
+ """Complete self-contained analyzer for kodit-slicer.
2
+
3
+ This module combines all necessary functionality without external dependencies
4
+ on the legacy domain/application/infrastructure layers.
5
+ """
6
+
7
+ from collections import defaultdict
8
+ from collections.abc import Generator
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ from typing import Any, ClassVar
12
+
13
+ from tree_sitter import Node, Parser, Tree
14
+ from tree_sitter_language_pack import get_language
15
+
16
+ from kodit.domain.entities import File, Snippet
17
+
18
+
19
+ @dataclass
20
+ class FunctionInfo:
21
+ """Information about a function definition."""
22
+
23
+ file: Path
24
+ node: Node
25
+ span: tuple[int, int]
26
+ qualified_name: str
27
+
28
+
29
+ @dataclass
30
+ class AnalyzerState:
31
+ """Central state for the dependency analysis."""
32
+
33
+ parser: Parser
34
+ files: list[Path] = field(default_factory=list)
35
+ asts: dict[Path, Tree] = field(default_factory=dict)
36
+ def_index: dict[str, FunctionInfo] = field(default_factory=dict)
37
+ call_graph: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
38
+ reverse_calls: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
39
+ imports: dict[Path, dict[str, str]] = field(
40
+ default_factory=lambda: defaultdict(dict)
41
+ )
42
+
43
+
44
+ class LanguageConfig:
45
+ """Language-specific configuration."""
46
+
47
+ CONFIGS: ClassVar[dict[str, dict[str, Any]]] = {
48
+ "python": {
49
+ "function_nodes": ["function_definition"],
50
+ "method_nodes": [],
51
+ "call_node": "call",
52
+ "import_nodes": ["import_statement", "import_from_statement"],
53
+ "extension": ".py",
54
+ "name_field": None, # Use identifier child
55
+ },
56
+ "java": {
57
+ "function_nodes": ["method_declaration"],
58
+ "method_nodes": [],
59
+ "call_node": "method_invocation",
60
+ "import_nodes": ["import_declaration"],
61
+ "extension": ".java",
62
+ "name_field": None,
63
+ },
64
+ "c": {
65
+ "function_nodes": ["function_definition"],
66
+ "method_nodes": [],
67
+ "call_node": "call_expression",
68
+ "import_nodes": ["preproc_include"],
69
+ "extension": ".c",
70
+ "name_field": "declarator",
71
+ },
72
+ "cpp": {
73
+ "function_nodes": ["function_definition"],
74
+ "method_nodes": [],
75
+ "call_node": "call_expression",
76
+ "import_nodes": ["preproc_include", "using_declaration"],
77
+ "extension": ".cpp",
78
+ "name_field": "declarator",
79
+ },
80
+ "rust": {
81
+ "function_nodes": ["function_item"],
82
+ "method_nodes": [],
83
+ "call_node": "call_expression",
84
+ "import_nodes": ["use_declaration", "extern_crate_declaration"],
85
+ "extension": ".rs",
86
+ "name_field": "name",
87
+ },
88
+ "go": {
89
+ "function_nodes": ["function_declaration"],
90
+ "method_nodes": ["method_declaration"],
91
+ "call_node": "call_expression",
92
+ "import_nodes": ["import_declaration"],
93
+ "extension": ".go",
94
+ "name_field": None,
95
+ },
96
+ "javascript": {
97
+ "function_nodes": [
98
+ "function_declaration",
99
+ "function_expression",
100
+ "arrow_function",
101
+ ],
102
+ "method_nodes": [],
103
+ "call_node": "call_expression",
104
+ "import_nodes": ["import_statement", "import_declaration"],
105
+ "extension": ".js",
106
+ "name_field": None,
107
+ },
108
+ "csharp": {
109
+ "function_nodes": ["method_declaration"],
110
+ "method_nodes": ["constructor_declaration"],
111
+ "call_node": "invocation_expression",
112
+ "import_nodes": ["using_directive"],
113
+ "extension": ".cs",
114
+ "name_field": None,
115
+ },
116
+ "html": {
117
+ "function_nodes": ["script_element", "style_element"],
118
+ "method_nodes": ["element"], # Elements with id/class attributes
119
+ "call_node": "attribute",
120
+ "import_nodes": ["script_element", "element"], # script and link elements
121
+ "extension": ".html",
122
+ "name_field": None,
123
+ },
124
+ "css": {
125
+ "function_nodes": ["rule_set", "keyframes_statement"],
126
+ "method_nodes": ["media_statement"],
127
+ "call_node": "call_expression",
128
+ "import_nodes": ["import_statement"],
129
+ "extension": ".css",
130
+ "name_field": None,
131
+ },
132
+ }
133
+
134
+ # Aliases
135
+ CONFIGS["c++"] = CONFIGS["cpp"]
136
+ CONFIGS["typescript"] = CONFIGS["javascript"]
137
+ CONFIGS["ts"] = CONFIGS["javascript"]
138
+ CONFIGS["js"] = CONFIGS["javascript"]
139
+ CONFIGS["c#"] = CONFIGS["csharp"]
140
+ CONFIGS["cs"] = CONFIGS["csharp"]
141
+
142
+
143
+ class Slicer:
144
+ """Slicer that extracts code snippets from files."""
145
+
146
+ def __init__(self) -> None:
147
+ """Initialize an empty slicer."""
148
+
149
+ def extract_snippets(
150
+ self, files: list[File], language: str = "python"
151
+ ) -> list[Snippet]:
152
+ """Extract code snippets from a list of files.
153
+
154
+ Args:
155
+ files: List of domain File objects to analyze
156
+ language: Programming language for analysis
157
+
158
+ Returns:
159
+ List of extracted code snippets as domain entities
160
+
161
+ Raises:
162
+ ValueError: If no files provided or language unsupported
163
+ FileNotFoundError: If any file doesn't exist
164
+
165
+ """
166
+ if not files:
167
+ raise ValueError("No files provided")
168
+
169
+ language = language.lower()
170
+
171
+ # Get language configuration
172
+ if language not in LanguageConfig.CONFIGS:
173
+ return []
174
+
175
+ config = LanguageConfig.CONFIGS[language]
176
+
177
+ # Initialize tree-sitter
178
+ tree_sitter_name = self._get_tree_sitter_language_name(language)
179
+ try:
180
+ ts_language = get_language(tree_sitter_name) # type: ignore[arg-type]
181
+ parser = Parser(ts_language)
182
+ except Exception as e:
183
+ raise RuntimeError(f"Failed to load {language} parser: {e}") from e
184
+
185
+ # Create mapping from Paths to File objects and extract paths
186
+ path_to_file_map: dict[Path, File] = {}
187
+ file_paths: list[Path] = []
188
+
189
+ for file in files:
190
+ file_path = file.as_path()
191
+ path_to_file_map[file_path] = file
192
+ file_paths.append(file_path)
193
+
194
+ # Validate file exists
195
+ if not file_path.exists():
196
+ raise FileNotFoundError(f"File not found: {file_path}")
197
+
198
+ # Initialize state
199
+ state = AnalyzerState(parser=parser)
200
+ state.files = file_paths
201
+ file_contents: dict[Path, str] = {}
202
+
203
+ # Parse all files
204
+ for file_path in file_paths:
205
+ try:
206
+ with file_path.open("rb") as f:
207
+ source_code = f.read()
208
+ tree = state.parser.parse(source_code)
209
+ state.asts[file_path] = tree
210
+ except OSError:
211
+ # Skip files that can't be parsed
212
+ pass
213
+
214
+ # Build indexes
215
+ self._build_definition_and_import_indexes(state, config, language)
216
+ self._build_call_graph(state, config)
217
+ self._build_reverse_call_graph(state)
218
+
219
+ # Extract snippets for all functions
220
+ snippets = []
221
+ for qualified_name in state.def_index:
222
+ snippet_content = self._get_snippet(
223
+ qualified_name,
224
+ state,
225
+ file_contents,
226
+ {"max_depth": 2, "max_functions": 8},
227
+ )
228
+ if "not found" not in snippet_content:
229
+ snippet = self._create_snippet_entity(
230
+ qualified_name, snippet_content, language, state, path_to_file_map
231
+ )
232
+ snippets.append(snippet)
233
+
234
+ return snippets
235
+
236
+ def _get_tree_sitter_language_name(self, language: str) -> str:
237
+ """Map user language names to tree-sitter language names."""
238
+ mapping = {
239
+ "c++": "cpp",
240
+ "c": "c",
241
+ "cpp": "cpp",
242
+ "java": "java",
243
+ "rust": "rust",
244
+ "python": "python",
245
+ "go": "go",
246
+ "javascript": "javascript",
247
+ "typescript": "typescript",
248
+ "js": "javascript",
249
+ "ts": "typescript",
250
+ "csharp": "c_sharp",
251
+ "c#": "c_sharp",
252
+ "cs": "c_sharp",
253
+ "html": "html",
254
+ "css": "css",
255
+ }
256
+ return mapping.get(language, language)
257
+
258
+ def _build_definition_and_import_indexes(
259
+ self, state: AnalyzerState, config: dict[str, Any], language: str
260
+ ) -> None:
261
+ """Build definition and import indexes."""
262
+ for file_path, tree in state.asts.items():
263
+ # Build definition index
264
+ for node in self._walk_tree(tree.root_node):
265
+ if self._is_function_definition(node, config):
266
+ qualified_name = self._qualify_name(
267
+ node, file_path, config, language
268
+ )
269
+ if qualified_name:
270
+ span = (node.start_byte, node.end_byte)
271
+ state.def_index[qualified_name] = FunctionInfo(
272
+ file=file_path,
273
+ node=node,
274
+ span=span,
275
+ qualified_name=qualified_name,
276
+ )
277
+
278
+ # Build import map
279
+ file_imports = {}
280
+ for node in self._walk_tree(tree.root_node):
281
+ if self._is_import_statement(node, config):
282
+ imports = self._extract_imports(node)
283
+ file_imports.update(imports)
284
+ state.imports[file_path] = file_imports
285
+
286
+ def _build_call_graph(self, state: AnalyzerState, config: dict[str, Any]) -> None:
287
+ """Build call graph from function definitions."""
288
+ for qualified_name, func_info in state.def_index.items():
289
+ calls = self._find_function_calls(
290
+ func_info.node, func_info.file, state, config
291
+ )
292
+ state.call_graph[qualified_name] = calls
293
+
294
+ def _build_reverse_call_graph(self, state: AnalyzerState) -> None:
295
+ """Build reverse call graph."""
296
+ for caller, callees in state.call_graph.items():
297
+ for callee in callees:
298
+ state.reverse_calls[callee].add(caller)
299
+
300
+ def _walk_tree(self, node: Node) -> Generator[Node, None, None]:
301
+ """Walk the AST tree, yielding all nodes."""
302
+ cursor = node.walk()
303
+
304
+ def _walk_recursive() -> Generator[Node, None, None]:
305
+ current_node = cursor.node
306
+ if current_node is not None:
307
+ yield current_node
308
+ if cursor.goto_first_child():
309
+ yield from _walk_recursive()
310
+ while cursor.goto_next_sibling():
311
+ yield from _walk_recursive()
312
+ cursor.goto_parent()
313
+
314
+ yield from _walk_recursive()
315
+
316
+ def _is_function_definition(self, node: Node, config: dict[str, Any]) -> bool:
317
+ """Check if node is a function definition."""
318
+ return node.type in (config["function_nodes"] + config["method_nodes"])
319
+
320
+ def _is_import_statement(self, node: Node, config: dict[str, Any]) -> bool:
321
+ """Check if node is an import statement."""
322
+ return node.type in config["import_nodes"]
323
+
324
+ def _extract_function_name(
325
+ self, node: Node, config: dict[str, Any], language: str
326
+ ) -> str | None:
327
+ """Extract function name from a function definition node."""
328
+ if language == "html":
329
+ return self._extract_html_element_name(node)
330
+ if language == "css":
331
+ return self._extract_css_rule_name(node)
332
+ if language == "go" and node.type == "method_declaration":
333
+ return self._extract_go_method_name(node)
334
+ if language in ["c", "cpp"] and config["name_field"]:
335
+ return self._extract_c_cpp_function_name(node, config)
336
+ if language == "rust" and config["name_field"]:
337
+ return self._extract_rust_function_name(node, config)
338
+ return self._extract_default_function_name(node)
339
+
340
+ def _extract_go_method_name(self, node: Node) -> str | None:
341
+ """Extract method name from Go method declaration."""
342
+ for child in node.children:
343
+ if child.type == "field_identifier" and child.text is not None:
344
+ return child.text.decode("utf-8")
345
+ return None
346
+
347
+ def _extract_c_cpp_function_name(
348
+ self, node: Node, config: dict[str, Any]
349
+ ) -> str | None:
350
+ """Extract function name from C/C++ function definition."""
351
+ declarator = node.child_by_field_name(config["name_field"])
352
+ if not declarator:
353
+ return None
354
+
355
+ if declarator.type == "function_declarator":
356
+ for child in declarator.children:
357
+ if child.type == "identifier" and child.text is not None:
358
+ return child.text.decode("utf-8")
359
+ elif declarator.type == "identifier" and declarator.text is not None:
360
+ return declarator.text.decode("utf-8")
361
+ return None
362
+
363
+ def _extract_rust_function_name(
364
+ self, node: Node, config: dict[str, Any]
365
+ ) -> str | None:
366
+ """Extract function name from Rust function definition."""
367
+ name_node = node.child_by_field_name(config["name_field"])
368
+ if name_node and name_node.type == "identifier" and name_node.text is not None:
369
+ return name_node.text.decode("utf-8")
370
+ return None
371
+
372
+ def _extract_html_element_name(self, node: Node) -> str | None:
373
+ """Extract meaningful name from HTML element."""
374
+ if node.type == "script_element":
375
+ return "script"
376
+ if node.type == "style_element":
377
+ return "style"
378
+ if node.type == "element":
379
+ return self._extract_html_element_info(node)
380
+ return None
381
+
382
+ def _extract_html_element_info(self, node: Node) -> str | None:
383
+ """Extract element info with ID or class."""
384
+ for child in node.children:
385
+ if child.type == "start_tag":
386
+ tag_name = self._get_tag_name(child)
387
+ element_id = self._get_element_id(child)
388
+ class_name = self._get_element_class(child)
389
+
390
+ if element_id:
391
+ return f"{tag_name or 'element'}#{element_id}"
392
+ if class_name:
393
+ return f"{tag_name or 'element'}.{class_name}"
394
+ if tag_name:
395
+ return tag_name
396
+ return None
397
+
398
+ def _get_tag_name(self, start_tag: Node) -> str | None:
399
+ """Get tag name from start_tag node."""
400
+ for child in start_tag.children:
401
+ if child.type == "tag_name" and child.text:
402
+ return child.text.decode("utf-8")
403
+ return None
404
+
405
+ def _get_element_id(self, start_tag: Node) -> str | None:
406
+ """Get element ID from start_tag node."""
407
+ return self._get_attribute_value(start_tag, "id")
408
+
409
+ def _get_element_class(self, start_tag: Node) -> str | None:
410
+ """Get first class name from start_tag node."""
411
+ class_value = self._get_attribute_value(start_tag, "class")
412
+ return class_value.split()[0] if class_value else None
413
+
414
+ def _get_attribute_value(self, start_tag: Node, attr_name: str) -> str | None:
415
+ """Get attribute value from start_tag node."""
416
+ for child in start_tag.children:
417
+ if child.type == "attribute":
418
+ name = self._get_attr_name(child)
419
+ if name == attr_name:
420
+ return self._get_attr_value(child)
421
+ return None
422
+
423
+ def _get_attr_name(self, attr_node: Node) -> str | None:
424
+ """Get attribute name."""
425
+ for child in attr_node.children:
426
+ if child.type == "attribute_name" and child.text:
427
+ return child.text.decode("utf-8")
428
+ return None
429
+
430
+ def _get_attr_value(self, attr_node: Node) -> str | None:
431
+ """Get attribute value."""
432
+ for child in attr_node.children:
433
+ if child.type == "quoted_attribute_value":
434
+ for val_child in child.children:
435
+ if val_child.type == "attribute_value" and val_child.text:
436
+ return val_child.text.decode("utf-8")
437
+ return None
438
+
439
+ def _extract_css_rule_name(self, node: Node) -> str | None:
440
+ """Extract meaningful name from CSS rule."""
441
+ if node.type == "rule_set":
442
+ return self._extract_css_selector(node)
443
+ if node.type == "keyframes_statement":
444
+ return self._extract_keyframes_name(node)
445
+ if node.type == "media_statement":
446
+ return "@media"
447
+ return None
448
+
449
+ def _extract_css_selector(self, rule_node: Node) -> str | None:
450
+ """Extract CSS selector from rule_set."""
451
+ for child in rule_node.children:
452
+ if child.type == "selectors":
453
+ selector_parts = []
454
+ for selector_child in child.children:
455
+ part = self._get_selector_part(selector_child)
456
+ if part:
457
+ selector_parts.append(part)
458
+ if selector_parts:
459
+ return "".join(selector_parts[:2]) # First couple selectors
460
+ return None
461
+
462
+ def _get_selector_part(self, selector_node: Node) -> str | None:
463
+ """Get a single selector part."""
464
+ if selector_node.type == "class_selector":
465
+ return self._extract_class_selector(selector_node)
466
+ if selector_node.type == "id_selector":
467
+ return self._extract_id_selector(selector_node)
468
+ if selector_node.type == "type_selector" and selector_node.text:
469
+ return selector_node.text.decode("utf-8")
470
+ return None
471
+
472
+ def _extract_class_selector(self, node: Node) -> str | None:
473
+ """Extract class selector name."""
474
+ for child in node.children:
475
+ if child.type == "class_name":
476
+ for name_child in child.children:
477
+ if name_child.type == "identifier" and name_child.text:
478
+ return f".{name_child.text.decode('utf-8')}"
479
+ return None
480
+
481
+ def _extract_id_selector(self, node: Node) -> str | None:
482
+ """Extract ID selector name."""
483
+ for child in node.children:
484
+ if child.type == "id_name":
485
+ for name_child in child.children:
486
+ if name_child.type == "identifier" and name_child.text:
487
+ return f"#{name_child.text.decode('utf-8')}"
488
+ return None
489
+
490
+ def _extract_keyframes_name(self, node: Node) -> str | None:
491
+ """Extract keyframes animation name."""
492
+ for child in node.children:
493
+ if child.type == "keyframes_name" and child.text:
494
+ return f"@keyframes-{child.text.decode('utf-8')}"
495
+ return None
496
+
497
+ def _extract_default_function_name(self, node: Node) -> str | None:
498
+ """Extract function name using default identifier search."""
499
+ for child in node.children:
500
+ if child.type == "identifier" and child.text is not None:
501
+ return child.text.decode("utf-8")
502
+ return None
503
+
504
+ def _qualify_name(
505
+ self, node: Node, file_path: Path, config: dict[str, Any], language: str
506
+ ) -> str | None:
507
+ """Create qualified name for a function node."""
508
+ function_name = self._extract_function_name(node, config, language)
509
+ if not function_name:
510
+ return None
511
+
512
+ module_name = file_path.stem
513
+ return f"{module_name}.{function_name}"
514
+
515
+ def _get_file_content(self, file_path: Path, file_contents: dict[Path, str]) -> str:
516
+ """Get cached file content."""
517
+ if file_path not in file_contents:
518
+ try:
519
+ with file_path.open(encoding="utf-8") as f:
520
+ file_contents[file_path] = f.read()
521
+ except UnicodeDecodeError as e:
522
+ file_contents[file_path] = f"# Error reading file: {e}"
523
+ except OSError as e:
524
+ file_contents[file_path] = f"# Error reading file: {e}"
525
+ return file_contents[file_path]
526
+
527
+ def _get_snippet(
528
+ self,
529
+ function_name: str,
530
+ state: AnalyzerState,
531
+ file_contents: dict[Path, str],
532
+ snippet_config: dict[str, Any] | None = None,
533
+ ) -> str:
534
+ """Generate a smart snippet for a function with its dependencies."""
535
+ if snippet_config is None:
536
+ snippet_config = {}
537
+
538
+ max_depth = snippet_config.get("max_depth", 2)
539
+ max_functions = snippet_config.get("max_functions", 8)
540
+ include_usage = snippet_config.get("include_usage", True)
541
+
542
+ if function_name not in state.def_index:
543
+ return f"Error: Function '{function_name}' not found"
544
+
545
+ # Find dependencies
546
+ dependencies = self._find_dependencies(
547
+ function_name, state, max_depth, max_functions
548
+ )
549
+
550
+ # Sort dependencies topologically
551
+ sorted_deps = self._topological_sort(dependencies, state)
552
+
553
+ # Build snippet
554
+ snippet_lines = []
555
+
556
+ # Add imports
557
+ imports = self._get_minimal_imports({function_name}.union(dependencies))
558
+ if imports:
559
+ snippet_lines.extend(imports)
560
+ snippet_lines.append("")
561
+
562
+ # Add target function
563
+ target_source = self._extract_function_source(
564
+ function_name, state, file_contents
565
+ )
566
+ snippet_lines.append(target_source)
567
+
568
+ # Add dependencies
569
+ if dependencies:
570
+ snippet_lines.append("")
571
+ snippet_lines.append("# === DEPENDENCIES ===")
572
+ for dep in sorted_deps:
573
+ snippet_lines.append("")
574
+ dep_source = self._extract_function_source(dep, state, file_contents)
575
+ snippet_lines.append(dep_source)
576
+
577
+ # Add usage examples
578
+ if include_usage:
579
+ callers = state.reverse_calls.get(function_name, set())
580
+ if callers:
581
+ snippet_lines.append("")
582
+ snippet_lines.append("# === USAGE EXAMPLES ===")
583
+ for caller in list(callers)[:2]: # Show up to 2 examples
584
+ call_line = self._find_function_call_line(
585
+ caller, function_name, state, file_contents
586
+ )
587
+ if call_line and not call_line.startswith("#"):
588
+ snippet_lines.append(f"# From {caller}:")
589
+ snippet_lines.append(f" {call_line}")
590
+ snippet_lines.append("")
591
+
592
+ return "\n".join(snippet_lines)
593
+
594
+ def _create_snippet_entity(
595
+ self,
596
+ qualified_name: str,
597
+ snippet_content: str,
598
+ language: str,
599
+ state: AnalyzerState,
600
+ path_to_file_map: dict[Path, File],
601
+ ) -> Snippet:
602
+ """Create a Snippet domain entity from extracted content."""
603
+ # Determine all files that this snippet derives from
604
+ derives_from_files = self._find_source_files_for_snippet(
605
+ qualified_name, snippet_content, state, path_to_file_map
606
+ )
607
+
608
+ # Create the snippet entity
609
+ snippet = Snippet(derives_from=derives_from_files)
610
+
611
+ # Add the original content
612
+ snippet.add_original_content(snippet_content, language)
613
+
614
+ return snippet
615
+
616
+ def _find_source_files_for_snippet(
617
+ self,
618
+ qualified_name: str,
619
+ snippet_content: str,
620
+ state: AnalyzerState,
621
+ path_to_file_map: dict[Path, File],
622
+ ) -> list[File]:
623
+ """Find all source files that a snippet derives from."""
624
+ source_files: list[File] = []
625
+ source_file_paths: set[Path] = set()
626
+
627
+ # Add the primary function's file
628
+ if qualified_name in state.def_index:
629
+ primary_file_path = state.def_index[qualified_name].file
630
+ if (
631
+ primary_file_path in path_to_file_map
632
+ and primary_file_path not in source_file_paths
633
+ ):
634
+ source_files.append(path_to_file_map[primary_file_path])
635
+ source_file_paths.add(primary_file_path)
636
+
637
+ # Find all dependencies mentioned in the snippet and add their source files
638
+ dependencies = self._extract_dependency_names_from_snippet(
639
+ snippet_content, state
640
+ )
641
+ for dep_name in dependencies:
642
+ if dep_name in state.def_index:
643
+ dep_file_path = state.def_index[dep_name].file
644
+ if (
645
+ dep_file_path in path_to_file_map
646
+ and dep_file_path not in source_file_paths
647
+ ):
648
+ source_files.append(path_to_file_map[dep_file_path])
649
+ source_file_paths.add(dep_file_path)
650
+
651
+ return source_files
652
+
653
+ def _extract_dependency_names_from_snippet(
654
+ self, snippet_content: str, state: AnalyzerState
655
+ ) -> set[str]:
656
+ """Extract dependency function names from snippet content."""
657
+ dependencies: set[str] = set()
658
+
659
+ # Look for the DEPENDENCIES section and extract function names
660
+ lines = snippet_content.split("\n")
661
+ in_dependencies_section = False
662
+
663
+ for original_line in lines:
664
+ line = original_line.strip()
665
+ if line == "# === DEPENDENCIES ===":
666
+ in_dependencies_section = True
667
+ continue
668
+ if line == "# === USAGE EXAMPLES ===":
669
+ in_dependencies_section = False
670
+ continue
671
+
672
+ if in_dependencies_section and line.startswith("def "):
673
+ # Extract function name from "def function_name(...)" pattern
674
+ func_def_start = line.find("def ") + 4
675
+ func_def_end = line.find("(", func_def_start)
676
+ if func_def_end > func_def_start:
677
+ func_name = line[func_def_start:func_def_end].strip()
678
+ # Try to find the qualified name (module.function_name format)
679
+ # We need to search through the state.def_index to find matches
680
+ for qualified_name in self._get_qualified_names_for_function(
681
+ func_name, state
682
+ ):
683
+ dependencies.add(qualified_name)
684
+
685
+ return dependencies
686
+
687
+ def _get_qualified_names_for_function(
688
+ self, func_name: str, state: AnalyzerState
689
+ ) -> list[str]:
690
+ """Get possible qualified names for a function name."""
691
+ # This is a simple implementation - in practice you might want more
692
+ # sophisticated matching
693
+ return [
694
+ qualified
695
+ for qualified in state.def_index
696
+ if qualified.endswith(f".{func_name}")
697
+ ]
698
+
699
+ # Helper methods
700
+
701
+ def _extract_imports(self, node: Node) -> dict[str, str]:
702
+ """Extract imports from import node."""
703
+ imports = {}
704
+ if node.type == "import_statement":
705
+ for child in node.children:
706
+ if child.type == "dotted_name" and child.text is not None:
707
+ module_name = child.text.decode("utf-8")
708
+ imports[module_name] = module_name
709
+ elif node.type == "import_from_statement":
710
+ module_node = node.child_by_field_name("module_name")
711
+ if module_node and module_node.text is not None:
712
+ module_name = module_node.text.decode("utf-8")
713
+ for child in node.children:
714
+ if child.type == "import_list":
715
+ for import_child in child.children:
716
+ if (
717
+ import_child.type == "dotted_name"
718
+ and import_child.text is not None
719
+ ):
720
+ imported_name = import_child.text.decode("utf-8")
721
+ imports[imported_name] = (
722
+ f"{module_name}.{imported_name}"
723
+ )
724
+ return imports
725
+
726
+ def _find_function_calls(
727
+ self, node: Node, file_path: Path, state: AnalyzerState, config: dict[str, Any]
728
+ ) -> set[str]:
729
+ """Find function calls in a node."""
730
+ calls = set()
731
+ call_node_type = config["call_node"]
732
+
733
+ for child in self._walk_tree(node):
734
+ if child.type == call_node_type:
735
+ function_node = child.child_by_field_name("function")
736
+ if function_node:
737
+ call_name = self._extract_call_name(function_node)
738
+ if call_name:
739
+ resolved = self._resolve_call(call_name, file_path, state)
740
+ if resolved:
741
+ calls.add(resolved)
742
+ return calls
743
+
744
+ def _extract_call_name(self, node: Node) -> str | None:
745
+ """Extract function name from call node."""
746
+ if node.type == "identifier" and node.text is not None:
747
+ return node.text.decode("utf-8")
748
+ if node.type == "attribute":
749
+ object_node = node.child_by_field_name("object")
750
+ attribute_node = node.child_by_field_name("attribute")
751
+ if (
752
+ object_node
753
+ and attribute_node
754
+ and object_node.text is not None
755
+ and attribute_node.text is not None
756
+ ):
757
+ obj_name = object_node.text.decode("utf-8")
758
+ attr_name = attribute_node.text.decode("utf-8")
759
+ return f"{obj_name}.{attr_name}"
760
+ return None
761
+
762
+ def _resolve_call(
763
+ self, call_name: str, file_path: Path, state: AnalyzerState
764
+ ) -> str | None:
765
+ """Resolve a function call to qualified name."""
766
+ module_name = file_path.stem
767
+ local_qualified = f"{module_name}.{call_name}"
768
+
769
+ if local_qualified in state.def_index:
770
+ return local_qualified
771
+
772
+ # Check imports
773
+ if file_path in state.imports:
774
+ imports = state.imports[file_path]
775
+ if call_name in imports:
776
+ return imports[call_name]
777
+
778
+ # Check if already qualified
779
+ if call_name in state.def_index:
780
+ return call_name
781
+
782
+ return None
783
+
784
+ def _find_dependencies(
785
+ self, target: str, state: AnalyzerState, max_depth: int, max_functions: int
786
+ ) -> set[str]:
787
+ """Find relevant dependencies for a function."""
788
+ visited: set[str] = set()
789
+ to_visit = [(target, 0)]
790
+ dependencies: set[str] = set()
791
+
792
+ while to_visit and len(dependencies) < max_functions:
793
+ current, depth = to_visit.pop(0)
794
+ if current in visited or depth > max_depth:
795
+ continue
796
+ visited.add(current)
797
+
798
+ if current != target:
799
+ dependencies.add(current)
800
+
801
+ # Add direct dependencies
802
+ to_visit.extend(
803
+ (callee, depth + 1)
804
+ for callee in state.call_graph.get(current, set())
805
+ if callee not in visited and callee in state.def_index
806
+ )
807
+
808
+ return dependencies
809
+
810
+ def _topological_sort(self, functions: set[str], state: AnalyzerState) -> list[str]:
811
+ """Sort functions in dependency order."""
812
+ if not functions:
813
+ return []
814
+
815
+ # Build subgraph
816
+ in_degree: dict[str, int] = defaultdict(int)
817
+ graph: dict[str, set[str]] = defaultdict(set)
818
+
819
+ for func in functions:
820
+ for callee in state.call_graph.get(func, set()):
821
+ if callee in functions:
822
+ graph[func].add(callee)
823
+ in_degree[callee] += 1
824
+
825
+ # Find roots
826
+ queue = [f for f in functions if in_degree[f] == 0]
827
+ result = []
828
+
829
+ while queue:
830
+ current = queue.pop(0)
831
+ result.append(current)
832
+ for neighbor in graph[current]:
833
+ in_degree[neighbor] -= 1
834
+ if in_degree[neighbor] == 0:
835
+ queue.append(neighbor)
836
+
837
+ # Add any remaining (cycles)
838
+ for func in functions:
839
+ if func not in result:
840
+ result.append(func)
841
+
842
+ return result
843
+
844
+ def _get_minimal_imports(self, _functions: set[str]) -> list[str]:
845
+ """Get minimal imports needed for functions."""
846
+ # For now, we'll skip imports to simplify the refactoring
847
+ return []
848
+
849
+ def _extract_function_source(
850
+ self, qualified_name: str, state: AnalyzerState, file_contents: dict[Path, str]
851
+ ) -> str:
852
+ """Extract complete function source code."""
853
+ if qualified_name not in state.def_index:
854
+ return f"# Function {qualified_name} not found"
855
+
856
+ func_info = state.def_index[qualified_name]
857
+ file_content = self._get_file_content(func_info.file, file_contents)
858
+
859
+ # Extract function source using byte positions
860
+ start_byte, end_byte = func_info.span
861
+ source_bytes = file_content.encode("utf-8")
862
+ return source_bytes[start_byte:end_byte].decode("utf-8")
863
+
864
+ def _find_function_call_line(
865
+ self,
866
+ caller_qualified_name: str,
867
+ target_name: str,
868
+ state: AnalyzerState,
869
+ file_contents: dict[Path, str],
870
+ ) -> str:
871
+ """Find the actual line where a function calls another."""
872
+ if caller_qualified_name not in state.def_index:
873
+ return f"# calls {target_name}"
874
+
875
+ caller_info = state.def_index[caller_qualified_name]
876
+ file_content = self._get_file_content(caller_info.file, file_contents)
877
+ source_bytes = file_content.encode("utf-8")
878
+
879
+ # Extract the caller function source
880
+ start_byte, end_byte = caller_info.span
881
+ function_source = source_bytes[start_byte:end_byte].decode("utf-8")
882
+
883
+ # Look for lines that contain the target function call
884
+ lines = function_source.split("\n")
885
+ target_simple_name = target_name.split(".")[-1] # Get just the function name
886
+
887
+ for line in lines:
888
+ if target_simple_name in line and "(" in line:
889
+ # Clean up the line (remove leading/trailing whitespace)
890
+ clean_line = line.strip()
891
+ if clean_line:
892
+ return clean_line
893
+
894
+ return f"# calls {target_name}"