kodit 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +2 -24
- kodit/application/services/code_indexing_application_service.py +10 -2
- kodit/domain/services/index_service.py +25 -66
- kodit/domain/value_objects.py +10 -22
- kodit/infrastructure/slicing/__init__.py +1 -0
- kodit/infrastructure/slicing/language_detection_service.py +18 -0
- kodit/infrastructure/slicing/slicer.py +894 -0
- kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +6 -4
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
- kodit/migrations/versions/85155663351e_initial.py +64 -48
- kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/RECORD +19 -29
- kodit/infrastructure/snippet_extraction/__init__.py +0 -1
- kodit/infrastructure/snippet_extraction/factories.py +0 -13
- kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
- kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
- kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
- kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
- kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -44
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
- kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,894 @@
|
|
|
1
|
+
"""Complete self-contained analyzer for kodit-slicer.
|
|
2
|
+
|
|
3
|
+
This module combines all necessary functionality without external dependencies
|
|
4
|
+
on the legacy domain/application/infrastructure layers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from collections.abc import Generator
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, ClassVar
|
|
12
|
+
|
|
13
|
+
from tree_sitter import Node, Parser, Tree
|
|
14
|
+
from tree_sitter_language_pack import get_language
|
|
15
|
+
|
|
16
|
+
from kodit.domain.entities import File, Snippet
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class FunctionInfo:
|
|
21
|
+
"""Information about a function definition."""
|
|
22
|
+
|
|
23
|
+
file: Path
|
|
24
|
+
node: Node
|
|
25
|
+
span: tuple[int, int]
|
|
26
|
+
qualified_name: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class AnalyzerState:
|
|
31
|
+
"""Central state for the dependency analysis."""
|
|
32
|
+
|
|
33
|
+
parser: Parser
|
|
34
|
+
files: list[Path] = field(default_factory=list)
|
|
35
|
+
asts: dict[Path, Tree] = field(default_factory=dict)
|
|
36
|
+
def_index: dict[str, FunctionInfo] = field(default_factory=dict)
|
|
37
|
+
call_graph: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
|
|
38
|
+
reverse_calls: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
|
|
39
|
+
imports: dict[Path, dict[str, str]] = field(
|
|
40
|
+
default_factory=lambda: defaultdict(dict)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class LanguageConfig:
|
|
45
|
+
"""Language-specific configuration."""
|
|
46
|
+
|
|
47
|
+
CONFIGS: ClassVar[dict[str, dict[str, Any]]] = {
|
|
48
|
+
"python": {
|
|
49
|
+
"function_nodes": ["function_definition"],
|
|
50
|
+
"method_nodes": [],
|
|
51
|
+
"call_node": "call",
|
|
52
|
+
"import_nodes": ["import_statement", "import_from_statement"],
|
|
53
|
+
"extension": ".py",
|
|
54
|
+
"name_field": None, # Use identifier child
|
|
55
|
+
},
|
|
56
|
+
"java": {
|
|
57
|
+
"function_nodes": ["method_declaration"],
|
|
58
|
+
"method_nodes": [],
|
|
59
|
+
"call_node": "method_invocation",
|
|
60
|
+
"import_nodes": ["import_declaration"],
|
|
61
|
+
"extension": ".java",
|
|
62
|
+
"name_field": None,
|
|
63
|
+
},
|
|
64
|
+
"c": {
|
|
65
|
+
"function_nodes": ["function_definition"],
|
|
66
|
+
"method_nodes": [],
|
|
67
|
+
"call_node": "call_expression",
|
|
68
|
+
"import_nodes": ["preproc_include"],
|
|
69
|
+
"extension": ".c",
|
|
70
|
+
"name_field": "declarator",
|
|
71
|
+
},
|
|
72
|
+
"cpp": {
|
|
73
|
+
"function_nodes": ["function_definition"],
|
|
74
|
+
"method_nodes": [],
|
|
75
|
+
"call_node": "call_expression",
|
|
76
|
+
"import_nodes": ["preproc_include", "using_declaration"],
|
|
77
|
+
"extension": ".cpp",
|
|
78
|
+
"name_field": "declarator",
|
|
79
|
+
},
|
|
80
|
+
"rust": {
|
|
81
|
+
"function_nodes": ["function_item"],
|
|
82
|
+
"method_nodes": [],
|
|
83
|
+
"call_node": "call_expression",
|
|
84
|
+
"import_nodes": ["use_declaration", "extern_crate_declaration"],
|
|
85
|
+
"extension": ".rs",
|
|
86
|
+
"name_field": "name",
|
|
87
|
+
},
|
|
88
|
+
"go": {
|
|
89
|
+
"function_nodes": ["function_declaration"],
|
|
90
|
+
"method_nodes": ["method_declaration"],
|
|
91
|
+
"call_node": "call_expression",
|
|
92
|
+
"import_nodes": ["import_declaration"],
|
|
93
|
+
"extension": ".go",
|
|
94
|
+
"name_field": None,
|
|
95
|
+
},
|
|
96
|
+
"javascript": {
|
|
97
|
+
"function_nodes": [
|
|
98
|
+
"function_declaration",
|
|
99
|
+
"function_expression",
|
|
100
|
+
"arrow_function",
|
|
101
|
+
],
|
|
102
|
+
"method_nodes": [],
|
|
103
|
+
"call_node": "call_expression",
|
|
104
|
+
"import_nodes": ["import_statement", "import_declaration"],
|
|
105
|
+
"extension": ".js",
|
|
106
|
+
"name_field": None,
|
|
107
|
+
},
|
|
108
|
+
"csharp": {
|
|
109
|
+
"function_nodes": ["method_declaration"],
|
|
110
|
+
"method_nodes": ["constructor_declaration"],
|
|
111
|
+
"call_node": "invocation_expression",
|
|
112
|
+
"import_nodes": ["using_directive"],
|
|
113
|
+
"extension": ".cs",
|
|
114
|
+
"name_field": None,
|
|
115
|
+
},
|
|
116
|
+
"html": {
|
|
117
|
+
"function_nodes": ["script_element", "style_element"],
|
|
118
|
+
"method_nodes": ["element"], # Elements with id/class attributes
|
|
119
|
+
"call_node": "attribute",
|
|
120
|
+
"import_nodes": ["script_element", "element"], # script and link elements
|
|
121
|
+
"extension": ".html",
|
|
122
|
+
"name_field": None,
|
|
123
|
+
},
|
|
124
|
+
"css": {
|
|
125
|
+
"function_nodes": ["rule_set", "keyframes_statement"],
|
|
126
|
+
"method_nodes": ["media_statement"],
|
|
127
|
+
"call_node": "call_expression",
|
|
128
|
+
"import_nodes": ["import_statement"],
|
|
129
|
+
"extension": ".css",
|
|
130
|
+
"name_field": None,
|
|
131
|
+
},
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# Aliases
|
|
135
|
+
CONFIGS["c++"] = CONFIGS["cpp"]
|
|
136
|
+
CONFIGS["typescript"] = CONFIGS["javascript"]
|
|
137
|
+
CONFIGS["ts"] = CONFIGS["javascript"]
|
|
138
|
+
CONFIGS["js"] = CONFIGS["javascript"]
|
|
139
|
+
CONFIGS["c#"] = CONFIGS["csharp"]
|
|
140
|
+
CONFIGS["cs"] = CONFIGS["csharp"]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class Slicer:
|
|
144
|
+
"""Slicer that extracts code snippets from files."""
|
|
145
|
+
|
|
146
|
+
def __init__(self) -> None:
|
|
147
|
+
"""Initialize an empty slicer."""
|
|
148
|
+
|
|
149
|
+
def extract_snippets(
|
|
150
|
+
self, files: list[File], language: str = "python"
|
|
151
|
+
) -> list[Snippet]:
|
|
152
|
+
"""Extract code snippets from a list of files.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
files: List of domain File objects to analyze
|
|
156
|
+
language: Programming language for analysis
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
List of extracted code snippets as domain entities
|
|
160
|
+
|
|
161
|
+
Raises:
|
|
162
|
+
ValueError: If no files provided or language unsupported
|
|
163
|
+
FileNotFoundError: If any file doesn't exist
|
|
164
|
+
|
|
165
|
+
"""
|
|
166
|
+
if not files:
|
|
167
|
+
raise ValueError("No files provided")
|
|
168
|
+
|
|
169
|
+
language = language.lower()
|
|
170
|
+
|
|
171
|
+
# Get language configuration
|
|
172
|
+
if language not in LanguageConfig.CONFIGS:
|
|
173
|
+
return []
|
|
174
|
+
|
|
175
|
+
config = LanguageConfig.CONFIGS[language]
|
|
176
|
+
|
|
177
|
+
# Initialize tree-sitter
|
|
178
|
+
tree_sitter_name = self._get_tree_sitter_language_name(language)
|
|
179
|
+
try:
|
|
180
|
+
ts_language = get_language(tree_sitter_name) # type: ignore[arg-type]
|
|
181
|
+
parser = Parser(ts_language)
|
|
182
|
+
except Exception as e:
|
|
183
|
+
raise RuntimeError(f"Failed to load {language} parser: {e}") from e
|
|
184
|
+
|
|
185
|
+
# Create mapping from Paths to File objects and extract paths
|
|
186
|
+
path_to_file_map: dict[Path, File] = {}
|
|
187
|
+
file_paths: list[Path] = []
|
|
188
|
+
|
|
189
|
+
for file in files:
|
|
190
|
+
file_path = file.as_path()
|
|
191
|
+
path_to_file_map[file_path] = file
|
|
192
|
+
file_paths.append(file_path)
|
|
193
|
+
|
|
194
|
+
# Validate file exists
|
|
195
|
+
if not file_path.exists():
|
|
196
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
197
|
+
|
|
198
|
+
# Initialize state
|
|
199
|
+
state = AnalyzerState(parser=parser)
|
|
200
|
+
state.files = file_paths
|
|
201
|
+
file_contents: dict[Path, str] = {}
|
|
202
|
+
|
|
203
|
+
# Parse all files
|
|
204
|
+
for file_path in file_paths:
|
|
205
|
+
try:
|
|
206
|
+
with file_path.open("rb") as f:
|
|
207
|
+
source_code = f.read()
|
|
208
|
+
tree = state.parser.parse(source_code)
|
|
209
|
+
state.asts[file_path] = tree
|
|
210
|
+
except OSError:
|
|
211
|
+
# Skip files that can't be parsed
|
|
212
|
+
pass
|
|
213
|
+
|
|
214
|
+
# Build indexes
|
|
215
|
+
self._build_definition_and_import_indexes(state, config, language)
|
|
216
|
+
self._build_call_graph(state, config)
|
|
217
|
+
self._build_reverse_call_graph(state)
|
|
218
|
+
|
|
219
|
+
# Extract snippets for all functions
|
|
220
|
+
snippets = []
|
|
221
|
+
for qualified_name in state.def_index:
|
|
222
|
+
snippet_content = self._get_snippet(
|
|
223
|
+
qualified_name,
|
|
224
|
+
state,
|
|
225
|
+
file_contents,
|
|
226
|
+
{"max_depth": 2, "max_functions": 8},
|
|
227
|
+
)
|
|
228
|
+
if "not found" not in snippet_content:
|
|
229
|
+
snippet = self._create_snippet_entity(
|
|
230
|
+
qualified_name, snippet_content, language, state, path_to_file_map
|
|
231
|
+
)
|
|
232
|
+
snippets.append(snippet)
|
|
233
|
+
|
|
234
|
+
return snippets
|
|
235
|
+
|
|
236
|
+
def _get_tree_sitter_language_name(self, language: str) -> str:
|
|
237
|
+
"""Map user language names to tree-sitter language names."""
|
|
238
|
+
mapping = {
|
|
239
|
+
"c++": "cpp",
|
|
240
|
+
"c": "c",
|
|
241
|
+
"cpp": "cpp",
|
|
242
|
+
"java": "java",
|
|
243
|
+
"rust": "rust",
|
|
244
|
+
"python": "python",
|
|
245
|
+
"go": "go",
|
|
246
|
+
"javascript": "javascript",
|
|
247
|
+
"typescript": "typescript",
|
|
248
|
+
"js": "javascript",
|
|
249
|
+
"ts": "typescript",
|
|
250
|
+
"csharp": "c_sharp",
|
|
251
|
+
"c#": "c_sharp",
|
|
252
|
+
"cs": "c_sharp",
|
|
253
|
+
"html": "html",
|
|
254
|
+
"css": "css",
|
|
255
|
+
}
|
|
256
|
+
return mapping.get(language, language)
|
|
257
|
+
|
|
258
|
+
def _build_definition_and_import_indexes(
|
|
259
|
+
self, state: AnalyzerState, config: dict[str, Any], language: str
|
|
260
|
+
) -> None:
|
|
261
|
+
"""Build definition and import indexes."""
|
|
262
|
+
for file_path, tree in state.asts.items():
|
|
263
|
+
# Build definition index
|
|
264
|
+
for node in self._walk_tree(tree.root_node):
|
|
265
|
+
if self._is_function_definition(node, config):
|
|
266
|
+
qualified_name = self._qualify_name(
|
|
267
|
+
node, file_path, config, language
|
|
268
|
+
)
|
|
269
|
+
if qualified_name:
|
|
270
|
+
span = (node.start_byte, node.end_byte)
|
|
271
|
+
state.def_index[qualified_name] = FunctionInfo(
|
|
272
|
+
file=file_path,
|
|
273
|
+
node=node,
|
|
274
|
+
span=span,
|
|
275
|
+
qualified_name=qualified_name,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Build import map
|
|
279
|
+
file_imports = {}
|
|
280
|
+
for node in self._walk_tree(tree.root_node):
|
|
281
|
+
if self._is_import_statement(node, config):
|
|
282
|
+
imports = self._extract_imports(node)
|
|
283
|
+
file_imports.update(imports)
|
|
284
|
+
state.imports[file_path] = file_imports
|
|
285
|
+
|
|
286
|
+
def _build_call_graph(self, state: AnalyzerState, config: dict[str, Any]) -> None:
|
|
287
|
+
"""Build call graph from function definitions."""
|
|
288
|
+
for qualified_name, func_info in state.def_index.items():
|
|
289
|
+
calls = self._find_function_calls(
|
|
290
|
+
func_info.node, func_info.file, state, config
|
|
291
|
+
)
|
|
292
|
+
state.call_graph[qualified_name] = calls
|
|
293
|
+
|
|
294
|
+
def _build_reverse_call_graph(self, state: AnalyzerState) -> None:
|
|
295
|
+
"""Build reverse call graph."""
|
|
296
|
+
for caller, callees in state.call_graph.items():
|
|
297
|
+
for callee in callees:
|
|
298
|
+
state.reverse_calls[callee].add(caller)
|
|
299
|
+
|
|
300
|
+
def _walk_tree(self, node: Node) -> Generator[Node, None, None]:
|
|
301
|
+
"""Walk the AST tree, yielding all nodes."""
|
|
302
|
+
cursor = node.walk()
|
|
303
|
+
|
|
304
|
+
def _walk_recursive() -> Generator[Node, None, None]:
|
|
305
|
+
current_node = cursor.node
|
|
306
|
+
if current_node is not None:
|
|
307
|
+
yield current_node
|
|
308
|
+
if cursor.goto_first_child():
|
|
309
|
+
yield from _walk_recursive()
|
|
310
|
+
while cursor.goto_next_sibling():
|
|
311
|
+
yield from _walk_recursive()
|
|
312
|
+
cursor.goto_parent()
|
|
313
|
+
|
|
314
|
+
yield from _walk_recursive()
|
|
315
|
+
|
|
316
|
+
def _is_function_definition(self, node: Node, config: dict[str, Any]) -> bool:
|
|
317
|
+
"""Check if node is a function definition."""
|
|
318
|
+
return node.type in (config["function_nodes"] + config["method_nodes"])
|
|
319
|
+
|
|
320
|
+
def _is_import_statement(self, node: Node, config: dict[str, Any]) -> bool:
|
|
321
|
+
"""Check if node is an import statement."""
|
|
322
|
+
return node.type in config["import_nodes"]
|
|
323
|
+
|
|
324
|
+
def _extract_function_name(
|
|
325
|
+
self, node: Node, config: dict[str, Any], language: str
|
|
326
|
+
) -> str | None:
|
|
327
|
+
"""Extract function name from a function definition node."""
|
|
328
|
+
if language == "html":
|
|
329
|
+
return self._extract_html_element_name(node)
|
|
330
|
+
if language == "css":
|
|
331
|
+
return self._extract_css_rule_name(node)
|
|
332
|
+
if language == "go" and node.type == "method_declaration":
|
|
333
|
+
return self._extract_go_method_name(node)
|
|
334
|
+
if language in ["c", "cpp"] and config["name_field"]:
|
|
335
|
+
return self._extract_c_cpp_function_name(node, config)
|
|
336
|
+
if language == "rust" and config["name_field"]:
|
|
337
|
+
return self._extract_rust_function_name(node, config)
|
|
338
|
+
return self._extract_default_function_name(node)
|
|
339
|
+
|
|
340
|
+
def _extract_go_method_name(self, node: Node) -> str | None:
|
|
341
|
+
"""Extract method name from Go method declaration."""
|
|
342
|
+
for child in node.children:
|
|
343
|
+
if child.type == "field_identifier" and child.text is not None:
|
|
344
|
+
return child.text.decode("utf-8")
|
|
345
|
+
return None
|
|
346
|
+
|
|
347
|
+
def _extract_c_cpp_function_name(
|
|
348
|
+
self, node: Node, config: dict[str, Any]
|
|
349
|
+
) -> str | None:
|
|
350
|
+
"""Extract function name from C/C++ function definition."""
|
|
351
|
+
declarator = node.child_by_field_name(config["name_field"])
|
|
352
|
+
if not declarator:
|
|
353
|
+
return None
|
|
354
|
+
|
|
355
|
+
if declarator.type == "function_declarator":
|
|
356
|
+
for child in declarator.children:
|
|
357
|
+
if child.type == "identifier" and child.text is not None:
|
|
358
|
+
return child.text.decode("utf-8")
|
|
359
|
+
elif declarator.type == "identifier" and declarator.text is not None:
|
|
360
|
+
return declarator.text.decode("utf-8")
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
def _extract_rust_function_name(
|
|
364
|
+
self, node: Node, config: dict[str, Any]
|
|
365
|
+
) -> str | None:
|
|
366
|
+
"""Extract function name from Rust function definition."""
|
|
367
|
+
name_node = node.child_by_field_name(config["name_field"])
|
|
368
|
+
if name_node and name_node.type == "identifier" and name_node.text is not None:
|
|
369
|
+
return name_node.text.decode("utf-8")
|
|
370
|
+
return None
|
|
371
|
+
|
|
372
|
+
def _extract_html_element_name(self, node: Node) -> str | None:
|
|
373
|
+
"""Extract meaningful name from HTML element."""
|
|
374
|
+
if node.type == "script_element":
|
|
375
|
+
return "script"
|
|
376
|
+
if node.type == "style_element":
|
|
377
|
+
return "style"
|
|
378
|
+
if node.type == "element":
|
|
379
|
+
return self._extract_html_element_info(node)
|
|
380
|
+
return None
|
|
381
|
+
|
|
382
|
+
def _extract_html_element_info(self, node: Node) -> str | None:
|
|
383
|
+
"""Extract element info with ID or class."""
|
|
384
|
+
for child in node.children:
|
|
385
|
+
if child.type == "start_tag":
|
|
386
|
+
tag_name = self._get_tag_name(child)
|
|
387
|
+
element_id = self._get_element_id(child)
|
|
388
|
+
class_name = self._get_element_class(child)
|
|
389
|
+
|
|
390
|
+
if element_id:
|
|
391
|
+
return f"{tag_name or 'element'}#{element_id}"
|
|
392
|
+
if class_name:
|
|
393
|
+
return f"{tag_name or 'element'}.{class_name}"
|
|
394
|
+
if tag_name:
|
|
395
|
+
return tag_name
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
def _get_tag_name(self, start_tag: Node) -> str | None:
|
|
399
|
+
"""Get tag name from start_tag node."""
|
|
400
|
+
for child in start_tag.children:
|
|
401
|
+
if child.type == "tag_name" and child.text:
|
|
402
|
+
return child.text.decode("utf-8")
|
|
403
|
+
return None
|
|
404
|
+
|
|
405
|
+
def _get_element_id(self, start_tag: Node) -> str | None:
|
|
406
|
+
"""Get element ID from start_tag node."""
|
|
407
|
+
return self._get_attribute_value(start_tag, "id")
|
|
408
|
+
|
|
409
|
+
def _get_element_class(self, start_tag: Node) -> str | None:
|
|
410
|
+
"""Get first class name from start_tag node."""
|
|
411
|
+
class_value = self._get_attribute_value(start_tag, "class")
|
|
412
|
+
return class_value.split()[0] if class_value else None
|
|
413
|
+
|
|
414
|
+
def _get_attribute_value(self, start_tag: Node, attr_name: str) -> str | None:
|
|
415
|
+
"""Get attribute value from start_tag node."""
|
|
416
|
+
for child in start_tag.children:
|
|
417
|
+
if child.type == "attribute":
|
|
418
|
+
name = self._get_attr_name(child)
|
|
419
|
+
if name == attr_name:
|
|
420
|
+
return self._get_attr_value(child)
|
|
421
|
+
return None
|
|
422
|
+
|
|
423
|
+
def _get_attr_name(self, attr_node: Node) -> str | None:
|
|
424
|
+
"""Get attribute name."""
|
|
425
|
+
for child in attr_node.children:
|
|
426
|
+
if child.type == "attribute_name" and child.text:
|
|
427
|
+
return child.text.decode("utf-8")
|
|
428
|
+
return None
|
|
429
|
+
|
|
430
|
+
def _get_attr_value(self, attr_node: Node) -> str | None:
|
|
431
|
+
"""Get attribute value."""
|
|
432
|
+
for child in attr_node.children:
|
|
433
|
+
if child.type == "quoted_attribute_value":
|
|
434
|
+
for val_child in child.children:
|
|
435
|
+
if val_child.type == "attribute_value" and val_child.text:
|
|
436
|
+
return val_child.text.decode("utf-8")
|
|
437
|
+
return None
|
|
438
|
+
|
|
439
|
+
def _extract_css_rule_name(self, node: Node) -> str | None:
|
|
440
|
+
"""Extract meaningful name from CSS rule."""
|
|
441
|
+
if node.type == "rule_set":
|
|
442
|
+
return self._extract_css_selector(node)
|
|
443
|
+
if node.type == "keyframes_statement":
|
|
444
|
+
return self._extract_keyframes_name(node)
|
|
445
|
+
if node.type == "media_statement":
|
|
446
|
+
return "@media"
|
|
447
|
+
return None
|
|
448
|
+
|
|
449
|
+
def _extract_css_selector(self, rule_node: Node) -> str | None:
|
|
450
|
+
"""Extract CSS selector from rule_set."""
|
|
451
|
+
for child in rule_node.children:
|
|
452
|
+
if child.type == "selectors":
|
|
453
|
+
selector_parts = []
|
|
454
|
+
for selector_child in child.children:
|
|
455
|
+
part = self._get_selector_part(selector_child)
|
|
456
|
+
if part:
|
|
457
|
+
selector_parts.append(part)
|
|
458
|
+
if selector_parts:
|
|
459
|
+
return "".join(selector_parts[:2]) # First couple selectors
|
|
460
|
+
return None
|
|
461
|
+
|
|
462
|
+
def _get_selector_part(self, selector_node: Node) -> str | None:
|
|
463
|
+
"""Get a single selector part."""
|
|
464
|
+
if selector_node.type == "class_selector":
|
|
465
|
+
return self._extract_class_selector(selector_node)
|
|
466
|
+
if selector_node.type == "id_selector":
|
|
467
|
+
return self._extract_id_selector(selector_node)
|
|
468
|
+
if selector_node.type == "type_selector" and selector_node.text:
|
|
469
|
+
return selector_node.text.decode("utf-8")
|
|
470
|
+
return None
|
|
471
|
+
|
|
472
|
+
def _extract_class_selector(self, node: Node) -> str | None:
|
|
473
|
+
"""Extract class selector name."""
|
|
474
|
+
for child in node.children:
|
|
475
|
+
if child.type == "class_name":
|
|
476
|
+
for name_child in child.children:
|
|
477
|
+
if name_child.type == "identifier" and name_child.text:
|
|
478
|
+
return f".{name_child.text.decode('utf-8')}"
|
|
479
|
+
return None
|
|
480
|
+
|
|
481
|
+
def _extract_id_selector(self, node: Node) -> str | None:
|
|
482
|
+
"""Extract ID selector name."""
|
|
483
|
+
for child in node.children:
|
|
484
|
+
if child.type == "id_name":
|
|
485
|
+
for name_child in child.children:
|
|
486
|
+
if name_child.type == "identifier" and name_child.text:
|
|
487
|
+
return f"#{name_child.text.decode('utf-8')}"
|
|
488
|
+
return None
|
|
489
|
+
|
|
490
|
+
def _extract_keyframes_name(self, node: Node) -> str | None:
|
|
491
|
+
"""Extract keyframes animation name."""
|
|
492
|
+
for child in node.children:
|
|
493
|
+
if child.type == "keyframes_name" and child.text:
|
|
494
|
+
return f"@keyframes-{child.text.decode('utf-8')}"
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
def _extract_default_function_name(self, node: Node) -> str | None:
|
|
498
|
+
"""Extract function name using default identifier search."""
|
|
499
|
+
for child in node.children:
|
|
500
|
+
if child.type == "identifier" and child.text is not None:
|
|
501
|
+
return child.text.decode("utf-8")
|
|
502
|
+
return None
|
|
503
|
+
|
|
504
|
+
def _qualify_name(
|
|
505
|
+
self, node: Node, file_path: Path, config: dict[str, Any], language: str
|
|
506
|
+
) -> str | None:
|
|
507
|
+
"""Create qualified name for a function node."""
|
|
508
|
+
function_name = self._extract_function_name(node, config, language)
|
|
509
|
+
if not function_name:
|
|
510
|
+
return None
|
|
511
|
+
|
|
512
|
+
module_name = file_path.stem
|
|
513
|
+
return f"{module_name}.{function_name}"
|
|
514
|
+
|
|
515
|
+
def _get_file_content(self, file_path: Path, file_contents: dict[Path, str]) -> str:
|
|
516
|
+
"""Get cached file content."""
|
|
517
|
+
if file_path not in file_contents:
|
|
518
|
+
try:
|
|
519
|
+
with file_path.open(encoding="utf-8") as f:
|
|
520
|
+
file_contents[file_path] = f.read()
|
|
521
|
+
except UnicodeDecodeError as e:
|
|
522
|
+
file_contents[file_path] = f"# Error reading file: {e}"
|
|
523
|
+
except OSError as e:
|
|
524
|
+
file_contents[file_path] = f"# Error reading file: {e}"
|
|
525
|
+
return file_contents[file_path]
|
|
526
|
+
|
|
527
|
+
def _get_snippet(
|
|
528
|
+
self,
|
|
529
|
+
function_name: str,
|
|
530
|
+
state: AnalyzerState,
|
|
531
|
+
file_contents: dict[Path, str],
|
|
532
|
+
snippet_config: dict[str, Any] | None = None,
|
|
533
|
+
) -> str:
|
|
534
|
+
"""Generate a smart snippet for a function with its dependencies."""
|
|
535
|
+
if snippet_config is None:
|
|
536
|
+
snippet_config = {}
|
|
537
|
+
|
|
538
|
+
max_depth = snippet_config.get("max_depth", 2)
|
|
539
|
+
max_functions = snippet_config.get("max_functions", 8)
|
|
540
|
+
include_usage = snippet_config.get("include_usage", True)
|
|
541
|
+
|
|
542
|
+
if function_name not in state.def_index:
|
|
543
|
+
return f"Error: Function '{function_name}' not found"
|
|
544
|
+
|
|
545
|
+
# Find dependencies
|
|
546
|
+
dependencies = self._find_dependencies(
|
|
547
|
+
function_name, state, max_depth, max_functions
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# Sort dependencies topologically
|
|
551
|
+
sorted_deps = self._topological_sort(dependencies, state)
|
|
552
|
+
|
|
553
|
+
# Build snippet
|
|
554
|
+
snippet_lines = []
|
|
555
|
+
|
|
556
|
+
# Add imports
|
|
557
|
+
imports = self._get_minimal_imports({function_name}.union(dependencies))
|
|
558
|
+
if imports:
|
|
559
|
+
snippet_lines.extend(imports)
|
|
560
|
+
snippet_lines.append("")
|
|
561
|
+
|
|
562
|
+
# Add target function
|
|
563
|
+
target_source = self._extract_function_source(
|
|
564
|
+
function_name, state, file_contents
|
|
565
|
+
)
|
|
566
|
+
snippet_lines.append(target_source)
|
|
567
|
+
|
|
568
|
+
# Add dependencies
|
|
569
|
+
if dependencies:
|
|
570
|
+
snippet_lines.append("")
|
|
571
|
+
snippet_lines.append("# === DEPENDENCIES ===")
|
|
572
|
+
for dep in sorted_deps:
|
|
573
|
+
snippet_lines.append("")
|
|
574
|
+
dep_source = self._extract_function_source(dep, state, file_contents)
|
|
575
|
+
snippet_lines.append(dep_source)
|
|
576
|
+
|
|
577
|
+
# Add usage examples
|
|
578
|
+
if include_usage:
|
|
579
|
+
callers = state.reverse_calls.get(function_name, set())
|
|
580
|
+
if callers:
|
|
581
|
+
snippet_lines.append("")
|
|
582
|
+
snippet_lines.append("# === USAGE EXAMPLES ===")
|
|
583
|
+
for caller in list(callers)[:2]: # Show up to 2 examples
|
|
584
|
+
call_line = self._find_function_call_line(
|
|
585
|
+
caller, function_name, state, file_contents
|
|
586
|
+
)
|
|
587
|
+
if call_line and not call_line.startswith("#"):
|
|
588
|
+
snippet_lines.append(f"# From {caller}:")
|
|
589
|
+
snippet_lines.append(f" {call_line}")
|
|
590
|
+
snippet_lines.append("")
|
|
591
|
+
|
|
592
|
+
return "\n".join(snippet_lines)
|
|
593
|
+
|
|
594
|
+
def _create_snippet_entity(
|
|
595
|
+
self,
|
|
596
|
+
qualified_name: str,
|
|
597
|
+
snippet_content: str,
|
|
598
|
+
language: str,
|
|
599
|
+
state: AnalyzerState,
|
|
600
|
+
path_to_file_map: dict[Path, File],
|
|
601
|
+
) -> Snippet:
|
|
602
|
+
"""Create a Snippet domain entity from extracted content."""
|
|
603
|
+
# Determine all files that this snippet derives from
|
|
604
|
+
derives_from_files = self._find_source_files_for_snippet(
|
|
605
|
+
qualified_name, snippet_content, state, path_to_file_map
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
# Create the snippet entity
|
|
609
|
+
snippet = Snippet(derives_from=derives_from_files)
|
|
610
|
+
|
|
611
|
+
# Add the original content
|
|
612
|
+
snippet.add_original_content(snippet_content, language)
|
|
613
|
+
|
|
614
|
+
return snippet
|
|
615
|
+
|
|
616
|
+
def _find_source_files_for_snippet(
|
|
617
|
+
self,
|
|
618
|
+
qualified_name: str,
|
|
619
|
+
snippet_content: str,
|
|
620
|
+
state: AnalyzerState,
|
|
621
|
+
path_to_file_map: dict[Path, File],
|
|
622
|
+
) -> list[File]:
|
|
623
|
+
"""Find all source files that a snippet derives from."""
|
|
624
|
+
source_files: list[File] = []
|
|
625
|
+
source_file_paths: set[Path] = set()
|
|
626
|
+
|
|
627
|
+
# Add the primary function's file
|
|
628
|
+
if qualified_name in state.def_index:
|
|
629
|
+
primary_file_path = state.def_index[qualified_name].file
|
|
630
|
+
if (
|
|
631
|
+
primary_file_path in path_to_file_map
|
|
632
|
+
and primary_file_path not in source_file_paths
|
|
633
|
+
):
|
|
634
|
+
source_files.append(path_to_file_map[primary_file_path])
|
|
635
|
+
source_file_paths.add(primary_file_path)
|
|
636
|
+
|
|
637
|
+
# Find all dependencies mentioned in the snippet and add their source files
|
|
638
|
+
dependencies = self._extract_dependency_names_from_snippet(
|
|
639
|
+
snippet_content, state
|
|
640
|
+
)
|
|
641
|
+
for dep_name in dependencies:
|
|
642
|
+
if dep_name in state.def_index:
|
|
643
|
+
dep_file_path = state.def_index[dep_name].file
|
|
644
|
+
if (
|
|
645
|
+
dep_file_path in path_to_file_map
|
|
646
|
+
and dep_file_path not in source_file_paths
|
|
647
|
+
):
|
|
648
|
+
source_files.append(path_to_file_map[dep_file_path])
|
|
649
|
+
source_file_paths.add(dep_file_path)
|
|
650
|
+
|
|
651
|
+
return source_files
|
|
652
|
+
|
|
653
|
+
def _extract_dependency_names_from_snippet(
|
|
654
|
+
self, snippet_content: str, state: AnalyzerState
|
|
655
|
+
) -> set[str]:
|
|
656
|
+
"""Extract dependency function names from snippet content."""
|
|
657
|
+
dependencies: set[str] = set()
|
|
658
|
+
|
|
659
|
+
# Look for the DEPENDENCIES section and extract function names
|
|
660
|
+
lines = snippet_content.split("\n")
|
|
661
|
+
in_dependencies_section = False
|
|
662
|
+
|
|
663
|
+
for original_line in lines:
|
|
664
|
+
line = original_line.strip()
|
|
665
|
+
if line == "# === DEPENDENCIES ===":
|
|
666
|
+
in_dependencies_section = True
|
|
667
|
+
continue
|
|
668
|
+
if line == "# === USAGE EXAMPLES ===":
|
|
669
|
+
in_dependencies_section = False
|
|
670
|
+
continue
|
|
671
|
+
|
|
672
|
+
if in_dependencies_section and line.startswith("def "):
|
|
673
|
+
# Extract function name from "def function_name(...)" pattern
|
|
674
|
+
func_def_start = line.find("def ") + 4
|
|
675
|
+
func_def_end = line.find("(", func_def_start)
|
|
676
|
+
if func_def_end > func_def_start:
|
|
677
|
+
func_name = line[func_def_start:func_def_end].strip()
|
|
678
|
+
# Try to find the qualified name (module.function_name format)
|
|
679
|
+
# We need to search through the state.def_index to find matches
|
|
680
|
+
for qualified_name in self._get_qualified_names_for_function(
|
|
681
|
+
func_name, state
|
|
682
|
+
):
|
|
683
|
+
dependencies.add(qualified_name)
|
|
684
|
+
|
|
685
|
+
return dependencies
|
|
686
|
+
|
|
687
|
+
def _get_qualified_names_for_function(
|
|
688
|
+
self, func_name: str, state: AnalyzerState
|
|
689
|
+
) -> list[str]:
|
|
690
|
+
"""Get possible qualified names for a function name."""
|
|
691
|
+
# This is a simple implementation - in practice you might want more
|
|
692
|
+
# sophisticated matching
|
|
693
|
+
return [
|
|
694
|
+
qualified
|
|
695
|
+
for qualified in state.def_index
|
|
696
|
+
if qualified.endswith(f".{func_name}")
|
|
697
|
+
]
|
|
698
|
+
|
|
699
|
+
# Helper methods
|
|
700
|
+
|
|
701
|
+
def _extract_imports(self, node: Node) -> dict[str, str]:
|
|
702
|
+
"""Extract imports from import node."""
|
|
703
|
+
imports = {}
|
|
704
|
+
if node.type == "import_statement":
|
|
705
|
+
for child in node.children:
|
|
706
|
+
if child.type == "dotted_name" and child.text is not None:
|
|
707
|
+
module_name = child.text.decode("utf-8")
|
|
708
|
+
imports[module_name] = module_name
|
|
709
|
+
elif node.type == "import_from_statement":
|
|
710
|
+
module_node = node.child_by_field_name("module_name")
|
|
711
|
+
if module_node and module_node.text is not None:
|
|
712
|
+
module_name = module_node.text.decode("utf-8")
|
|
713
|
+
for child in node.children:
|
|
714
|
+
if child.type == "import_list":
|
|
715
|
+
for import_child in child.children:
|
|
716
|
+
if (
|
|
717
|
+
import_child.type == "dotted_name"
|
|
718
|
+
and import_child.text is not None
|
|
719
|
+
):
|
|
720
|
+
imported_name = import_child.text.decode("utf-8")
|
|
721
|
+
imports[imported_name] = (
|
|
722
|
+
f"{module_name}.{imported_name}"
|
|
723
|
+
)
|
|
724
|
+
return imports
|
|
725
|
+
|
|
726
|
+
def _find_function_calls(
|
|
727
|
+
self, node: Node, file_path: Path, state: AnalyzerState, config: dict[str, Any]
|
|
728
|
+
) -> set[str]:
|
|
729
|
+
"""Find function calls in a node."""
|
|
730
|
+
calls = set()
|
|
731
|
+
call_node_type = config["call_node"]
|
|
732
|
+
|
|
733
|
+
for child in self._walk_tree(node):
|
|
734
|
+
if child.type == call_node_type:
|
|
735
|
+
function_node = child.child_by_field_name("function")
|
|
736
|
+
if function_node:
|
|
737
|
+
call_name = self._extract_call_name(function_node)
|
|
738
|
+
if call_name:
|
|
739
|
+
resolved = self._resolve_call(call_name, file_path, state)
|
|
740
|
+
if resolved:
|
|
741
|
+
calls.add(resolved)
|
|
742
|
+
return calls
|
|
743
|
+
|
|
744
|
+
def _extract_call_name(self, node: Node) -> str | None:
|
|
745
|
+
"""Extract function name from call node."""
|
|
746
|
+
if node.type == "identifier" and node.text is not None:
|
|
747
|
+
return node.text.decode("utf-8")
|
|
748
|
+
if node.type == "attribute":
|
|
749
|
+
object_node = node.child_by_field_name("object")
|
|
750
|
+
attribute_node = node.child_by_field_name("attribute")
|
|
751
|
+
if (
|
|
752
|
+
object_node
|
|
753
|
+
and attribute_node
|
|
754
|
+
and object_node.text is not None
|
|
755
|
+
and attribute_node.text is not None
|
|
756
|
+
):
|
|
757
|
+
obj_name = object_node.text.decode("utf-8")
|
|
758
|
+
attr_name = attribute_node.text.decode("utf-8")
|
|
759
|
+
return f"{obj_name}.{attr_name}"
|
|
760
|
+
return None
|
|
761
|
+
|
|
762
|
+
def _resolve_call(
|
|
763
|
+
self, call_name: str, file_path: Path, state: AnalyzerState
|
|
764
|
+
) -> str | None:
|
|
765
|
+
"""Resolve a function call to qualified name."""
|
|
766
|
+
module_name = file_path.stem
|
|
767
|
+
local_qualified = f"{module_name}.{call_name}"
|
|
768
|
+
|
|
769
|
+
if local_qualified in state.def_index:
|
|
770
|
+
return local_qualified
|
|
771
|
+
|
|
772
|
+
# Check imports
|
|
773
|
+
if file_path in state.imports:
|
|
774
|
+
imports = state.imports[file_path]
|
|
775
|
+
if call_name in imports:
|
|
776
|
+
return imports[call_name]
|
|
777
|
+
|
|
778
|
+
# Check if already qualified
|
|
779
|
+
if call_name in state.def_index:
|
|
780
|
+
return call_name
|
|
781
|
+
|
|
782
|
+
return None
|
|
783
|
+
|
|
784
|
+
def _find_dependencies(
|
|
785
|
+
self, target: str, state: AnalyzerState, max_depth: int, max_functions: int
|
|
786
|
+
) -> set[str]:
|
|
787
|
+
"""Find relevant dependencies for a function."""
|
|
788
|
+
visited: set[str] = set()
|
|
789
|
+
to_visit = [(target, 0)]
|
|
790
|
+
dependencies: set[str] = set()
|
|
791
|
+
|
|
792
|
+
while to_visit and len(dependencies) < max_functions:
|
|
793
|
+
current, depth = to_visit.pop(0)
|
|
794
|
+
if current in visited or depth > max_depth:
|
|
795
|
+
continue
|
|
796
|
+
visited.add(current)
|
|
797
|
+
|
|
798
|
+
if current != target:
|
|
799
|
+
dependencies.add(current)
|
|
800
|
+
|
|
801
|
+
# Add direct dependencies
|
|
802
|
+
to_visit.extend(
|
|
803
|
+
(callee, depth + 1)
|
|
804
|
+
for callee in state.call_graph.get(current, set())
|
|
805
|
+
if callee not in visited and callee in state.def_index
|
|
806
|
+
)
|
|
807
|
+
|
|
808
|
+
return dependencies
|
|
809
|
+
|
|
810
|
+
def _topological_sort(self, functions: set[str], state: AnalyzerState) -> list[str]:
|
|
811
|
+
"""Sort functions in dependency order."""
|
|
812
|
+
if not functions:
|
|
813
|
+
return []
|
|
814
|
+
|
|
815
|
+
# Build subgraph
|
|
816
|
+
in_degree: dict[str, int] = defaultdict(int)
|
|
817
|
+
graph: dict[str, set[str]] = defaultdict(set)
|
|
818
|
+
|
|
819
|
+
for func in functions:
|
|
820
|
+
for callee in state.call_graph.get(func, set()):
|
|
821
|
+
if callee in functions:
|
|
822
|
+
graph[func].add(callee)
|
|
823
|
+
in_degree[callee] += 1
|
|
824
|
+
|
|
825
|
+
# Find roots
|
|
826
|
+
queue = [f for f in functions if in_degree[f] == 0]
|
|
827
|
+
result = []
|
|
828
|
+
|
|
829
|
+
while queue:
|
|
830
|
+
current = queue.pop(0)
|
|
831
|
+
result.append(current)
|
|
832
|
+
for neighbor in graph[current]:
|
|
833
|
+
in_degree[neighbor] -= 1
|
|
834
|
+
if in_degree[neighbor] == 0:
|
|
835
|
+
queue.append(neighbor)
|
|
836
|
+
|
|
837
|
+
# Add any remaining (cycles)
|
|
838
|
+
for func in functions:
|
|
839
|
+
if func not in result:
|
|
840
|
+
result.append(func)
|
|
841
|
+
|
|
842
|
+
return result
|
|
843
|
+
|
|
844
|
+
def _get_minimal_imports(self, _functions: set[str]) -> list[str]:
|
|
845
|
+
"""Get minimal imports needed for functions."""
|
|
846
|
+
# For now, we'll skip imports to simplify the refactoring
|
|
847
|
+
return []
|
|
848
|
+
|
|
849
|
+
def _extract_function_source(
|
|
850
|
+
self, qualified_name: str, state: AnalyzerState, file_contents: dict[Path, str]
|
|
851
|
+
) -> str:
|
|
852
|
+
"""Extract complete function source code."""
|
|
853
|
+
if qualified_name not in state.def_index:
|
|
854
|
+
return f"# Function {qualified_name} not found"
|
|
855
|
+
|
|
856
|
+
func_info = state.def_index[qualified_name]
|
|
857
|
+
file_content = self._get_file_content(func_info.file, file_contents)
|
|
858
|
+
|
|
859
|
+
# Extract function source using byte positions
|
|
860
|
+
start_byte, end_byte = func_info.span
|
|
861
|
+
source_bytes = file_content.encode("utf-8")
|
|
862
|
+
return source_bytes[start_byte:end_byte].decode("utf-8")
|
|
863
|
+
|
|
864
|
+
def _find_function_call_line(
|
|
865
|
+
self,
|
|
866
|
+
caller_qualified_name: str,
|
|
867
|
+
target_name: str,
|
|
868
|
+
state: AnalyzerState,
|
|
869
|
+
file_contents: dict[Path, str],
|
|
870
|
+
) -> str:
|
|
871
|
+
"""Find the actual line where a function calls another."""
|
|
872
|
+
if caller_qualified_name not in state.def_index:
|
|
873
|
+
return f"# calls {target_name}"
|
|
874
|
+
|
|
875
|
+
caller_info = state.def_index[caller_qualified_name]
|
|
876
|
+
file_content = self._get_file_content(caller_info.file, file_contents)
|
|
877
|
+
source_bytes = file_content.encode("utf-8")
|
|
878
|
+
|
|
879
|
+
# Extract the caller function source
|
|
880
|
+
start_byte, end_byte = caller_info.span
|
|
881
|
+
function_source = source_bytes[start_byte:end_byte].decode("utf-8")
|
|
882
|
+
|
|
883
|
+
# Look for lines that contain the target function call
|
|
884
|
+
lines = function_source.split("\n")
|
|
885
|
+
target_simple_name = target_name.split(".")[-1] # Get just the function name
|
|
886
|
+
|
|
887
|
+
for line in lines:
|
|
888
|
+
if target_simple_name in line and "(" in line:
|
|
889
|
+
# Clean up the line (remove leading/trailing whitespace)
|
|
890
|
+
clean_line = line.strip()
|
|
891
|
+
if clean_line:
|
|
892
|
+
return clean_line
|
|
893
|
+
|
|
894
|
+
return f"# calls {target_name}"
|