agentcrew-ai 0.8.12__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AgentCrew/__init__.py +1 -1
- AgentCrew/app.py +34 -633
- AgentCrew/main.py +55 -3
- AgentCrew/main_docker.py +1 -30
- AgentCrew/modules/agents/local_agent.py +26 -1
- AgentCrew/modules/chat/message/command_processor.py +33 -8
- AgentCrew/modules/chat/message/handler.py +5 -1
- AgentCrew/modules/code_analysis/__init__.py +8 -0
- AgentCrew/modules/code_analysis/parsers/__init__.py +67 -0
- AgentCrew/modules/code_analysis/parsers/base.py +93 -0
- AgentCrew/modules/code_analysis/parsers/cpp_parser.py +127 -0
- AgentCrew/modules/code_analysis/parsers/csharp_parser.py +162 -0
- AgentCrew/modules/code_analysis/parsers/generic_parser.py +63 -0
- AgentCrew/modules/code_analysis/parsers/go_parser.py +154 -0
- AgentCrew/modules/code_analysis/parsers/java_parser.py +103 -0
- AgentCrew/modules/code_analysis/parsers/javascript_parser.py +268 -0
- AgentCrew/modules/code_analysis/parsers/kotlin_parser.py +84 -0
- AgentCrew/modules/code_analysis/parsers/php_parser.py +107 -0
- AgentCrew/modules/code_analysis/parsers/python_parser.py +60 -0
- AgentCrew/modules/code_analysis/parsers/ruby_parser.py +46 -0
- AgentCrew/modules/code_analysis/parsers/rust_parser.py +72 -0
- AgentCrew/modules/code_analysis/service.py +231 -897
- AgentCrew/modules/command_execution/constants.py +2 -2
- AgentCrew/modules/console/completers.py +1 -1
- AgentCrew/modules/console/confirmation_handler.py +4 -4
- AgentCrew/modules/console/console_ui.py +17 -3
- AgentCrew/modules/console/conversation_browser/__init__.py +9 -0
- AgentCrew/modules/console/conversation_browser/browser.py +84 -0
- AgentCrew/modules/console/conversation_browser/browser_input_handler.py +279 -0
- AgentCrew/modules/console/conversation_browser/browser_ui.py +643 -0
- AgentCrew/modules/console/conversation_handler.py +34 -1
- AgentCrew/modules/console/diff_display.py +22 -51
- AgentCrew/modules/console/display_handlers.py +142 -26
- AgentCrew/modules/console/tool_display.py +4 -6
- AgentCrew/modules/file_editing/service.py +8 -8
- AgentCrew/modules/file_editing/tool.py +65 -67
- AgentCrew/modules/gui/components/command_handler.py +137 -29
- AgentCrew/modules/gui/components/tool_handlers.py +0 -2
- AgentCrew/modules/gui/themes/README.md +30 -14
- AgentCrew/modules/gui/themes/__init__.py +2 -1
- AgentCrew/modules/gui/themes/atom_light.yaml +1287 -0
- AgentCrew/modules/gui/themes/catppuccin.yaml +1276 -0
- AgentCrew/modules/gui/themes/dracula.yaml +1262 -0
- AgentCrew/modules/gui/themes/nord.yaml +1267 -0
- AgentCrew/modules/gui/themes/saigontech.yaml +1268 -0
- AgentCrew/modules/gui/themes/style_provider.py +76 -264
- AgentCrew/modules/gui/themes/theme_loader.py +379 -0
- AgentCrew/modules/gui/themes/unicorn.yaml +1276 -0
- AgentCrew/modules/gui/widgets/configs/global_settings.py +3 -4
- AgentCrew/modules/gui/widgets/diff_widget.py +30 -61
- AgentCrew/modules/llm/constants.py +18 -9
- AgentCrew/modules/memory/context_persistent.py +1 -0
- AgentCrew/modules/memory/tool.py +1 -1
- AgentCrew/setup.py +470 -0
- {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.9.0.dist-info}/METADATA +1 -1
- {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.9.0.dist-info}/RECORD +60 -41
- {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.9.0.dist-info}/WHEEL +1 -1
- AgentCrew/modules/gui/themes/atom_light.py +0 -1365
- AgentCrew/modules/gui/themes/catppuccin.py +0 -1404
- AgentCrew/modules/gui/themes/dracula.py +0 -1372
- AgentCrew/modules/gui/themes/nord.py +0 -1365
- AgentCrew/modules/gui/themes/saigontech.py +0 -1359
- AgentCrew/modules/gui/themes/unicorn.py +0 -1372
- {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.9.0.dist-info}/entry_points.txt +0 -0
- {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.9.0.dist-info}/top_level.txt +0 -0
|
@@ -4,21 +4,23 @@ import subprocess
|
|
|
4
4
|
import json
|
|
5
5
|
import asyncio
|
|
6
6
|
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
7
|
+
from loguru import logger
|
|
7
8
|
|
|
8
9
|
from tree_sitter_language_pack import get_parser
|
|
9
10
|
from tree_sitter import Parser
|
|
10
11
|
|
|
12
|
+
from .parsers import get_parser_for_language, BaseLanguageParser
|
|
13
|
+
|
|
11
14
|
if TYPE_CHECKING:
|
|
12
15
|
from AgentCrew.modules.llm.base import BaseLLMService
|
|
13
16
|
|
|
14
|
-
MAX_ITEMS_OUT =
|
|
15
|
-
MAX_FILES_TO_ANALYZE =
|
|
17
|
+
MAX_ITEMS_OUT = 40
|
|
18
|
+
MAX_FILES_TO_ANALYZE = 600
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
class CodeAnalysisService:
|
|
19
22
|
"""Service for analyzing code structure using tree-sitter."""
|
|
20
23
|
|
|
21
|
-
# Map of file extensions to language names
|
|
22
24
|
LANGUAGE_MAP = {
|
|
23
25
|
".py": "python",
|
|
24
26
|
".js": "javascript",
|
|
@@ -47,7 +49,6 @@ class CodeAnalysisService:
|
|
|
47
49
|
".toml": "config",
|
|
48
50
|
".yaml": "config",
|
|
49
51
|
".yml": "config",
|
|
50
|
-
# Add more languages as needed
|
|
51
52
|
}
|
|
52
53
|
|
|
53
54
|
def __init__(self, llm_service: Optional["BaseLLMService"] = None):
|
|
@@ -72,7 +73,7 @@ class CodeAnalysisService:
|
|
|
72
73
|
elif self.llm_service.provider_name == "github_copilot":
|
|
73
74
|
self.llm_service.model = "gpt-5-mini"
|
|
74
75
|
try:
|
|
75
|
-
self.
|
|
76
|
+
self._tree_sitter_parser_cache = {
|
|
76
77
|
"python": get_parser("python"),
|
|
77
78
|
"javascript": get_parser("javascript"),
|
|
78
79
|
"typescript": get_parser("typescript"),
|
|
@@ -85,15 +86,17 @@ class CodeAnalysisService:
|
|
|
85
86
|
"c-sharp": get_parser("csharp"),
|
|
86
87
|
"kotlin": get_parser("kotlin"),
|
|
87
88
|
}
|
|
88
|
-
|
|
89
|
+
self._language_parser_cache: Dict[str, BaseLanguageParser] = {}
|
|
90
|
+
|
|
89
91
|
self.class_types = {
|
|
90
92
|
"class_definition",
|
|
91
93
|
"class_declaration",
|
|
92
94
|
"class_specifier",
|
|
93
95
|
"struct_specifier",
|
|
96
|
+
"struct_declaration",
|
|
94
97
|
"struct_item",
|
|
95
98
|
"interface_declaration",
|
|
96
|
-
"object_declaration",
|
|
99
|
+
"object_declaration",
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
self.function_types = {
|
|
@@ -106,7 +109,7 @@ class CodeAnalysisService:
|
|
|
106
109
|
"fn_item",
|
|
107
110
|
"method",
|
|
108
111
|
"singleton_method",
|
|
109
|
-
"primary_constructor",
|
|
112
|
+
"primary_constructor",
|
|
110
113
|
}
|
|
111
114
|
except Exception as e:
|
|
112
115
|
raise RuntimeError(f"Failed to initialize languages: {e}")
|
|
@@ -116,15 +119,17 @@ class CodeAnalysisService:
|
|
|
116
119
|
ext = os.path.splitext(file_path)[1].lower()
|
|
117
120
|
return self.LANGUAGE_MAP.get(ext, "unknown")
|
|
118
121
|
|
|
119
|
-
def
|
|
122
|
+
def _get_tree_sitter_parser(self, language: str) -> Parser:
|
|
120
123
|
"""Get the appropriate tree-sitter parser for a language."""
|
|
121
|
-
if language not in self.
|
|
124
|
+
if language not in self._tree_sitter_parser_cache:
|
|
122
125
|
raise ValueError(f"Unsupported language: {language}")
|
|
123
|
-
return self.
|
|
126
|
+
return self._tree_sitter_parser_cache[language]
|
|
124
127
|
|
|
125
|
-
def
|
|
126
|
-
"""
|
|
127
|
-
|
|
128
|
+
def _get_language_parser(self, language: str) -> BaseLanguageParser:
|
|
129
|
+
"""Get the appropriate language parser for processing nodes."""
|
|
130
|
+
if language not in self._language_parser_cache:
|
|
131
|
+
self._language_parser_cache[language] = get_parser_for_language(language)
|
|
132
|
+
return self._language_parser_cache[language]
|
|
128
133
|
|
|
129
134
|
def _analyze_file(self, file_path: str) -> Optional[Dict[str, Any]]:
|
|
130
135
|
"""Analyze a single file using tree-sitter."""
|
|
@@ -138,772 +143,22 @@ class CodeAnalysisService:
|
|
|
138
143
|
"error": f"Unsupported file type: {os.path.splitext(file_path)[1]}"
|
|
139
144
|
}
|
|
140
145
|
|
|
141
|
-
|
|
142
|
-
if isinstance(
|
|
143
|
-
return
|
|
146
|
+
tree_sitter_parser = self._get_tree_sitter_parser(language)
|
|
147
|
+
if isinstance(tree_sitter_parser, dict) and "error" in tree_sitter_parser:
|
|
148
|
+
return tree_sitter_parser
|
|
144
149
|
|
|
145
|
-
tree =
|
|
150
|
+
tree = tree_sitter_parser.parse(source_code)
|
|
146
151
|
root_node = tree.root_node
|
|
147
152
|
|
|
148
|
-
# Check if we got a valid root node
|
|
149
153
|
if not root_node:
|
|
150
154
|
return {"error": "Failed to parse file - no root node"}
|
|
151
155
|
|
|
156
|
+
language_parser = self._get_language_parser(language)
|
|
157
|
+
|
|
152
158
|
def process_node(node) -> Optional[Dict[str, Any]]:
|
|
153
159
|
if not node:
|
|
154
160
|
return None
|
|
155
|
-
|
|
156
|
-
result = {
|
|
157
|
-
"type": node.type,
|
|
158
|
-
"start_line": node.start_point[0] + 1,
|
|
159
|
-
"end_line": node.end_point[0] + 1,
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
# Process child nodes based on language-specific patterns
|
|
163
|
-
if language == "python":
|
|
164
|
-
if node.type in ["class_definition", "function_definition"]:
|
|
165
|
-
for child in node.children:
|
|
166
|
-
if child.type == "identifier":
|
|
167
|
-
result["name"] = self._extract_node_text(
|
|
168
|
-
child, source_code
|
|
169
|
-
)
|
|
170
|
-
elif child.type == "parameters":
|
|
171
|
-
params = []
|
|
172
|
-
for param in child.children:
|
|
173
|
-
if (
|
|
174
|
-
"parameter" in param.type
|
|
175
|
-
or param.type == "identifier"
|
|
176
|
-
):
|
|
177
|
-
params.append(
|
|
178
|
-
self._extract_node_text(param, source_code)
|
|
179
|
-
)
|
|
180
|
-
if params:
|
|
181
|
-
result["parameters"] = params
|
|
182
|
-
elif node.type == "assignment":
|
|
183
|
-
# Handle global variable assignments
|
|
184
|
-
for child in node.children:
|
|
185
|
-
if child.type == "identifier":
|
|
186
|
-
result["type"] = "variable_declaration"
|
|
187
|
-
result["name"] = self._extract_node_text(
|
|
188
|
-
child, source_code
|
|
189
|
-
)
|
|
190
|
-
return result
|
|
191
|
-
# Break after first identifier to avoid capturing right-hand side
|
|
192
|
-
break
|
|
193
|
-
elif language == "javascript" or language == "typescript":
|
|
194
|
-
if (
|
|
195
|
-
node.type
|
|
196
|
-
in [
|
|
197
|
-
"class_declaration",
|
|
198
|
-
"method_definition",
|
|
199
|
-
"class",
|
|
200
|
-
"method_declaration",
|
|
201
|
-
"function_declaration",
|
|
202
|
-
"interface_declaration",
|
|
203
|
-
"export_statement", # Handle exported items
|
|
204
|
-
"arrow_function", # Add support for arrow functions
|
|
205
|
-
"lexical_declaration", # Add support for const/let declarations with arrow functions
|
|
206
|
-
]
|
|
207
|
-
):
|
|
208
|
-
# Handle export statements by looking at their children
|
|
209
|
-
if node.type == "export_statement":
|
|
210
|
-
# Process the declaration that's being exported
|
|
211
|
-
for child in node.children:
|
|
212
|
-
if child.type in [
|
|
213
|
-
"class_declaration",
|
|
214
|
-
"function_declaration",
|
|
215
|
-
"interface_declaration",
|
|
216
|
-
"variable_statement",
|
|
217
|
-
"lexical_declaration",
|
|
218
|
-
"method_definition",
|
|
219
|
-
]:
|
|
220
|
-
# Recursively process the exported declaration
|
|
221
|
-
exported_result = process_node(child)
|
|
222
|
-
|
|
223
|
-
if exported_result:
|
|
224
|
-
# Mark as exported
|
|
225
|
-
exported_result["exported"] = True
|
|
226
|
-
# Return the exported item's result
|
|
227
|
-
return exported_result
|
|
228
|
-
|
|
229
|
-
# Handle arrow functions - extract name from parent variable declarator
|
|
230
|
-
elif node.type == "arrow_function":
|
|
231
|
-
parent = node.parent
|
|
232
|
-
if parent and parent.type == "variable_declarator":
|
|
233
|
-
for sibling in parent.children:
|
|
234
|
-
if sibling.type == "identifier":
|
|
235
|
-
result["type"] = "arrow_function"
|
|
236
|
-
result["name"] = self._extract_node_text(
|
|
237
|
-
sibling, source_code
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
# Process arrow function parameters
|
|
241
|
-
for child in node.children:
|
|
242
|
-
if child.type == "formal_parameters":
|
|
243
|
-
params = []
|
|
244
|
-
for param in child.children:
|
|
245
|
-
if param.type in [
|
|
246
|
-
"required_parameter",
|
|
247
|
-
"optional_parameter",
|
|
248
|
-
"identifier",
|
|
249
|
-
]:
|
|
250
|
-
param_text = self._extract_node_text(
|
|
251
|
-
param, source_code
|
|
252
|
-
)
|
|
253
|
-
params.append(param_text)
|
|
254
|
-
|
|
255
|
-
if params:
|
|
256
|
-
result["parameters"] = params
|
|
257
|
-
|
|
258
|
-
# Handle lexical declarations with arrow functions (const/let)
|
|
259
|
-
elif node.type == "lexical_declaration":
|
|
260
|
-
for child in node.children:
|
|
261
|
-
if child.type == "variable_declarator":
|
|
262
|
-
# Find the identifier (name)
|
|
263
|
-
var_name = None
|
|
264
|
-
has_arrow_function = False
|
|
265
|
-
for declarator_child in child.children:
|
|
266
|
-
if declarator_child.type == "identifier":
|
|
267
|
-
var_name = self._extract_node_text(
|
|
268
|
-
declarator_child, source_code
|
|
269
|
-
)
|
|
270
|
-
elif declarator_child.type == "arrow_function":
|
|
271
|
-
has_arrow_function = True
|
|
272
|
-
|
|
273
|
-
if var_name and has_arrow_function:
|
|
274
|
-
result["type"] = "arrow_function"
|
|
275
|
-
result["name"] = var_name
|
|
276
|
-
# Recursively process the arrow function to get parameters
|
|
277
|
-
for declarator_child in child.children:
|
|
278
|
-
if (
|
|
279
|
-
declarator_child.type
|
|
280
|
-
== "arrow_function"
|
|
281
|
-
):
|
|
282
|
-
arrow_result = process_node(
|
|
283
|
-
declarator_child
|
|
284
|
-
)
|
|
285
|
-
if (
|
|
286
|
-
arrow_result
|
|
287
|
-
and "parameters" in arrow_result
|
|
288
|
-
):
|
|
289
|
-
result["parameters"] = arrow_result[
|
|
290
|
-
"parameters"
|
|
291
|
-
]
|
|
292
|
-
else:
|
|
293
|
-
result["type"] = "variable_declaration"
|
|
294
|
-
result["name"] = var_name
|
|
295
|
-
result["first_line"] = (
|
|
296
|
-
self._extract_node_text(node, source_code)
|
|
297
|
-
.split("\n")[0]
|
|
298
|
-
.strip("{")
|
|
299
|
-
)
|
|
300
|
-
|
|
301
|
-
# Handle regular declarations
|
|
302
|
-
elif node.type in [
|
|
303
|
-
"class",
|
|
304
|
-
"class_declaration",
|
|
305
|
-
"function_declaration",
|
|
306
|
-
"method_declaration",
|
|
307
|
-
"interface_declaration",
|
|
308
|
-
"method_definition",
|
|
309
|
-
]:
|
|
310
|
-
for child in node.children:
|
|
311
|
-
if (
|
|
312
|
-
child.type == "identifier"
|
|
313
|
-
or child.type == "type_identifier"
|
|
314
|
-
or child.type == "property_identifier"
|
|
315
|
-
):
|
|
316
|
-
result["name"] = self._extract_node_text(
|
|
317
|
-
child, source_code
|
|
318
|
-
)
|
|
319
|
-
# Process function parameters for function declarations
|
|
320
|
-
elif (
|
|
321
|
-
child.type == "formal_parameters"
|
|
322
|
-
and node.type
|
|
323
|
-
in [
|
|
324
|
-
"function_declaration",
|
|
325
|
-
"method_declaration",
|
|
326
|
-
"method_definition",
|
|
327
|
-
]
|
|
328
|
-
):
|
|
329
|
-
params = []
|
|
330
|
-
for param in child.children:
|
|
331
|
-
if param.type in [
|
|
332
|
-
"required_parameter",
|
|
333
|
-
"optional_parameter",
|
|
334
|
-
"identifier",
|
|
335
|
-
]:
|
|
336
|
-
param_name = None
|
|
337
|
-
param_type = None
|
|
338
|
-
|
|
339
|
-
# For simple identifiers
|
|
340
|
-
if param.type == "identifier":
|
|
341
|
-
param_name = self._extract_node_text(
|
|
342
|
-
param, source_code
|
|
343
|
-
)
|
|
344
|
-
params.append(param_name)
|
|
345
|
-
continue
|
|
346
|
-
|
|
347
|
-
# For parameters with type annotations
|
|
348
|
-
for param_child in param.children:
|
|
349
|
-
if (
|
|
350
|
-
param_child.type == "identifier"
|
|
351
|
-
or param_child.type
|
|
352
|
-
== "object_pattern"
|
|
353
|
-
):
|
|
354
|
-
param_name = (
|
|
355
|
-
self._extract_node_text(
|
|
356
|
-
param_child, source_code
|
|
357
|
-
)
|
|
358
|
-
)
|
|
359
|
-
elif (
|
|
360
|
-
param_child.type
|
|
361
|
-
== "type_annotation"
|
|
362
|
-
):
|
|
363
|
-
# Extract the type from type annotation
|
|
364
|
-
for (
|
|
365
|
-
type_child
|
|
366
|
-
) in param_child.children:
|
|
367
|
-
if (
|
|
368
|
-
type_child.type != ":"
|
|
369
|
-
): # Skip the colon
|
|
370
|
-
param_type = (
|
|
371
|
-
self._extract_node_text(
|
|
372
|
-
type_child,
|
|
373
|
-
source_code,
|
|
374
|
-
)
|
|
375
|
-
)
|
|
376
|
-
|
|
377
|
-
if param_name:
|
|
378
|
-
if param_type:
|
|
379
|
-
params.append(
|
|
380
|
-
f"{param_name}: {param_type}"
|
|
381
|
-
)
|
|
382
|
-
else:
|
|
383
|
-
params.append(param_name)
|
|
384
|
-
|
|
385
|
-
if params:
|
|
386
|
-
result["parameters"] = params
|
|
387
|
-
|
|
388
|
-
elif node.type in [
|
|
389
|
-
"variable_statement",
|
|
390
|
-
"property_declaration",
|
|
391
|
-
"variable_declaration",
|
|
392
|
-
]:
|
|
393
|
-
# Handle variable declarations and property declarations
|
|
394
|
-
for child in node.children:
|
|
395
|
-
if child.type == "variable_declaration_list":
|
|
396
|
-
for declarator in child.children:
|
|
397
|
-
if declarator.type == "variable_declarator":
|
|
398
|
-
var_name = None
|
|
399
|
-
has_arrow_function = False
|
|
400
|
-
|
|
401
|
-
for declarator_child in declarator.children:
|
|
402
|
-
if declarator_child.type == "identifier":
|
|
403
|
-
var_name = self._extract_node_text(
|
|
404
|
-
declarator_child, source_code
|
|
405
|
-
)
|
|
406
|
-
elif (
|
|
407
|
-
declarator_child.type
|
|
408
|
-
== "arrow_function"
|
|
409
|
-
):
|
|
410
|
-
has_arrow_function = True
|
|
411
|
-
|
|
412
|
-
if var_name:
|
|
413
|
-
if has_arrow_function:
|
|
414
|
-
result["type"] = "arrow_function"
|
|
415
|
-
result["name"] = var_name
|
|
416
|
-
# Find parameters
|
|
417
|
-
for (
|
|
418
|
-
declarator_child
|
|
419
|
-
) in declarator.children:
|
|
420
|
-
if (
|
|
421
|
-
declarator_child.type
|
|
422
|
-
== "arrow_function"
|
|
423
|
-
):
|
|
424
|
-
arrow_result = process_node(
|
|
425
|
-
declarator_child
|
|
426
|
-
)
|
|
427
|
-
if (
|
|
428
|
-
arrow_result
|
|
429
|
-
and "parameters"
|
|
430
|
-
in arrow_result
|
|
431
|
-
):
|
|
432
|
-
result["parameters"] = (
|
|
433
|
-
arrow_result[
|
|
434
|
-
"parameters"
|
|
435
|
-
]
|
|
436
|
-
)
|
|
437
|
-
else:
|
|
438
|
-
result["type"] = "variable_declaration"
|
|
439
|
-
result["name"] = var_name
|
|
440
|
-
|
|
441
|
-
return result
|
|
442
|
-
elif child.type == "identifier":
|
|
443
|
-
result["type"] = "variable_declaration"
|
|
444
|
-
result["name"] = self._extract_node_text(
|
|
445
|
-
child, source_code
|
|
446
|
-
)
|
|
447
|
-
return result
|
|
448
|
-
|
|
449
|
-
elif language == "java":
|
|
450
|
-
if node.type in ["class_declaration", "interface_declaration"]:
|
|
451
|
-
# Handle class and interface declarations
|
|
452
|
-
for child in node.children:
|
|
453
|
-
if child.type == "identifier":
|
|
454
|
-
result["name"] = self._extract_node_text(
|
|
455
|
-
child, source_code
|
|
456
|
-
)
|
|
457
|
-
elif child.type in ["class_body", "interface_body"]:
|
|
458
|
-
result["children"] = [
|
|
459
|
-
process_node(c) for c in child.children
|
|
460
|
-
]
|
|
461
|
-
|
|
462
|
-
elif node.type == "method_declaration":
|
|
463
|
-
# Handle method declarations
|
|
464
|
-
method_name = None
|
|
465
|
-
parameters = []
|
|
466
|
-
return_type = None
|
|
467
|
-
|
|
468
|
-
for child in node.children:
|
|
469
|
-
if child.type == "identifier":
|
|
470
|
-
method_name = self._extract_node_text(
|
|
471
|
-
child, source_code
|
|
472
|
-
)
|
|
473
|
-
result["name"] = method_name
|
|
474
|
-
elif child.type == "formal_parameters":
|
|
475
|
-
for param in child.children:
|
|
476
|
-
if param.type == "parameter":
|
|
477
|
-
param_name = self._extract_node_text(
|
|
478
|
-
param.child_by_field_name("name"),
|
|
479
|
-
source_code,
|
|
480
|
-
)
|
|
481
|
-
param_type = self._extract_node_text(
|
|
482
|
-
param.child_by_field_name("type"),
|
|
483
|
-
source_code,
|
|
484
|
-
)
|
|
485
|
-
parameters.append(f"{param_type} {param_name}")
|
|
486
|
-
result["parameters"] = parameters
|
|
487
|
-
elif child.type == "type":
|
|
488
|
-
return_type = self._extract_node_text(
|
|
489
|
-
child, source_code
|
|
490
|
-
)
|
|
491
|
-
result["return_type"] = return_type
|
|
492
|
-
|
|
493
|
-
elif node.type == "field_declaration":
|
|
494
|
-
# Handle field declarations
|
|
495
|
-
for child in node.children:
|
|
496
|
-
if child.type == "variable_declarator":
|
|
497
|
-
var_name = self._extract_node_text(
|
|
498
|
-
child.child_by_field_name("name"), source_code
|
|
499
|
-
)
|
|
500
|
-
var_type = self._extract_node_text(
|
|
501
|
-
child.child_by_field_name("type"), source_code
|
|
502
|
-
)
|
|
503
|
-
result["name"] = var_name
|
|
504
|
-
result["variable_type"] = var_type
|
|
505
|
-
result["type"] = "field_declaration"
|
|
506
|
-
|
|
507
|
-
elif node.type == "annotation":
|
|
508
|
-
# Handle annotations
|
|
509
|
-
annotation_name = self._extract_node_text(node, source_code)
|
|
510
|
-
result["name"] = annotation_name
|
|
511
|
-
result["type"] = "annotation"
|
|
512
|
-
|
|
513
|
-
elif node.type == "lambda_expression":
|
|
514
|
-
# Handle lambda expressions
|
|
515
|
-
result["type"] = "lambda_expression"
|
|
516
|
-
# Additional processing for lambda parameters and body can be added here
|
|
517
|
-
|
|
518
|
-
# Recursively process children for nested classes or other constructs
|
|
519
|
-
children = [process_node(child) for child in node.children]
|
|
520
|
-
if children:
|
|
521
|
-
result["children"] = children
|
|
522
|
-
|
|
523
|
-
return result
|
|
524
|
-
|
|
525
|
-
elif language == "cpp":
|
|
526
|
-
if node.type in [
|
|
527
|
-
"class_specifier",
|
|
528
|
-
"function_definition",
|
|
529
|
-
"struct_specifier",
|
|
530
|
-
]:
|
|
531
|
-
for child in node.children:
|
|
532
|
-
if child.type == "identifier":
|
|
533
|
-
result["name"] = self._extract_node_text(
|
|
534
|
-
child, source_code
|
|
535
|
-
)
|
|
536
|
-
return result
|
|
537
|
-
return result
|
|
538
|
-
elif node.type in ["declaration", "variable_declaration"]:
|
|
539
|
-
# Handle C++ global variables and declarations
|
|
540
|
-
for child in node.children:
|
|
541
|
-
if (
|
|
542
|
-
child.type == "init_declarator"
|
|
543
|
-
or child.type == "declarator"
|
|
544
|
-
):
|
|
545
|
-
for subchild in child.children:
|
|
546
|
-
if subchild.type == "identifier":
|
|
547
|
-
result["type"] = "variable_declaration"
|
|
548
|
-
result["name"] = self._extract_node_text(
|
|
549
|
-
subchild, source_code
|
|
550
|
-
)
|
|
551
|
-
return result
|
|
552
|
-
return result
|
|
553
|
-
|
|
554
|
-
elif language == "ruby":
|
|
555
|
-
if node.type in ["class", "method", "singleton_method", "module"]:
|
|
556
|
-
for child in node.children:
|
|
557
|
-
if child.type == "identifier":
|
|
558
|
-
result["name"] = self._extract_node_text(
|
|
559
|
-
child, source_code
|
|
560
|
-
)
|
|
561
|
-
return result
|
|
562
|
-
return result
|
|
563
|
-
elif node.type == "assignment" or node.type == "global_variable":
|
|
564
|
-
# Handle Ruby global variables and assignments
|
|
565
|
-
for child in node.children:
|
|
566
|
-
if (
|
|
567
|
-
child.type == "identifier"
|
|
568
|
-
or child.type == "global_variable"
|
|
569
|
-
):
|
|
570
|
-
result["type"] = "variable_declaration"
|
|
571
|
-
result["name"] = self._extract_node_text(
|
|
572
|
-
child, source_code
|
|
573
|
-
)
|
|
574
|
-
return result
|
|
575
|
-
return result
|
|
576
|
-
|
|
577
|
-
elif language == "go":
|
|
578
|
-
if node.type in [
|
|
579
|
-
"type_declaration",
|
|
580
|
-
"function_declaration",
|
|
581
|
-
"method_declaration",
|
|
582
|
-
"interface_declaration",
|
|
583
|
-
]:
|
|
584
|
-
for child in node.children:
|
|
585
|
-
if (
|
|
586
|
-
child.type == "identifier"
|
|
587
|
-
or child.type == "field_identifier"
|
|
588
|
-
):
|
|
589
|
-
result["name"] = self._extract_node_text(
|
|
590
|
-
child, source_code
|
|
591
|
-
)
|
|
592
|
-
result["first_line"] = (
|
|
593
|
-
self._extract_node_text(node, source_code)
|
|
594
|
-
.split("\n")[0]
|
|
595
|
-
.strip("{")
|
|
596
|
-
)
|
|
597
|
-
return result
|
|
598
|
-
return result
|
|
599
|
-
elif (
|
|
600
|
-
node.type == "var_declaration"
|
|
601
|
-
or node.type == "const_declaration"
|
|
602
|
-
):
|
|
603
|
-
# Handle Go variable and constant declarations
|
|
604
|
-
for child in node.children:
|
|
605
|
-
if child.type == "var_spec" or child.type == "const_spec":
|
|
606
|
-
for subchild in child.children:
|
|
607
|
-
if subchild.type == "identifier":
|
|
608
|
-
result["type"] = "variable_declaration"
|
|
609
|
-
result["name"] = self._extract_node_text(
|
|
610
|
-
subchild, source_code
|
|
611
|
-
)
|
|
612
|
-
return result
|
|
613
|
-
return result
|
|
614
|
-
|
|
615
|
-
elif language == "rust":
|
|
616
|
-
if node.type in [
|
|
617
|
-
"struct_item",
|
|
618
|
-
"impl_item",
|
|
619
|
-
"fn_item",
|
|
620
|
-
"trait_item",
|
|
621
|
-
]:
|
|
622
|
-
for child in node.children:
|
|
623
|
-
if child.type == "identifier":
|
|
624
|
-
result["name"] = self._extract_node_text(
|
|
625
|
-
child, source_code
|
|
626
|
-
)
|
|
627
|
-
return result
|
|
628
|
-
return result
|
|
629
|
-
elif node.type in ["static_item", "const_item", "let_declaration"]:
|
|
630
|
-
# Handle Rust static items, constants, and let declarations
|
|
631
|
-
for child in node.children:
|
|
632
|
-
if child.type == "identifier":
|
|
633
|
-
result["type"] = "variable_declaration"
|
|
634
|
-
result["name"] = self._extract_node_text(
|
|
635
|
-
child, source_code
|
|
636
|
-
)
|
|
637
|
-
return result
|
|
638
|
-
elif child.type == "pattern" and child.children:
|
|
639
|
-
result["name"] = self._extract_node_text(
|
|
640
|
-
child.children[0], source_code
|
|
641
|
-
)
|
|
642
|
-
return result
|
|
643
|
-
|
|
644
|
-
elif language == "php":
|
|
645
|
-
if node.type in [
|
|
646
|
-
"class_declaration",
|
|
647
|
-
"method_declaration",
|
|
648
|
-
"function_definition",
|
|
649
|
-
"interface_declaration",
|
|
650
|
-
"trait_declaration",
|
|
651
|
-
]:
|
|
652
|
-
for child in node.children:
|
|
653
|
-
if child.type == "name":
|
|
654
|
-
result["name"] = self._extract_node_text(
|
|
655
|
-
child, source_code
|
|
656
|
-
)
|
|
657
|
-
return result
|
|
658
|
-
return result
|
|
659
|
-
elif (
|
|
660
|
-
node.type == "property_declaration"
|
|
661
|
-
or node.type == "const_declaration"
|
|
662
|
-
):
|
|
663
|
-
# Handle PHP class properties and constants
|
|
664
|
-
for child in node.children:
|
|
665
|
-
if (
|
|
666
|
-
child.type == "property_element"
|
|
667
|
-
or child.type == "const_element"
|
|
668
|
-
):
|
|
669
|
-
for subchild in child.children:
|
|
670
|
-
if (
|
|
671
|
-
subchild.type == "variable_name"
|
|
672
|
-
or subchild.type == "name"
|
|
673
|
-
):
|
|
674
|
-
result["type"] = "variable_declaration"
|
|
675
|
-
result["name"] = self._extract_node_text(
|
|
676
|
-
subchild, source_code
|
|
677
|
-
)
|
|
678
|
-
return result
|
|
679
|
-
|
|
680
|
-
elif language == "c-sharp":
|
|
681
|
-
if node.type == "class_declaration":
|
|
682
|
-
# Create a more comprehensive class result
|
|
683
|
-
class_name = None
|
|
684
|
-
base_class_name = None
|
|
685
|
-
|
|
686
|
-
# Extract class name and base class name
|
|
687
|
-
for child in node.children:
|
|
688
|
-
if child.type == "identifier":
|
|
689
|
-
class_name = self._extract_node_text(child, source_code)
|
|
690
|
-
result["name"] = class_name
|
|
691
|
-
elif child.type == "base_list":
|
|
692
|
-
# Extract base class if present
|
|
693
|
-
if (
|
|
694
|
-
len(child.children) > 1
|
|
695
|
-
): # Check if there's a base class
|
|
696
|
-
base_class_name = self._extract_node_text(
|
|
697
|
-
child.children[1], source_code
|
|
698
|
-
)
|
|
699
|
-
result["base_class"] = base_class_name
|
|
700
|
-
|
|
701
|
-
# DO NOT return early here to ensure methods are processed
|
|
702
|
-
|
|
703
|
-
elif node.type == "method_declaration":
|
|
704
|
-
method_name = None
|
|
705
|
-
parameters = []
|
|
706
|
-
access_modifiers = []
|
|
707
|
-
|
|
708
|
-
for child in node.children:
|
|
709
|
-
if child.type == "identifier":
|
|
710
|
-
method_name = self._extract_node_text(
|
|
711
|
-
child, source_code
|
|
712
|
-
)
|
|
713
|
-
result["name"] = method_name
|
|
714
|
-
elif child.type == "parameter_list":
|
|
715
|
-
# Extract parameter information
|
|
716
|
-
for param in child.children:
|
|
717
|
-
if param.type == "parameter":
|
|
718
|
-
param_type = ""
|
|
719
|
-
param_name = None
|
|
720
|
-
|
|
721
|
-
# Get type and name fields from parameter
|
|
722
|
-
type_node = param.child_by_field_name("type")
|
|
723
|
-
name_node = param.child_by_field_name("name")
|
|
724
|
-
|
|
725
|
-
if type_node:
|
|
726
|
-
param_type = self._extract_node_text(
|
|
727
|
-
type_node, source_code
|
|
728
|
-
)
|
|
729
|
-
if name_node:
|
|
730
|
-
param_name = self._extract_node_text(
|
|
731
|
-
name_node, source_code
|
|
732
|
-
)
|
|
733
|
-
|
|
734
|
-
if param_name:
|
|
735
|
-
parameters.append(
|
|
736
|
-
param_type + " " + param_name
|
|
737
|
-
)
|
|
738
|
-
|
|
739
|
-
# Add parameters to result
|
|
740
|
-
if parameters:
|
|
741
|
-
result["parameters"] = parameters
|
|
742
|
-
elif child.type == "modifier":
|
|
743
|
-
# Capture access modifiers
|
|
744
|
-
modifier = self._extract_node_text(child, source_code)
|
|
745
|
-
access_modifiers.append(modifier)
|
|
746
|
-
|
|
747
|
-
# Add access modifiers to result
|
|
748
|
-
if access_modifiers:
|
|
749
|
-
result["modifiers"] = access_modifiers
|
|
750
|
-
|
|
751
|
-
# DO NOT return early here
|
|
752
|
-
|
|
753
|
-
elif node.type in ["property_declaration", "field_declaration"]:
|
|
754
|
-
# Improved handling for properties and fields
|
|
755
|
-
property_name = None
|
|
756
|
-
property_type = None
|
|
757
|
-
|
|
758
|
-
for child in node.children:
|
|
759
|
-
if child.type == "variable_declaration":
|
|
760
|
-
for subchild in child.children:
|
|
761
|
-
if subchild.type == "identifier":
|
|
762
|
-
result["type"] = "variable_declaration"
|
|
763
|
-
result["name"] = self._extract_node_text(
|
|
764
|
-
subchild, source_code
|
|
765
|
-
)
|
|
766
|
-
# Look for the type of the variable
|
|
767
|
-
elif subchild.type == "predefined_type" or (
|
|
768
|
-
subchild.type == "identifier"
|
|
769
|
-
and subchild != child
|
|
770
|
-
):
|
|
771
|
-
result["variable_type"] = (
|
|
772
|
-
self._extract_node_text(
|
|
773
|
-
subchild, source_code
|
|
774
|
-
)
|
|
775
|
-
)
|
|
776
|
-
# Check for property name directly in property_declaration
|
|
777
|
-
elif child.type == "identifier":
|
|
778
|
-
property_name = self._extract_node_text(
|
|
779
|
-
child, source_code
|
|
780
|
-
)
|
|
781
|
-
result["name"] = property_name
|
|
782
|
-
result["type"] = "property_declaration"
|
|
783
|
-
# Check for property type
|
|
784
|
-
elif child.type == "predefined_type" or (
|
|
785
|
-
child.type == "identifier" and child != property_name
|
|
786
|
-
):
|
|
787
|
-
if (
|
|
788
|
-
not property_name
|
|
789
|
-
or self._extract_node_text(child, source_code)
|
|
790
|
-
!= property_name
|
|
791
|
-
):
|
|
792
|
-
property_type = self._extract_node_text(
|
|
793
|
-
child, source_code
|
|
794
|
-
)
|
|
795
|
-
result["property_type"] = property_type
|
|
796
|
-
|
|
797
|
-
elif language == "kotlin":
|
|
798
|
-
if node.type in ["class_declaration", "function_declaration"]:
|
|
799
|
-
for child in node.children:
|
|
800
|
-
if child.type == "simple_identifier":
|
|
801
|
-
result["name"] = self._extract_node_text(
|
|
802
|
-
child, source_code
|
|
803
|
-
)
|
|
804
|
-
return result
|
|
805
|
-
return result
|
|
806
|
-
elif node.type in ["property_declaration", "variable_declaration"]:
|
|
807
|
-
# Handle Kotlin properties and variables
|
|
808
|
-
for child in node.children:
|
|
809
|
-
if child.type == "simple_identifier":
|
|
810
|
-
result["type"] = "variable_declaration"
|
|
811
|
-
result["name"] = self._extract_node_text(
|
|
812
|
-
child, source_code
|
|
813
|
-
)
|
|
814
|
-
return result
|
|
815
|
-
break # Only capture the first identifier
|
|
816
|
-
return result
|
|
817
|
-
else:
|
|
818
|
-
if node.type in [
|
|
819
|
-
"type_declaration",
|
|
820
|
-
"function_declaration",
|
|
821
|
-
"method_declaration",
|
|
822
|
-
"interface_declaration",
|
|
823
|
-
]:
|
|
824
|
-
for child in node.children:
|
|
825
|
-
if (
|
|
826
|
-
child.type == "identifier"
|
|
827
|
-
or child.type == "field_identifier"
|
|
828
|
-
):
|
|
829
|
-
result["name"] = self._extract_node_text(
|
|
830
|
-
child, source_code
|
|
831
|
-
)
|
|
832
|
-
result["first_line"] = (
|
|
833
|
-
self._extract_node_text(node, source_code)
|
|
834
|
-
.split("\n")[0]
|
|
835
|
-
.strip("{")
|
|
836
|
-
)
|
|
837
|
-
return result
|
|
838
|
-
return result
|
|
839
|
-
elif (
|
|
840
|
-
node.type == "var_declaration"
|
|
841
|
-
or node.type == "const_declaration"
|
|
842
|
-
):
|
|
843
|
-
# Handle Go variable and constant declarations
|
|
844
|
-
for child in node.children:
|
|
845
|
-
if child.type == "var_spec" or child.type == "const_spec":
|
|
846
|
-
for subchild in child.children:
|
|
847
|
-
if subchild.type == "identifier":
|
|
848
|
-
result["type"] = "variable_declaration"
|
|
849
|
-
result["name"] = self._extract_node_text(
|
|
850
|
-
subchild, source_code
|
|
851
|
-
)
|
|
852
|
-
return result
|
|
853
|
-
return result
|
|
854
|
-
|
|
855
|
-
# Recursively process children
|
|
856
|
-
children = []
|
|
857
|
-
# if file_path.endswith("models/wishlist.js"):
|
|
858
|
-
# print(f"{file_path} {language}")
|
|
859
|
-
# print(
|
|
860
|
-
# f"{node.type} ({self._extract_node_text(node, source_code) if node.type == 'identifier' else ''})"
|
|
861
|
-
# )
|
|
862
|
-
# print(self._extract_node_text(node, source_code))
|
|
863
|
-
# print("=============")
|
|
864
|
-
for child in node.children:
|
|
865
|
-
child_result = process_node(child)
|
|
866
|
-
if child_result and (
|
|
867
|
-
child_result.get("type")
|
|
868
|
-
in [
|
|
869
|
-
"class_definition",
|
|
870
|
-
"function_definition",
|
|
871
|
-
"class_declaration",
|
|
872
|
-
"method_definition",
|
|
873
|
-
"function_declaration",
|
|
874
|
-
"interface_declaration",
|
|
875
|
-
"method_declaration",
|
|
876
|
-
"constructor_declaration",
|
|
877
|
-
"class_specifier",
|
|
878
|
-
"struct_specifier",
|
|
879
|
-
"class",
|
|
880
|
-
"method",
|
|
881
|
-
"singleton_method",
|
|
882
|
-
"module",
|
|
883
|
-
"type_declaration",
|
|
884
|
-
"method_declaration",
|
|
885
|
-
"interface_declaration",
|
|
886
|
-
"struct_item",
|
|
887
|
-
"impl_item",
|
|
888
|
-
"fn_item",
|
|
889
|
-
"trait_item",
|
|
890
|
-
"trait_declaration",
|
|
891
|
-
"property_declaration",
|
|
892
|
-
"object_definition",
|
|
893
|
-
"trait_definition",
|
|
894
|
-
"def_definition",
|
|
895
|
-
"function_definition",
|
|
896
|
-
"class_definition",
|
|
897
|
-
"variable_declaration",
|
|
898
|
-
"arrow_function",
|
|
899
|
-
]
|
|
900
|
-
or "children" in child_result
|
|
901
|
-
):
|
|
902
|
-
children.append(child_result)
|
|
903
|
-
|
|
904
|
-
if children:
|
|
905
|
-
result["children"] = children
|
|
906
|
-
return result
|
|
161
|
+
return language_parser.process_node(node, source_code, process_node)
|
|
907
162
|
|
|
908
163
|
return process_node(root_node)
|
|
909
164
|
|
|
@@ -914,11 +169,9 @@ class CodeAnalysisService:
|
|
|
914
169
|
"""Recursively count nodes of specific types in the tree structure."""
|
|
915
170
|
count = 0
|
|
916
171
|
|
|
917
|
-
# Count current node if it matches
|
|
918
172
|
if structure.get("type") in node_types:
|
|
919
173
|
count += 1
|
|
920
174
|
|
|
921
|
-
# Recursively count in children
|
|
922
175
|
for child in structure.get("children", []):
|
|
923
176
|
count += self._count_nodes(child, node_types)
|
|
924
177
|
|
|
@@ -939,26 +192,39 @@ class CodeAnalysisService:
|
|
|
939
192
|
if not self.llm_service:
|
|
940
193
|
return files[:max_files]
|
|
941
194
|
|
|
942
|
-
prompt = f"""You are analyzing a code repository with {len(files)} files.
|
|
195
|
+
prompt = f"""You are analyzing a code repository with {len(files)} files.
|
|
943
196
|
The analysis system can only process {max_files} files at a time.
|
|
944
197
|
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
198
|
+
Generate glob patterns to EXCLUDE less important files. The goal is to keep around {max_files} most important files after exclusion.
|
|
199
|
+
|
|
200
|
+
Files to EXCLUDE (generate patterns for these):
|
|
201
|
+
1. Test files
|
|
202
|
+
2. Generated/build files
|
|
203
|
+
3. Vendor/dependency files
|
|
204
|
+
4. Documentation files (e.g., **/docs/**, **/*.md)
|
|
205
|
+
5. Configuration duplicates and environment files
|
|
206
|
+
6. Migration files
|
|
207
|
+
7. Static assets (images, fonts, etc.)
|
|
208
|
+
8. Example/sample files
|
|
209
|
+
|
|
210
|
+
Files to KEEP (NEVER exclude):
|
|
211
|
+
1. Core application logic (main entry points, core modules)
|
|
212
|
+
2. Business features logic and domain models
|
|
948
213
|
3. API endpoints and controllers
|
|
949
214
|
4. Service/utility classes
|
|
950
|
-
5.
|
|
951
|
-
6. Test files are lower priority unless they reveal architecture
|
|
952
|
-
7. Generated files, lock files, and vendor files should be excluded
|
|
215
|
+
5. Key configuration files that define app structure
|
|
953
216
|
|
|
954
|
-
Here is the complete list of files
|
|
217
|
+
Here is the complete list of files:
|
|
955
218
|
{chr(10).join(files)}
|
|
956
219
|
|
|
957
|
-
|
|
958
|
-
|
|
220
|
+
Current file count: {len(files)}
|
|
221
|
+
Target file count: ~{max_files}
|
|
222
|
+
Files to exclude: ~{max(0, len(files) - max_files)}
|
|
223
|
+
|
|
224
|
+
Return ONLY a JSON array of glob patterns to exclude. Be strategic - use broad patterns when possible.
|
|
959
225
|
|
|
960
226
|
Example response format:
|
|
961
|
-
["
|
|
227
|
+
["**/tests/**", "**/test_*", "**/*.test.*", "**/docs/**", "**/migrations/**", "**/__pycache__/**"]"""
|
|
962
228
|
|
|
963
229
|
try:
|
|
964
230
|
loop = asyncio.get_event_loop()
|
|
@@ -980,14 +246,26 @@ Example response format:
|
|
|
980
246
|
response = response[:-3]
|
|
981
247
|
response = response.strip()
|
|
982
248
|
|
|
983
|
-
|
|
249
|
+
exclude_patterns = json.loads(response)
|
|
984
250
|
|
|
985
|
-
if isinstance(
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
251
|
+
if isinstance(exclude_patterns, list):
|
|
252
|
+
filtered_files = []
|
|
253
|
+
for file_path in files:
|
|
254
|
+
excluded = False
|
|
255
|
+
for pattern in exclude_patterns:
|
|
256
|
+
if fnmatch.fnmatch(file_path, pattern):
|
|
257
|
+
excluded = True
|
|
258
|
+
break
|
|
259
|
+
if not excluded:
|
|
260
|
+
filtered_files.append(file_path)
|
|
261
|
+
|
|
262
|
+
logger.info(
|
|
263
|
+
f"LLM exclusion patterns reduced files from {len(files)} to {len(filtered_files)}"
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return filtered_files[:max_files]
|
|
267
|
+
except Exception as e:
|
|
268
|
+
logger.warning(f"Cannot extract exclusion patterns from LLM response: {e}")
|
|
991
269
|
|
|
992
270
|
return files[:max_files]
|
|
993
271
|
|
|
@@ -1088,7 +366,7 @@ Example response format:
|
|
|
1088
366
|
return {"error": f"Error analyzing directory: {str(e)}"}
|
|
1089
367
|
|
|
1090
368
|
def _generate_text_map(self, analysis_results: List[Dict[str, Any]]) -> str:
|
|
1091
|
-
"""Generate a
|
|
369
|
+
"""Generate a hierarchical text representation of the code structure analysis."""
|
|
1092
370
|
|
|
1093
371
|
def format_node(
|
|
1094
372
|
node: Dict[str, Any], prefix: str = "", is_last: bool = True
|
|
@@ -1098,10 +376,9 @@ Example response format:
|
|
|
1098
376
|
node_type = node.get("type", "")
|
|
1099
377
|
node_name = node.get("name", "")
|
|
1100
378
|
node_lines = (
|
|
1101
|
-
f" //
|
|
379
|
+
f" //L: {node.get('start_line', '')}-{node.get('end_line', '')}"
|
|
1102
380
|
)
|
|
1103
381
|
|
|
1104
|
-
# Handle decorated functions - extract the actual function definition
|
|
1105
382
|
if node_type == "decorated_definition" and "children" in node:
|
|
1106
383
|
for child in node.get("children", []):
|
|
1107
384
|
if child.get("type") in {
|
|
@@ -1111,7 +388,6 @@ Example response format:
|
|
|
1111
388
|
}:
|
|
1112
389
|
return format_node(child, prefix, is_last)
|
|
1113
390
|
|
|
1114
|
-
# Handle class body, block nodes, and wrapper functions
|
|
1115
391
|
if not node_name and node_type in {
|
|
1116
392
|
"class_body",
|
|
1117
393
|
"block",
|
|
@@ -1125,8 +401,7 @@ Example response format:
|
|
|
1125
401
|
elif not node_name:
|
|
1126
402
|
return lines
|
|
1127
403
|
|
|
1128
|
-
branch = "
|
|
1129
|
-
# Format node information based on type
|
|
404
|
+
branch = " "
|
|
1130
405
|
if node_type in {
|
|
1131
406
|
"class_definition",
|
|
1132
407
|
"class_declaration",
|
|
@@ -1134,6 +409,7 @@ Example response format:
|
|
|
1134
409
|
"class",
|
|
1135
410
|
"interface_declaration",
|
|
1136
411
|
"struct_specifier",
|
|
412
|
+
"struct_declaration",
|
|
1137
413
|
"struct_item",
|
|
1138
414
|
"trait_item",
|
|
1139
415
|
"trait_declaration",
|
|
@@ -1159,7 +435,6 @@ Example response format:
|
|
|
1159
435
|
"arrow_function",
|
|
1160
436
|
"lexical_declaration",
|
|
1161
437
|
}:
|
|
1162
|
-
# Handle parameters
|
|
1163
438
|
if "first_line" in node:
|
|
1164
439
|
node_info = node["first_line"] + node_lines
|
|
1165
440
|
else:
|
|
@@ -1168,7 +443,6 @@ Example response format:
|
|
|
1168
443
|
if "parameters" in node and node["parameters"]:
|
|
1169
444
|
params = node["parameters"]
|
|
1170
445
|
elif "children" in node:
|
|
1171
|
-
# Try to extract parameters from children for languages that structure them differently
|
|
1172
446
|
for child in node["children"]:
|
|
1173
447
|
if child.get("type") in {
|
|
1174
448
|
"parameter_list",
|
|
@@ -1198,11 +472,10 @@ Example response format:
|
|
|
1198
472
|
|
|
1199
473
|
lines.append(f"{prefix}{branch}{node_info}")
|
|
1200
474
|
|
|
1201
|
-
# Process children
|
|
1202
475
|
if "children" in node:
|
|
1203
|
-
new_prefix = prefix +
|
|
476
|
+
new_prefix = prefix + " "
|
|
1204
477
|
child_lines = process_children(node["children"], new_prefix, is_last)
|
|
1205
|
-
if child_lines:
|
|
478
|
+
if child_lines:
|
|
1206
479
|
lines.extend(child_lines)
|
|
1207
480
|
|
|
1208
481
|
return lines
|
|
@@ -1223,20 +496,19 @@ Example response format:
|
|
|
1223
496
|
"call_expression",
|
|
1224
497
|
"lexical_declaration",
|
|
1225
498
|
"decorated_definition",
|
|
1226
|
-
# Class-related nodes
|
|
1227
499
|
"class_definition",
|
|
1228
500
|
"class_declaration",
|
|
1229
501
|
"class_specifier",
|
|
1230
502
|
"class",
|
|
1231
503
|
"interface_declaration",
|
|
1232
504
|
"struct_specifier",
|
|
505
|
+
"struct_declaration",
|
|
1233
506
|
"struct_item",
|
|
1234
507
|
"trait_item",
|
|
1235
508
|
"trait_declaration",
|
|
1236
509
|
"module",
|
|
1237
510
|
"type_declaration",
|
|
1238
|
-
"impl_item",
|
|
1239
|
-
# Method-related nodes
|
|
511
|
+
"impl_item",
|
|
1240
512
|
"function_definition",
|
|
1241
513
|
"function_declaration",
|
|
1242
514
|
"method_definition",
|
|
@@ -1251,13 +523,11 @@ Example response format:
|
|
|
1251
523
|
"public_method_definition",
|
|
1252
524
|
"private_method_definition",
|
|
1253
525
|
"protected_method_definition",
|
|
1254
|
-
# Container nodes that might have methods
|
|
1255
526
|
"class_body",
|
|
1256
527
|
"block",
|
|
1257
528
|
"declaration_list",
|
|
1258
529
|
"body",
|
|
1259
|
-
"impl_block",
|
|
1260
|
-
# Property and field nodes
|
|
530
|
+
"impl_block",
|
|
1261
531
|
"property_declaration",
|
|
1262
532
|
"field_declaration",
|
|
1263
533
|
"variable_declaration",
|
|
@@ -1268,100 +538,123 @@ Example response format:
|
|
|
1268
538
|
for i, child in enumerate(significant_children):
|
|
1269
539
|
is_last_child = i == len(significant_children) - 1
|
|
1270
540
|
child_lines = format_node(child, prefix, is_last_child)
|
|
1271
|
-
if child_lines:
|
|
541
|
+
if child_lines:
|
|
1272
542
|
lines.extend(child_lines)
|
|
1273
543
|
if i >= MAX_ITEMS_OUT:
|
|
1274
544
|
lines.append(
|
|
1275
|
-
f"...({len(significant_children) - MAX_ITEMS_OUT} more items)"
|
|
545
|
+
f"{prefix} ...({len(significant_children) - MAX_ITEMS_OUT} more items)"
|
|
1276
546
|
)
|
|
1277
547
|
break
|
|
1278
548
|
|
|
1279
549
|
return lines
|
|
1280
550
|
|
|
1281
|
-
|
|
1282
|
-
|
|
551
|
+
def get_file_code_content(
|
|
552
|
+
result: Dict[str, Any], file_indent: str
|
|
553
|
+
) -> List[str]:
|
|
554
|
+
"""Generate code structure content for a single file."""
|
|
555
|
+
lines = []
|
|
556
|
+
structure = result.get("structure")
|
|
557
|
+
if not structure:
|
|
558
|
+
return lines
|
|
559
|
+
|
|
560
|
+
if not structure.get("children"):
|
|
561
|
+
if structure.get("type"):
|
|
562
|
+
return [f"{file_indent} {structure['type']}"]
|
|
563
|
+
return lines
|
|
564
|
+
|
|
565
|
+
significant_nodes = [
|
|
566
|
+
child
|
|
567
|
+
for child in structure["children"]
|
|
568
|
+
if child.get("type")
|
|
569
|
+
in {
|
|
570
|
+
"arrow_function",
|
|
571
|
+
"lexical_declaration",
|
|
572
|
+
"call_expression",
|
|
573
|
+
"decorated_definition",
|
|
574
|
+
"class_definition",
|
|
575
|
+
"class_declaration",
|
|
576
|
+
"class_specifier",
|
|
577
|
+
"class",
|
|
578
|
+
"interface_declaration",
|
|
579
|
+
"struct_specifier",
|
|
580
|
+
"struct_declaration",
|
|
581
|
+
"struct_item",
|
|
582
|
+
"trait_item",
|
|
583
|
+
"trait_declaration",
|
|
584
|
+
"module",
|
|
585
|
+
"type_declaration",
|
|
586
|
+
"impl_item",
|
|
587
|
+
"function_definition",
|
|
588
|
+
"function_declaration",
|
|
589
|
+
"method_definition",
|
|
590
|
+
"method_declaration",
|
|
591
|
+
"fn_item",
|
|
592
|
+
"method",
|
|
593
|
+
"singleton_method",
|
|
594
|
+
"constructor_declaration",
|
|
595
|
+
"member_function_definition",
|
|
596
|
+
"constructor",
|
|
597
|
+
"destructor",
|
|
598
|
+
"public_method_definition",
|
|
599
|
+
"private_method_definition",
|
|
600
|
+
"protected_method_definition",
|
|
601
|
+
"property_declaration",
|
|
602
|
+
"field_declaration",
|
|
603
|
+
"variable_declaration",
|
|
604
|
+
"const_declaration",
|
|
605
|
+
"namespace_declaration",
|
|
606
|
+
}
|
|
607
|
+
]
|
|
608
|
+
|
|
609
|
+
for i, node in enumerate(significant_nodes):
|
|
610
|
+
is_last = i == len(significant_nodes) - 1
|
|
611
|
+
node_lines = format_node(node, file_indent, is_last)
|
|
612
|
+
if node_lines:
|
|
613
|
+
lines.extend(node_lines)
|
|
614
|
+
if i >= MAX_ITEMS_OUT:
|
|
615
|
+
lines.append(
|
|
616
|
+
f"{file_indent} ...({len(significant_nodes) - MAX_ITEMS_OUT} more items)"
|
|
617
|
+
)
|
|
618
|
+
break
|
|
619
|
+
return lines
|
|
1283
620
|
|
|
1284
|
-
# Sort analysis results by path
|
|
1285
621
|
sorted_results = sorted(analysis_results, key=lambda x: x["path"])
|
|
1286
622
|
|
|
623
|
+
results_by_path = {result["path"]: result for result in sorted_results}
|
|
624
|
+
|
|
625
|
+
tree: Dict[str, Any] = {}
|
|
1287
626
|
for result in sorted_results:
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
):
|
|
1292
|
-
if
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
# Format the structure
|
|
1301
|
-
structure = result["structure"]
|
|
1302
|
-
if "children" in structure:
|
|
1303
|
-
significant_nodes = [
|
|
1304
|
-
child
|
|
1305
|
-
for child in structure["children"]
|
|
1306
|
-
if child.get("type")
|
|
1307
|
-
in {
|
|
1308
|
-
"arrow_function",
|
|
1309
|
-
"lexical_declaration",
|
|
1310
|
-
"call_expression",
|
|
1311
|
-
"decorated_definition",
|
|
1312
|
-
# Class-related nodes
|
|
1313
|
-
"class_definition",
|
|
1314
|
-
"class_declaration",
|
|
1315
|
-
"class_specifier",
|
|
1316
|
-
"class",
|
|
1317
|
-
"interface_declaration",
|
|
1318
|
-
"struct_specifier",
|
|
1319
|
-
"struct_item",
|
|
1320
|
-
"trait_item",
|
|
1321
|
-
"trait_declaration",
|
|
1322
|
-
"module",
|
|
1323
|
-
"type_declaration",
|
|
1324
|
-
"impl_item", # Rust implementations
|
|
1325
|
-
# Method-related nodes
|
|
1326
|
-
"function_definition",
|
|
1327
|
-
"function_declaration",
|
|
1328
|
-
"method_definition",
|
|
1329
|
-
"method_declaration",
|
|
1330
|
-
"fn_item",
|
|
1331
|
-
"method",
|
|
1332
|
-
"singleton_method",
|
|
1333
|
-
"constructor_declaration",
|
|
1334
|
-
"member_function_definition",
|
|
1335
|
-
"constructor",
|
|
1336
|
-
"destructor",
|
|
1337
|
-
"public_method_definition",
|
|
1338
|
-
"private_method_definition",
|
|
1339
|
-
"protected_method_definition",
|
|
1340
|
-
# Property and field nodes
|
|
1341
|
-
"property_declaration",
|
|
1342
|
-
"field_declaration",
|
|
1343
|
-
"variable_declaration",
|
|
1344
|
-
"const_declaration",
|
|
1345
|
-
"namespace_declaration",
|
|
1346
|
-
}
|
|
1347
|
-
]
|
|
627
|
+
path = result["path"].replace("\\", "/")
|
|
628
|
+
parts = path.split("/")
|
|
629
|
+
current = tree
|
|
630
|
+
for i, part in enumerate(parts):
|
|
631
|
+
if i == len(parts) - 1:
|
|
632
|
+
current[part] = {"__is_file__": True, "__path__": result["path"]}
|
|
633
|
+
else:
|
|
634
|
+
if part not in current:
|
|
635
|
+
current[part] = {}
|
|
636
|
+
current = current[part]
|
|
637
|
+
|
|
638
|
+
output_lines = []
|
|
1348
639
|
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
640
|
+
def format_tree(node: Dict[str, Any], indent: str = "") -> None:
|
|
641
|
+
items = sorted(node.keys())
|
|
642
|
+
for name in items:
|
|
643
|
+
child = node[name]
|
|
644
|
+
if isinstance(child, dict) and child.get("__is_file__"):
|
|
645
|
+
output_lines.append(f"{indent}{name}")
|
|
646
|
+
file_path = child["__path__"]
|
|
647
|
+
if file_path in results_by_path:
|
|
648
|
+
file_content = get_file_code_content(
|
|
649
|
+
results_by_path[file_path], indent
|
|
1357
650
|
)
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
651
|
+
output_lines.extend(file_content)
|
|
652
|
+
elif isinstance(child, dict):
|
|
653
|
+
output_lines.append(f"{indent}{name}/")
|
|
654
|
+
format_tree(child, indent + " ")
|
|
655
|
+
|
|
656
|
+
format_tree(tree)
|
|
657
|
+
|
|
1365
658
|
return (
|
|
1366
659
|
"\n".join(output_lines)
|
|
1367
660
|
if output_lines
|
|
@@ -1385,15 +678,12 @@ Example response format:
|
|
|
1385
678
|
Returns:
|
|
1386
679
|
Dictionary with file content (key: "file", value: file content string)
|
|
1387
680
|
"""
|
|
1388
|
-
# Read the whole file
|
|
1389
681
|
with open(file_path, "rb") as file:
|
|
1390
682
|
content = file.read()
|
|
1391
683
|
|
|
1392
684
|
decoded_content = content.decode("utf-8")
|
|
1393
685
|
|
|
1394
|
-
# If line range is specified, extract those lines
|
|
1395
686
|
if start_line is not None and end_line is not None:
|
|
1396
|
-
# Validate line range
|
|
1397
687
|
if start_line < 1:
|
|
1398
688
|
raise ValueError("start_line must be >= 1")
|
|
1399
689
|
if end_line < start_line:
|
|
@@ -1402,7 +692,6 @@ Example response format:
|
|
|
1402
692
|
lines = decoded_content.split("\n")
|
|
1403
693
|
total_lines = len(lines)
|
|
1404
694
|
|
|
1405
|
-
# Validate bounds
|
|
1406
695
|
if start_line > total_lines:
|
|
1407
696
|
raise ValueError(
|
|
1408
697
|
f"start_line {start_line} exceeds file length ({total_lines} lines)"
|
|
@@ -1410,13 +699,55 @@ Example response format:
|
|
|
1410
699
|
if end_line > total_lines:
|
|
1411
700
|
end_line = total_lines
|
|
1412
701
|
|
|
1413
|
-
# Extract the line range (convert to 0-indexed)
|
|
1414
702
|
selected_lines = lines[start_line - 1 : end_line]
|
|
1415
703
|
return {"file": "\n".join(selected_lines)}
|
|
1416
704
|
|
|
1417
|
-
# Return the whole file
|
|
1418
705
|
return {"file": decoded_content}
|
|
1419
706
|
|
|
707
|
+
def _build_file_tree(self, file_paths: List[str]) -> Dict[str, Any]:
|
|
708
|
+
"""Build a hierarchical tree structure from flat file paths.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
file_paths: List of relative file paths
|
|
712
|
+
|
|
713
|
+
Returns:
|
|
714
|
+
Nested dictionary representing the file tree
|
|
715
|
+
"""
|
|
716
|
+
tree: Dict[str, Any] = {}
|
|
717
|
+
for path in sorted(file_paths):
|
|
718
|
+
parts = path.replace("\\", "/").split("/")
|
|
719
|
+
current = tree
|
|
720
|
+
for i, part in enumerate(parts):
|
|
721
|
+
if i == len(parts) - 1:
|
|
722
|
+
current[part] = None
|
|
723
|
+
else:
|
|
724
|
+
if part not in current:
|
|
725
|
+
current[part] = {}
|
|
726
|
+
current = current[part]
|
|
727
|
+
return tree
|
|
728
|
+
|
|
729
|
+
def _format_file_tree(self, tree: Dict[str, Any], indent: str = "") -> List[str]:
|
|
730
|
+
"""Format a file tree dictionary into indented lines.
|
|
731
|
+
|
|
732
|
+
Args:
|
|
733
|
+
tree: Nested dictionary representing file tree
|
|
734
|
+
indent: Current indentation string
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
List of formatted lines
|
|
738
|
+
"""
|
|
739
|
+
lines = []
|
|
740
|
+
items = sorted(tree.keys())
|
|
741
|
+
for name in items:
|
|
742
|
+
subtree = tree[name]
|
|
743
|
+
if subtree is None:
|
|
744
|
+
lines.append(f"{indent}{name}")
|
|
745
|
+
else:
|
|
746
|
+
lines.append(f"{indent}{name}/")
|
|
747
|
+
child_lines = self._format_file_tree(subtree, indent + " ")
|
|
748
|
+
lines.extend(child_lines)
|
|
749
|
+
return lines
|
|
750
|
+
|
|
1420
751
|
def _format_analysis_results(
|
|
1421
752
|
self,
|
|
1422
753
|
analysis_results: List[Dict[str, Any]],
|
|
@@ -1482,11 +813,14 @@ Example response format:
|
|
|
1482
813
|
f"The following {non_analyzed_count} files were not analyzed due to the {MAX_FILES_TO_ANALYZE} file limit:"
|
|
1483
814
|
)
|
|
1484
815
|
max_non_analyzed_to_show = int(MAX_FILES_TO_ANALYZE / 2)
|
|
1485
|
-
|
|
1486
|
-
|
|
816
|
+
non_analyzed_tree = self._build_file_tree(
|
|
817
|
+
sorted(non_analyzed_files)[:max_non_analyzed_to_show]
|
|
818
|
+
)
|
|
819
|
+
non_analyzed_tree_lines = self._format_file_tree(non_analyzed_tree)
|
|
820
|
+
sections.extend(non_analyzed_tree_lines)
|
|
1487
821
|
if len(non_analyzed_files) > max_non_analyzed_to_show:
|
|
1488
822
|
sections.append(
|
|
1489
|
-
f"
|
|
823
|
+
f"...and {len(non_analyzed_files) - max_non_analyzed_to_show} more files."
|
|
1490
824
|
)
|
|
1491
825
|
|
|
1492
826
|
return "\n".join(sections)
|