agentcrew-ai 0.8.12__py3-none-any.whl → 0.8.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. AgentCrew/__init__.py +1 -1
  2. AgentCrew/main.py +55 -3
  3. AgentCrew/modules/agents/local_agent.py +25 -0
  4. AgentCrew/modules/code_analysis/__init__.py +8 -0
  5. AgentCrew/modules/code_analysis/parsers/__init__.py +67 -0
  6. AgentCrew/modules/code_analysis/parsers/base.py +93 -0
  7. AgentCrew/modules/code_analysis/parsers/cpp_parser.py +127 -0
  8. AgentCrew/modules/code_analysis/parsers/csharp_parser.py +162 -0
  9. AgentCrew/modules/code_analysis/parsers/generic_parser.py +63 -0
  10. AgentCrew/modules/code_analysis/parsers/go_parser.py +154 -0
  11. AgentCrew/modules/code_analysis/parsers/java_parser.py +103 -0
  12. AgentCrew/modules/code_analysis/parsers/javascript_parser.py +268 -0
  13. AgentCrew/modules/code_analysis/parsers/kotlin_parser.py +84 -0
  14. AgentCrew/modules/code_analysis/parsers/php_parser.py +107 -0
  15. AgentCrew/modules/code_analysis/parsers/python_parser.py +60 -0
  16. AgentCrew/modules/code_analysis/parsers/ruby_parser.py +46 -0
  17. AgentCrew/modules/code_analysis/parsers/rust_parser.py +72 -0
  18. AgentCrew/modules/code_analysis/service.py +231 -897
  19. AgentCrew/modules/command_execution/constants.py +2 -2
  20. AgentCrew/modules/console/confirmation_handler.py +4 -4
  21. AgentCrew/modules/console/console_ui.py +20 -1
  22. AgentCrew/modules/console/conversation_browser.py +557 -0
  23. AgentCrew/modules/console/diff_display.py +22 -51
  24. AgentCrew/modules/console/display_handlers.py +22 -22
  25. AgentCrew/modules/console/tool_display.py +4 -6
  26. AgentCrew/modules/file_editing/service.py +8 -8
  27. AgentCrew/modules/file_editing/tool.py +65 -67
  28. AgentCrew/modules/gui/components/tool_handlers.py +0 -2
  29. AgentCrew/modules/gui/widgets/diff_widget.py +30 -61
  30. AgentCrew/modules/llm/constants.py +5 -5
  31. AgentCrew/modules/memory/context_persistent.py +1 -0
  32. AgentCrew/modules/memory/tool.py +1 -1
  33. {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.8.13.dist-info}/METADATA +1 -1
  34. {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.8.13.dist-info}/RECORD +38 -24
  35. {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.8.13.dist-info}/WHEEL +1 -1
  36. {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.8.13.dist-info}/entry_points.txt +0 -0
  37. {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.8.13.dist-info}/licenses/LICENSE +0 -0
  38. {agentcrew_ai-0.8.12.dist-info → agentcrew_ai-0.8.13.dist-info}/top_level.txt +0 -0
@@ -4,21 +4,23 @@ import subprocess
4
4
  import json
5
5
  import asyncio
6
6
  from typing import Any, Dict, List, Optional, TYPE_CHECKING
7
+ from loguru import logger
7
8
 
8
9
  from tree_sitter_language_pack import get_parser
9
10
  from tree_sitter import Parser
10
11
 
12
+ from .parsers import get_parser_for_language, BaseLanguageParser
13
+
11
14
  if TYPE_CHECKING:
12
15
  from AgentCrew.modules.llm.base import BaseLLMService
13
16
 
14
- MAX_ITEMS_OUT = 20
15
- MAX_FILES_TO_ANALYZE = 400
17
+ MAX_ITEMS_OUT = 40
18
+ MAX_FILES_TO_ANALYZE = 600
16
19
 
17
20
 
18
21
  class CodeAnalysisService:
19
22
  """Service for analyzing code structure using tree-sitter."""
20
23
 
21
- # Map of file extensions to language names
22
24
  LANGUAGE_MAP = {
23
25
  ".py": "python",
24
26
  ".js": "javascript",
@@ -47,7 +49,6 @@ class CodeAnalysisService:
47
49
  ".toml": "config",
48
50
  ".yaml": "config",
49
51
  ".yml": "config",
50
- # Add more languages as needed
51
52
  }
52
53
 
53
54
  def __init__(self, llm_service: Optional["BaseLLMService"] = None):
@@ -72,7 +73,7 @@ class CodeAnalysisService:
72
73
  elif self.llm_service.provider_name == "github_copilot":
73
74
  self.llm_service.model = "gpt-5-mini"
74
75
  try:
75
- self._parser_cache = {
76
+ self._tree_sitter_parser_cache = {
76
77
  "python": get_parser("python"),
77
78
  "javascript": get_parser("javascript"),
78
79
  "typescript": get_parser("typescript"),
@@ -85,15 +86,17 @@ class CodeAnalysisService:
85
86
  "c-sharp": get_parser("csharp"),
86
87
  "kotlin": get_parser("kotlin"),
87
88
  }
88
- # Define node types for different categories
89
+ self._language_parser_cache: Dict[str, BaseLanguageParser] = {}
90
+
89
91
  self.class_types = {
90
92
  "class_definition",
91
93
  "class_declaration",
92
94
  "class_specifier",
93
95
  "struct_specifier",
96
+ "struct_declaration",
94
97
  "struct_item",
95
98
  "interface_declaration",
96
- "object_declaration", # Kotlin object declarations
99
+ "object_declaration",
97
100
  }
98
101
 
99
102
  self.function_types = {
@@ -106,7 +109,7 @@ class CodeAnalysisService:
106
109
  "fn_item",
107
110
  "method",
108
111
  "singleton_method",
109
- "primary_constructor", # Kotlin primary constructors
112
+ "primary_constructor",
110
113
  }
111
114
  except Exception as e:
112
115
  raise RuntimeError(f"Failed to initialize languages: {e}")
@@ -116,15 +119,17 @@ class CodeAnalysisService:
116
119
  ext = os.path.splitext(file_path)[1].lower()
117
120
  return self.LANGUAGE_MAP.get(ext, "unknown")
118
121
 
119
- def _get_language_parser(self, language: str) -> Parser:
122
+ def _get_tree_sitter_parser(self, language: str) -> Parser:
120
123
  """Get the appropriate tree-sitter parser for a language."""
121
- if language not in self._parser_cache:
124
+ if language not in self._tree_sitter_parser_cache:
122
125
  raise ValueError(f"Unsupported language: {language}")
123
- return self._parser_cache[language]
126
+ return self._tree_sitter_parser_cache[language]
124
127
 
125
- def _extract_node_text(self, node, source_code: bytes) -> str:
126
- """Extract text from a node."""
127
- return source_code[node.start_byte : node.end_byte].decode("utf-8")
128
+ def _get_language_parser(self, language: str) -> BaseLanguageParser:
129
+ """Get the appropriate language parser for processing nodes."""
130
+ if language not in self._language_parser_cache:
131
+ self._language_parser_cache[language] = get_parser_for_language(language)
132
+ return self._language_parser_cache[language]
128
133
 
129
134
  def _analyze_file(self, file_path: str) -> Optional[Dict[str, Any]]:
130
135
  """Analyze a single file using tree-sitter."""
@@ -138,772 +143,22 @@ class CodeAnalysisService:
138
143
  "error": f"Unsupported file type: {os.path.splitext(file_path)[1]}"
139
144
  }
140
145
 
141
- parser = self._get_language_parser(language)
142
- if isinstance(parser, dict) and "error" in parser:
143
- return parser
146
+ tree_sitter_parser = self._get_tree_sitter_parser(language)
147
+ if isinstance(tree_sitter_parser, dict) and "error" in tree_sitter_parser:
148
+ return tree_sitter_parser
144
149
 
145
- tree = parser.parse(source_code)
150
+ tree = tree_sitter_parser.parse(source_code)
146
151
  root_node = tree.root_node
147
152
 
148
- # Check if we got a valid root node
149
153
  if not root_node:
150
154
  return {"error": "Failed to parse file - no root node"}
151
155
 
156
+ language_parser = self._get_language_parser(language)
157
+
152
158
  def process_node(node) -> Optional[Dict[str, Any]]:
153
159
  if not node:
154
160
  return None
155
-
156
- result = {
157
- "type": node.type,
158
- "start_line": node.start_point[0] + 1,
159
- "end_line": node.end_point[0] + 1,
160
- }
161
-
162
- # Process child nodes based on language-specific patterns
163
- if language == "python":
164
- if node.type in ["class_definition", "function_definition"]:
165
- for child in node.children:
166
- if child.type == "identifier":
167
- result["name"] = self._extract_node_text(
168
- child, source_code
169
- )
170
- elif child.type == "parameters":
171
- params = []
172
- for param in child.children:
173
- if (
174
- "parameter" in param.type
175
- or param.type == "identifier"
176
- ):
177
- params.append(
178
- self._extract_node_text(param, source_code)
179
- )
180
- if params:
181
- result["parameters"] = params
182
- elif node.type == "assignment":
183
- # Handle global variable assignments
184
- for child in node.children:
185
- if child.type == "identifier":
186
- result["type"] = "variable_declaration"
187
- result["name"] = self._extract_node_text(
188
- child, source_code
189
- )
190
- return result
191
- # Break after first identifier to avoid capturing right-hand side
192
- break
193
- elif language == "javascript" or language == "typescript":
194
- if (
195
- node.type
196
- in [
197
- "class_declaration",
198
- "method_definition",
199
- "class",
200
- "method_declaration",
201
- "function_declaration",
202
- "interface_declaration",
203
- "export_statement", # Handle exported items
204
- "arrow_function", # Add support for arrow functions
205
- "lexical_declaration", # Add support for const/let declarations with arrow functions
206
- ]
207
- ):
208
- # Handle export statements by looking at their children
209
- if node.type == "export_statement":
210
- # Process the declaration that's being exported
211
- for child in node.children:
212
- if child.type in [
213
- "class_declaration",
214
- "function_declaration",
215
- "interface_declaration",
216
- "variable_statement",
217
- "lexical_declaration",
218
- "method_definition",
219
- ]:
220
- # Recursively process the exported declaration
221
- exported_result = process_node(child)
222
-
223
- if exported_result:
224
- # Mark as exported
225
- exported_result["exported"] = True
226
- # Return the exported item's result
227
- return exported_result
228
-
229
- # Handle arrow functions - extract name from parent variable declarator
230
- elif node.type == "arrow_function":
231
- parent = node.parent
232
- if parent and parent.type == "variable_declarator":
233
- for sibling in parent.children:
234
- if sibling.type == "identifier":
235
- result["type"] = "arrow_function"
236
- result["name"] = self._extract_node_text(
237
- sibling, source_code
238
- )
239
-
240
- # Process arrow function parameters
241
- for child in node.children:
242
- if child.type == "formal_parameters":
243
- params = []
244
- for param in child.children:
245
- if param.type in [
246
- "required_parameter",
247
- "optional_parameter",
248
- "identifier",
249
- ]:
250
- param_text = self._extract_node_text(
251
- param, source_code
252
- )
253
- params.append(param_text)
254
-
255
- if params:
256
- result["parameters"] = params
257
-
258
- # Handle lexical declarations with arrow functions (const/let)
259
- elif node.type == "lexical_declaration":
260
- for child in node.children:
261
- if child.type == "variable_declarator":
262
- # Find the identifier (name)
263
- var_name = None
264
- has_arrow_function = False
265
- for declarator_child in child.children:
266
- if declarator_child.type == "identifier":
267
- var_name = self._extract_node_text(
268
- declarator_child, source_code
269
- )
270
- elif declarator_child.type == "arrow_function":
271
- has_arrow_function = True
272
-
273
- if var_name and has_arrow_function:
274
- result["type"] = "arrow_function"
275
- result["name"] = var_name
276
- # Recursively process the arrow function to get parameters
277
- for declarator_child in child.children:
278
- if (
279
- declarator_child.type
280
- == "arrow_function"
281
- ):
282
- arrow_result = process_node(
283
- declarator_child
284
- )
285
- if (
286
- arrow_result
287
- and "parameters" in arrow_result
288
- ):
289
- result["parameters"] = arrow_result[
290
- "parameters"
291
- ]
292
- else:
293
- result["type"] = "variable_declaration"
294
- result["name"] = var_name
295
- result["first_line"] = (
296
- self._extract_node_text(node, source_code)
297
- .split("\n")[0]
298
- .strip("{")
299
- )
300
-
301
- # Handle regular declarations
302
- elif node.type in [
303
- "class",
304
- "class_declaration",
305
- "function_declaration",
306
- "method_declaration",
307
- "interface_declaration",
308
- "method_definition",
309
- ]:
310
- for child in node.children:
311
- if (
312
- child.type == "identifier"
313
- or child.type == "type_identifier"
314
- or child.type == "property_identifier"
315
- ):
316
- result["name"] = self._extract_node_text(
317
- child, source_code
318
- )
319
- # Process function parameters for function declarations
320
- elif (
321
- child.type == "formal_parameters"
322
- and node.type
323
- in [
324
- "function_declaration",
325
- "method_declaration",
326
- "method_definition",
327
- ]
328
- ):
329
- params = []
330
- for param in child.children:
331
- if param.type in [
332
- "required_parameter",
333
- "optional_parameter",
334
- "identifier",
335
- ]:
336
- param_name = None
337
- param_type = None
338
-
339
- # For simple identifiers
340
- if param.type == "identifier":
341
- param_name = self._extract_node_text(
342
- param, source_code
343
- )
344
- params.append(param_name)
345
- continue
346
-
347
- # For parameters with type annotations
348
- for param_child in param.children:
349
- if (
350
- param_child.type == "identifier"
351
- or param_child.type
352
- == "object_pattern"
353
- ):
354
- param_name = (
355
- self._extract_node_text(
356
- param_child, source_code
357
- )
358
- )
359
- elif (
360
- param_child.type
361
- == "type_annotation"
362
- ):
363
- # Extract the type from type annotation
364
- for (
365
- type_child
366
- ) in param_child.children:
367
- if (
368
- type_child.type != ":"
369
- ): # Skip the colon
370
- param_type = (
371
- self._extract_node_text(
372
- type_child,
373
- source_code,
374
- )
375
- )
376
-
377
- if param_name:
378
- if param_type:
379
- params.append(
380
- f"{param_name}: {param_type}"
381
- )
382
- else:
383
- params.append(param_name)
384
-
385
- if params:
386
- result["parameters"] = params
387
-
388
- elif node.type in [
389
- "variable_statement",
390
- "property_declaration",
391
- "variable_declaration",
392
- ]:
393
- # Handle variable declarations and property declarations
394
- for child in node.children:
395
- if child.type == "variable_declaration_list":
396
- for declarator in child.children:
397
- if declarator.type == "variable_declarator":
398
- var_name = None
399
- has_arrow_function = False
400
-
401
- for declarator_child in declarator.children:
402
- if declarator_child.type == "identifier":
403
- var_name = self._extract_node_text(
404
- declarator_child, source_code
405
- )
406
- elif (
407
- declarator_child.type
408
- == "arrow_function"
409
- ):
410
- has_arrow_function = True
411
-
412
- if var_name:
413
- if has_arrow_function:
414
- result["type"] = "arrow_function"
415
- result["name"] = var_name
416
- # Find parameters
417
- for (
418
- declarator_child
419
- ) in declarator.children:
420
- if (
421
- declarator_child.type
422
- == "arrow_function"
423
- ):
424
- arrow_result = process_node(
425
- declarator_child
426
- )
427
- if (
428
- arrow_result
429
- and "parameters"
430
- in arrow_result
431
- ):
432
- result["parameters"] = (
433
- arrow_result[
434
- "parameters"
435
- ]
436
- )
437
- else:
438
- result["type"] = "variable_declaration"
439
- result["name"] = var_name
440
-
441
- return result
442
- elif child.type == "identifier":
443
- result["type"] = "variable_declaration"
444
- result["name"] = self._extract_node_text(
445
- child, source_code
446
- )
447
- return result
448
-
449
- elif language == "java":
450
- if node.type in ["class_declaration", "interface_declaration"]:
451
- # Handle class and interface declarations
452
- for child in node.children:
453
- if child.type == "identifier":
454
- result["name"] = self._extract_node_text(
455
- child, source_code
456
- )
457
- elif child.type in ["class_body", "interface_body"]:
458
- result["children"] = [
459
- process_node(c) for c in child.children
460
- ]
461
-
462
- elif node.type == "method_declaration":
463
- # Handle method declarations
464
- method_name = None
465
- parameters = []
466
- return_type = None
467
-
468
- for child in node.children:
469
- if child.type == "identifier":
470
- method_name = self._extract_node_text(
471
- child, source_code
472
- )
473
- result["name"] = method_name
474
- elif child.type == "formal_parameters":
475
- for param in child.children:
476
- if param.type == "parameter":
477
- param_name = self._extract_node_text(
478
- param.child_by_field_name("name"),
479
- source_code,
480
- )
481
- param_type = self._extract_node_text(
482
- param.child_by_field_name("type"),
483
- source_code,
484
- )
485
- parameters.append(f"{param_type} {param_name}")
486
- result["parameters"] = parameters
487
- elif child.type == "type":
488
- return_type = self._extract_node_text(
489
- child, source_code
490
- )
491
- result["return_type"] = return_type
492
-
493
- elif node.type == "field_declaration":
494
- # Handle field declarations
495
- for child in node.children:
496
- if child.type == "variable_declarator":
497
- var_name = self._extract_node_text(
498
- child.child_by_field_name("name"), source_code
499
- )
500
- var_type = self._extract_node_text(
501
- child.child_by_field_name("type"), source_code
502
- )
503
- result["name"] = var_name
504
- result["variable_type"] = var_type
505
- result["type"] = "field_declaration"
506
-
507
- elif node.type == "annotation":
508
- # Handle annotations
509
- annotation_name = self._extract_node_text(node, source_code)
510
- result["name"] = annotation_name
511
- result["type"] = "annotation"
512
-
513
- elif node.type == "lambda_expression":
514
- # Handle lambda expressions
515
- result["type"] = "lambda_expression"
516
- # Additional processing for lambda parameters and body can be added here
517
-
518
- # Recursively process children for nested classes or other constructs
519
- children = [process_node(child) for child in node.children]
520
- if children:
521
- result["children"] = children
522
-
523
- return result
524
-
525
- elif language == "cpp":
526
- if node.type in [
527
- "class_specifier",
528
- "function_definition",
529
- "struct_specifier",
530
- ]:
531
- for child in node.children:
532
- if child.type == "identifier":
533
- result["name"] = self._extract_node_text(
534
- child, source_code
535
- )
536
- return result
537
- return result
538
- elif node.type in ["declaration", "variable_declaration"]:
539
- # Handle C++ global variables and declarations
540
- for child in node.children:
541
- if (
542
- child.type == "init_declarator"
543
- or child.type == "declarator"
544
- ):
545
- for subchild in child.children:
546
- if subchild.type == "identifier":
547
- result["type"] = "variable_declaration"
548
- result["name"] = self._extract_node_text(
549
- subchild, source_code
550
- )
551
- return result
552
- return result
553
-
554
- elif language == "ruby":
555
- if node.type in ["class", "method", "singleton_method", "module"]:
556
- for child in node.children:
557
- if child.type == "identifier":
558
- result["name"] = self._extract_node_text(
559
- child, source_code
560
- )
561
- return result
562
- return result
563
- elif node.type == "assignment" or node.type == "global_variable":
564
- # Handle Ruby global variables and assignments
565
- for child in node.children:
566
- if (
567
- child.type == "identifier"
568
- or child.type == "global_variable"
569
- ):
570
- result["type"] = "variable_declaration"
571
- result["name"] = self._extract_node_text(
572
- child, source_code
573
- )
574
- return result
575
- return result
576
-
577
- elif language == "go":
578
- if node.type in [
579
- "type_declaration",
580
- "function_declaration",
581
- "method_declaration",
582
- "interface_declaration",
583
- ]:
584
- for child in node.children:
585
- if (
586
- child.type == "identifier"
587
- or child.type == "field_identifier"
588
- ):
589
- result["name"] = self._extract_node_text(
590
- child, source_code
591
- )
592
- result["first_line"] = (
593
- self._extract_node_text(node, source_code)
594
- .split("\n")[0]
595
- .strip("{")
596
- )
597
- return result
598
- return result
599
- elif (
600
- node.type == "var_declaration"
601
- or node.type == "const_declaration"
602
- ):
603
- # Handle Go variable and constant declarations
604
- for child in node.children:
605
- if child.type == "var_spec" or child.type == "const_spec":
606
- for subchild in child.children:
607
- if subchild.type == "identifier":
608
- result["type"] = "variable_declaration"
609
- result["name"] = self._extract_node_text(
610
- subchild, source_code
611
- )
612
- return result
613
- return result
614
-
615
- elif language == "rust":
616
- if node.type in [
617
- "struct_item",
618
- "impl_item",
619
- "fn_item",
620
- "trait_item",
621
- ]:
622
- for child in node.children:
623
- if child.type == "identifier":
624
- result["name"] = self._extract_node_text(
625
- child, source_code
626
- )
627
- return result
628
- return result
629
- elif node.type in ["static_item", "const_item", "let_declaration"]:
630
- # Handle Rust static items, constants, and let declarations
631
- for child in node.children:
632
- if child.type == "identifier":
633
- result["type"] = "variable_declaration"
634
- result["name"] = self._extract_node_text(
635
- child, source_code
636
- )
637
- return result
638
- elif child.type == "pattern" and child.children:
639
- result["name"] = self._extract_node_text(
640
- child.children[0], source_code
641
- )
642
- return result
643
-
644
- elif language == "php":
645
- if node.type in [
646
- "class_declaration",
647
- "method_declaration",
648
- "function_definition",
649
- "interface_declaration",
650
- "trait_declaration",
651
- ]:
652
- for child in node.children:
653
- if child.type == "name":
654
- result["name"] = self._extract_node_text(
655
- child, source_code
656
- )
657
- return result
658
- return result
659
- elif (
660
- node.type == "property_declaration"
661
- or node.type == "const_declaration"
662
- ):
663
- # Handle PHP class properties and constants
664
- for child in node.children:
665
- if (
666
- child.type == "property_element"
667
- or child.type == "const_element"
668
- ):
669
- for subchild in child.children:
670
- if (
671
- subchild.type == "variable_name"
672
- or subchild.type == "name"
673
- ):
674
- result["type"] = "variable_declaration"
675
- result["name"] = self._extract_node_text(
676
- subchild, source_code
677
- )
678
- return result
679
-
680
- elif language == "c-sharp":
681
- if node.type == "class_declaration":
682
- # Create a more comprehensive class result
683
- class_name = None
684
- base_class_name = None
685
-
686
- # Extract class name and base class name
687
- for child in node.children:
688
- if child.type == "identifier":
689
- class_name = self._extract_node_text(child, source_code)
690
- result["name"] = class_name
691
- elif child.type == "base_list":
692
- # Extract base class if present
693
- if (
694
- len(child.children) > 1
695
- ): # Check if there's a base class
696
- base_class_name = self._extract_node_text(
697
- child.children[1], source_code
698
- )
699
- result["base_class"] = base_class_name
700
-
701
- # DO NOT return early here to ensure methods are processed
702
-
703
- elif node.type == "method_declaration":
704
- method_name = None
705
- parameters = []
706
- access_modifiers = []
707
-
708
- for child in node.children:
709
- if child.type == "identifier":
710
- method_name = self._extract_node_text(
711
- child, source_code
712
- )
713
- result["name"] = method_name
714
- elif child.type == "parameter_list":
715
- # Extract parameter information
716
- for param in child.children:
717
- if param.type == "parameter":
718
- param_type = ""
719
- param_name = None
720
-
721
- # Get type and name fields from parameter
722
- type_node = param.child_by_field_name("type")
723
- name_node = param.child_by_field_name("name")
724
-
725
- if type_node:
726
- param_type = self._extract_node_text(
727
- type_node, source_code
728
- )
729
- if name_node:
730
- param_name = self._extract_node_text(
731
- name_node, source_code
732
- )
733
-
734
- if param_name:
735
- parameters.append(
736
- param_type + " " + param_name
737
- )
738
-
739
- # Add parameters to result
740
- if parameters:
741
- result["parameters"] = parameters
742
- elif child.type == "modifier":
743
- # Capture access modifiers
744
- modifier = self._extract_node_text(child, source_code)
745
- access_modifiers.append(modifier)
746
-
747
- # Add access modifiers to result
748
- if access_modifiers:
749
- result["modifiers"] = access_modifiers
750
-
751
- # DO NOT return early here
752
-
753
- elif node.type in ["property_declaration", "field_declaration"]:
754
- # Improved handling for properties and fields
755
- property_name = None
756
- property_type = None
757
-
758
- for child in node.children:
759
- if child.type == "variable_declaration":
760
- for subchild in child.children:
761
- if subchild.type == "identifier":
762
- result["type"] = "variable_declaration"
763
- result["name"] = self._extract_node_text(
764
- subchild, source_code
765
- )
766
- # Look for the type of the variable
767
- elif subchild.type == "predefined_type" or (
768
- subchild.type == "identifier"
769
- and subchild != child
770
- ):
771
- result["variable_type"] = (
772
- self._extract_node_text(
773
- subchild, source_code
774
- )
775
- )
776
- # Check for property name directly in property_declaration
777
- elif child.type == "identifier":
778
- property_name = self._extract_node_text(
779
- child, source_code
780
- )
781
- result["name"] = property_name
782
- result["type"] = "property_declaration"
783
- # Check for property type
784
- elif child.type == "predefined_type" or (
785
- child.type == "identifier" and child != property_name
786
- ):
787
- if (
788
- not property_name
789
- or self._extract_node_text(child, source_code)
790
- != property_name
791
- ):
792
- property_type = self._extract_node_text(
793
- child, source_code
794
- )
795
- result["property_type"] = property_type
796
-
797
- elif language == "kotlin":
798
- if node.type in ["class_declaration", "function_declaration"]:
799
- for child in node.children:
800
- if child.type == "simple_identifier":
801
- result["name"] = self._extract_node_text(
802
- child, source_code
803
- )
804
- return result
805
- return result
806
- elif node.type in ["property_declaration", "variable_declaration"]:
807
- # Handle Kotlin properties and variables
808
- for child in node.children:
809
- if child.type == "simple_identifier":
810
- result["type"] = "variable_declaration"
811
- result["name"] = self._extract_node_text(
812
- child, source_code
813
- )
814
- return result
815
- break # Only capture the first identifier
816
- return result
817
- else:
818
- if node.type in [
819
- "type_declaration",
820
- "function_declaration",
821
- "method_declaration",
822
- "interface_declaration",
823
- ]:
824
- for child in node.children:
825
- if (
826
- child.type == "identifier"
827
- or child.type == "field_identifier"
828
- ):
829
- result["name"] = self._extract_node_text(
830
- child, source_code
831
- )
832
- result["first_line"] = (
833
- self._extract_node_text(node, source_code)
834
- .split("\n")[0]
835
- .strip("{")
836
- )
837
- return result
838
- return result
839
- elif (
840
- node.type == "var_declaration"
841
- or node.type == "const_declaration"
842
- ):
843
- # Handle Go variable and constant declarations
844
- for child in node.children:
845
- if child.type == "var_spec" or child.type == "const_spec":
846
- for subchild in child.children:
847
- if subchild.type == "identifier":
848
- result["type"] = "variable_declaration"
849
- result["name"] = self._extract_node_text(
850
- subchild, source_code
851
- )
852
- return result
853
- return result
854
-
855
- # Recursively process children
856
- children = []
857
- # if file_path.endswith("models/wishlist.js"):
858
- # print(f"{file_path} {language}")
859
- # print(
860
- # f"{node.type} ({self._extract_node_text(node, source_code) if node.type == 'identifier' else ''})"
861
- # )
862
- # print(self._extract_node_text(node, source_code))
863
- # print("=============")
864
- for child in node.children:
865
- child_result = process_node(child)
866
- if child_result and (
867
- child_result.get("type")
868
- in [
869
- "class_definition",
870
- "function_definition",
871
- "class_declaration",
872
- "method_definition",
873
- "function_declaration",
874
- "interface_declaration",
875
- "method_declaration",
876
- "constructor_declaration",
877
- "class_specifier",
878
- "struct_specifier",
879
- "class",
880
- "method",
881
- "singleton_method",
882
- "module",
883
- "type_declaration",
884
- "method_declaration",
885
- "interface_declaration",
886
- "struct_item",
887
- "impl_item",
888
- "fn_item",
889
- "trait_item",
890
- "trait_declaration",
891
- "property_declaration",
892
- "object_definition",
893
- "trait_definition",
894
- "def_definition",
895
- "function_definition",
896
- "class_definition",
897
- "variable_declaration",
898
- "arrow_function",
899
- ]
900
- or "children" in child_result
901
- ):
902
- children.append(child_result)
903
-
904
- if children:
905
- result["children"] = children
906
- return result
161
+ return language_parser.process_node(node, source_code, process_node)
907
162
 
908
163
  return process_node(root_node)
909
164
 
@@ -914,11 +169,9 @@ class CodeAnalysisService:
914
169
  """Recursively count nodes of specific types in the tree structure."""
915
170
  count = 0
916
171
 
917
- # Count current node if it matches
918
172
  if structure.get("type") in node_types:
919
173
  count += 1
920
174
 
921
- # Recursively count in children
922
175
  for child in structure.get("children", []):
923
176
  count += self._count_nodes(child, node_types)
924
177
 
@@ -939,26 +192,39 @@ class CodeAnalysisService:
939
192
  if not self.llm_service:
940
193
  return files[:max_files]
941
194
 
942
- prompt = f"""You are analyzing a code repository with {len(files)} files.
195
+ prompt = f"""You are analyzing a code repository with {len(files)} files.
943
196
  The analysis system can only process {max_files} files at a time.
944
197
 
945
- Please select the {max_files} most important files to analyze based on these criteria:
946
- 1. Core application logic files (main entry points, core modules)
947
- 2. Business logic and domain models
198
+ Generate glob patterns to EXCLUDE less important files. The goal is to keep around {max_files} most important files after exclusion.
199
+
200
+ Files to EXCLUDE (generate patterns for these):
201
+ 1. Test files
202
+ 2. Generated/build files
203
+ 3. Vendor/dependency files
204
+ 4. Documentation files (e.g., **/docs/**, **/*.md)
205
+ 5. Configuration duplicates and environment files
206
+ 6. Migration files
207
+ 7. Static assets (images, fonts, etc.)
208
+ 8. Example/sample files
209
+
210
+ Files to KEEP (NEVER exclude):
211
+ 1. Core application logic (main entry points, core modules)
212
+ 2. Business features logic and domain models
948
213
  3. API endpoints and controllers
949
214
  4. Service/utility classes
950
- 5. Configuration files that define app structure
951
- 6. Test files are lower priority unless they reveal architecture
952
- 7. Generated files, lock files, and vendor files should be excluded
215
+ 5. Key configuration files that define app structure
953
216
 
954
- Here is the complete list of files in the repository:
217
+ Here is the complete list of files:
955
218
  {chr(10).join(files)}
956
219
 
957
- Return your selection as a JSON array of file paths. Only return the JSON array, nothing else.
958
- Select exactly {max_files} files from the list above.
220
+ Current file count: {len(files)}
221
+ Target file count: ~{max_files}
222
+ Files to exclude: ~{max(0, len(files) - max_files)}
223
+
224
+ Return ONLY a JSON array of glob patterns to exclude. Be strategic - use broad patterns when possible.
959
225
 
960
226
  Example response format:
961
- ["src/main.py", "src/app.py", "src/models/user.py"]"""
227
+ ["**/tests/**", "**/test_*", "**/*.test.*", "**/docs/**", "**/migrations/**", "**/__pycache__/**"]"""
962
228
 
963
229
  try:
964
230
  loop = asyncio.get_event_loop()
@@ -980,14 +246,26 @@ Example response format:
980
246
  response = response[:-3]
981
247
  response = response.strip()
982
248
 
983
- selected_files = json.loads(response)
249
+ exclude_patterns = json.loads(response)
984
250
 
985
- if isinstance(selected_files, list):
986
- valid_files = [f for f in selected_files if f in files]
987
- if len(valid_files) >= max_files * 0.5:
988
- return valid_files[:max_files]
989
- except Exception:
990
- pass
251
+ if isinstance(exclude_patterns, list):
252
+ filtered_files = []
253
+ for file_path in files:
254
+ excluded = False
255
+ for pattern in exclude_patterns:
256
+ if fnmatch.fnmatch(file_path, pattern):
257
+ excluded = True
258
+ break
259
+ if not excluded:
260
+ filtered_files.append(file_path)
261
+
262
+ logger.info(
263
+ f"LLM exclusion patterns reduced files from {len(files)} to {len(filtered_files)}"
264
+ )
265
+
266
+ return filtered_files[:max_files]
267
+ except Exception as e:
268
+ logger.warning(f"Cannot extract exclusion patterns from LLM response: {e}")
991
269
 
992
270
  return files[:max_files]
993
271
 
@@ -1088,7 +366,7 @@ Example response format:
1088
366
  return {"error": f"Error analyzing directory: {str(e)}"}
1089
367
 
1090
368
  def _generate_text_map(self, analysis_results: List[Dict[str, Any]]) -> str:
1091
- """Generate a compact text representation of the code structure analysis."""
369
+ """Generate a hierarchical text representation of the code structure analysis."""
1092
370
 
1093
371
  def format_node(
1094
372
  node: Dict[str, Any], prefix: str = "", is_last: bool = True
@@ -1098,10 +376,9 @@ Example response format:
1098
376
  node_type = node.get("type", "")
1099
377
  node_name = node.get("name", "")
1100
378
  node_lines = (
1101
- f" //Lines:{node.get('start_line', '')}-{node.get('end_line', '')}"
379
+ f" //L: {node.get('start_line', '')}-{node.get('end_line', '')}"
1102
380
  )
1103
381
 
1104
- # Handle decorated functions - extract the actual function definition
1105
382
  if node_type == "decorated_definition" and "children" in node:
1106
383
  for child in node.get("children", []):
1107
384
  if child.get("type") in {
@@ -1111,7 +388,6 @@ Example response format:
1111
388
  }:
1112
389
  return format_node(child, prefix, is_last)
1113
390
 
1114
- # Handle class body, block nodes, and wrapper functions
1115
391
  if not node_name and node_type in {
1116
392
  "class_body",
1117
393
  "block",
@@ -1125,8 +401,7 @@ Example response format:
1125
401
  elif not node_name:
1126
402
  return lines
1127
403
 
1128
- branch = "└── " if is_last else "├── "
1129
- # Format node information based on type
404
+ branch = " "
1130
405
  if node_type in {
1131
406
  "class_definition",
1132
407
  "class_declaration",
@@ -1134,6 +409,7 @@ Example response format:
1134
409
  "class",
1135
410
  "interface_declaration",
1136
411
  "struct_specifier",
412
+ "struct_declaration",
1137
413
  "struct_item",
1138
414
  "trait_item",
1139
415
  "trait_declaration",
@@ -1159,7 +435,6 @@ Example response format:
1159
435
  "arrow_function",
1160
436
  "lexical_declaration",
1161
437
  }:
1162
- # Handle parameters
1163
438
  if "first_line" in node:
1164
439
  node_info = node["first_line"] + node_lines
1165
440
  else:
@@ -1168,7 +443,6 @@ Example response format:
1168
443
  if "parameters" in node and node["parameters"]:
1169
444
  params = node["parameters"]
1170
445
  elif "children" in node:
1171
- # Try to extract parameters from children for languages that structure them differently
1172
446
  for child in node["children"]:
1173
447
  if child.get("type") in {
1174
448
  "parameter_list",
@@ -1198,11 +472,10 @@ Example response format:
1198
472
 
1199
473
  lines.append(f"{prefix}{branch}{node_info}")
1200
474
 
1201
- # Process children
1202
475
  if "children" in node:
1203
- new_prefix = prefix + (" " if is_last else "│ ")
476
+ new_prefix = prefix + " "
1204
477
  child_lines = process_children(node["children"], new_prefix, is_last)
1205
- if child_lines: # Only add child lines if there are any
478
+ if child_lines:
1206
479
  lines.extend(child_lines)
1207
480
 
1208
481
  return lines
@@ -1223,20 +496,19 @@ Example response format:
1223
496
  "call_expression",
1224
497
  "lexical_declaration",
1225
498
  "decorated_definition",
1226
- # Class-related nodes
1227
499
  "class_definition",
1228
500
  "class_declaration",
1229
501
  "class_specifier",
1230
502
  "class",
1231
503
  "interface_declaration",
1232
504
  "struct_specifier",
505
+ "struct_declaration",
1233
506
  "struct_item",
1234
507
  "trait_item",
1235
508
  "trait_declaration",
1236
509
  "module",
1237
510
  "type_declaration",
1238
- "impl_item", # Rust implementations
1239
- # Method-related nodes
511
+ "impl_item",
1240
512
  "function_definition",
1241
513
  "function_declaration",
1242
514
  "method_definition",
@@ -1251,13 +523,11 @@ Example response format:
1251
523
  "public_method_definition",
1252
524
  "private_method_definition",
1253
525
  "protected_method_definition",
1254
- # Container nodes that might have methods
1255
526
  "class_body",
1256
527
  "block",
1257
528
  "declaration_list",
1258
529
  "body",
1259
- "impl_block", # Rust implementation blocks
1260
- # Property and field nodes
530
+ "impl_block",
1261
531
  "property_declaration",
1262
532
  "field_declaration",
1263
533
  "variable_declaration",
@@ -1268,100 +538,123 @@ Example response format:
1268
538
  for i, child in enumerate(significant_children):
1269
539
  is_last_child = i == len(significant_children) - 1
1270
540
  child_lines = format_node(child, prefix, is_last_child)
1271
- if child_lines: # Only add child lines if there are any
541
+ if child_lines:
1272
542
  lines.extend(child_lines)
1273
543
  if i >= MAX_ITEMS_OUT:
1274
544
  lines.append(
1275
- f"...({len(significant_children) - MAX_ITEMS_OUT} more items)"
545
+ f"{prefix} ...({len(significant_children) - MAX_ITEMS_OUT} more items)"
1276
546
  )
1277
547
  break
1278
548
 
1279
549
  return lines
1280
550
 
1281
- # Process each file
1282
- output_lines = []
551
+ def get_file_code_content(
552
+ result: Dict[str, Any], file_indent: str
553
+ ) -> List[str]:
554
+ """Generate code structure content for a single file."""
555
+ lines = []
556
+ structure = result.get("structure")
557
+ if not structure:
558
+ return lines
559
+
560
+ if not structure.get("children"):
561
+ if structure.get("type"):
562
+ return [f"{file_indent} {structure['type']}"]
563
+ return lines
564
+
565
+ significant_nodes = [
566
+ child
567
+ for child in structure["children"]
568
+ if child.get("type")
569
+ in {
570
+ "arrow_function",
571
+ "lexical_declaration",
572
+ "call_expression",
573
+ "decorated_definition",
574
+ "class_definition",
575
+ "class_declaration",
576
+ "class_specifier",
577
+ "class",
578
+ "interface_declaration",
579
+ "struct_specifier",
580
+ "struct_declaration",
581
+ "struct_item",
582
+ "trait_item",
583
+ "trait_declaration",
584
+ "module",
585
+ "type_declaration",
586
+ "impl_item",
587
+ "function_definition",
588
+ "function_declaration",
589
+ "method_definition",
590
+ "method_declaration",
591
+ "fn_item",
592
+ "method",
593
+ "singleton_method",
594
+ "constructor_declaration",
595
+ "member_function_definition",
596
+ "constructor",
597
+ "destructor",
598
+ "public_method_definition",
599
+ "private_method_definition",
600
+ "protected_method_definition",
601
+ "property_declaration",
602
+ "field_declaration",
603
+ "variable_declaration",
604
+ "const_declaration",
605
+ "namespace_declaration",
606
+ }
607
+ ]
608
+
609
+ for i, node in enumerate(significant_nodes):
610
+ is_last = i == len(significant_nodes) - 1
611
+ node_lines = format_node(node, file_indent, is_last)
612
+ if node_lines:
613
+ lines.extend(node_lines)
614
+ if i >= MAX_ITEMS_OUT:
615
+ lines.append(
616
+ f"{file_indent} ...({len(significant_nodes) - MAX_ITEMS_OUT} more items)"
617
+ )
618
+ break
619
+ return lines
1283
620
 
1284
- # Sort analysis results by path
1285
621
  sorted_results = sorted(analysis_results, key=lambda x: x["path"])
1286
622
 
623
+ results_by_path = {result["path"]: result for result in sorted_results}
624
+
625
+ tree: Dict[str, Any] = {}
1287
626
  for result in sorted_results:
1288
- # Skip files with no significant structure
1289
- if not result.get("structure") or not result.get("structure", {}).get(
1290
- "children"
1291
- ):
1292
- if not result.get("structure"):
1293
- output_lines.append(
1294
- f"\n{result['path']}: {result['structure']['type']}"
1295
- )
1296
- continue
1297
-
1298
- # Add file header
1299
- output_lines.append(f"\n{result['path']}")
1300
- # Format the structure
1301
- structure = result["structure"]
1302
- if "children" in structure:
1303
- significant_nodes = [
1304
- child
1305
- for child in structure["children"]
1306
- if child.get("type")
1307
- in {
1308
- "arrow_function",
1309
- "lexical_declaration",
1310
- "call_expression",
1311
- "decorated_definition",
1312
- # Class-related nodes
1313
- "class_definition",
1314
- "class_declaration",
1315
- "class_specifier",
1316
- "class",
1317
- "interface_declaration",
1318
- "struct_specifier",
1319
- "struct_item",
1320
- "trait_item",
1321
- "trait_declaration",
1322
- "module",
1323
- "type_declaration",
1324
- "impl_item", # Rust implementations
1325
- # Method-related nodes
1326
- "function_definition",
1327
- "function_declaration",
1328
- "method_definition",
1329
- "method_declaration",
1330
- "fn_item",
1331
- "method",
1332
- "singleton_method",
1333
- "constructor_declaration",
1334
- "member_function_definition",
1335
- "constructor",
1336
- "destructor",
1337
- "public_method_definition",
1338
- "private_method_definition",
1339
- "protected_method_definition",
1340
- # Property and field nodes
1341
- "property_declaration",
1342
- "field_declaration",
1343
- "variable_declaration",
1344
- "const_declaration",
1345
- "namespace_declaration",
1346
- }
1347
- ]
627
+ path = result["path"].replace("\\", "/")
628
+ parts = path.split("/")
629
+ current = tree
630
+ for i, part in enumerate(parts):
631
+ if i == len(parts) - 1:
632
+ current[part] = {"__is_file__": True, "__path__": result["path"]}
633
+ else:
634
+ if part not in current:
635
+ current[part] = {}
636
+ current = current[part]
637
+
638
+ output_lines = []
1348
639
 
1349
- for i, node in enumerate(significant_nodes):
1350
- is_last = i == len(significant_nodes) - 1
1351
- node_lines = format_node(node, "", is_last)
1352
- if node_lines: # Only add node lines if there are any
1353
- output_lines.extend(node_lines)
1354
- if i >= MAX_ITEMS_OUT:
1355
- output_lines.append(
1356
- f"...({len(significant_nodes) - MAX_ITEMS_OUT} more items)"
640
+ def format_tree(node: Dict[str, Any], indent: str = "") -> None:
641
+ items = sorted(node.keys())
642
+ for name in items:
643
+ child = node[name]
644
+ if isinstance(child, dict) and child.get("__is_file__"):
645
+ output_lines.append(f"{indent}{name}")
646
+ file_path = child["__path__"]
647
+ if file_path in results_by_path:
648
+ file_content = get_file_code_content(
649
+ results_by_path[file_path], indent
1357
650
  )
1358
- break
1359
- # else:
1360
- # output_lines.append(
1361
- # self.get_file_content(result["path"]).get("file")
1362
- # )
1363
- #
1364
- # Return the formatted text
651
+ output_lines.extend(file_content)
652
+ elif isinstance(child, dict):
653
+ output_lines.append(f"{indent}{name}/")
654
+ format_tree(child, indent + " ")
655
+
656
+ format_tree(tree)
657
+
1365
658
  return (
1366
659
  "\n".join(output_lines)
1367
660
  if output_lines
@@ -1385,15 +678,12 @@ Example response format:
1385
678
  Returns:
1386
679
  Dictionary with file content (key: "file", value: file content string)
1387
680
  """
1388
- # Read the whole file
1389
681
  with open(file_path, "rb") as file:
1390
682
  content = file.read()
1391
683
 
1392
684
  decoded_content = content.decode("utf-8")
1393
685
 
1394
- # If line range is specified, extract those lines
1395
686
  if start_line is not None and end_line is not None:
1396
- # Validate line range
1397
687
  if start_line < 1:
1398
688
  raise ValueError("start_line must be >= 1")
1399
689
  if end_line < start_line:
@@ -1402,7 +692,6 @@ Example response format:
1402
692
  lines = decoded_content.split("\n")
1403
693
  total_lines = len(lines)
1404
694
 
1405
- # Validate bounds
1406
695
  if start_line > total_lines:
1407
696
  raise ValueError(
1408
697
  f"start_line {start_line} exceeds file length ({total_lines} lines)"
@@ -1410,13 +699,55 @@ Example response format:
1410
699
  if end_line > total_lines:
1411
700
  end_line = total_lines
1412
701
 
1413
- # Extract the line range (convert to 0-indexed)
1414
702
  selected_lines = lines[start_line - 1 : end_line]
1415
703
  return {"file": "\n".join(selected_lines)}
1416
704
 
1417
- # Return the whole file
1418
705
  return {"file": decoded_content}
1419
706
 
707
+ def _build_file_tree(self, file_paths: List[str]) -> Dict[str, Any]:
708
+ """Build a hierarchical tree structure from flat file paths.
709
+
710
+ Args:
711
+ file_paths: List of relative file paths
712
+
713
+ Returns:
714
+ Nested dictionary representing the file tree
715
+ """
716
+ tree: Dict[str, Any] = {}
717
+ for path in sorted(file_paths):
718
+ parts = path.replace("\\", "/").split("/")
719
+ current = tree
720
+ for i, part in enumerate(parts):
721
+ if i == len(parts) - 1:
722
+ current[part] = None
723
+ else:
724
+ if part not in current:
725
+ current[part] = {}
726
+ current = current[part]
727
+ return tree
728
+
729
+ def _format_file_tree(self, tree: Dict[str, Any], indent: str = "") -> List[str]:
730
+ """Format a file tree dictionary into indented lines.
731
+
732
+ Args:
733
+ tree: Nested dictionary representing file tree
734
+ indent: Current indentation string
735
+
736
+ Returns:
737
+ List of formatted lines
738
+ """
739
+ lines = []
740
+ items = sorted(tree.keys())
741
+ for name in items:
742
+ subtree = tree[name]
743
+ if subtree is None:
744
+ lines.append(f"{indent}{name}")
745
+ else:
746
+ lines.append(f"{indent}{name}/")
747
+ child_lines = self._format_file_tree(subtree, indent + " ")
748
+ lines.extend(child_lines)
749
+ return lines
750
+
1420
751
  def _format_analysis_results(
1421
752
  self,
1422
753
  analysis_results: List[Dict[str, Any]],
@@ -1482,11 +813,14 @@ Example response format:
1482
813
  f"The following {non_analyzed_count} files were not analyzed due to the {MAX_FILES_TO_ANALYZE} file limit:"
1483
814
  )
1484
815
  max_non_analyzed_to_show = int(MAX_FILES_TO_ANALYZE / 2)
1485
- for file_path in sorted(non_analyzed_files[:max_non_analyzed_to_show]):
1486
- sections.append(f" {file_path}")
816
+ non_analyzed_tree = self._build_file_tree(
817
+ sorted(non_analyzed_files)[:max_non_analyzed_to_show]
818
+ )
819
+ non_analyzed_tree_lines = self._format_file_tree(non_analyzed_tree)
820
+ sections.extend(non_analyzed_tree_lines)
1487
821
  if len(non_analyzed_files) > max_non_analyzed_to_show:
1488
822
  sections.append(
1489
- f" ...and {len(non_analyzed_files) - max_non_analyzed_to_show} more files."
823
+ f"...and {len(non_analyzed_files) - max_non_analyzed_to_show} more files."
1490
824
  )
1491
825
 
1492
826
  return "\n".join(sections)