claude-mpm 4.1.8__py3-none-any.whl → 4.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/INSTRUCTIONS.md +26 -1
  3. claude_mpm/agents/agents_metadata.py +57 -0
  4. claude_mpm/agents/templates/.claude-mpm/memories/README.md +17 -0
  5. claude_mpm/agents/templates/.claude-mpm/memories/engineer_memories.md +3 -0
  6. claude_mpm/agents/templates/agent-manager.json +263 -17
  7. claude_mpm/agents/templates/agentic_coder_optimizer.json +222 -0
  8. claude_mpm/agents/templates/code_analyzer.json +18 -8
  9. claude_mpm/agents/templates/engineer.json +1 -1
  10. claude_mpm/agents/templates/logs/prompts/agent_engineer_20250826_014258_728.md +39 -0
  11. claude_mpm/agents/templates/qa.json +1 -1
  12. claude_mpm/agents/templates/research.json +1 -1
  13. claude_mpm/cli/__init__.py +15 -0
  14. claude_mpm/cli/commands/__init__.py +6 -0
  15. claude_mpm/cli/commands/analyze.py +548 -0
  16. claude_mpm/cli/commands/analyze_code.py +524 -0
  17. claude_mpm/cli/commands/configure.py +78 -28
  18. claude_mpm/cli/commands/configure_tui.py +62 -60
  19. claude_mpm/cli/commands/dashboard.py +288 -0
  20. claude_mpm/cli/commands/debug.py +1386 -0
  21. claude_mpm/cli/commands/mpm_init.py +427 -0
  22. claude_mpm/cli/commands/mpm_init_handler.py +83 -0
  23. claude_mpm/cli/parsers/analyze_code_parser.py +170 -0
  24. claude_mpm/cli/parsers/analyze_parser.py +135 -0
  25. claude_mpm/cli/parsers/base_parser.py +44 -0
  26. claude_mpm/cli/parsers/dashboard_parser.py +113 -0
  27. claude_mpm/cli/parsers/debug_parser.py +319 -0
  28. claude_mpm/cli/parsers/mpm_init_parser.py +122 -0
  29. claude_mpm/constants.py +13 -1
  30. claude_mpm/core/framework_loader.py +148 -6
  31. claude_mpm/core/log_manager.py +16 -13
  32. claude_mpm/core/logger.py +1 -1
  33. claude_mpm/core/unified_agent_registry.py +1 -1
  34. claude_mpm/dashboard/.claude-mpm/socketio-instances.json +1 -0
  35. claude_mpm/dashboard/analysis_runner.py +455 -0
  36. claude_mpm/dashboard/static/built/components/activity-tree.js +2 -0
  37. claude_mpm/dashboard/static/built/components/agent-inference.js +1 -1
  38. claude_mpm/dashboard/static/built/components/code-tree.js +2 -0
  39. claude_mpm/dashboard/static/built/components/code-viewer.js +2 -0
  40. claude_mpm/dashboard/static/built/components/event-viewer.js +1 -1
  41. claude_mpm/dashboard/static/built/components/file-tool-tracker.js +1 -1
  42. claude_mpm/dashboard/static/built/components/module-viewer.js +1 -1
  43. claude_mpm/dashboard/static/built/components/session-manager.js +1 -1
  44. claude_mpm/dashboard/static/built/components/working-directory.js +1 -1
  45. claude_mpm/dashboard/static/built/dashboard.js +1 -1
  46. claude_mpm/dashboard/static/built/socket-client.js +1 -1
  47. claude_mpm/dashboard/static/css/activity.css +549 -0
  48. claude_mpm/dashboard/static/css/code-tree.css +1175 -0
  49. claude_mpm/dashboard/static/css/dashboard.css +245 -0
  50. claude_mpm/dashboard/static/dist/components/activity-tree.js +2 -0
  51. claude_mpm/dashboard/static/dist/components/code-tree.js +2 -0
  52. claude_mpm/dashboard/static/dist/components/code-viewer.js +2 -0
  53. claude_mpm/dashboard/static/dist/components/event-viewer.js +1 -1
  54. claude_mpm/dashboard/static/dist/components/session-manager.js +1 -1
  55. claude_mpm/dashboard/static/dist/components/working-directory.js +1 -1
  56. claude_mpm/dashboard/static/dist/dashboard.js +1 -1
  57. claude_mpm/dashboard/static/dist/socket-client.js +1 -1
  58. claude_mpm/dashboard/static/js/components/activity-tree.js +1338 -0
  59. claude_mpm/dashboard/static/js/components/code-tree.js +2535 -0
  60. claude_mpm/dashboard/static/js/components/code-viewer.js +480 -0
  61. claude_mpm/dashboard/static/js/components/event-viewer.js +59 -9
  62. claude_mpm/dashboard/static/js/components/session-manager.js +40 -4
  63. claude_mpm/dashboard/static/js/components/socket-manager.js +12 -0
  64. claude_mpm/dashboard/static/js/components/ui-state-manager.js +4 -0
  65. claude_mpm/dashboard/static/js/components/working-directory.js +17 -1
  66. claude_mpm/dashboard/static/js/dashboard.js +51 -0
  67. claude_mpm/dashboard/static/js/socket-client.js +465 -29
  68. claude_mpm/dashboard/templates/index.html +182 -4
  69. claude_mpm/hooks/claude_hooks/hook_handler.py +182 -5
  70. claude_mpm/hooks/claude_hooks/installer.py +386 -113
  71. claude_mpm/scripts/claude-hook-handler.sh +161 -0
  72. claude_mpm/scripts/socketio_daemon.py +121 -8
  73. claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +2 -2
  74. claude_mpm/services/agents/deployment/agent_record_service.py +1 -2
  75. claude_mpm/services/agents/memory/memory_format_service.py +1 -3
  76. claude_mpm/services/cli/agent_cleanup_service.py +1 -5
  77. claude_mpm/services/cli/agent_dependency_service.py +1 -1
  78. claude_mpm/services/cli/agent_validation_service.py +3 -4
  79. claude_mpm/services/cli/dashboard_launcher.py +2 -3
  80. claude_mpm/services/cli/startup_checker.py +0 -11
  81. claude_mpm/services/core/cache_manager.py +1 -3
  82. claude_mpm/services/core/path_resolver.py +1 -4
  83. claude_mpm/services/core/service_container.py +2 -2
  84. claude_mpm/services/diagnostics/checks/instructions_check.py +1 -2
  85. claude_mpm/services/infrastructure/monitoring/__init__.py +11 -11
  86. claude_mpm/services/infrastructure/monitoring.py +11 -11
  87. claude_mpm/services/project/architecture_analyzer.py +1 -1
  88. claude_mpm/services/project/dependency_analyzer.py +4 -4
  89. claude_mpm/services/project/language_analyzer.py +3 -3
  90. claude_mpm/services/project/metrics_collector.py +3 -6
  91. claude_mpm/services/socketio/event_normalizer.py +64 -0
  92. claude_mpm/services/socketio/handlers/__init__.py +2 -0
  93. claude_mpm/services/socketio/handlers/code_analysis.py +672 -0
  94. claude_mpm/services/socketio/handlers/registry.py +2 -0
  95. claude_mpm/services/socketio/server/connection_manager.py +6 -4
  96. claude_mpm/services/socketio/server/core.py +100 -11
  97. claude_mpm/services/socketio/server/main.py +8 -2
  98. claude_mpm/services/visualization/__init__.py +19 -0
  99. claude_mpm/services/visualization/mermaid_generator.py +938 -0
  100. claude_mpm/tools/__main__.py +208 -0
  101. claude_mpm/tools/code_tree_analyzer.py +1596 -0
  102. claude_mpm/tools/code_tree_builder.py +631 -0
  103. claude_mpm/tools/code_tree_events.py +416 -0
  104. claude_mpm/tools/socketio_debug.py +671 -0
  105. {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/METADATA +2 -1
  106. {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/RECORD +110 -74
  107. claude_mpm/agents/schema/agent_schema.json +0 -314
  108. {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/WHEEL +0 -0
  109. {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/entry_points.txt +0 -0
  110. {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/licenses/LICENSE +0 -0
  111. {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1596 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Code Tree Analyzer
4
+ ==================
5
+
6
+ WHY: Analyzes source code using AST to extract structure and metrics,
7
+ supporting multiple languages and emitting incremental events for visualization.
8
+
9
+ DESIGN DECISIONS:
10
+ - Use Python's ast module for Python files
11
+ - Use tree-sitter for multi-language support
12
+ - Extract comprehensive metadata (complexity, docstrings, etc.)
13
+ - Cache parsed results to avoid re-processing
14
+ - Support incremental processing with checkpoints
15
+ """
16
+
17
+ import ast
18
+ import hashlib
19
+ import json
20
+ import time
21
+ from dataclasses import dataclass
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional
24
+
25
+ try:
26
+ import pathspec
27
+
28
+ PATHSPEC_AVAILABLE = True
29
+ except ImportError:
30
+ PATHSPEC_AVAILABLE = False
31
+ pathspec = None
32
+
33
+ try:
34
+ import tree_sitter
35
+ import tree_sitter_javascript
36
+ import tree_sitter_python
37
+ import tree_sitter_typescript
38
+
39
+ TREE_SITTER_AVAILABLE = True
40
+ except ImportError:
41
+ TREE_SITTER_AVAILABLE = False
42
+ tree_sitter = None
43
+
44
+ from ..core.logging_config import get_logger
45
+ from .code_tree_events import CodeNodeEvent, CodeTreeEventEmitter
46
+
47
+
48
+ class GitignoreManager:
49
+ """Manages .gitignore pattern matching for file filtering.
50
+
51
+ WHY: Properly respecting .gitignore patterns ensures we don't analyze
52
+ or display files that should be ignored in the repository.
53
+ """
54
+
55
+ # Default patterns that should always be ignored
56
+ DEFAULT_PATTERNS = [
57
+ ".git/",
58
+ "__pycache__/",
59
+ "*.pyc",
60
+ "*.pyo",
61
+ ".DS_Store",
62
+ ".pytest_cache/",
63
+ ".mypy_cache/",
64
+ "dist/",
65
+ "build/",
66
+ "*.egg-info/",
67
+ ".coverage",
68
+ ".tox/",
69
+ "htmlcov/",
70
+ ".idea/",
71
+ ".vscode/",
72
+ "*.swp",
73
+ "*.swo",
74
+ "*~",
75
+ "Thumbs.db",
76
+ "node_modules/",
77
+ ".venv/",
78
+ "venv/",
79
+ "env/",
80
+ ".env",
81
+ "*.log",
82
+ ".ipynb_checkpoints/",
83
+ "__MACOSX/",
84
+ ".Spotlight-V100/",
85
+ ".Trashes/",
86
+ "desktop.ini",
87
+ ]
88
+
89
+ # Additional patterns to hide dotfiles (when enabled)
90
+ DOTFILE_PATTERNS = [
91
+ ".*", # All dotfiles
92
+ ".*/", # All dot directories
93
+ ]
94
+
95
+ # Important files/directories to always show
96
+ DOTFILE_EXCEPTIONS = {
97
+ # Removed .gitignore from exceptions - it should be hidden by default
98
+ ".env.example",
99
+ ".env.sample",
100
+ ".gitlab-ci.yml",
101
+ ".travis.yml",
102
+ ".dockerignore",
103
+ ".editorconfig",
104
+ ".eslintrc",
105
+ ".prettierrc"
106
+ # Removed .github from exceptions - it should be hidden by default
107
+ }
108
+
109
+ def __init__(self, show_hidden_files: bool = False):
110
+ """Initialize the GitignoreManager.
111
+
112
+ Args:
113
+ show_hidden_files: Whether to show hidden files/directories
114
+ """
115
+ self.logger = get_logger(__name__)
116
+ self._pathspec_cache: Dict[str, Any] = {}
117
+ self._gitignore_cache: Dict[str, List[str]] = {}
118
+ self._use_pathspec = PATHSPEC_AVAILABLE
119
+ self.show_hidden_files = show_hidden_files
120
+
121
+ if not self._use_pathspec:
122
+ self.logger.warning(
123
+ "pathspec library not available - using basic pattern matching"
124
+ )
125
+
126
+ def get_ignore_patterns(self, working_dir: Path) -> List[str]:
127
+ """Get all ignore patterns for a directory.
128
+
129
+ Args:
130
+ working_dir: The working directory to search for .gitignore files
131
+
132
+ Returns:
133
+ Combined list of ignore patterns from all sources
134
+ """
135
+ # Always include default patterns
136
+ patterns = self.DEFAULT_PATTERNS.copy()
137
+
138
+ # Don't add dotfile patterns here - handle them separately in should_ignore
139
+ # This prevents exceptions from being overridden by the .* pattern
140
+
141
+ # Find and parse .gitignore files
142
+ gitignore_files = self._find_gitignore_files(working_dir)
143
+ for gitignore_file in gitignore_files:
144
+ patterns.extend(self._parse_gitignore(gitignore_file))
145
+
146
+ return patterns
147
+
148
+ def should_ignore(self, path: Path, working_dir: Path) -> bool:
149
+ """Check if a path should be ignored based on patterns.
150
+
151
+ Args:
152
+ path: The path to check
153
+ working_dir: The working directory (for relative path calculation)
154
+
155
+ Returns:
156
+ True if the path should be ignored
157
+ """
158
+ # Get the filename
159
+ filename = path.name
160
+
161
+ # 1. ALWAYS hide system files regardless of settings
162
+ ALWAYS_HIDE = {'.DS_Store', 'Thumbs.db', '.pyc', '.pyo', '.pyd'}
163
+ if filename in ALWAYS_HIDE or filename.endswith(('.pyc', '.pyo', '.pyd')):
164
+ return True
165
+
166
+ # 2. Check dotfiles BEFORE exceptions
167
+ if filename.startswith('.'):
168
+ # If showing hidden files, show all dotfiles
169
+ if self.show_hidden_files:
170
+ return False # Show the dotfile
171
+ else:
172
+ # Hide all dotfiles except those in the exceptions list
173
+ # This means: return True (ignore) if NOT in exceptions
174
+ return filename not in self.DOTFILE_EXCEPTIONS
175
+
176
+ # Get or create PathSpec for this working directory
177
+ pathspec_obj = self._get_pathspec(working_dir)
178
+
179
+ if pathspec_obj:
180
+ # Use pathspec for accurate matching
181
+ try:
182
+ rel_path = path.relative_to(working_dir)
183
+ rel_path_str = str(rel_path)
184
+
185
+ # For directories, also check with trailing slash
186
+ if path.is_dir():
187
+ return pathspec_obj.match_file(rel_path_str) or pathspec_obj.match_file(rel_path_str + '/')
188
+ else:
189
+ return pathspec_obj.match_file(rel_path_str)
190
+ except ValueError:
191
+ # Path is outside working directory
192
+ return False
193
+ else:
194
+ # Fallback to basic pattern matching
195
+ return self._basic_should_ignore(path, working_dir)
196
+
197
+ def _get_pathspec(self, working_dir: Path) -> Optional[Any]:
198
+ """Get or create a PathSpec object for the working directory.
199
+
200
+ Args:
201
+ working_dir: The working directory
202
+
203
+ Returns:
204
+ PathSpec object or None if not available
205
+ """
206
+ if not self._use_pathspec:
207
+ return None
208
+
209
+ cache_key = str(working_dir)
210
+ if cache_key not in self._pathspec_cache:
211
+ patterns = self.get_ignore_patterns(working_dir)
212
+ try:
213
+ self._pathspec_cache[cache_key] = pathspec.PathSpec.from_lines(
214
+ "gitwildmatch", patterns
215
+ )
216
+ except Exception as e:
217
+ self.logger.warning(f"Failed to create PathSpec: {e}")
218
+ return None
219
+
220
+ return self._pathspec_cache[cache_key]
221
+
222
+ def _find_gitignore_files(self, working_dir: Path) -> List[Path]:
223
+ """Find all .gitignore files in the directory tree.
224
+
225
+ Args:
226
+ working_dir: The directory to search
227
+
228
+ Returns:
229
+ List of .gitignore file paths
230
+ """
231
+ gitignore_files = []
232
+
233
+ # Check for .gitignore in working directory
234
+ main_gitignore = working_dir / ".gitignore"
235
+ if main_gitignore.exists():
236
+ gitignore_files.append(main_gitignore)
237
+
238
+ # Also check parent directories up to repository root
239
+ current = working_dir
240
+ while current != current.parent:
241
+ parent_gitignore = current.parent / ".gitignore"
242
+ if parent_gitignore.exists():
243
+ gitignore_files.append(parent_gitignore)
244
+
245
+ # Stop if we find a .git directory (repository root)
246
+ if (current / ".git").exists():
247
+ break
248
+
249
+ current = current.parent
250
+
251
+ return gitignore_files
252
+
253
+ def _parse_gitignore(self, gitignore_path: Path) -> List[str]:
254
+ """Parse a .gitignore file and return patterns.
255
+
256
+ Args:
257
+ gitignore_path: Path to .gitignore file
258
+
259
+ Returns:
260
+ List of patterns from the file
261
+ """
262
+ cache_key = str(gitignore_path)
263
+
264
+ # Check cache
265
+ if cache_key in self._gitignore_cache:
266
+ return self._gitignore_cache[cache_key]
267
+
268
+ patterns = []
269
+ try:
270
+ with open(gitignore_path, encoding="utf-8") as f:
271
+ for line in f:
272
+ line = line.strip()
273
+ # Skip empty lines and comments
274
+ if line and not line.startswith("#"):
275
+ patterns.append(line)
276
+
277
+ self._gitignore_cache[cache_key] = patterns
278
+ except Exception as e:
279
+ self.logger.warning(f"Failed to parse {gitignore_path}: {e}")
280
+
281
+ return patterns
282
+
283
+ def _basic_should_ignore(self, path: Path, working_dir: Path) -> bool:
284
+ """Basic pattern matching fallback when pathspec is not available.
285
+
286
+ Args:
287
+ path: The path to check
288
+ working_dir: The working directory
289
+
290
+ Returns:
291
+ True if the path should be ignored
292
+ """
293
+ path_str = str(path)
294
+ path_name = path.name
295
+
296
+ # 1. ALWAYS hide system files regardless of settings
297
+ ALWAYS_HIDE = {'.DS_Store', 'Thumbs.db', '.pyc', '.pyo', '.pyd'}
298
+ if path_name in ALWAYS_HIDE or path_name.endswith(('.pyc', '.pyo', '.pyd')):
299
+ return True
300
+
301
+ # 2. Check dotfiles BEFORE exceptions
302
+ if path_name.startswith('.'):
303
+ # If showing hidden files, check exceptions
304
+ if self.show_hidden_files:
305
+ return False # Show the dotfile
306
+ else:
307
+ # Only show if in exceptions list
308
+ return path_name not in self.DOTFILE_EXCEPTIONS
309
+
310
+ patterns = self.get_ignore_patterns(working_dir)
311
+
312
+ for pattern in patterns:
313
+ # Skip dotfile patterns since we already handled them above
314
+ if pattern in [".*", ".*/"]:
315
+ continue
316
+
317
+ # Simple pattern matching
318
+ if pattern.endswith("/"):
319
+ # Directory pattern
320
+ if path.is_dir() and path_name == pattern[:-1]:
321
+ return True
322
+ elif pattern.startswith("*."):
323
+ # Extension pattern
324
+ if path_name.endswith(pattern[1:]):
325
+ return True
326
+ elif "*" in pattern:
327
+ # Wildcard pattern (simplified)
328
+ import fnmatch
329
+
330
+ if fnmatch.fnmatch(path_name, pattern):
331
+ return True
332
+ elif pattern in path_str:
333
+ # Substring match
334
+ return True
335
+ elif path_name == pattern:
336
+ # Exact match
337
+ return True
338
+
339
+ return False
340
+
341
+ def clear_cache(self):
342
+ """Clear all caches."""
343
+ self._pathspec_cache.clear()
344
+ self._gitignore_cache.clear()
345
+
346
+
347
+ @dataclass
348
+ class CodeNode:
349
+ """Represents a node in the code tree."""
350
+
351
+ file_path: str
352
+ node_type: str
353
+ name: str
354
+ line_start: int
355
+ line_end: int
356
+ complexity: int = 0
357
+ has_docstring: bool = False
358
+ decorators: List[str] = None
359
+ parent: Optional[str] = None
360
+ children: List["CodeNode"] = None
361
+ language: str = "python"
362
+ signature: str = ""
363
+ metrics: Dict[str, Any] = None
364
+
365
+ def __post_init__(self):
366
+ if self.decorators is None:
367
+ self.decorators = []
368
+ if self.children is None:
369
+ self.children = []
370
+ if self.metrics is None:
371
+ self.metrics = {}
372
+
373
+
374
+ class PythonAnalyzer:
375
+ """Analyzes Python source code using AST.
376
+
377
+ WHY: Python's built-in AST module provides rich structural information
378
+ that we can leverage for detailed analysis.
379
+ """
380
+
381
+ def __init__(self, emitter: Optional[CodeTreeEventEmitter] = None):
382
+ self.logger = get_logger(__name__)
383
+ self.emitter = emitter
384
+
385
+ def analyze_file(self, file_path: Path) -> List[CodeNode]:
386
+ """Analyze a Python file and extract code structure.
387
+
388
+ Args:
389
+ file_path: Path to Python file
390
+
391
+ Returns:
392
+ List of code nodes found in the file
393
+ """
394
+ nodes = []
395
+
396
+ try:
397
+ with open(file_path, encoding="utf-8") as f:
398
+ source = f.read()
399
+
400
+ tree = ast.parse(source, filename=str(file_path))
401
+ nodes = self._extract_nodes(tree, file_path, source)
402
+
403
+ except SyntaxError as e:
404
+ self.logger.warning(f"Syntax error in {file_path}: {e}")
405
+ if self.emitter:
406
+ self.emitter.emit_error(str(file_path), f"Syntax error: {e}")
407
+ except Exception as e:
408
+ self.logger.error(f"Error analyzing {file_path}: {e}")
409
+ if self.emitter:
410
+ self.emitter.emit_error(str(file_path), str(e))
411
+
412
+ return nodes
413
+
414
+ def _extract_nodes(
415
+ self, tree: ast.AST, file_path: Path, source: str
416
+ ) -> List[CodeNode]:
417
+ """Extract code nodes from AST tree.
418
+
419
+ Args:
420
+ tree: AST tree
421
+ file_path: Source file path
422
+ source: Source code text
423
+
424
+ Returns:
425
+ List of extracted code nodes
426
+ """
427
+ nodes = []
428
+ source.splitlines()
429
+
430
+ class NodeVisitor(ast.NodeVisitor):
431
+ def __init__(self, parent_name: Optional[str] = None):
432
+ self.parent_name = parent_name
433
+ self.current_class = None
434
+
435
+ def visit_ClassDef(self, node):
436
+ # Extract class information
437
+ class_node = CodeNode(
438
+ file_path=str(file_path),
439
+ node_type="class",
440
+ name=node.name,
441
+ line_start=node.lineno,
442
+ line_end=node.end_lineno or node.lineno,
443
+ has_docstring=bool(ast.get_docstring(node)),
444
+ decorators=[self._decorator_name(d) for d in node.decorator_list],
445
+ parent=self.parent_name,
446
+ complexity=self._calculate_complexity(node),
447
+ signature=self._get_class_signature(node),
448
+ )
449
+
450
+ nodes.append(class_node)
451
+
452
+ # Emit event if emitter is available
453
+ if self.emitter:
454
+ self.emitter.emit_node(
455
+ CodeNodeEvent(
456
+ file_path=str(file_path),
457
+ node_type="class",
458
+ name=node.name,
459
+ line_start=node.lineno,
460
+ line_end=node.end_lineno or node.lineno,
461
+ complexity=class_node.complexity,
462
+ has_docstring=class_node.has_docstring,
463
+ decorators=class_node.decorators,
464
+ parent=self.parent_name,
465
+ children_count=len(node.body),
466
+ )
467
+ )
468
+
469
+ # Visit class members
470
+ old_class = self.current_class
471
+ self.current_class = node.name
472
+ for child in node.body:
473
+ if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
474
+ self.visit_FunctionDef(child, is_method=True)
475
+ self.current_class = old_class
476
+
477
+ def visit_FunctionDef(self, node, is_method=False):
478
+ # Determine node type
479
+ node_type = "method" if is_method else "function"
480
+ parent = self.current_class if is_method else self.parent_name
481
+
482
+ # Extract function information
483
+ func_node = CodeNode(
484
+ file_path=str(file_path),
485
+ node_type=node_type,
486
+ name=node.name,
487
+ line_start=node.lineno,
488
+ line_end=node.end_lineno or node.lineno,
489
+ has_docstring=bool(ast.get_docstring(node)),
490
+ decorators=[self._decorator_name(d) for d in node.decorator_list],
491
+ parent=parent,
492
+ complexity=self._calculate_complexity(node),
493
+ signature=self._get_function_signature(node),
494
+ )
495
+
496
+ nodes.append(func_node)
497
+
498
+ # Emit event if emitter is available
499
+ if self.emitter:
500
+ self.emitter.emit_node(
501
+ CodeNodeEvent(
502
+ file_path=str(file_path),
503
+ node_type=node_type,
504
+ name=node.name,
505
+ line_start=node.lineno,
506
+ line_end=node.end_lineno or node.lineno,
507
+ complexity=func_node.complexity,
508
+ has_docstring=func_node.has_docstring,
509
+ decorators=func_node.decorators,
510
+ parent=parent,
511
+ children_count=0,
512
+ )
513
+ )
514
+
515
+ def visit_AsyncFunctionDef(self, node):
516
+ self.visit_FunctionDef(node)
517
+
518
+ def _decorator_name(self, decorator):
519
+ """Extract decorator name from AST node."""
520
+ if isinstance(decorator, ast.Name):
521
+ return decorator.id
522
+ if isinstance(decorator, ast.Call):
523
+ if isinstance(decorator.func, ast.Name):
524
+ return decorator.func.id
525
+ if isinstance(decorator.func, ast.Attribute):
526
+ return decorator.func.attr
527
+ return "unknown"
528
+
529
+ def _calculate_complexity(self, node):
530
+ """Calculate cyclomatic complexity of a node."""
531
+ complexity = 1 # Base complexity
532
+
533
+ for child in ast.walk(node):
534
+ if isinstance(
535
+ child, (ast.If, ast.While, ast.For, ast.ExceptHandler)
536
+ ):
537
+ complexity += 1
538
+ elif isinstance(child, ast.BoolOp):
539
+ complexity += len(child.values) - 1
540
+
541
+ return complexity
542
+
543
+ def _get_function_signature(self, node):
544
+ """Extract function signature."""
545
+ args = []
546
+ for arg in node.args.args:
547
+ args.append(arg.arg)
548
+ return f"{node.name}({', '.join(args)})"
549
+
550
+ def _get_class_signature(self, node):
551
+ """Extract class signature."""
552
+ bases = []
553
+ for base in node.bases:
554
+ if isinstance(base, ast.Name):
555
+ bases.append(base.id)
556
+ base_str = f"({', '.join(bases)})" if bases else ""
557
+ return f"class {node.name}{base_str}"
558
+
559
+ # Extract imports
560
+ for node in ast.walk(tree):
561
+ if isinstance(node, ast.Import):
562
+ for alias in node.names:
563
+ import_node = CodeNode(
564
+ file_path=str(file_path),
565
+ node_type="import",
566
+ name=alias.name,
567
+ line_start=node.lineno,
568
+ line_end=node.end_lineno or node.lineno,
569
+ signature=f"import {alias.name}",
570
+ )
571
+ nodes.append(import_node)
572
+
573
+ elif isinstance(node, ast.ImportFrom):
574
+ module = node.module or ""
575
+ for alias in node.names:
576
+ import_node = CodeNode(
577
+ file_path=str(file_path),
578
+ node_type="import",
579
+ name=f"{module}.{alias.name}",
580
+ line_start=node.lineno,
581
+ line_end=node.end_lineno or node.lineno,
582
+ signature=f"from {module} import {alias.name}",
583
+ )
584
+ nodes.append(import_node)
585
+
586
+ # Visit all nodes
587
+ visitor = NodeVisitor()
588
+ visitor.emitter = self.emitter
589
+ visitor.visit(tree)
590
+
591
+ return nodes
592
+
593
+
594
+ class MultiLanguageAnalyzer:
595
+ """Analyzes multiple programming languages using tree-sitter.
596
+
597
+ WHY: Tree-sitter provides consistent parsing across multiple languages,
598
+ allowing us to support JavaScript, TypeScript, and other languages.
599
+ """
600
+
601
+ LANGUAGE_PARSERS = {
602
+ "python": "tree_sitter_python",
603
+ "javascript": "tree_sitter_javascript",
604
+ "typescript": "tree_sitter_typescript",
605
+ }
606
+
607
+ def __init__(self, emitter: Optional[CodeTreeEventEmitter] = None):
608
+ self.logger = get_logger(__name__)
609
+ self.emitter = emitter
610
+ self.parsers = {}
611
+ self._init_parsers()
612
+
613
+ def _init_parsers(self):
614
+ """Initialize tree-sitter parsers for supported languages."""
615
+ if not TREE_SITTER_AVAILABLE:
616
+ self.logger.warning(
617
+ "tree-sitter not available - multi-language support disabled"
618
+ )
619
+ return
620
+
621
+ for lang, module_name in self.LANGUAGE_PARSERS.items():
622
+ try:
623
+ # Dynamic import of language module
624
+ module = __import__(module_name)
625
+ parser = tree_sitter.Parser()
626
+ # Different tree-sitter versions have different APIs
627
+ if hasattr(parser, "set_language"):
628
+ parser.set_language(tree_sitter.Language(module.language()))
629
+ else:
630
+ # Newer API
631
+ lang_obj = tree_sitter.Language(module.language())
632
+ parser = tree_sitter.Parser(lang_obj)
633
+ self.parsers[lang] = parser
634
+ except (ImportError, AttributeError) as e:
635
+ # Silently skip unavailable parsers - will fall back to basic file discovery
636
+ self.logger.debug(f"Language parser not available for {lang}: {e}")
637
+
638
+ def analyze_file(self, file_path: Path, language: str) -> List[CodeNode]:
639
+ """Analyze a file using tree-sitter.
640
+
641
+ Args:
642
+ file_path: Path to source file
643
+ language: Programming language
644
+
645
+ Returns:
646
+ List of code nodes found in the file
647
+ """
648
+ if language not in self.parsers:
649
+ # No parser available - return empty list to fall back to basic discovery
650
+ self.logger.debug(
651
+ f"No parser available for language: {language}, using basic file discovery"
652
+ )
653
+ return []
654
+
655
+ nodes = []
656
+
657
+ try:
658
+ with open(file_path, "rb") as f:
659
+ source = f.read()
660
+
661
+ parser = self.parsers[language]
662
+ tree = parser.parse(source)
663
+
664
+ # Extract nodes based on language
665
+ if language in {"javascript", "typescript"}:
666
+ nodes = self._extract_js_nodes(tree, file_path, source)
667
+ else:
668
+ nodes = self._extract_generic_nodes(tree, file_path, source, language)
669
+
670
+ except Exception as e:
671
+ self.logger.error(f"Error analyzing {file_path}: {e}")
672
+ if self.emitter:
673
+ self.emitter.emit_error(str(file_path), str(e))
674
+
675
+ return nodes
676
+
677
+ def _extract_js_nodes(self, tree, file_path: Path, source: bytes) -> List[CodeNode]:
678
+ """Extract nodes from JavaScript/TypeScript files."""
679
+ nodes = []
680
+
681
+ def walk_tree(node, parent_name=None):
682
+ if node.type == "class_declaration":
683
+ # Extract class
684
+ name_node = node.child_by_field_name("name")
685
+ if name_node:
686
+ class_node = CodeNode(
687
+ file_path=str(file_path),
688
+ node_type="class",
689
+ name=source[name_node.start_byte : name_node.end_byte].decode(
690
+ "utf-8"
691
+ ),
692
+ line_start=node.start_point[0] + 1,
693
+ line_end=node.end_point[0] + 1,
694
+ parent=parent_name,
695
+ language="javascript",
696
+ )
697
+ nodes.append(class_node)
698
+
699
+ if self.emitter:
700
+ self.emitter.emit_node(
701
+ CodeNodeEvent(
702
+ file_path=str(file_path),
703
+ node_type="class",
704
+ name=class_node.name,
705
+ line_start=class_node.line_start,
706
+ line_end=class_node.line_end,
707
+ parent=parent_name,
708
+ language="javascript",
709
+ )
710
+ )
711
+
712
+ elif node.type in (
713
+ "function_declaration",
714
+ "arrow_function",
715
+ "method_definition",
716
+ ):
717
+ # Extract function
718
+ name_node = node.child_by_field_name("name")
719
+ if name_node:
720
+ func_name = source[
721
+ name_node.start_byte : name_node.end_byte
722
+ ].decode("utf-8")
723
+ func_node = CodeNode(
724
+ file_path=str(file_path),
725
+ node_type=(
726
+ "function" if node.type != "method_definition" else "method"
727
+ ),
728
+ name=func_name,
729
+ line_start=node.start_point[0] + 1,
730
+ line_end=node.end_point[0] + 1,
731
+ parent=parent_name,
732
+ language="javascript",
733
+ )
734
+ nodes.append(func_node)
735
+
736
+ if self.emitter:
737
+ self.emitter.emit_node(
738
+ CodeNodeEvent(
739
+ file_path=str(file_path),
740
+ node_type=func_node.node_type,
741
+ name=func_name,
742
+ line_start=func_node.line_start,
743
+ line_end=func_node.line_end,
744
+ parent=parent_name,
745
+ language="javascript",
746
+ )
747
+ )
748
+
749
+ # Recursively walk children
750
+ for child in node.children:
751
+ walk_tree(child, parent_name)
752
+
753
+ walk_tree(tree.root_node)
754
+ return nodes
755
+
756
+ def _extract_generic_nodes(
757
+ self, tree, file_path: Path, source: bytes, language: str
758
+ ) -> List[CodeNode]:
759
+ """Generic node extraction for other languages."""
760
+ # Simple generic extraction - can be enhanced per language
761
+ nodes = []
762
+
763
+ def walk_tree(node):
764
+ # Look for common patterns
765
+ if "class" in node.type or "struct" in node.type:
766
+ nodes.append(
767
+ CodeNode(
768
+ file_path=str(file_path),
769
+ node_type="class",
770
+ name=f"{node.type}_{node.start_point[0]}",
771
+ line_start=node.start_point[0] + 1,
772
+ line_end=node.end_point[0] + 1,
773
+ language=language,
774
+ )
775
+ )
776
+ elif "function" in node.type or "method" in node.type:
777
+ nodes.append(
778
+ CodeNode(
779
+ file_path=str(file_path),
780
+ node_type="function",
781
+ name=f"{node.type}_{node.start_point[0]}",
782
+ line_start=node.start_point[0] + 1,
783
+ line_end=node.end_point[0] + 1,
784
+ language=language,
785
+ )
786
+ )
787
+
788
+ for child in node.children:
789
+ walk_tree(child)
790
+
791
+ walk_tree(tree.root_node)
792
+ return nodes
793
+
794
+
795
+ class CodeTreeAnalyzer:
796
+ """Main analyzer that coordinates language-specific analyzers.
797
+
798
+ WHY: Provides a unified interface for analyzing codebases with multiple
799
+ languages, handling caching and incremental processing.
800
+ """
801
+
802
+ # Define code file extensions at class level for directory filtering
803
+ CODE_EXTENSIONS = {
804
+ '.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.hpp',
805
+ '.cs', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.r',
806
+ '.m', '.mm', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
807
+ '.sql', '.html', '.css', '.scss', '.sass', '.less', '.xml', '.json',
808
+ '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.md', '.rst', '.txt'
809
+ }
810
+
811
+ # File extensions to language mapping
812
+ LANGUAGE_MAP = {
813
+ ".py": "python",
814
+ ".js": "javascript",
815
+ ".jsx": "javascript",
816
+ ".ts": "typescript",
817
+ ".tsx": "typescript",
818
+ ".mjs": "javascript",
819
+ ".cjs": "javascript",
820
+ }
821
+
822
+ def __init__(
823
+ self,
824
+ emit_events: bool = True,
825
+ cache_dir: Optional[Path] = None,
826
+ emitter: Optional[CodeTreeEventEmitter] = None,
827
+ show_hidden_files: bool = False,
828
+ ):
829
+ """Initialize the code tree analyzer.
830
+
831
+ Args:
832
+ emit_events: Whether to emit Socket.IO events
833
+ cache_dir: Directory for caching analysis results
834
+ emitter: Optional event emitter to use (creates one if not provided)
835
+ show_hidden_files: Whether to show hidden files/directories (default False - hide dotfiles)
836
+ """
837
+ self.logger = get_logger(__name__)
838
+ self.emit_events = emit_events
839
+ self.cache_dir = cache_dir or Path.home() / ".claude-mpm" / "code-cache"
840
+ self.show_hidden_files = show_hidden_files
841
+
842
+ # Initialize gitignore manager with hidden files setting (default False)
843
+ self.gitignore_manager = GitignoreManager(show_hidden_files=show_hidden_files)
844
+ self._last_working_dir = None
845
+
846
+ # Use provided emitter or create one
847
+ if emitter:
848
+ self.emitter = emitter
849
+ elif emit_events:
850
+ self.emitter = CodeTreeEventEmitter(use_stdout=True)
851
+ else:
852
+ self.emitter = None
853
+
854
+ # Initialize language analyzers
855
+ self.python_analyzer = PythonAnalyzer(self.emitter)
856
+ self.multi_lang_analyzer = MultiLanguageAnalyzer(self.emitter)
857
+
858
+ # For JavaScript/TypeScript
859
+ self.javascript_analyzer = self.multi_lang_analyzer
860
+ self.generic_analyzer = self.multi_lang_analyzer
861
+
862
+ # Cache for processed files
863
+ self.cache = {}
864
+ self._load_cache()
865
+
866
+ def analyze_directory(
867
+ self,
868
+ directory: Path,
869
+ languages: Optional[List[str]] = None,
870
+ ignore_patterns: Optional[List[str]] = None,
871
+ max_depth: Optional[int] = None,
872
+ ) -> Dict[str, Any]:
873
+ """Analyze a directory and build code tree.
874
+
875
+ Args:
876
+ directory: Directory to analyze
877
+ languages: Languages to include (None for all)
878
+ ignore_patterns: Patterns to ignore
879
+ max_depth: Maximum directory depth
880
+
881
+ Returns:
882
+ Dictionary containing the code tree and statistics
883
+ """
884
+ if self.emitter:
885
+ self.emitter.start()
886
+
887
+ start_time = time.time()
888
+ all_nodes = []
889
+ files_processed = 0
890
+ total_files = 0
891
+
892
+ # Collect files to process
893
+ files_to_process = []
894
+ for ext, lang in self.LANGUAGE_MAP.items():
895
+ if languages and lang not in languages:
896
+ continue
897
+
898
+ for file_path in directory.rglob(f"*{ext}"):
899
+ # Use gitignore manager for filtering with directory as working dir
900
+ if self.gitignore_manager.should_ignore(file_path, directory):
901
+ continue
902
+
903
+ # Also check additional patterns
904
+ if ignore_patterns and any(
905
+ p in str(file_path) for p in ignore_patterns
906
+ ):
907
+ continue
908
+
909
+ # Check max depth
910
+ if max_depth:
911
+ depth = len(file_path.relative_to(directory).parts) - 1
912
+ if depth > max_depth:
913
+ continue
914
+
915
+ files_to_process.append((file_path, lang))
916
+
917
+ total_files = len(files_to_process)
918
+
919
+ # Process files
920
+ for file_path, language in files_to_process:
921
+ # Check cache
922
+ file_hash = self._get_file_hash(file_path)
923
+ cache_key = f"{file_path}:{file_hash}"
924
+
925
+ if cache_key in self.cache:
926
+ nodes = self.cache[cache_key]
927
+ self.logger.debug(f"Using cached results for {file_path}")
928
+ else:
929
+ # Emit file start event
930
+ if self.emitter:
931
+ self.emitter.emit_file_start(str(file_path), language)
932
+
933
+ file_start = time.time()
934
+
935
+ # Analyze based on language
936
+ if language == "python":
937
+ nodes = self.python_analyzer.analyze_file(file_path)
938
+ else:
939
+ nodes = self.multi_lang_analyzer.analyze_file(file_path, language)
940
+
941
+ # If no nodes found and we have a valid language, emit basic file info
942
+ if not nodes and language != "unknown":
943
+ self.logger.debug(
944
+ f"No AST nodes found for {file_path}, using basic discovery"
945
+ )
946
+
947
+ # Cache results
948
+ self.cache[cache_key] = nodes
949
+
950
+ # Emit file complete event
951
+ if self.emitter:
952
+ self.emitter.emit_file_complete(
953
+ str(file_path), len(nodes), time.time() - file_start
954
+ )
955
+
956
+ all_nodes.extend(nodes)
957
+ files_processed += 1
958
+
959
+ # Emit progress
960
+ if self.emitter and files_processed % 10 == 0:
961
+ self.emitter.emit_progress(
962
+ files_processed, total_files, f"Processing {file_path.name}"
963
+ )
964
+
965
+ # Build tree structure
966
+ tree = self._build_tree(all_nodes, directory)
967
+
968
+ # Calculate statistics
969
+ duration = time.time() - start_time
970
+ stats = {
971
+ "files_processed": files_processed,
972
+ "total_nodes": len(all_nodes),
973
+ "duration": duration,
974
+ "classes": sum(1 for n in all_nodes if n.node_type == "class"),
975
+ "functions": sum(
976
+ 1 for n in all_nodes if n.node_type in ("function", "method")
977
+ ),
978
+ "imports": sum(1 for n in all_nodes if n.node_type == "import"),
979
+ "languages": list(
980
+ {n.language for n in all_nodes if hasattr(n, "language")}
981
+ ),
982
+ "avg_complexity": (
983
+ sum(n.complexity for n in all_nodes) / len(all_nodes)
984
+ if all_nodes
985
+ else 0
986
+ ),
987
+ }
988
+
989
+ # Save cache
990
+ self._save_cache()
991
+
992
+ # Stop emitter
993
+ if self.emitter:
994
+ self.emitter.stop()
995
+
996
+ return {"tree": tree, "nodes": all_nodes, "stats": stats}
997
+
998
+ def _should_ignore(self, file_path: Path, patterns: Optional[List[str]]) -> bool:
999
+ """Check if file should be ignored.
1000
+
1001
+ Uses GitignoreManager for proper pattern matching.
1002
+ """
1003
+ # Get the working directory (use parent for files, self for directories)
1004
+ if file_path.is_file():
1005
+ working_dir = file_path.parent
1006
+ else:
1007
+ # For directories during discovery, use the parent
1008
+ working_dir = (
1009
+ file_path.parent if file_path.parent != file_path else Path.cwd()
1010
+ )
1011
+
1012
+ # Use gitignore manager for checking
1013
+ if self.gitignore_manager.should_ignore(file_path, working_dir):
1014
+ return True
1015
+
1016
+ # Also check any additional patterns provided
1017
+ if patterns:
1018
+ path_str = str(file_path)
1019
+ return any(pattern in path_str for pattern in patterns)
1020
+
1021
+ return False
1022
+
1023
+ def _get_file_hash(self, file_path: Path) -> str:
1024
+ """Get hash of file contents for caching."""
1025
+ hasher = hashlib.md5()
1026
+ with open(file_path, "rb") as f:
1027
+ hasher.update(f.read())
1028
+ return hasher.hexdigest()
1029
+
1030
+ def _build_tree(self, nodes: List[CodeNode], root_dir: Path) -> Dict[str, Any]:
1031
+ """Build hierarchical tree structure from flat nodes list."""
1032
+ tree = {
1033
+ "name": root_dir.name,
1034
+ "type": "directory",
1035
+ "path": str(root_dir),
1036
+ "children": [],
1037
+ }
1038
+
1039
+ # Group nodes by file
1040
+ files_map = {}
1041
+ for node in nodes:
1042
+ if node.file_path not in files_map:
1043
+ files_map[node.file_path] = {
1044
+ "name": Path(node.file_path).name,
1045
+ "type": "file",
1046
+ "path": node.file_path,
1047
+ "children": [],
1048
+ }
1049
+
1050
+ # Add node to file
1051
+ node_dict = {
1052
+ "name": node.name,
1053
+ "type": node.node_type,
1054
+ "line_start": node.line_start,
1055
+ "line_end": node.line_end,
1056
+ "complexity": node.complexity,
1057
+ "has_docstring": node.has_docstring,
1058
+ "decorators": node.decorators,
1059
+ "signature": node.signature,
1060
+ }
1061
+ files_map[node.file_path]["children"].append(node_dict)
1062
+
1063
+ # Build directory structure
1064
+ for file_path, file_node in files_map.items():
1065
+ rel_path = Path(file_path).relative_to(root_dir)
1066
+ parts = rel_path.parts
1067
+
1068
+ current = tree
1069
+ for part in parts[:-1]:
1070
+ # Find or create directory
1071
+ dir_node = None
1072
+ for child in current["children"]:
1073
+ if child["type"] == "directory" and child["name"] == part:
1074
+ dir_node = child
1075
+ break
1076
+
1077
+ if not dir_node:
1078
+ dir_node = {"name": part, "type": "directory", "children": []}
1079
+ current["children"].append(dir_node)
1080
+
1081
+ current = dir_node
1082
+
1083
+ # Add file to current directory
1084
+ current["children"].append(file_node)
1085
+
1086
+ return tree
1087
+
1088
+ def _load_cache(self):
1089
+ """Load cache from disk."""
1090
+ cache_file = self.cache_dir / "code_tree_cache.json"
1091
+ if cache_file.exists():
1092
+ try:
1093
+ with open(cache_file) as f:
1094
+ cache_data = json.load(f)
1095
+ # Reconstruct CodeNode objects
1096
+ for key, nodes_data in cache_data.items():
1097
+ self.cache[key] = [
1098
+ CodeNode(**node_data) for node_data in nodes_data
1099
+ ]
1100
+ self.logger.info(f"Loaded cache with {len(self.cache)} entries")
1101
+ except Exception as e:
1102
+ self.logger.warning(f"Failed to load cache: {e}")
1103
+
1104
+ def _save_cache(self):
1105
+ """Save cache to disk."""
1106
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
1107
+ cache_file = self.cache_dir / "code_tree_cache.json"
1108
+
1109
+ try:
1110
+ # Convert CodeNode objects to dictionaries
1111
+ cache_data = {}
1112
+ for key, nodes in self.cache.items():
1113
+ cache_data[key] = [
1114
+ {
1115
+ "file_path": n.file_path,
1116
+ "node_type": n.node_type,
1117
+ "name": n.name,
1118
+ "line_start": n.line_start,
1119
+ "line_end": n.line_end,
1120
+ "complexity": n.complexity,
1121
+ "has_docstring": n.has_docstring,
1122
+ "decorators": n.decorators,
1123
+ "parent": n.parent,
1124
+ "language": n.language,
1125
+ "signature": n.signature,
1126
+ }
1127
+ for n in nodes
1128
+ ]
1129
+
1130
+ with open(cache_file, "w") as f:
1131
+ json.dump(cache_data, f, indent=2)
1132
+
1133
+ self.logger.info(f"Saved cache with {len(self.cache)} entries")
1134
+ except Exception as e:
1135
+ self.logger.warning(f"Failed to save cache: {e}")
1136
+
1137
+ def has_code_files(self, directory: Path, depth: int = 5, current_depth: int = 0) -> bool:
1138
+ """Check if directory contains code files up to 5 levels deep.
1139
+
1140
+ Args:
1141
+ directory: Directory to check
1142
+ depth: Maximum depth to search
1143
+ current_depth: Current recursion depth
1144
+
1145
+ Returns:
1146
+ True if directory contains code files within depth levels
1147
+ """
1148
+ if current_depth >= depth:
1149
+ return False
1150
+
1151
+ # Skip checking these directories entirely
1152
+ SKIP_DIRS = {'node_modules', '__pycache__', '.git', '.venv', 'venv', 'dist', 'build',
1153
+ '.tox', 'htmlcov', '.pytest_cache', '.mypy_cache', 'coverage',
1154
+ '.idea', '.vscode', 'env', '.coverage', '__MACOSX', '.ipynb_checkpoints'}
1155
+ if directory.name in SKIP_DIRS:
1156
+ return False
1157
+
1158
+ try:
1159
+ for item in directory.iterdir():
1160
+ # Skip hidden items in scan
1161
+ if item.name.startswith('.'):
1162
+ continue
1163
+
1164
+ if item.is_file():
1165
+ # Check if it's a code file
1166
+ ext = item.suffix.lower()
1167
+ if ext in self.CODE_EXTENSIONS:
1168
+ return True
1169
+ elif item.is_dir() and current_depth < depth - 1:
1170
+ if self.has_code_files(item, depth, current_depth + 1):
1171
+ return True
1172
+ except (PermissionError, OSError):
1173
+ pass
1174
+
1175
+ return False
1176
+
1177
+ def discover_top_level(
1178
+ self, directory: Path, ignore_patterns: Optional[List[str]] = None
1179
+ ) -> Dict[str, Any]:
1180
+ """Discover only top-level directories and files for lazy loading.
1181
+
1182
+ Args:
1183
+ directory: Root directory to discover
1184
+ ignore_patterns: Patterns to ignore
1185
+
1186
+ Returns:
1187
+ Dictionary with top-level structure
1188
+ """
1189
+ # CRITICAL FIX: Use the directory parameter as the base for relative paths
1190
+ # NOT the current working directory. This ensures we only show items
1191
+ # within the requested directory, not parent directories.
1192
+ working_dir = Path(directory).absolute()
1193
+
1194
+ # Emit discovery start event
1195
+ if self.emitter:
1196
+ from datetime import datetime
1197
+ self.emitter.emit('info', {
1198
+ 'type': 'discovery.start',
1199
+ 'action': 'scanning_directory',
1200
+ 'path': str(directory),
1201
+ 'message': f'Starting discovery of {directory.name}',
1202
+ 'timestamp': datetime.now().isoformat()
1203
+ })
1204
+
1205
+ result = {
1206
+ "path": str(directory),
1207
+ "name": directory.name,
1208
+ "type": "directory",
1209
+ "children": [],
1210
+ }
1211
+
1212
+ try:
1213
+ # Clear cache if working directory changed
1214
+ if self._last_working_dir != directory:
1215
+ self.gitignore_manager.clear_cache()
1216
+ self._last_working_dir = directory
1217
+
1218
+ # Get immediate children only (no recursion)
1219
+ files_count = 0
1220
+ dirs_count = 0
1221
+ ignored_count = 0
1222
+
1223
+ for item in directory.iterdir():
1224
+ # Use gitignore manager for filtering with the directory as working dir
1225
+ if self.gitignore_manager.should_ignore(item, directory):
1226
+ if self.emitter:
1227
+ from datetime import datetime
1228
+ self.emitter.emit('info', {
1229
+ 'type': 'filter.gitignore',
1230
+ 'path': str(item),
1231
+ 'reason': 'gitignore pattern',
1232
+ 'message': f'Ignored by gitignore: {item.name}',
1233
+ 'timestamp': datetime.now().isoformat()
1234
+ })
1235
+ ignored_count += 1
1236
+ continue
1237
+
1238
+ # Also check additional patterns if provided
1239
+ if ignore_patterns and any(p in str(item) for p in ignore_patterns):
1240
+ if self.emitter:
1241
+ from datetime import datetime
1242
+ self.emitter.emit('info', {
1243
+ 'type': 'filter.pattern',
1244
+ 'path': str(item),
1245
+ 'reason': 'custom pattern',
1246
+ 'message': f'Ignored by pattern: {item.name}',
1247
+ 'timestamp': datetime.now().isoformat()
1248
+ })
1249
+ ignored_count += 1
1250
+ continue
1251
+
1252
+ if item.is_dir():
1253
+ # Only include directories that contain code files (5-level deep scan)
1254
+ if not self.has_code_files(item, depth=5):
1255
+ if self.emitter:
1256
+ from datetime import datetime
1257
+ self.emitter.emit('info', {
1258
+ 'type': 'filter.no_code',
1259
+ 'path': str(item.name),
1260
+ 'reason': 'no code files',
1261
+ 'message': f'Skipped directory without code: {item.name}',
1262
+ 'timestamp': datetime.now().isoformat()
1263
+ })
1264
+ ignored_count += 1
1265
+ continue
1266
+
1267
+ # Directory - just mark as unexplored
1268
+ # CRITICAL FIX: Use relative path from working directory
1269
+ # This prevents the frontend from showing parent directories
1270
+ try:
1271
+ relative_path = item.relative_to(working_dir)
1272
+ path_str = str(relative_path)
1273
+ except ValueError:
1274
+ # If somehow the item is outside working_dir, skip it
1275
+ self.logger.warning(f"Directory outside working dir: {item}")
1276
+ continue
1277
+
1278
+ # Emit directory found event
1279
+ if self.emitter:
1280
+ from datetime import datetime
1281
+ self.emitter.emit('info', {
1282
+ 'type': 'discovery.directory',
1283
+ 'path': str(item),
1284
+ 'message': f'Found directory: {item.name}',
1285
+ 'timestamp': datetime.now().isoformat()
1286
+ })
1287
+ dirs_count += 1
1288
+
1289
+ child = {
1290
+ "path": path_str,
1291
+ "name": item.name,
1292
+ "type": "directory",
1293
+ "discovered": False,
1294
+ "children": [],
1295
+ }
1296
+ result["children"].append(child)
1297
+
1298
+ if self.emitter:
1299
+ self.emitter.emit_directory_discovered(path_str, [])
1300
+
1301
+ elif item.is_file():
1302
+ # Check if it's a supported code file or a special file we want to show
1303
+ if item.suffix in self.supported_extensions or item.name in ['.gitignore', '.env.example', '.env.sample']:
1304
+ # File - mark for lazy analysis
1305
+ language = self._get_language(item)
1306
+
1307
+ # CRITICAL FIX: Use relative path from working directory
1308
+ # This prevents the frontend from showing parent directories
1309
+ try:
1310
+ relative_path = item.relative_to(working_dir)
1311
+ path_str = str(relative_path)
1312
+ except ValueError:
1313
+ # If somehow the item is outside working_dir, skip it
1314
+ self.logger.warning(f"File outside working dir: {item}")
1315
+ continue
1316
+
1317
+ # Emit file found event
1318
+ if self.emitter:
1319
+ from datetime import datetime
1320
+ self.emitter.emit('info', {
1321
+ 'type': 'discovery.file',
1322
+ 'path': str(item),
1323
+ 'language': language,
1324
+ 'size': item.stat().st_size,
1325
+ 'message': f'Found file: {item.name} ({language})',
1326
+ 'timestamp': datetime.now().isoformat()
1327
+ })
1328
+ files_count += 1
1329
+
1330
+ child = {
1331
+ "path": path_str,
1332
+ "name": item.name,
1333
+ "type": "file",
1334
+ "language": language,
1335
+ "size": item.stat().st_size,
1336
+ "analyzed": False,
1337
+ }
1338
+ result["children"].append(child)
1339
+
1340
+ if self.emitter:
1341
+ self.emitter.emit_file_discovered(
1342
+ path_str, language, item.stat().st_size
1343
+ )
1344
+
1345
+ except PermissionError as e:
1346
+ self.logger.warning(f"Permission denied accessing {directory}: {e}")
1347
+ if self.emitter:
1348
+ self.emitter.emit_error(str(directory), f"Permission denied: {e}")
1349
+
1350
+ # Emit discovery complete event with stats
1351
+ if self.emitter:
1352
+ from datetime import datetime
1353
+ self.emitter.emit('info', {
1354
+ 'type': 'discovery.complete',
1355
+ 'path': str(directory),
1356
+ 'stats': {
1357
+ 'files': files_count,
1358
+ 'directories': dirs_count,
1359
+ 'ignored': ignored_count
1360
+ },
1361
+ 'message': f'Discovery complete: {files_count} files, {dirs_count} directories, {ignored_count} ignored',
1362
+ 'timestamp': datetime.now().isoformat()
1363
+ })
1364
+
1365
+ return result
1366
+
1367
+ def discover_directory(
1368
+ self, dir_path: str, ignore_patterns: Optional[List[str]] = None
1369
+ ) -> Dict[str, Any]:
1370
+ """Discover contents of a specific directory for lazy loading.
1371
+
1372
+ Args:
1373
+ dir_path: Directory path to discover
1374
+ ignore_patterns: Patterns to ignore
1375
+
1376
+ Returns:
1377
+ Dictionary with directory contents
1378
+ """
1379
+ directory = Path(dir_path)
1380
+ if not directory.exists() or not directory.is_dir():
1381
+ return {"error": f"Invalid directory: {dir_path}"}
1382
+
1383
+ # Clear cache if working directory changed
1384
+ if self._last_working_dir != directory.parent:
1385
+ self.gitignore_manager.clear_cache()
1386
+ self._last_working_dir = directory.parent
1387
+
1388
+ # The discover_top_level method will emit all the INFO events
1389
+ return self.discover_top_level(directory, ignore_patterns)
1390
+
1391
+ def analyze_file(self, file_path: str) -> Dict[str, Any]:
1392
+ """Analyze a specific file and return its AST structure.
1393
+
1394
+ Args:
1395
+ file_path: Path to file to analyze
1396
+
1397
+ Returns:
1398
+ Dictionary with file analysis results
1399
+ """
1400
+ path = Path(file_path)
1401
+ if not path.exists() or not path.is_file():
1402
+ return {"error": f"Invalid file: {file_path}"}
1403
+
1404
+ # Get language first (needed for return statement)
1405
+ language = self._get_language(path)
1406
+
1407
+ # Emit analysis start event
1408
+ if self.emitter:
1409
+ from datetime import datetime
1410
+ self.emitter.emit('info', {
1411
+ 'type': 'analysis.start',
1412
+ 'file': str(path),
1413
+ 'language': language,
1414
+ 'message': f'Analyzing: {path.name}',
1415
+ 'timestamp': datetime.now().isoformat()
1416
+ })
1417
+
1418
+ # Check cache
1419
+ file_hash = self._get_file_hash(path)
1420
+ cache_key = f"{file_path}:{file_hash}"
1421
+
1422
+ if cache_key in self.cache:
1423
+ nodes = self.cache[cache_key]
1424
+ if self.emitter:
1425
+ from datetime import datetime
1426
+ self.emitter.emit('info', {
1427
+ 'type': 'cache.hit',
1428
+ 'file': str(path),
1429
+ 'message': f'Using cached analysis for {path.name}',
1430
+ 'timestamp': datetime.now().isoformat()
1431
+ })
1432
+ else:
1433
+ # Analyze file
1434
+ if self.emitter:
1435
+ from datetime import datetime
1436
+ self.emitter.emit('info', {
1437
+ 'type': 'cache.miss',
1438
+ 'file': str(path),
1439
+ 'message': f'Cache miss, analyzing fresh: {path.name}',
1440
+ 'timestamp': datetime.now().isoformat()
1441
+ })
1442
+
1443
+ if language == "python":
1444
+ analyzer = self.python_analyzer
1445
+ elif language == "javascript" or language == "typescript":
1446
+ analyzer = self.javascript_analyzer
1447
+ else:
1448
+ analyzer = self.generic_analyzer
1449
+
1450
+ start_time = time.time()
1451
+
1452
+ # Emit parsing event
1453
+ if self.emitter:
1454
+ from datetime import datetime
1455
+ self.emitter.emit('info', {
1456
+ 'type': 'analysis.parse',
1457
+ 'file': str(path),
1458
+ 'message': f'Parsing file content: {path.name}',
1459
+ 'timestamp': datetime.now().isoformat()
1460
+ })
1461
+
1462
+ nodes = analyzer.analyze_file(path) if analyzer else []
1463
+ duration = time.time() - start_time
1464
+
1465
+ # Cache results
1466
+ self.cache[cache_key] = nodes
1467
+
1468
+ # Filter internal functions before emitting
1469
+ filtered_nodes = []
1470
+ classes_count = 0
1471
+ functions_count = 0
1472
+ methods_count = 0
1473
+
1474
+ for node in nodes:
1475
+ # Only include main structural elements
1476
+ if not self._is_internal_node(node):
1477
+ # Emit found element event
1478
+ if self.emitter:
1479
+ from datetime import datetime
1480
+ self.emitter.emit('info', {
1481
+ 'type': f'analysis.{node.node_type}',
1482
+ 'name': node.name,
1483
+ 'file': str(path),
1484
+ 'line_start': node.line_start,
1485
+ 'complexity': node.complexity,
1486
+ 'message': f'Found {node.node_type}: {node.name}',
1487
+ 'timestamp': datetime.now().isoformat()
1488
+ })
1489
+
1490
+ # Count node types
1491
+ if node.node_type == 'class':
1492
+ classes_count += 1
1493
+ elif node.node_type == 'function':
1494
+ functions_count += 1
1495
+ elif node.node_type == 'method':
1496
+ methods_count += 1
1497
+
1498
+ filtered_nodes.append(
1499
+ {
1500
+ "name": node.name,
1501
+ "type": node.node_type,
1502
+ "line_start": node.line_start,
1503
+ "line_end": node.line_end,
1504
+ "complexity": node.complexity,
1505
+ "has_docstring": node.has_docstring,
1506
+ "signature": node.signature,
1507
+ }
1508
+ )
1509
+
1510
+ # Emit analysis complete event with stats
1511
+ if self.emitter:
1512
+ from datetime import datetime
1513
+ self.emitter.emit('info', {
1514
+ 'type': 'analysis.complete',
1515
+ 'file': str(path),
1516
+ 'stats': {
1517
+ 'classes': classes_count,
1518
+ 'functions': functions_count,
1519
+ 'methods': methods_count,
1520
+ 'total_nodes': len(filtered_nodes)
1521
+ },
1522
+ 'duration': duration,
1523
+ 'message': f'Analysis complete: {classes_count} classes, {functions_count} functions, {methods_count} methods',
1524
+ 'timestamp': datetime.now().isoformat()
1525
+ })
1526
+
1527
+ self.emitter.emit_file_analyzed(file_path, filtered_nodes, duration)
1528
+
1529
+ return {
1530
+ "path": file_path,
1531
+ "language": language,
1532
+ "nodes": (
1533
+ filtered_nodes
1534
+ if "filtered_nodes" in locals()
1535
+ else [
1536
+ {
1537
+ "name": n.name,
1538
+ "type": n.node_type,
1539
+ "line_start": n.line_start,
1540
+ "line_end": n.line_end,
1541
+ "complexity": n.complexity,
1542
+ "has_docstring": n.has_docstring,
1543
+ "signature": n.signature,
1544
+ }
1545
+ for n in nodes
1546
+ if not self._is_internal_node(n)
1547
+ ]
1548
+ ),
1549
+ }
1550
+
1551
+ def _is_internal_node(self, node: CodeNode) -> bool:
1552
+ """Check if node is an internal function that should be filtered."""
1553
+ # Filter patterns for internal functions
1554
+ internal_patterns = [
1555
+ "handle", # Event handlers
1556
+ "on_", # Event callbacks
1557
+ "_", # Private methods
1558
+ "get_", # Simple getters
1559
+ "set_", # Simple setters
1560
+ "__", # Python magic methods
1561
+ ]
1562
+
1563
+ name_lower = node.name.lower()
1564
+
1565
+ # Don't filter classes or important public methods
1566
+ if node.node_type == "class":
1567
+ return False
1568
+
1569
+ # Check patterns
1570
+ for pattern in internal_patterns:
1571
+ if name_lower.startswith(pattern):
1572
+ # Exception: include __init__ methods
1573
+ if node.name == "__init__":
1574
+ return False
1575
+ return True
1576
+
1577
+ return False
1578
+
1579
+ @property
1580
+ def supported_extensions(self):
1581
+ """Get list of supported file extensions."""
1582
+ return {".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
1583
+
1584
+ def _get_language(self, file_path: Path) -> str:
1585
+ """Determine language from file extension."""
1586
+ ext = file_path.suffix.lower()
1587
+ language_map = {
1588
+ ".py": "python",
1589
+ ".js": "javascript",
1590
+ ".jsx": "javascript",
1591
+ ".ts": "typescript",
1592
+ ".tsx": "typescript",
1593
+ ".mjs": "javascript",
1594
+ ".cjs": "javascript",
1595
+ }
1596
+ return language_map.get(ext, "unknown")