claude-mpm 4.1.8__py3-none-any.whl → 4.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/INSTRUCTIONS.md +26 -1
- claude_mpm/agents/agents_metadata.py +57 -0
- claude_mpm/agents/templates/.claude-mpm/memories/README.md +17 -0
- claude_mpm/agents/templates/.claude-mpm/memories/engineer_memories.md +3 -0
- claude_mpm/agents/templates/agent-manager.json +263 -17
- claude_mpm/agents/templates/agentic_coder_optimizer.json +222 -0
- claude_mpm/agents/templates/code_analyzer.json +18 -8
- claude_mpm/agents/templates/engineer.json +1 -1
- claude_mpm/agents/templates/logs/prompts/agent_engineer_20250826_014258_728.md +39 -0
- claude_mpm/agents/templates/qa.json +1 -1
- claude_mpm/agents/templates/research.json +1 -1
- claude_mpm/cli/__init__.py +15 -0
- claude_mpm/cli/commands/__init__.py +6 -0
- claude_mpm/cli/commands/analyze.py +548 -0
- claude_mpm/cli/commands/analyze_code.py +524 -0
- claude_mpm/cli/commands/configure.py +78 -28
- claude_mpm/cli/commands/configure_tui.py +62 -60
- claude_mpm/cli/commands/dashboard.py +288 -0
- claude_mpm/cli/commands/debug.py +1386 -0
- claude_mpm/cli/commands/mpm_init.py +427 -0
- claude_mpm/cli/commands/mpm_init_handler.py +83 -0
- claude_mpm/cli/parsers/analyze_code_parser.py +170 -0
- claude_mpm/cli/parsers/analyze_parser.py +135 -0
- claude_mpm/cli/parsers/base_parser.py +44 -0
- claude_mpm/cli/parsers/dashboard_parser.py +113 -0
- claude_mpm/cli/parsers/debug_parser.py +319 -0
- claude_mpm/cli/parsers/mpm_init_parser.py +122 -0
- claude_mpm/constants.py +13 -1
- claude_mpm/core/framework_loader.py +148 -6
- claude_mpm/core/log_manager.py +16 -13
- claude_mpm/core/logger.py +1 -1
- claude_mpm/core/unified_agent_registry.py +1 -1
- claude_mpm/dashboard/.claude-mpm/socketio-instances.json +1 -0
- claude_mpm/dashboard/analysis_runner.py +455 -0
- claude_mpm/dashboard/static/built/components/activity-tree.js +2 -0
- claude_mpm/dashboard/static/built/components/agent-inference.js +1 -1
- claude_mpm/dashboard/static/built/components/code-tree.js +2 -0
- claude_mpm/dashboard/static/built/components/code-viewer.js +2 -0
- claude_mpm/dashboard/static/built/components/event-viewer.js +1 -1
- claude_mpm/dashboard/static/built/components/file-tool-tracker.js +1 -1
- claude_mpm/dashboard/static/built/components/module-viewer.js +1 -1
- claude_mpm/dashboard/static/built/components/session-manager.js +1 -1
- claude_mpm/dashboard/static/built/components/working-directory.js +1 -1
- claude_mpm/dashboard/static/built/dashboard.js +1 -1
- claude_mpm/dashboard/static/built/socket-client.js +1 -1
- claude_mpm/dashboard/static/css/activity.css +549 -0
- claude_mpm/dashboard/static/css/code-tree.css +1175 -0
- claude_mpm/dashboard/static/css/dashboard.css +245 -0
- claude_mpm/dashboard/static/dist/components/activity-tree.js +2 -0
- claude_mpm/dashboard/static/dist/components/code-tree.js +2 -0
- claude_mpm/dashboard/static/dist/components/code-viewer.js +2 -0
- claude_mpm/dashboard/static/dist/components/event-viewer.js +1 -1
- claude_mpm/dashboard/static/dist/components/session-manager.js +1 -1
- claude_mpm/dashboard/static/dist/components/working-directory.js +1 -1
- claude_mpm/dashboard/static/dist/dashboard.js +1 -1
- claude_mpm/dashboard/static/dist/socket-client.js +1 -1
- claude_mpm/dashboard/static/js/components/activity-tree.js +1338 -0
- claude_mpm/dashboard/static/js/components/code-tree.js +2535 -0
- claude_mpm/dashboard/static/js/components/code-viewer.js +480 -0
- claude_mpm/dashboard/static/js/components/event-viewer.js +59 -9
- claude_mpm/dashboard/static/js/components/session-manager.js +40 -4
- claude_mpm/dashboard/static/js/components/socket-manager.js +12 -0
- claude_mpm/dashboard/static/js/components/ui-state-manager.js +4 -0
- claude_mpm/dashboard/static/js/components/working-directory.js +17 -1
- claude_mpm/dashboard/static/js/dashboard.js +51 -0
- claude_mpm/dashboard/static/js/socket-client.js +465 -29
- claude_mpm/dashboard/templates/index.html +182 -4
- claude_mpm/hooks/claude_hooks/hook_handler.py +182 -5
- claude_mpm/hooks/claude_hooks/installer.py +386 -113
- claude_mpm/scripts/claude-hook-handler.sh +161 -0
- claude_mpm/scripts/socketio_daemon.py +121 -8
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +2 -2
- claude_mpm/services/agents/deployment/agent_record_service.py +1 -2
- claude_mpm/services/agents/memory/memory_format_service.py +1 -3
- claude_mpm/services/cli/agent_cleanup_service.py +1 -5
- claude_mpm/services/cli/agent_dependency_service.py +1 -1
- claude_mpm/services/cli/agent_validation_service.py +3 -4
- claude_mpm/services/cli/dashboard_launcher.py +2 -3
- claude_mpm/services/cli/startup_checker.py +0 -11
- claude_mpm/services/core/cache_manager.py +1 -3
- claude_mpm/services/core/path_resolver.py +1 -4
- claude_mpm/services/core/service_container.py +2 -2
- claude_mpm/services/diagnostics/checks/instructions_check.py +1 -2
- claude_mpm/services/infrastructure/monitoring/__init__.py +11 -11
- claude_mpm/services/infrastructure/monitoring.py +11 -11
- claude_mpm/services/project/architecture_analyzer.py +1 -1
- claude_mpm/services/project/dependency_analyzer.py +4 -4
- claude_mpm/services/project/language_analyzer.py +3 -3
- claude_mpm/services/project/metrics_collector.py +3 -6
- claude_mpm/services/socketio/event_normalizer.py +64 -0
- claude_mpm/services/socketio/handlers/__init__.py +2 -0
- claude_mpm/services/socketio/handlers/code_analysis.py +672 -0
- claude_mpm/services/socketio/handlers/registry.py +2 -0
- claude_mpm/services/socketio/server/connection_manager.py +6 -4
- claude_mpm/services/socketio/server/core.py +100 -11
- claude_mpm/services/socketio/server/main.py +8 -2
- claude_mpm/services/visualization/__init__.py +19 -0
- claude_mpm/services/visualization/mermaid_generator.py +938 -0
- claude_mpm/tools/__main__.py +208 -0
- claude_mpm/tools/code_tree_analyzer.py +1596 -0
- claude_mpm/tools/code_tree_builder.py +631 -0
- claude_mpm/tools/code_tree_events.py +416 -0
- claude_mpm/tools/socketio_debug.py +671 -0
- {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/METADATA +2 -1
- {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/RECORD +110 -74
- claude_mpm/agents/schema/agent_schema.json +0 -314
- {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/WHEEL +0 -0
- {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.1.8.dist-info → claude_mpm-4.1.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1596 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Code Tree Analyzer
|
|
4
|
+
==================
|
|
5
|
+
|
|
6
|
+
WHY: Analyzes source code using AST to extract structure and metrics,
|
|
7
|
+
supporting multiple languages and emitting incremental events for visualization.
|
|
8
|
+
|
|
9
|
+
DESIGN DECISIONS:
|
|
10
|
+
- Use Python's ast module for Python files
|
|
11
|
+
- Use tree-sitter for multi-language support
|
|
12
|
+
- Extract comprehensive metadata (complexity, docstrings, etc.)
|
|
13
|
+
- Cache parsed results to avoid re-processing
|
|
14
|
+
- Support incremental processing with checkpoints
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import ast
|
|
18
|
+
import hashlib
|
|
19
|
+
import json
|
|
20
|
+
import time
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any, Dict, List, Optional
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
import pathspec
|
|
27
|
+
|
|
28
|
+
PATHSPEC_AVAILABLE = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
PATHSPEC_AVAILABLE = False
|
|
31
|
+
pathspec = None
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
import tree_sitter
|
|
35
|
+
import tree_sitter_javascript
|
|
36
|
+
import tree_sitter_python
|
|
37
|
+
import tree_sitter_typescript
|
|
38
|
+
|
|
39
|
+
TREE_SITTER_AVAILABLE = True
|
|
40
|
+
except ImportError:
|
|
41
|
+
TREE_SITTER_AVAILABLE = False
|
|
42
|
+
tree_sitter = None
|
|
43
|
+
|
|
44
|
+
from ..core.logging_config import get_logger
|
|
45
|
+
from .code_tree_events import CodeNodeEvent, CodeTreeEventEmitter
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class GitignoreManager:
|
|
49
|
+
"""Manages .gitignore pattern matching for file filtering.
|
|
50
|
+
|
|
51
|
+
WHY: Properly respecting .gitignore patterns ensures we don't analyze
|
|
52
|
+
or display files that should be ignored in the repository.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
# Default patterns that should always be ignored
|
|
56
|
+
DEFAULT_PATTERNS = [
|
|
57
|
+
".git/",
|
|
58
|
+
"__pycache__/",
|
|
59
|
+
"*.pyc",
|
|
60
|
+
"*.pyo",
|
|
61
|
+
".DS_Store",
|
|
62
|
+
".pytest_cache/",
|
|
63
|
+
".mypy_cache/",
|
|
64
|
+
"dist/",
|
|
65
|
+
"build/",
|
|
66
|
+
"*.egg-info/",
|
|
67
|
+
".coverage",
|
|
68
|
+
".tox/",
|
|
69
|
+
"htmlcov/",
|
|
70
|
+
".idea/",
|
|
71
|
+
".vscode/",
|
|
72
|
+
"*.swp",
|
|
73
|
+
"*.swo",
|
|
74
|
+
"*~",
|
|
75
|
+
"Thumbs.db",
|
|
76
|
+
"node_modules/",
|
|
77
|
+
".venv/",
|
|
78
|
+
"venv/",
|
|
79
|
+
"env/",
|
|
80
|
+
".env",
|
|
81
|
+
"*.log",
|
|
82
|
+
".ipynb_checkpoints/",
|
|
83
|
+
"__MACOSX/",
|
|
84
|
+
".Spotlight-V100/",
|
|
85
|
+
".Trashes/",
|
|
86
|
+
"desktop.ini",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
# Additional patterns to hide dotfiles (when enabled)
|
|
90
|
+
DOTFILE_PATTERNS = [
|
|
91
|
+
".*", # All dotfiles
|
|
92
|
+
".*/", # All dot directories
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
# Important files/directories to always show
|
|
96
|
+
DOTFILE_EXCEPTIONS = {
|
|
97
|
+
# Removed .gitignore from exceptions - it should be hidden by default
|
|
98
|
+
".env.example",
|
|
99
|
+
".env.sample",
|
|
100
|
+
".gitlab-ci.yml",
|
|
101
|
+
".travis.yml",
|
|
102
|
+
".dockerignore",
|
|
103
|
+
".editorconfig",
|
|
104
|
+
".eslintrc",
|
|
105
|
+
".prettierrc"
|
|
106
|
+
# Removed .github from exceptions - it should be hidden by default
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
def __init__(self, show_hidden_files: bool = False):
|
|
110
|
+
"""Initialize the GitignoreManager.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
show_hidden_files: Whether to show hidden files/directories
|
|
114
|
+
"""
|
|
115
|
+
self.logger = get_logger(__name__)
|
|
116
|
+
self._pathspec_cache: Dict[str, Any] = {}
|
|
117
|
+
self._gitignore_cache: Dict[str, List[str]] = {}
|
|
118
|
+
self._use_pathspec = PATHSPEC_AVAILABLE
|
|
119
|
+
self.show_hidden_files = show_hidden_files
|
|
120
|
+
|
|
121
|
+
if not self._use_pathspec:
|
|
122
|
+
self.logger.warning(
|
|
123
|
+
"pathspec library not available - using basic pattern matching"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def get_ignore_patterns(self, working_dir: Path) -> List[str]:
|
|
127
|
+
"""Get all ignore patterns for a directory.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
working_dir: The working directory to search for .gitignore files
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Combined list of ignore patterns from all sources
|
|
134
|
+
"""
|
|
135
|
+
# Always include default patterns
|
|
136
|
+
patterns = self.DEFAULT_PATTERNS.copy()
|
|
137
|
+
|
|
138
|
+
# Don't add dotfile patterns here - handle them separately in should_ignore
|
|
139
|
+
# This prevents exceptions from being overridden by the .* pattern
|
|
140
|
+
|
|
141
|
+
# Find and parse .gitignore files
|
|
142
|
+
gitignore_files = self._find_gitignore_files(working_dir)
|
|
143
|
+
for gitignore_file in gitignore_files:
|
|
144
|
+
patterns.extend(self._parse_gitignore(gitignore_file))
|
|
145
|
+
|
|
146
|
+
return patterns
|
|
147
|
+
|
|
148
|
+
def should_ignore(self, path: Path, working_dir: Path) -> bool:
|
|
149
|
+
"""Check if a path should be ignored based on patterns.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
path: The path to check
|
|
153
|
+
working_dir: The working directory (for relative path calculation)
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
True if the path should be ignored
|
|
157
|
+
"""
|
|
158
|
+
# Get the filename
|
|
159
|
+
filename = path.name
|
|
160
|
+
|
|
161
|
+
# 1. ALWAYS hide system files regardless of settings
|
|
162
|
+
ALWAYS_HIDE = {'.DS_Store', 'Thumbs.db', '.pyc', '.pyo', '.pyd'}
|
|
163
|
+
if filename in ALWAYS_HIDE or filename.endswith(('.pyc', '.pyo', '.pyd')):
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
# 2. Check dotfiles BEFORE exceptions
|
|
167
|
+
if filename.startswith('.'):
|
|
168
|
+
# If showing hidden files, show all dotfiles
|
|
169
|
+
if self.show_hidden_files:
|
|
170
|
+
return False # Show the dotfile
|
|
171
|
+
else:
|
|
172
|
+
# Hide all dotfiles except those in the exceptions list
|
|
173
|
+
# This means: return True (ignore) if NOT in exceptions
|
|
174
|
+
return filename not in self.DOTFILE_EXCEPTIONS
|
|
175
|
+
|
|
176
|
+
# Get or create PathSpec for this working directory
|
|
177
|
+
pathspec_obj = self._get_pathspec(working_dir)
|
|
178
|
+
|
|
179
|
+
if pathspec_obj:
|
|
180
|
+
# Use pathspec for accurate matching
|
|
181
|
+
try:
|
|
182
|
+
rel_path = path.relative_to(working_dir)
|
|
183
|
+
rel_path_str = str(rel_path)
|
|
184
|
+
|
|
185
|
+
# For directories, also check with trailing slash
|
|
186
|
+
if path.is_dir():
|
|
187
|
+
return pathspec_obj.match_file(rel_path_str) or pathspec_obj.match_file(rel_path_str + '/')
|
|
188
|
+
else:
|
|
189
|
+
return pathspec_obj.match_file(rel_path_str)
|
|
190
|
+
except ValueError:
|
|
191
|
+
# Path is outside working directory
|
|
192
|
+
return False
|
|
193
|
+
else:
|
|
194
|
+
# Fallback to basic pattern matching
|
|
195
|
+
return self._basic_should_ignore(path, working_dir)
|
|
196
|
+
|
|
197
|
+
def _get_pathspec(self, working_dir: Path) -> Optional[Any]:
|
|
198
|
+
"""Get or create a PathSpec object for the working directory.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
working_dir: The working directory
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
PathSpec object or None if not available
|
|
205
|
+
"""
|
|
206
|
+
if not self._use_pathspec:
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
cache_key = str(working_dir)
|
|
210
|
+
if cache_key not in self._pathspec_cache:
|
|
211
|
+
patterns = self.get_ignore_patterns(working_dir)
|
|
212
|
+
try:
|
|
213
|
+
self._pathspec_cache[cache_key] = pathspec.PathSpec.from_lines(
|
|
214
|
+
"gitwildmatch", patterns
|
|
215
|
+
)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
self.logger.warning(f"Failed to create PathSpec: {e}")
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
return self._pathspec_cache[cache_key]
|
|
221
|
+
|
|
222
|
+
def _find_gitignore_files(self, working_dir: Path) -> List[Path]:
|
|
223
|
+
"""Find all .gitignore files in the directory tree.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
working_dir: The directory to search
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
List of .gitignore file paths
|
|
230
|
+
"""
|
|
231
|
+
gitignore_files = []
|
|
232
|
+
|
|
233
|
+
# Check for .gitignore in working directory
|
|
234
|
+
main_gitignore = working_dir / ".gitignore"
|
|
235
|
+
if main_gitignore.exists():
|
|
236
|
+
gitignore_files.append(main_gitignore)
|
|
237
|
+
|
|
238
|
+
# Also check parent directories up to repository root
|
|
239
|
+
current = working_dir
|
|
240
|
+
while current != current.parent:
|
|
241
|
+
parent_gitignore = current.parent / ".gitignore"
|
|
242
|
+
if parent_gitignore.exists():
|
|
243
|
+
gitignore_files.append(parent_gitignore)
|
|
244
|
+
|
|
245
|
+
# Stop if we find a .git directory (repository root)
|
|
246
|
+
if (current / ".git").exists():
|
|
247
|
+
break
|
|
248
|
+
|
|
249
|
+
current = current.parent
|
|
250
|
+
|
|
251
|
+
return gitignore_files
|
|
252
|
+
|
|
253
|
+
def _parse_gitignore(self, gitignore_path: Path) -> List[str]:
|
|
254
|
+
"""Parse a .gitignore file and return patterns.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
gitignore_path: Path to .gitignore file
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
List of patterns from the file
|
|
261
|
+
"""
|
|
262
|
+
cache_key = str(gitignore_path)
|
|
263
|
+
|
|
264
|
+
# Check cache
|
|
265
|
+
if cache_key in self._gitignore_cache:
|
|
266
|
+
return self._gitignore_cache[cache_key]
|
|
267
|
+
|
|
268
|
+
patterns = []
|
|
269
|
+
try:
|
|
270
|
+
with open(gitignore_path, encoding="utf-8") as f:
|
|
271
|
+
for line in f:
|
|
272
|
+
line = line.strip()
|
|
273
|
+
# Skip empty lines and comments
|
|
274
|
+
if line and not line.startswith("#"):
|
|
275
|
+
patterns.append(line)
|
|
276
|
+
|
|
277
|
+
self._gitignore_cache[cache_key] = patterns
|
|
278
|
+
except Exception as e:
|
|
279
|
+
self.logger.warning(f"Failed to parse {gitignore_path}: {e}")
|
|
280
|
+
|
|
281
|
+
return patterns
|
|
282
|
+
|
|
283
|
+
def _basic_should_ignore(self, path: Path, working_dir: Path) -> bool:
|
|
284
|
+
"""Basic pattern matching fallback when pathspec is not available.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
path: The path to check
|
|
288
|
+
working_dir: The working directory
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
True if the path should be ignored
|
|
292
|
+
"""
|
|
293
|
+
path_str = str(path)
|
|
294
|
+
path_name = path.name
|
|
295
|
+
|
|
296
|
+
# 1. ALWAYS hide system files regardless of settings
|
|
297
|
+
ALWAYS_HIDE = {'.DS_Store', 'Thumbs.db', '.pyc', '.pyo', '.pyd'}
|
|
298
|
+
if path_name in ALWAYS_HIDE or path_name.endswith(('.pyc', '.pyo', '.pyd')):
|
|
299
|
+
return True
|
|
300
|
+
|
|
301
|
+
# 2. Check dotfiles BEFORE exceptions
|
|
302
|
+
if path_name.startswith('.'):
|
|
303
|
+
# If showing hidden files, check exceptions
|
|
304
|
+
if self.show_hidden_files:
|
|
305
|
+
return False # Show the dotfile
|
|
306
|
+
else:
|
|
307
|
+
# Only show if in exceptions list
|
|
308
|
+
return path_name not in self.DOTFILE_EXCEPTIONS
|
|
309
|
+
|
|
310
|
+
patterns = self.get_ignore_patterns(working_dir)
|
|
311
|
+
|
|
312
|
+
for pattern in patterns:
|
|
313
|
+
# Skip dotfile patterns since we already handled them above
|
|
314
|
+
if pattern in [".*", ".*/"]:
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
# Simple pattern matching
|
|
318
|
+
if pattern.endswith("/"):
|
|
319
|
+
# Directory pattern
|
|
320
|
+
if path.is_dir() and path_name == pattern[:-1]:
|
|
321
|
+
return True
|
|
322
|
+
elif pattern.startswith("*."):
|
|
323
|
+
# Extension pattern
|
|
324
|
+
if path_name.endswith(pattern[1:]):
|
|
325
|
+
return True
|
|
326
|
+
elif "*" in pattern:
|
|
327
|
+
# Wildcard pattern (simplified)
|
|
328
|
+
import fnmatch
|
|
329
|
+
|
|
330
|
+
if fnmatch.fnmatch(path_name, pattern):
|
|
331
|
+
return True
|
|
332
|
+
elif pattern in path_str:
|
|
333
|
+
# Substring match
|
|
334
|
+
return True
|
|
335
|
+
elif path_name == pattern:
|
|
336
|
+
# Exact match
|
|
337
|
+
return True
|
|
338
|
+
|
|
339
|
+
return False
|
|
340
|
+
|
|
341
|
+
def clear_cache(self):
|
|
342
|
+
"""Clear all caches."""
|
|
343
|
+
self._pathspec_cache.clear()
|
|
344
|
+
self._gitignore_cache.clear()
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
@dataclass
|
|
348
|
+
class CodeNode:
|
|
349
|
+
"""Represents a node in the code tree."""
|
|
350
|
+
|
|
351
|
+
file_path: str
|
|
352
|
+
node_type: str
|
|
353
|
+
name: str
|
|
354
|
+
line_start: int
|
|
355
|
+
line_end: int
|
|
356
|
+
complexity: int = 0
|
|
357
|
+
has_docstring: bool = False
|
|
358
|
+
decorators: List[str] = None
|
|
359
|
+
parent: Optional[str] = None
|
|
360
|
+
children: List["CodeNode"] = None
|
|
361
|
+
language: str = "python"
|
|
362
|
+
signature: str = ""
|
|
363
|
+
metrics: Dict[str, Any] = None
|
|
364
|
+
|
|
365
|
+
def __post_init__(self):
|
|
366
|
+
if self.decorators is None:
|
|
367
|
+
self.decorators = []
|
|
368
|
+
if self.children is None:
|
|
369
|
+
self.children = []
|
|
370
|
+
if self.metrics is None:
|
|
371
|
+
self.metrics = {}
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class PythonAnalyzer:
|
|
375
|
+
"""Analyzes Python source code using AST.
|
|
376
|
+
|
|
377
|
+
WHY: Python's built-in AST module provides rich structural information
|
|
378
|
+
that we can leverage for detailed analysis.
|
|
379
|
+
"""
|
|
380
|
+
|
|
381
|
+
def __init__(self, emitter: Optional[CodeTreeEventEmitter] = None):
|
|
382
|
+
self.logger = get_logger(__name__)
|
|
383
|
+
self.emitter = emitter
|
|
384
|
+
|
|
385
|
+
def analyze_file(self, file_path: Path) -> List[CodeNode]:
|
|
386
|
+
"""Analyze a Python file and extract code structure.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
file_path: Path to Python file
|
|
390
|
+
|
|
391
|
+
Returns:
|
|
392
|
+
List of code nodes found in the file
|
|
393
|
+
"""
|
|
394
|
+
nodes = []
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
with open(file_path, encoding="utf-8") as f:
|
|
398
|
+
source = f.read()
|
|
399
|
+
|
|
400
|
+
tree = ast.parse(source, filename=str(file_path))
|
|
401
|
+
nodes = self._extract_nodes(tree, file_path, source)
|
|
402
|
+
|
|
403
|
+
except SyntaxError as e:
|
|
404
|
+
self.logger.warning(f"Syntax error in {file_path}: {e}")
|
|
405
|
+
if self.emitter:
|
|
406
|
+
self.emitter.emit_error(str(file_path), f"Syntax error: {e}")
|
|
407
|
+
except Exception as e:
|
|
408
|
+
self.logger.error(f"Error analyzing {file_path}: {e}")
|
|
409
|
+
if self.emitter:
|
|
410
|
+
self.emitter.emit_error(str(file_path), str(e))
|
|
411
|
+
|
|
412
|
+
return nodes
|
|
413
|
+
|
|
414
|
+
def _extract_nodes(
|
|
415
|
+
self, tree: ast.AST, file_path: Path, source: str
|
|
416
|
+
) -> List[CodeNode]:
|
|
417
|
+
"""Extract code nodes from AST tree.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
tree: AST tree
|
|
421
|
+
file_path: Source file path
|
|
422
|
+
source: Source code text
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
List of extracted code nodes
|
|
426
|
+
"""
|
|
427
|
+
nodes = []
|
|
428
|
+
source.splitlines()
|
|
429
|
+
|
|
430
|
+
class NodeVisitor(ast.NodeVisitor):
|
|
431
|
+
def __init__(self, parent_name: Optional[str] = None):
|
|
432
|
+
self.parent_name = parent_name
|
|
433
|
+
self.current_class = None
|
|
434
|
+
|
|
435
|
+
def visit_ClassDef(self, node):
|
|
436
|
+
# Extract class information
|
|
437
|
+
class_node = CodeNode(
|
|
438
|
+
file_path=str(file_path),
|
|
439
|
+
node_type="class",
|
|
440
|
+
name=node.name,
|
|
441
|
+
line_start=node.lineno,
|
|
442
|
+
line_end=node.end_lineno or node.lineno,
|
|
443
|
+
has_docstring=bool(ast.get_docstring(node)),
|
|
444
|
+
decorators=[self._decorator_name(d) for d in node.decorator_list],
|
|
445
|
+
parent=self.parent_name,
|
|
446
|
+
complexity=self._calculate_complexity(node),
|
|
447
|
+
signature=self._get_class_signature(node),
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
nodes.append(class_node)
|
|
451
|
+
|
|
452
|
+
# Emit event if emitter is available
|
|
453
|
+
if self.emitter:
|
|
454
|
+
self.emitter.emit_node(
|
|
455
|
+
CodeNodeEvent(
|
|
456
|
+
file_path=str(file_path),
|
|
457
|
+
node_type="class",
|
|
458
|
+
name=node.name,
|
|
459
|
+
line_start=node.lineno,
|
|
460
|
+
line_end=node.end_lineno or node.lineno,
|
|
461
|
+
complexity=class_node.complexity,
|
|
462
|
+
has_docstring=class_node.has_docstring,
|
|
463
|
+
decorators=class_node.decorators,
|
|
464
|
+
parent=self.parent_name,
|
|
465
|
+
children_count=len(node.body),
|
|
466
|
+
)
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# Visit class members
|
|
470
|
+
old_class = self.current_class
|
|
471
|
+
self.current_class = node.name
|
|
472
|
+
for child in node.body:
|
|
473
|
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
474
|
+
self.visit_FunctionDef(child, is_method=True)
|
|
475
|
+
self.current_class = old_class
|
|
476
|
+
|
|
477
|
+
def visit_FunctionDef(self, node, is_method=False):
|
|
478
|
+
# Determine node type
|
|
479
|
+
node_type = "method" if is_method else "function"
|
|
480
|
+
parent = self.current_class if is_method else self.parent_name
|
|
481
|
+
|
|
482
|
+
# Extract function information
|
|
483
|
+
func_node = CodeNode(
|
|
484
|
+
file_path=str(file_path),
|
|
485
|
+
node_type=node_type,
|
|
486
|
+
name=node.name,
|
|
487
|
+
line_start=node.lineno,
|
|
488
|
+
line_end=node.end_lineno or node.lineno,
|
|
489
|
+
has_docstring=bool(ast.get_docstring(node)),
|
|
490
|
+
decorators=[self._decorator_name(d) for d in node.decorator_list],
|
|
491
|
+
parent=parent,
|
|
492
|
+
complexity=self._calculate_complexity(node),
|
|
493
|
+
signature=self._get_function_signature(node),
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
nodes.append(func_node)
|
|
497
|
+
|
|
498
|
+
# Emit event if emitter is available
|
|
499
|
+
if self.emitter:
|
|
500
|
+
self.emitter.emit_node(
|
|
501
|
+
CodeNodeEvent(
|
|
502
|
+
file_path=str(file_path),
|
|
503
|
+
node_type=node_type,
|
|
504
|
+
name=node.name,
|
|
505
|
+
line_start=node.lineno,
|
|
506
|
+
line_end=node.end_lineno or node.lineno,
|
|
507
|
+
complexity=func_node.complexity,
|
|
508
|
+
has_docstring=func_node.has_docstring,
|
|
509
|
+
decorators=func_node.decorators,
|
|
510
|
+
parent=parent,
|
|
511
|
+
children_count=0,
|
|
512
|
+
)
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
def visit_AsyncFunctionDef(self, node):
|
|
516
|
+
self.visit_FunctionDef(node)
|
|
517
|
+
|
|
518
|
+
def _decorator_name(self, decorator):
|
|
519
|
+
"""Extract decorator name from AST node."""
|
|
520
|
+
if isinstance(decorator, ast.Name):
|
|
521
|
+
return decorator.id
|
|
522
|
+
if isinstance(decorator, ast.Call):
|
|
523
|
+
if isinstance(decorator.func, ast.Name):
|
|
524
|
+
return decorator.func.id
|
|
525
|
+
if isinstance(decorator.func, ast.Attribute):
|
|
526
|
+
return decorator.func.attr
|
|
527
|
+
return "unknown"
|
|
528
|
+
|
|
529
|
+
def _calculate_complexity(self, node):
|
|
530
|
+
"""Calculate cyclomatic complexity of a node."""
|
|
531
|
+
complexity = 1 # Base complexity
|
|
532
|
+
|
|
533
|
+
for child in ast.walk(node):
|
|
534
|
+
if isinstance(
|
|
535
|
+
child, (ast.If, ast.While, ast.For, ast.ExceptHandler)
|
|
536
|
+
):
|
|
537
|
+
complexity += 1
|
|
538
|
+
elif isinstance(child, ast.BoolOp):
|
|
539
|
+
complexity += len(child.values) - 1
|
|
540
|
+
|
|
541
|
+
return complexity
|
|
542
|
+
|
|
543
|
+
def _get_function_signature(self, node):
|
|
544
|
+
"""Extract function signature."""
|
|
545
|
+
args = []
|
|
546
|
+
for arg in node.args.args:
|
|
547
|
+
args.append(arg.arg)
|
|
548
|
+
return f"{node.name}({', '.join(args)})"
|
|
549
|
+
|
|
550
|
+
def _get_class_signature(self, node):
|
|
551
|
+
"""Extract class signature."""
|
|
552
|
+
bases = []
|
|
553
|
+
for base in node.bases:
|
|
554
|
+
if isinstance(base, ast.Name):
|
|
555
|
+
bases.append(base.id)
|
|
556
|
+
base_str = f"({', '.join(bases)})" if bases else ""
|
|
557
|
+
return f"class {node.name}{base_str}"
|
|
558
|
+
|
|
559
|
+
# Extract imports
|
|
560
|
+
for node in ast.walk(tree):
|
|
561
|
+
if isinstance(node, ast.Import):
|
|
562
|
+
for alias in node.names:
|
|
563
|
+
import_node = CodeNode(
|
|
564
|
+
file_path=str(file_path),
|
|
565
|
+
node_type="import",
|
|
566
|
+
name=alias.name,
|
|
567
|
+
line_start=node.lineno,
|
|
568
|
+
line_end=node.end_lineno or node.lineno,
|
|
569
|
+
signature=f"import {alias.name}",
|
|
570
|
+
)
|
|
571
|
+
nodes.append(import_node)
|
|
572
|
+
|
|
573
|
+
elif isinstance(node, ast.ImportFrom):
|
|
574
|
+
module = node.module or ""
|
|
575
|
+
for alias in node.names:
|
|
576
|
+
import_node = CodeNode(
|
|
577
|
+
file_path=str(file_path),
|
|
578
|
+
node_type="import",
|
|
579
|
+
name=f"{module}.{alias.name}",
|
|
580
|
+
line_start=node.lineno,
|
|
581
|
+
line_end=node.end_lineno or node.lineno,
|
|
582
|
+
signature=f"from {module} import {alias.name}",
|
|
583
|
+
)
|
|
584
|
+
nodes.append(import_node)
|
|
585
|
+
|
|
586
|
+
# Visit all nodes
|
|
587
|
+
visitor = NodeVisitor()
|
|
588
|
+
visitor.emitter = self.emitter
|
|
589
|
+
visitor.visit(tree)
|
|
590
|
+
|
|
591
|
+
return nodes
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
class MultiLanguageAnalyzer:
|
|
595
|
+
"""Analyzes multiple programming languages using tree-sitter.
|
|
596
|
+
|
|
597
|
+
WHY: Tree-sitter provides consistent parsing across multiple languages,
|
|
598
|
+
allowing us to support JavaScript, TypeScript, and other languages.
|
|
599
|
+
"""
|
|
600
|
+
|
|
601
|
+
LANGUAGE_PARSERS = {
|
|
602
|
+
"python": "tree_sitter_python",
|
|
603
|
+
"javascript": "tree_sitter_javascript",
|
|
604
|
+
"typescript": "tree_sitter_typescript",
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
def __init__(self, emitter: Optional[CodeTreeEventEmitter] = None):
|
|
608
|
+
self.logger = get_logger(__name__)
|
|
609
|
+
self.emitter = emitter
|
|
610
|
+
self.parsers = {}
|
|
611
|
+
self._init_parsers()
|
|
612
|
+
|
|
613
|
+
def _init_parsers(self):
|
|
614
|
+
"""Initialize tree-sitter parsers for supported languages."""
|
|
615
|
+
if not TREE_SITTER_AVAILABLE:
|
|
616
|
+
self.logger.warning(
|
|
617
|
+
"tree-sitter not available - multi-language support disabled"
|
|
618
|
+
)
|
|
619
|
+
return
|
|
620
|
+
|
|
621
|
+
for lang, module_name in self.LANGUAGE_PARSERS.items():
|
|
622
|
+
try:
|
|
623
|
+
# Dynamic import of language module
|
|
624
|
+
module = __import__(module_name)
|
|
625
|
+
parser = tree_sitter.Parser()
|
|
626
|
+
# Different tree-sitter versions have different APIs
|
|
627
|
+
if hasattr(parser, "set_language"):
|
|
628
|
+
parser.set_language(tree_sitter.Language(module.language()))
|
|
629
|
+
else:
|
|
630
|
+
# Newer API
|
|
631
|
+
lang_obj = tree_sitter.Language(module.language())
|
|
632
|
+
parser = tree_sitter.Parser(lang_obj)
|
|
633
|
+
self.parsers[lang] = parser
|
|
634
|
+
except (ImportError, AttributeError) as e:
|
|
635
|
+
# Silently skip unavailable parsers - will fall back to basic file discovery
|
|
636
|
+
self.logger.debug(f"Language parser not available for {lang}: {e}")
|
|
637
|
+
|
|
638
|
+
def analyze_file(self, file_path: Path, language: str) -> List[CodeNode]:
|
|
639
|
+
"""Analyze a file using tree-sitter.
|
|
640
|
+
|
|
641
|
+
Args:
|
|
642
|
+
file_path: Path to source file
|
|
643
|
+
language: Programming language
|
|
644
|
+
|
|
645
|
+
Returns:
|
|
646
|
+
List of code nodes found in the file
|
|
647
|
+
"""
|
|
648
|
+
if language not in self.parsers:
|
|
649
|
+
# No parser available - return empty list to fall back to basic discovery
|
|
650
|
+
self.logger.debug(
|
|
651
|
+
f"No parser available for language: {language}, using basic file discovery"
|
|
652
|
+
)
|
|
653
|
+
return []
|
|
654
|
+
|
|
655
|
+
nodes = []
|
|
656
|
+
|
|
657
|
+
try:
|
|
658
|
+
with open(file_path, "rb") as f:
|
|
659
|
+
source = f.read()
|
|
660
|
+
|
|
661
|
+
parser = self.parsers[language]
|
|
662
|
+
tree = parser.parse(source)
|
|
663
|
+
|
|
664
|
+
# Extract nodes based on language
|
|
665
|
+
if language in {"javascript", "typescript"}:
|
|
666
|
+
nodes = self._extract_js_nodes(tree, file_path, source)
|
|
667
|
+
else:
|
|
668
|
+
nodes = self._extract_generic_nodes(tree, file_path, source, language)
|
|
669
|
+
|
|
670
|
+
except Exception as e:
|
|
671
|
+
self.logger.error(f"Error analyzing {file_path}: {e}")
|
|
672
|
+
if self.emitter:
|
|
673
|
+
self.emitter.emit_error(str(file_path), str(e))
|
|
674
|
+
|
|
675
|
+
return nodes
|
|
676
|
+
|
|
677
|
+
def _extract_js_nodes(self, tree, file_path: Path, source: bytes) -> List[CodeNode]:
|
|
678
|
+
"""Extract nodes from JavaScript/TypeScript files."""
|
|
679
|
+
nodes = []
|
|
680
|
+
|
|
681
|
+
def walk_tree(node, parent_name=None):
|
|
682
|
+
if node.type == "class_declaration":
|
|
683
|
+
# Extract class
|
|
684
|
+
name_node = node.child_by_field_name("name")
|
|
685
|
+
if name_node:
|
|
686
|
+
class_node = CodeNode(
|
|
687
|
+
file_path=str(file_path),
|
|
688
|
+
node_type="class",
|
|
689
|
+
name=source[name_node.start_byte : name_node.end_byte].decode(
|
|
690
|
+
"utf-8"
|
|
691
|
+
),
|
|
692
|
+
line_start=node.start_point[0] + 1,
|
|
693
|
+
line_end=node.end_point[0] + 1,
|
|
694
|
+
parent=parent_name,
|
|
695
|
+
language="javascript",
|
|
696
|
+
)
|
|
697
|
+
nodes.append(class_node)
|
|
698
|
+
|
|
699
|
+
if self.emitter:
|
|
700
|
+
self.emitter.emit_node(
|
|
701
|
+
CodeNodeEvent(
|
|
702
|
+
file_path=str(file_path),
|
|
703
|
+
node_type="class",
|
|
704
|
+
name=class_node.name,
|
|
705
|
+
line_start=class_node.line_start,
|
|
706
|
+
line_end=class_node.line_end,
|
|
707
|
+
parent=parent_name,
|
|
708
|
+
language="javascript",
|
|
709
|
+
)
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
elif node.type in (
|
|
713
|
+
"function_declaration",
|
|
714
|
+
"arrow_function",
|
|
715
|
+
"method_definition",
|
|
716
|
+
):
|
|
717
|
+
# Extract function
|
|
718
|
+
name_node = node.child_by_field_name("name")
|
|
719
|
+
if name_node:
|
|
720
|
+
func_name = source[
|
|
721
|
+
name_node.start_byte : name_node.end_byte
|
|
722
|
+
].decode("utf-8")
|
|
723
|
+
func_node = CodeNode(
|
|
724
|
+
file_path=str(file_path),
|
|
725
|
+
node_type=(
|
|
726
|
+
"function" if node.type != "method_definition" else "method"
|
|
727
|
+
),
|
|
728
|
+
name=func_name,
|
|
729
|
+
line_start=node.start_point[0] + 1,
|
|
730
|
+
line_end=node.end_point[0] + 1,
|
|
731
|
+
parent=parent_name,
|
|
732
|
+
language="javascript",
|
|
733
|
+
)
|
|
734
|
+
nodes.append(func_node)
|
|
735
|
+
|
|
736
|
+
if self.emitter:
|
|
737
|
+
self.emitter.emit_node(
|
|
738
|
+
CodeNodeEvent(
|
|
739
|
+
file_path=str(file_path),
|
|
740
|
+
node_type=func_node.node_type,
|
|
741
|
+
name=func_name,
|
|
742
|
+
line_start=func_node.line_start,
|
|
743
|
+
line_end=func_node.line_end,
|
|
744
|
+
parent=parent_name,
|
|
745
|
+
language="javascript",
|
|
746
|
+
)
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
# Recursively walk children
|
|
750
|
+
for child in node.children:
|
|
751
|
+
walk_tree(child, parent_name)
|
|
752
|
+
|
|
753
|
+
walk_tree(tree.root_node)
|
|
754
|
+
return nodes
|
|
755
|
+
|
|
756
|
+
def _extract_generic_nodes(
|
|
757
|
+
self, tree, file_path: Path, source: bytes, language: str
|
|
758
|
+
) -> List[CodeNode]:
|
|
759
|
+
"""Generic node extraction for other languages."""
|
|
760
|
+
# Simple generic extraction - can be enhanced per language
|
|
761
|
+
nodes = []
|
|
762
|
+
|
|
763
|
+
def walk_tree(node):
|
|
764
|
+
# Look for common patterns
|
|
765
|
+
if "class" in node.type or "struct" in node.type:
|
|
766
|
+
nodes.append(
|
|
767
|
+
CodeNode(
|
|
768
|
+
file_path=str(file_path),
|
|
769
|
+
node_type="class",
|
|
770
|
+
name=f"{node.type}_{node.start_point[0]}",
|
|
771
|
+
line_start=node.start_point[0] + 1,
|
|
772
|
+
line_end=node.end_point[0] + 1,
|
|
773
|
+
language=language,
|
|
774
|
+
)
|
|
775
|
+
)
|
|
776
|
+
elif "function" in node.type or "method" in node.type:
|
|
777
|
+
nodes.append(
|
|
778
|
+
CodeNode(
|
|
779
|
+
file_path=str(file_path),
|
|
780
|
+
node_type="function",
|
|
781
|
+
name=f"{node.type}_{node.start_point[0]}",
|
|
782
|
+
line_start=node.start_point[0] + 1,
|
|
783
|
+
line_end=node.end_point[0] + 1,
|
|
784
|
+
language=language,
|
|
785
|
+
)
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
for child in node.children:
|
|
789
|
+
walk_tree(child)
|
|
790
|
+
|
|
791
|
+
walk_tree(tree.root_node)
|
|
792
|
+
return nodes
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
class CodeTreeAnalyzer:
|
|
796
|
+
"""Main analyzer that coordinates language-specific analyzers.
|
|
797
|
+
|
|
798
|
+
WHY: Provides a unified interface for analyzing codebases with multiple
|
|
799
|
+
languages, handling caching and incremental processing.
|
|
800
|
+
"""
|
|
801
|
+
|
|
802
|
+
# Define code file extensions at class level for directory filtering
|
|
803
|
+
CODE_EXTENSIONS = {
|
|
804
|
+
'.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.hpp',
|
|
805
|
+
'.cs', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.r',
|
|
806
|
+
'.m', '.mm', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
|
|
807
|
+
'.sql', '.html', '.css', '.scss', '.sass', '.less', '.xml', '.json',
|
|
808
|
+
'.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.md', '.rst', '.txt'
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
# File extensions to language mapping
|
|
812
|
+
LANGUAGE_MAP = {
|
|
813
|
+
".py": "python",
|
|
814
|
+
".js": "javascript",
|
|
815
|
+
".jsx": "javascript",
|
|
816
|
+
".ts": "typescript",
|
|
817
|
+
".tsx": "typescript",
|
|
818
|
+
".mjs": "javascript",
|
|
819
|
+
".cjs": "javascript",
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
def __init__(
|
|
823
|
+
self,
|
|
824
|
+
emit_events: bool = True,
|
|
825
|
+
cache_dir: Optional[Path] = None,
|
|
826
|
+
emitter: Optional[CodeTreeEventEmitter] = None,
|
|
827
|
+
show_hidden_files: bool = False,
|
|
828
|
+
):
|
|
829
|
+
"""Initialize the code tree analyzer.
|
|
830
|
+
|
|
831
|
+
Args:
|
|
832
|
+
emit_events: Whether to emit Socket.IO events
|
|
833
|
+
cache_dir: Directory for caching analysis results
|
|
834
|
+
emitter: Optional event emitter to use (creates one if not provided)
|
|
835
|
+
show_hidden_files: Whether to show hidden files/directories (default False - hide dotfiles)
|
|
836
|
+
"""
|
|
837
|
+
self.logger = get_logger(__name__)
|
|
838
|
+
self.emit_events = emit_events
|
|
839
|
+
self.cache_dir = cache_dir or Path.home() / ".claude-mpm" / "code-cache"
|
|
840
|
+
self.show_hidden_files = show_hidden_files
|
|
841
|
+
|
|
842
|
+
# Initialize gitignore manager with hidden files setting (default False)
|
|
843
|
+
self.gitignore_manager = GitignoreManager(show_hidden_files=show_hidden_files)
|
|
844
|
+
self._last_working_dir = None
|
|
845
|
+
|
|
846
|
+
# Use provided emitter or create one
|
|
847
|
+
if emitter:
|
|
848
|
+
self.emitter = emitter
|
|
849
|
+
elif emit_events:
|
|
850
|
+
self.emitter = CodeTreeEventEmitter(use_stdout=True)
|
|
851
|
+
else:
|
|
852
|
+
self.emitter = None
|
|
853
|
+
|
|
854
|
+
# Initialize language analyzers
|
|
855
|
+
self.python_analyzer = PythonAnalyzer(self.emitter)
|
|
856
|
+
self.multi_lang_analyzer = MultiLanguageAnalyzer(self.emitter)
|
|
857
|
+
|
|
858
|
+
# For JavaScript/TypeScript
|
|
859
|
+
self.javascript_analyzer = self.multi_lang_analyzer
|
|
860
|
+
self.generic_analyzer = self.multi_lang_analyzer
|
|
861
|
+
|
|
862
|
+
# Cache for processed files
|
|
863
|
+
self.cache = {}
|
|
864
|
+
self._load_cache()
|
|
865
|
+
|
|
866
|
+
def analyze_directory(
|
|
867
|
+
self,
|
|
868
|
+
directory: Path,
|
|
869
|
+
languages: Optional[List[str]] = None,
|
|
870
|
+
ignore_patterns: Optional[List[str]] = None,
|
|
871
|
+
max_depth: Optional[int] = None,
|
|
872
|
+
) -> Dict[str, Any]:
|
|
873
|
+
"""Analyze a directory and build code tree.
|
|
874
|
+
|
|
875
|
+
Args:
|
|
876
|
+
directory: Directory to analyze
|
|
877
|
+
languages: Languages to include (None for all)
|
|
878
|
+
ignore_patterns: Patterns to ignore
|
|
879
|
+
max_depth: Maximum directory depth
|
|
880
|
+
|
|
881
|
+
Returns:
|
|
882
|
+
Dictionary containing the code tree and statistics
|
|
883
|
+
"""
|
|
884
|
+
if self.emitter:
|
|
885
|
+
self.emitter.start()
|
|
886
|
+
|
|
887
|
+
start_time = time.time()
|
|
888
|
+
all_nodes = []
|
|
889
|
+
files_processed = 0
|
|
890
|
+
total_files = 0
|
|
891
|
+
|
|
892
|
+
# Collect files to process
|
|
893
|
+
files_to_process = []
|
|
894
|
+
for ext, lang in self.LANGUAGE_MAP.items():
|
|
895
|
+
if languages and lang not in languages:
|
|
896
|
+
continue
|
|
897
|
+
|
|
898
|
+
for file_path in directory.rglob(f"*{ext}"):
|
|
899
|
+
# Use gitignore manager for filtering with directory as working dir
|
|
900
|
+
if self.gitignore_manager.should_ignore(file_path, directory):
|
|
901
|
+
continue
|
|
902
|
+
|
|
903
|
+
# Also check additional patterns
|
|
904
|
+
if ignore_patterns and any(
|
|
905
|
+
p in str(file_path) for p in ignore_patterns
|
|
906
|
+
):
|
|
907
|
+
continue
|
|
908
|
+
|
|
909
|
+
# Check max depth
|
|
910
|
+
if max_depth:
|
|
911
|
+
depth = len(file_path.relative_to(directory).parts) - 1
|
|
912
|
+
if depth > max_depth:
|
|
913
|
+
continue
|
|
914
|
+
|
|
915
|
+
files_to_process.append((file_path, lang))
|
|
916
|
+
|
|
917
|
+
total_files = len(files_to_process)
|
|
918
|
+
|
|
919
|
+
# Process files
|
|
920
|
+
for file_path, language in files_to_process:
|
|
921
|
+
# Check cache
|
|
922
|
+
file_hash = self._get_file_hash(file_path)
|
|
923
|
+
cache_key = f"{file_path}:{file_hash}"
|
|
924
|
+
|
|
925
|
+
if cache_key in self.cache:
|
|
926
|
+
nodes = self.cache[cache_key]
|
|
927
|
+
self.logger.debug(f"Using cached results for {file_path}")
|
|
928
|
+
else:
|
|
929
|
+
# Emit file start event
|
|
930
|
+
if self.emitter:
|
|
931
|
+
self.emitter.emit_file_start(str(file_path), language)
|
|
932
|
+
|
|
933
|
+
file_start = time.time()
|
|
934
|
+
|
|
935
|
+
# Analyze based on language
|
|
936
|
+
if language == "python":
|
|
937
|
+
nodes = self.python_analyzer.analyze_file(file_path)
|
|
938
|
+
else:
|
|
939
|
+
nodes = self.multi_lang_analyzer.analyze_file(file_path, language)
|
|
940
|
+
|
|
941
|
+
# If no nodes found and we have a valid language, emit basic file info
|
|
942
|
+
if not nodes and language != "unknown":
|
|
943
|
+
self.logger.debug(
|
|
944
|
+
f"No AST nodes found for {file_path}, using basic discovery"
|
|
945
|
+
)
|
|
946
|
+
|
|
947
|
+
# Cache results
|
|
948
|
+
self.cache[cache_key] = nodes
|
|
949
|
+
|
|
950
|
+
# Emit file complete event
|
|
951
|
+
if self.emitter:
|
|
952
|
+
self.emitter.emit_file_complete(
|
|
953
|
+
str(file_path), len(nodes), time.time() - file_start
|
|
954
|
+
)
|
|
955
|
+
|
|
956
|
+
all_nodes.extend(nodes)
|
|
957
|
+
files_processed += 1
|
|
958
|
+
|
|
959
|
+
# Emit progress
|
|
960
|
+
if self.emitter and files_processed % 10 == 0:
|
|
961
|
+
self.emitter.emit_progress(
|
|
962
|
+
files_processed, total_files, f"Processing {file_path.name}"
|
|
963
|
+
)
|
|
964
|
+
|
|
965
|
+
# Build tree structure
|
|
966
|
+
tree = self._build_tree(all_nodes, directory)
|
|
967
|
+
|
|
968
|
+
# Calculate statistics
|
|
969
|
+
duration = time.time() - start_time
|
|
970
|
+
stats = {
|
|
971
|
+
"files_processed": files_processed,
|
|
972
|
+
"total_nodes": len(all_nodes),
|
|
973
|
+
"duration": duration,
|
|
974
|
+
"classes": sum(1 for n in all_nodes if n.node_type == "class"),
|
|
975
|
+
"functions": sum(
|
|
976
|
+
1 for n in all_nodes if n.node_type in ("function", "method")
|
|
977
|
+
),
|
|
978
|
+
"imports": sum(1 for n in all_nodes if n.node_type == "import"),
|
|
979
|
+
"languages": list(
|
|
980
|
+
{n.language for n in all_nodes if hasattr(n, "language")}
|
|
981
|
+
),
|
|
982
|
+
"avg_complexity": (
|
|
983
|
+
sum(n.complexity for n in all_nodes) / len(all_nodes)
|
|
984
|
+
if all_nodes
|
|
985
|
+
else 0
|
|
986
|
+
),
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
# Save cache
|
|
990
|
+
self._save_cache()
|
|
991
|
+
|
|
992
|
+
# Stop emitter
|
|
993
|
+
if self.emitter:
|
|
994
|
+
self.emitter.stop()
|
|
995
|
+
|
|
996
|
+
return {"tree": tree, "nodes": all_nodes, "stats": stats}
|
|
997
|
+
|
|
998
|
+
def _should_ignore(self, file_path: Path, patterns: Optional[List[str]]) -> bool:
|
|
999
|
+
"""Check if file should be ignored.
|
|
1000
|
+
|
|
1001
|
+
Uses GitignoreManager for proper pattern matching.
|
|
1002
|
+
"""
|
|
1003
|
+
# Get the working directory (use parent for files, self for directories)
|
|
1004
|
+
if file_path.is_file():
|
|
1005
|
+
working_dir = file_path.parent
|
|
1006
|
+
else:
|
|
1007
|
+
# For directories during discovery, use the parent
|
|
1008
|
+
working_dir = (
|
|
1009
|
+
file_path.parent if file_path.parent != file_path else Path.cwd()
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
# Use gitignore manager for checking
|
|
1013
|
+
if self.gitignore_manager.should_ignore(file_path, working_dir):
|
|
1014
|
+
return True
|
|
1015
|
+
|
|
1016
|
+
# Also check any additional patterns provided
|
|
1017
|
+
if patterns:
|
|
1018
|
+
path_str = str(file_path)
|
|
1019
|
+
return any(pattern in path_str for pattern in patterns)
|
|
1020
|
+
|
|
1021
|
+
return False
|
|
1022
|
+
|
|
1023
|
+
def _get_file_hash(self, file_path: Path) -> str:
|
|
1024
|
+
"""Get hash of file contents for caching."""
|
|
1025
|
+
hasher = hashlib.md5()
|
|
1026
|
+
with open(file_path, "rb") as f:
|
|
1027
|
+
hasher.update(f.read())
|
|
1028
|
+
return hasher.hexdigest()
|
|
1029
|
+
|
|
1030
|
+
def _build_tree(self, nodes: List[CodeNode], root_dir: Path) -> Dict[str, Any]:
|
|
1031
|
+
"""Build hierarchical tree structure from flat nodes list."""
|
|
1032
|
+
tree = {
|
|
1033
|
+
"name": root_dir.name,
|
|
1034
|
+
"type": "directory",
|
|
1035
|
+
"path": str(root_dir),
|
|
1036
|
+
"children": [],
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
# Group nodes by file
|
|
1040
|
+
files_map = {}
|
|
1041
|
+
for node in nodes:
|
|
1042
|
+
if node.file_path not in files_map:
|
|
1043
|
+
files_map[node.file_path] = {
|
|
1044
|
+
"name": Path(node.file_path).name,
|
|
1045
|
+
"type": "file",
|
|
1046
|
+
"path": node.file_path,
|
|
1047
|
+
"children": [],
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
# Add node to file
|
|
1051
|
+
node_dict = {
|
|
1052
|
+
"name": node.name,
|
|
1053
|
+
"type": node.node_type,
|
|
1054
|
+
"line_start": node.line_start,
|
|
1055
|
+
"line_end": node.line_end,
|
|
1056
|
+
"complexity": node.complexity,
|
|
1057
|
+
"has_docstring": node.has_docstring,
|
|
1058
|
+
"decorators": node.decorators,
|
|
1059
|
+
"signature": node.signature,
|
|
1060
|
+
}
|
|
1061
|
+
files_map[node.file_path]["children"].append(node_dict)
|
|
1062
|
+
|
|
1063
|
+
# Build directory structure
|
|
1064
|
+
for file_path, file_node in files_map.items():
|
|
1065
|
+
rel_path = Path(file_path).relative_to(root_dir)
|
|
1066
|
+
parts = rel_path.parts
|
|
1067
|
+
|
|
1068
|
+
current = tree
|
|
1069
|
+
for part in parts[:-1]:
|
|
1070
|
+
# Find or create directory
|
|
1071
|
+
dir_node = None
|
|
1072
|
+
for child in current["children"]:
|
|
1073
|
+
if child["type"] == "directory" and child["name"] == part:
|
|
1074
|
+
dir_node = child
|
|
1075
|
+
break
|
|
1076
|
+
|
|
1077
|
+
if not dir_node:
|
|
1078
|
+
dir_node = {"name": part, "type": "directory", "children": []}
|
|
1079
|
+
current["children"].append(dir_node)
|
|
1080
|
+
|
|
1081
|
+
current = dir_node
|
|
1082
|
+
|
|
1083
|
+
# Add file to current directory
|
|
1084
|
+
current["children"].append(file_node)
|
|
1085
|
+
|
|
1086
|
+
return tree
|
|
1087
|
+
|
|
1088
|
+
def _load_cache(self):
|
|
1089
|
+
"""Load cache from disk."""
|
|
1090
|
+
cache_file = self.cache_dir / "code_tree_cache.json"
|
|
1091
|
+
if cache_file.exists():
|
|
1092
|
+
try:
|
|
1093
|
+
with open(cache_file) as f:
|
|
1094
|
+
cache_data = json.load(f)
|
|
1095
|
+
# Reconstruct CodeNode objects
|
|
1096
|
+
for key, nodes_data in cache_data.items():
|
|
1097
|
+
self.cache[key] = [
|
|
1098
|
+
CodeNode(**node_data) for node_data in nodes_data
|
|
1099
|
+
]
|
|
1100
|
+
self.logger.info(f"Loaded cache with {len(self.cache)} entries")
|
|
1101
|
+
except Exception as e:
|
|
1102
|
+
self.logger.warning(f"Failed to load cache: {e}")
|
|
1103
|
+
|
|
1104
|
+
def _save_cache(self):
|
|
1105
|
+
"""Save cache to disk."""
|
|
1106
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
1107
|
+
cache_file = self.cache_dir / "code_tree_cache.json"
|
|
1108
|
+
|
|
1109
|
+
try:
|
|
1110
|
+
# Convert CodeNode objects to dictionaries
|
|
1111
|
+
cache_data = {}
|
|
1112
|
+
for key, nodes in self.cache.items():
|
|
1113
|
+
cache_data[key] = [
|
|
1114
|
+
{
|
|
1115
|
+
"file_path": n.file_path,
|
|
1116
|
+
"node_type": n.node_type,
|
|
1117
|
+
"name": n.name,
|
|
1118
|
+
"line_start": n.line_start,
|
|
1119
|
+
"line_end": n.line_end,
|
|
1120
|
+
"complexity": n.complexity,
|
|
1121
|
+
"has_docstring": n.has_docstring,
|
|
1122
|
+
"decorators": n.decorators,
|
|
1123
|
+
"parent": n.parent,
|
|
1124
|
+
"language": n.language,
|
|
1125
|
+
"signature": n.signature,
|
|
1126
|
+
}
|
|
1127
|
+
for n in nodes
|
|
1128
|
+
]
|
|
1129
|
+
|
|
1130
|
+
with open(cache_file, "w") as f:
|
|
1131
|
+
json.dump(cache_data, f, indent=2)
|
|
1132
|
+
|
|
1133
|
+
self.logger.info(f"Saved cache with {len(self.cache)} entries")
|
|
1134
|
+
except Exception as e:
|
|
1135
|
+
self.logger.warning(f"Failed to save cache: {e}")
|
|
1136
|
+
|
|
1137
|
+
def has_code_files(self, directory: Path, depth: int = 5, current_depth: int = 0) -> bool:
|
|
1138
|
+
"""Check if directory contains code files up to 5 levels deep.
|
|
1139
|
+
|
|
1140
|
+
Args:
|
|
1141
|
+
directory: Directory to check
|
|
1142
|
+
depth: Maximum depth to search
|
|
1143
|
+
current_depth: Current recursion depth
|
|
1144
|
+
|
|
1145
|
+
Returns:
|
|
1146
|
+
True if directory contains code files within depth levels
|
|
1147
|
+
"""
|
|
1148
|
+
if current_depth >= depth:
|
|
1149
|
+
return False
|
|
1150
|
+
|
|
1151
|
+
# Skip checking these directories entirely
|
|
1152
|
+
SKIP_DIRS = {'node_modules', '__pycache__', '.git', '.venv', 'venv', 'dist', 'build',
|
|
1153
|
+
'.tox', 'htmlcov', '.pytest_cache', '.mypy_cache', 'coverage',
|
|
1154
|
+
'.idea', '.vscode', 'env', '.coverage', '__MACOSX', '.ipynb_checkpoints'}
|
|
1155
|
+
if directory.name in SKIP_DIRS:
|
|
1156
|
+
return False
|
|
1157
|
+
|
|
1158
|
+
try:
|
|
1159
|
+
for item in directory.iterdir():
|
|
1160
|
+
# Skip hidden items in scan
|
|
1161
|
+
if item.name.startswith('.'):
|
|
1162
|
+
continue
|
|
1163
|
+
|
|
1164
|
+
if item.is_file():
|
|
1165
|
+
# Check if it's a code file
|
|
1166
|
+
ext = item.suffix.lower()
|
|
1167
|
+
if ext in self.CODE_EXTENSIONS:
|
|
1168
|
+
return True
|
|
1169
|
+
elif item.is_dir() and current_depth < depth - 1:
|
|
1170
|
+
if self.has_code_files(item, depth, current_depth + 1):
|
|
1171
|
+
return True
|
|
1172
|
+
except (PermissionError, OSError):
|
|
1173
|
+
pass
|
|
1174
|
+
|
|
1175
|
+
return False
|
|
1176
|
+
|
|
1177
|
+
def discover_top_level(
|
|
1178
|
+
self, directory: Path, ignore_patterns: Optional[List[str]] = None
|
|
1179
|
+
) -> Dict[str, Any]:
|
|
1180
|
+
"""Discover only top-level directories and files for lazy loading.
|
|
1181
|
+
|
|
1182
|
+
Args:
|
|
1183
|
+
directory: Root directory to discover
|
|
1184
|
+
ignore_patterns: Patterns to ignore
|
|
1185
|
+
|
|
1186
|
+
Returns:
|
|
1187
|
+
Dictionary with top-level structure
|
|
1188
|
+
"""
|
|
1189
|
+
# CRITICAL FIX: Use the directory parameter as the base for relative paths
|
|
1190
|
+
# NOT the current working directory. This ensures we only show items
|
|
1191
|
+
# within the requested directory, not parent directories.
|
|
1192
|
+
working_dir = Path(directory).absolute()
|
|
1193
|
+
|
|
1194
|
+
# Emit discovery start event
|
|
1195
|
+
if self.emitter:
|
|
1196
|
+
from datetime import datetime
|
|
1197
|
+
self.emitter.emit('info', {
|
|
1198
|
+
'type': 'discovery.start',
|
|
1199
|
+
'action': 'scanning_directory',
|
|
1200
|
+
'path': str(directory),
|
|
1201
|
+
'message': f'Starting discovery of {directory.name}',
|
|
1202
|
+
'timestamp': datetime.now().isoformat()
|
|
1203
|
+
})
|
|
1204
|
+
|
|
1205
|
+
result = {
|
|
1206
|
+
"path": str(directory),
|
|
1207
|
+
"name": directory.name,
|
|
1208
|
+
"type": "directory",
|
|
1209
|
+
"children": [],
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
try:
|
|
1213
|
+
# Clear cache if working directory changed
|
|
1214
|
+
if self._last_working_dir != directory:
|
|
1215
|
+
self.gitignore_manager.clear_cache()
|
|
1216
|
+
self._last_working_dir = directory
|
|
1217
|
+
|
|
1218
|
+
# Get immediate children only (no recursion)
|
|
1219
|
+
files_count = 0
|
|
1220
|
+
dirs_count = 0
|
|
1221
|
+
ignored_count = 0
|
|
1222
|
+
|
|
1223
|
+
for item in directory.iterdir():
|
|
1224
|
+
# Use gitignore manager for filtering with the directory as working dir
|
|
1225
|
+
if self.gitignore_manager.should_ignore(item, directory):
|
|
1226
|
+
if self.emitter:
|
|
1227
|
+
from datetime import datetime
|
|
1228
|
+
self.emitter.emit('info', {
|
|
1229
|
+
'type': 'filter.gitignore',
|
|
1230
|
+
'path': str(item),
|
|
1231
|
+
'reason': 'gitignore pattern',
|
|
1232
|
+
'message': f'Ignored by gitignore: {item.name}',
|
|
1233
|
+
'timestamp': datetime.now().isoformat()
|
|
1234
|
+
})
|
|
1235
|
+
ignored_count += 1
|
|
1236
|
+
continue
|
|
1237
|
+
|
|
1238
|
+
# Also check additional patterns if provided
|
|
1239
|
+
if ignore_patterns and any(p in str(item) for p in ignore_patterns):
|
|
1240
|
+
if self.emitter:
|
|
1241
|
+
from datetime import datetime
|
|
1242
|
+
self.emitter.emit('info', {
|
|
1243
|
+
'type': 'filter.pattern',
|
|
1244
|
+
'path': str(item),
|
|
1245
|
+
'reason': 'custom pattern',
|
|
1246
|
+
'message': f'Ignored by pattern: {item.name}',
|
|
1247
|
+
'timestamp': datetime.now().isoformat()
|
|
1248
|
+
})
|
|
1249
|
+
ignored_count += 1
|
|
1250
|
+
continue
|
|
1251
|
+
|
|
1252
|
+
if item.is_dir():
|
|
1253
|
+
# Only include directories that contain code files (5-level deep scan)
|
|
1254
|
+
if not self.has_code_files(item, depth=5):
|
|
1255
|
+
if self.emitter:
|
|
1256
|
+
from datetime import datetime
|
|
1257
|
+
self.emitter.emit('info', {
|
|
1258
|
+
'type': 'filter.no_code',
|
|
1259
|
+
'path': str(item.name),
|
|
1260
|
+
'reason': 'no code files',
|
|
1261
|
+
'message': f'Skipped directory without code: {item.name}',
|
|
1262
|
+
'timestamp': datetime.now().isoformat()
|
|
1263
|
+
})
|
|
1264
|
+
ignored_count += 1
|
|
1265
|
+
continue
|
|
1266
|
+
|
|
1267
|
+
# Directory - just mark as unexplored
|
|
1268
|
+
# CRITICAL FIX: Use relative path from working directory
|
|
1269
|
+
# This prevents the frontend from showing parent directories
|
|
1270
|
+
try:
|
|
1271
|
+
relative_path = item.relative_to(working_dir)
|
|
1272
|
+
path_str = str(relative_path)
|
|
1273
|
+
except ValueError:
|
|
1274
|
+
# If somehow the item is outside working_dir, skip it
|
|
1275
|
+
self.logger.warning(f"Directory outside working dir: {item}")
|
|
1276
|
+
continue
|
|
1277
|
+
|
|
1278
|
+
# Emit directory found event
|
|
1279
|
+
if self.emitter:
|
|
1280
|
+
from datetime import datetime
|
|
1281
|
+
self.emitter.emit('info', {
|
|
1282
|
+
'type': 'discovery.directory',
|
|
1283
|
+
'path': str(item),
|
|
1284
|
+
'message': f'Found directory: {item.name}',
|
|
1285
|
+
'timestamp': datetime.now().isoformat()
|
|
1286
|
+
})
|
|
1287
|
+
dirs_count += 1
|
|
1288
|
+
|
|
1289
|
+
child = {
|
|
1290
|
+
"path": path_str,
|
|
1291
|
+
"name": item.name,
|
|
1292
|
+
"type": "directory",
|
|
1293
|
+
"discovered": False,
|
|
1294
|
+
"children": [],
|
|
1295
|
+
}
|
|
1296
|
+
result["children"].append(child)
|
|
1297
|
+
|
|
1298
|
+
if self.emitter:
|
|
1299
|
+
self.emitter.emit_directory_discovered(path_str, [])
|
|
1300
|
+
|
|
1301
|
+
elif item.is_file():
|
|
1302
|
+
# Check if it's a supported code file or a special file we want to show
|
|
1303
|
+
if item.suffix in self.supported_extensions or item.name in ['.gitignore', '.env.example', '.env.sample']:
|
|
1304
|
+
# File - mark for lazy analysis
|
|
1305
|
+
language = self._get_language(item)
|
|
1306
|
+
|
|
1307
|
+
# CRITICAL FIX: Use relative path from working directory
|
|
1308
|
+
# This prevents the frontend from showing parent directories
|
|
1309
|
+
try:
|
|
1310
|
+
relative_path = item.relative_to(working_dir)
|
|
1311
|
+
path_str = str(relative_path)
|
|
1312
|
+
except ValueError:
|
|
1313
|
+
# If somehow the item is outside working_dir, skip it
|
|
1314
|
+
self.logger.warning(f"File outside working dir: {item}")
|
|
1315
|
+
continue
|
|
1316
|
+
|
|
1317
|
+
# Emit file found event
|
|
1318
|
+
if self.emitter:
|
|
1319
|
+
from datetime import datetime
|
|
1320
|
+
self.emitter.emit('info', {
|
|
1321
|
+
'type': 'discovery.file',
|
|
1322
|
+
'path': str(item),
|
|
1323
|
+
'language': language,
|
|
1324
|
+
'size': item.stat().st_size,
|
|
1325
|
+
'message': f'Found file: {item.name} ({language})',
|
|
1326
|
+
'timestamp': datetime.now().isoformat()
|
|
1327
|
+
})
|
|
1328
|
+
files_count += 1
|
|
1329
|
+
|
|
1330
|
+
child = {
|
|
1331
|
+
"path": path_str,
|
|
1332
|
+
"name": item.name,
|
|
1333
|
+
"type": "file",
|
|
1334
|
+
"language": language,
|
|
1335
|
+
"size": item.stat().st_size,
|
|
1336
|
+
"analyzed": False,
|
|
1337
|
+
}
|
|
1338
|
+
result["children"].append(child)
|
|
1339
|
+
|
|
1340
|
+
if self.emitter:
|
|
1341
|
+
self.emitter.emit_file_discovered(
|
|
1342
|
+
path_str, language, item.stat().st_size
|
|
1343
|
+
)
|
|
1344
|
+
|
|
1345
|
+
except PermissionError as e:
|
|
1346
|
+
self.logger.warning(f"Permission denied accessing {directory}: {e}")
|
|
1347
|
+
if self.emitter:
|
|
1348
|
+
self.emitter.emit_error(str(directory), f"Permission denied: {e}")
|
|
1349
|
+
|
|
1350
|
+
# Emit discovery complete event with stats
|
|
1351
|
+
if self.emitter:
|
|
1352
|
+
from datetime import datetime
|
|
1353
|
+
self.emitter.emit('info', {
|
|
1354
|
+
'type': 'discovery.complete',
|
|
1355
|
+
'path': str(directory),
|
|
1356
|
+
'stats': {
|
|
1357
|
+
'files': files_count,
|
|
1358
|
+
'directories': dirs_count,
|
|
1359
|
+
'ignored': ignored_count
|
|
1360
|
+
},
|
|
1361
|
+
'message': f'Discovery complete: {files_count} files, {dirs_count} directories, {ignored_count} ignored',
|
|
1362
|
+
'timestamp': datetime.now().isoformat()
|
|
1363
|
+
})
|
|
1364
|
+
|
|
1365
|
+
return result
|
|
1366
|
+
|
|
1367
|
+
def discover_directory(
|
|
1368
|
+
self, dir_path: str, ignore_patterns: Optional[List[str]] = None
|
|
1369
|
+
) -> Dict[str, Any]:
|
|
1370
|
+
"""Discover contents of a specific directory for lazy loading.
|
|
1371
|
+
|
|
1372
|
+
Args:
|
|
1373
|
+
dir_path: Directory path to discover
|
|
1374
|
+
ignore_patterns: Patterns to ignore
|
|
1375
|
+
|
|
1376
|
+
Returns:
|
|
1377
|
+
Dictionary with directory contents
|
|
1378
|
+
"""
|
|
1379
|
+
directory = Path(dir_path)
|
|
1380
|
+
if not directory.exists() or not directory.is_dir():
|
|
1381
|
+
return {"error": f"Invalid directory: {dir_path}"}
|
|
1382
|
+
|
|
1383
|
+
# Clear cache if working directory changed
|
|
1384
|
+
if self._last_working_dir != directory.parent:
|
|
1385
|
+
self.gitignore_manager.clear_cache()
|
|
1386
|
+
self._last_working_dir = directory.parent
|
|
1387
|
+
|
|
1388
|
+
# The discover_top_level method will emit all the INFO events
|
|
1389
|
+
return self.discover_top_level(directory, ignore_patterns)
|
|
1390
|
+
|
|
1391
|
+
def analyze_file(self, file_path: str) -> Dict[str, Any]:
|
|
1392
|
+
"""Analyze a specific file and return its AST structure.
|
|
1393
|
+
|
|
1394
|
+
Args:
|
|
1395
|
+
file_path: Path to file to analyze
|
|
1396
|
+
|
|
1397
|
+
Returns:
|
|
1398
|
+
Dictionary with file analysis results
|
|
1399
|
+
"""
|
|
1400
|
+
path = Path(file_path)
|
|
1401
|
+
if not path.exists() or not path.is_file():
|
|
1402
|
+
return {"error": f"Invalid file: {file_path}"}
|
|
1403
|
+
|
|
1404
|
+
# Get language first (needed for return statement)
|
|
1405
|
+
language = self._get_language(path)
|
|
1406
|
+
|
|
1407
|
+
# Emit analysis start event
|
|
1408
|
+
if self.emitter:
|
|
1409
|
+
from datetime import datetime
|
|
1410
|
+
self.emitter.emit('info', {
|
|
1411
|
+
'type': 'analysis.start',
|
|
1412
|
+
'file': str(path),
|
|
1413
|
+
'language': language,
|
|
1414
|
+
'message': f'Analyzing: {path.name}',
|
|
1415
|
+
'timestamp': datetime.now().isoformat()
|
|
1416
|
+
})
|
|
1417
|
+
|
|
1418
|
+
# Check cache
|
|
1419
|
+
file_hash = self._get_file_hash(path)
|
|
1420
|
+
cache_key = f"{file_path}:{file_hash}"
|
|
1421
|
+
|
|
1422
|
+
if cache_key in self.cache:
|
|
1423
|
+
nodes = self.cache[cache_key]
|
|
1424
|
+
if self.emitter:
|
|
1425
|
+
from datetime import datetime
|
|
1426
|
+
self.emitter.emit('info', {
|
|
1427
|
+
'type': 'cache.hit',
|
|
1428
|
+
'file': str(path),
|
|
1429
|
+
'message': f'Using cached analysis for {path.name}',
|
|
1430
|
+
'timestamp': datetime.now().isoformat()
|
|
1431
|
+
})
|
|
1432
|
+
else:
|
|
1433
|
+
# Analyze file
|
|
1434
|
+
if self.emitter:
|
|
1435
|
+
from datetime import datetime
|
|
1436
|
+
self.emitter.emit('info', {
|
|
1437
|
+
'type': 'cache.miss',
|
|
1438
|
+
'file': str(path),
|
|
1439
|
+
'message': f'Cache miss, analyzing fresh: {path.name}',
|
|
1440
|
+
'timestamp': datetime.now().isoformat()
|
|
1441
|
+
})
|
|
1442
|
+
|
|
1443
|
+
if language == "python":
|
|
1444
|
+
analyzer = self.python_analyzer
|
|
1445
|
+
elif language == "javascript" or language == "typescript":
|
|
1446
|
+
analyzer = self.javascript_analyzer
|
|
1447
|
+
else:
|
|
1448
|
+
analyzer = self.generic_analyzer
|
|
1449
|
+
|
|
1450
|
+
start_time = time.time()
|
|
1451
|
+
|
|
1452
|
+
# Emit parsing event
|
|
1453
|
+
if self.emitter:
|
|
1454
|
+
from datetime import datetime
|
|
1455
|
+
self.emitter.emit('info', {
|
|
1456
|
+
'type': 'analysis.parse',
|
|
1457
|
+
'file': str(path),
|
|
1458
|
+
'message': f'Parsing file content: {path.name}',
|
|
1459
|
+
'timestamp': datetime.now().isoformat()
|
|
1460
|
+
})
|
|
1461
|
+
|
|
1462
|
+
nodes = analyzer.analyze_file(path) if analyzer else []
|
|
1463
|
+
duration = time.time() - start_time
|
|
1464
|
+
|
|
1465
|
+
# Cache results
|
|
1466
|
+
self.cache[cache_key] = nodes
|
|
1467
|
+
|
|
1468
|
+
# Filter internal functions before emitting
|
|
1469
|
+
filtered_nodes = []
|
|
1470
|
+
classes_count = 0
|
|
1471
|
+
functions_count = 0
|
|
1472
|
+
methods_count = 0
|
|
1473
|
+
|
|
1474
|
+
for node in nodes:
|
|
1475
|
+
# Only include main structural elements
|
|
1476
|
+
if not self._is_internal_node(node):
|
|
1477
|
+
# Emit found element event
|
|
1478
|
+
if self.emitter:
|
|
1479
|
+
from datetime import datetime
|
|
1480
|
+
self.emitter.emit('info', {
|
|
1481
|
+
'type': f'analysis.{node.node_type}',
|
|
1482
|
+
'name': node.name,
|
|
1483
|
+
'file': str(path),
|
|
1484
|
+
'line_start': node.line_start,
|
|
1485
|
+
'complexity': node.complexity,
|
|
1486
|
+
'message': f'Found {node.node_type}: {node.name}',
|
|
1487
|
+
'timestamp': datetime.now().isoformat()
|
|
1488
|
+
})
|
|
1489
|
+
|
|
1490
|
+
# Count node types
|
|
1491
|
+
if node.node_type == 'class':
|
|
1492
|
+
classes_count += 1
|
|
1493
|
+
elif node.node_type == 'function':
|
|
1494
|
+
functions_count += 1
|
|
1495
|
+
elif node.node_type == 'method':
|
|
1496
|
+
methods_count += 1
|
|
1497
|
+
|
|
1498
|
+
filtered_nodes.append(
|
|
1499
|
+
{
|
|
1500
|
+
"name": node.name,
|
|
1501
|
+
"type": node.node_type,
|
|
1502
|
+
"line_start": node.line_start,
|
|
1503
|
+
"line_end": node.line_end,
|
|
1504
|
+
"complexity": node.complexity,
|
|
1505
|
+
"has_docstring": node.has_docstring,
|
|
1506
|
+
"signature": node.signature,
|
|
1507
|
+
}
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1510
|
+
# Emit analysis complete event with stats
|
|
1511
|
+
if self.emitter:
|
|
1512
|
+
from datetime import datetime
|
|
1513
|
+
self.emitter.emit('info', {
|
|
1514
|
+
'type': 'analysis.complete',
|
|
1515
|
+
'file': str(path),
|
|
1516
|
+
'stats': {
|
|
1517
|
+
'classes': classes_count,
|
|
1518
|
+
'functions': functions_count,
|
|
1519
|
+
'methods': methods_count,
|
|
1520
|
+
'total_nodes': len(filtered_nodes)
|
|
1521
|
+
},
|
|
1522
|
+
'duration': duration,
|
|
1523
|
+
'message': f'Analysis complete: {classes_count} classes, {functions_count} functions, {methods_count} methods',
|
|
1524
|
+
'timestamp': datetime.now().isoformat()
|
|
1525
|
+
})
|
|
1526
|
+
|
|
1527
|
+
self.emitter.emit_file_analyzed(file_path, filtered_nodes, duration)
|
|
1528
|
+
|
|
1529
|
+
return {
|
|
1530
|
+
"path": file_path,
|
|
1531
|
+
"language": language,
|
|
1532
|
+
"nodes": (
|
|
1533
|
+
filtered_nodes
|
|
1534
|
+
if "filtered_nodes" in locals()
|
|
1535
|
+
else [
|
|
1536
|
+
{
|
|
1537
|
+
"name": n.name,
|
|
1538
|
+
"type": n.node_type,
|
|
1539
|
+
"line_start": n.line_start,
|
|
1540
|
+
"line_end": n.line_end,
|
|
1541
|
+
"complexity": n.complexity,
|
|
1542
|
+
"has_docstring": n.has_docstring,
|
|
1543
|
+
"signature": n.signature,
|
|
1544
|
+
}
|
|
1545
|
+
for n in nodes
|
|
1546
|
+
if not self._is_internal_node(n)
|
|
1547
|
+
]
|
|
1548
|
+
),
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
def _is_internal_node(self, node: CodeNode) -> bool:
|
|
1552
|
+
"""Check if node is an internal function that should be filtered."""
|
|
1553
|
+
# Filter patterns for internal functions
|
|
1554
|
+
internal_patterns = [
|
|
1555
|
+
"handle", # Event handlers
|
|
1556
|
+
"on_", # Event callbacks
|
|
1557
|
+
"_", # Private methods
|
|
1558
|
+
"get_", # Simple getters
|
|
1559
|
+
"set_", # Simple setters
|
|
1560
|
+
"__", # Python magic methods
|
|
1561
|
+
]
|
|
1562
|
+
|
|
1563
|
+
name_lower = node.name.lower()
|
|
1564
|
+
|
|
1565
|
+
# Don't filter classes or important public methods
|
|
1566
|
+
if node.node_type == "class":
|
|
1567
|
+
return False
|
|
1568
|
+
|
|
1569
|
+
# Check patterns
|
|
1570
|
+
for pattern in internal_patterns:
|
|
1571
|
+
if name_lower.startswith(pattern):
|
|
1572
|
+
# Exception: include __init__ methods
|
|
1573
|
+
if node.name == "__init__":
|
|
1574
|
+
return False
|
|
1575
|
+
return True
|
|
1576
|
+
|
|
1577
|
+
return False
|
|
1578
|
+
|
|
1579
|
+
@property
|
|
1580
|
+
def supported_extensions(self):
|
|
1581
|
+
"""Get list of supported file extensions."""
|
|
1582
|
+
return {".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
|
|
1583
|
+
|
|
1584
|
+
def _get_language(self, file_path: Path) -> str:
|
|
1585
|
+
"""Determine language from file extension."""
|
|
1586
|
+
ext = file_path.suffix.lower()
|
|
1587
|
+
language_map = {
|
|
1588
|
+
".py": "python",
|
|
1589
|
+
".js": "javascript",
|
|
1590
|
+
".jsx": "javascript",
|
|
1591
|
+
".ts": "typescript",
|
|
1592
|
+
".tsx": "typescript",
|
|
1593
|
+
".mjs": "javascript",
|
|
1594
|
+
".cjs": "javascript",
|
|
1595
|
+
}
|
|
1596
|
+
return language_map.get(ext, "unknown")
|