claude-mpm 4.1.10__py3-none-any.whl → 4.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/cli/__init__.py +11 -0
  3. claude_mpm/cli/commands/analyze.py +2 -1
  4. claude_mpm/cli/commands/configure.py +9 -8
  5. claude_mpm/cli/commands/configure_tui.py +3 -1
  6. claude_mpm/cli/commands/dashboard.py +288 -0
  7. claude_mpm/cli/commands/debug.py +0 -1
  8. claude_mpm/cli/commands/mpm_init.py +427 -0
  9. claude_mpm/cli/commands/mpm_init_handler.py +83 -0
  10. claude_mpm/cli/parsers/base_parser.py +15 -0
  11. claude_mpm/cli/parsers/dashboard_parser.py +113 -0
  12. claude_mpm/cli/parsers/mpm_init_parser.py +122 -0
  13. claude_mpm/constants.py +10 -0
  14. claude_mpm/dashboard/analysis_runner.py +52 -25
  15. claude_mpm/dashboard/static/built/components/activity-tree.js +1 -1
  16. claude_mpm/dashboard/static/built/components/code-tree.js +2 -0
  17. claude_mpm/dashboard/static/built/components/code-viewer.js +2 -0
  18. claude_mpm/dashboard/static/built/components/event-viewer.js +1 -1
  19. claude_mpm/dashboard/static/built/dashboard.js +1 -1
  20. claude_mpm/dashboard/static/built/socket-client.js +1 -1
  21. claude_mpm/dashboard/static/css/code-tree.css +330 -1
  22. claude_mpm/dashboard/static/dist/components/activity-tree.js +1 -1
  23. claude_mpm/dashboard/static/dist/components/code-tree.js +1 -1
  24. claude_mpm/dashboard/static/dist/components/event-viewer.js +1 -1
  25. claude_mpm/dashboard/static/dist/dashboard.js +1 -1
  26. claude_mpm/dashboard/static/dist/socket-client.js +1 -1
  27. claude_mpm/dashboard/static/js/components/activity-tree.js +212 -13
  28. claude_mpm/dashboard/static/js/components/code-tree.js +1999 -821
  29. claude_mpm/dashboard/static/js/components/event-viewer.js +58 -19
  30. claude_mpm/dashboard/static/js/dashboard.js +15 -3
  31. claude_mpm/dashboard/static/js/socket-client.js +74 -32
  32. claude_mpm/dashboard/templates/index.html +9 -11
  33. claude_mpm/services/agents/memory/memory_format_service.py +3 -1
  34. claude_mpm/services/cli/agent_cleanup_service.py +1 -4
  35. claude_mpm/services/cli/startup_checker.py +0 -1
  36. claude_mpm/services/core/cache_manager.py +0 -1
  37. claude_mpm/services/socketio/event_normalizer.py +64 -0
  38. claude_mpm/services/socketio/handlers/code_analysis.py +502 -0
  39. claude_mpm/services/socketio/server/connection_manager.py +3 -1
  40. claude_mpm/tools/code_tree_analyzer.py +843 -25
  41. claude_mpm/tools/code_tree_builder.py +0 -1
  42. claude_mpm/tools/code_tree_events.py +113 -15
  43. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.11.dist-info}/METADATA +2 -1
  44. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.11.dist-info}/RECORD +48 -41
  45. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.11.dist-info}/WHEEL +0 -0
  46. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.11.dist-info}/entry_points.txt +0 -0
  47. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.11.dist-info}/licenses/LICENSE +0 -0
  48. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.11.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,14 @@ from dataclasses import dataclass
22
22
  from pathlib import Path
23
23
  from typing import Any, Dict, List, Optional
24
24
 
25
+ try:
26
+ import pathspec
27
+
28
+ PATHSPEC_AVAILABLE = True
29
+ except ImportError:
30
+ PATHSPEC_AVAILABLE = False
31
+ pathspec = None
32
+
25
33
  try:
26
34
  import tree_sitter
27
35
  import tree_sitter_javascript
@@ -37,6 +45,305 @@ from ..core.logging_config import get_logger
37
45
  from .code_tree_events import CodeNodeEvent, CodeTreeEventEmitter
38
46
 
39
47
 
48
+ class GitignoreManager:
49
+ """Manages .gitignore pattern matching for file filtering.
50
+
51
+ WHY: Properly respecting .gitignore patterns ensures we don't analyze
52
+ or display files that should be ignored in the repository.
53
+ """
54
+
55
+ # Default patterns that should always be ignored
56
+ DEFAULT_PATTERNS = [
57
+ ".git/",
58
+ "__pycache__/",
59
+ "*.pyc",
60
+ "*.pyo",
61
+ ".DS_Store",
62
+ ".pytest_cache/",
63
+ ".mypy_cache/",
64
+ "dist/",
65
+ "build/",
66
+ "*.egg-info/",
67
+ ".coverage",
68
+ ".tox/",
69
+ "htmlcov/",
70
+ ".idea/",
71
+ ".vscode/",
72
+ "*.swp",
73
+ "*.swo",
74
+ "*~",
75
+ "Thumbs.db",
76
+ "node_modules/",
77
+ ".venv/",
78
+ "venv/",
79
+ "env/",
80
+ ".env",
81
+ "*.log",
82
+ ".ipynb_checkpoints/",
83
+ "__MACOSX/",
84
+ ".Spotlight-V100/",
85
+ ".Trashes/",
86
+ "desktop.ini",
87
+ ]
88
+
89
+ # Additional patterns to hide dotfiles (when enabled)
90
+ DOTFILE_PATTERNS = [
91
+ ".*", # All dotfiles
92
+ ".*/", # All dot directories
93
+ ]
94
+
95
+ # Important files/directories to always show
96
+ DOTFILE_EXCEPTIONS = {
97
+ # Removed .gitignore from exceptions - it should be hidden by default
98
+ ".env.example",
99
+ ".env.sample",
100
+ ".gitlab-ci.yml",
101
+ ".travis.yml",
102
+ ".dockerignore",
103
+ ".editorconfig",
104
+ ".eslintrc",
105
+ ".prettierrc"
106
+ # Removed .github from exceptions - it should be hidden by default
107
+ }
108
+
109
+ def __init__(self, show_hidden_files: bool = False):
110
+ """Initialize the GitignoreManager.
111
+
112
+ Args:
113
+ show_hidden_files: Whether to show hidden files/directories
114
+ """
115
+ self.logger = get_logger(__name__)
116
+ self._pathspec_cache: Dict[str, Any] = {}
117
+ self._gitignore_cache: Dict[str, List[str]] = {}
118
+ self._use_pathspec = PATHSPEC_AVAILABLE
119
+ self.show_hidden_files = show_hidden_files
120
+
121
+ if not self._use_pathspec:
122
+ self.logger.warning(
123
+ "pathspec library not available - using basic pattern matching"
124
+ )
125
+
126
+ def get_ignore_patterns(self, working_dir: Path) -> List[str]:
127
+ """Get all ignore patterns for a directory.
128
+
129
+ Args:
130
+ working_dir: The working directory to search for .gitignore files
131
+
132
+ Returns:
133
+ Combined list of ignore patterns from all sources
134
+ """
135
+ # Always include default patterns
136
+ patterns = self.DEFAULT_PATTERNS.copy()
137
+
138
+ # Don't add dotfile patterns here - handle them separately in should_ignore
139
+ # This prevents exceptions from being overridden by the .* pattern
140
+
141
+ # Find and parse .gitignore files
142
+ gitignore_files = self._find_gitignore_files(working_dir)
143
+ for gitignore_file in gitignore_files:
144
+ patterns.extend(self._parse_gitignore(gitignore_file))
145
+
146
+ return patterns
147
+
148
+ def should_ignore(self, path: Path, working_dir: Path) -> bool:
149
+ """Check if a path should be ignored based on patterns.
150
+
151
+ Args:
152
+ path: The path to check
153
+ working_dir: The working directory (for relative path calculation)
154
+
155
+ Returns:
156
+ True if the path should be ignored
157
+ """
158
+ # Get the filename
159
+ filename = path.name
160
+
161
+ # 1. ALWAYS hide system files regardless of settings
162
+ ALWAYS_HIDE = {'.DS_Store', 'Thumbs.db', '.pyc', '.pyo', '.pyd'}
163
+ if filename in ALWAYS_HIDE or filename.endswith(('.pyc', '.pyo', '.pyd')):
164
+ return True
165
+
166
+ # 2. Check dotfiles BEFORE exceptions
167
+ if filename.startswith('.'):
168
+ # If showing hidden files, show all dotfiles
169
+ if self.show_hidden_files:
170
+ return False # Show the dotfile
171
+ else:
172
+ # Hide all dotfiles except those in the exceptions list
173
+ # This means: return True (ignore) if NOT in exceptions
174
+ return filename not in self.DOTFILE_EXCEPTIONS
175
+
176
+ # Get or create PathSpec for this working directory
177
+ pathspec_obj = self._get_pathspec(working_dir)
178
+
179
+ if pathspec_obj:
180
+ # Use pathspec for accurate matching
181
+ try:
182
+ rel_path = path.relative_to(working_dir)
183
+ rel_path_str = str(rel_path)
184
+
185
+ # For directories, also check with trailing slash
186
+ if path.is_dir():
187
+ return pathspec_obj.match_file(rel_path_str) or pathspec_obj.match_file(rel_path_str + '/')
188
+ else:
189
+ return pathspec_obj.match_file(rel_path_str)
190
+ except ValueError:
191
+ # Path is outside working directory
192
+ return False
193
+ else:
194
+ # Fallback to basic pattern matching
195
+ return self._basic_should_ignore(path, working_dir)
196
+
197
+ def _get_pathspec(self, working_dir: Path) -> Optional[Any]:
198
+ """Get or create a PathSpec object for the working directory.
199
+
200
+ Args:
201
+ working_dir: The working directory
202
+
203
+ Returns:
204
+ PathSpec object or None if not available
205
+ """
206
+ if not self._use_pathspec:
207
+ return None
208
+
209
+ cache_key = str(working_dir)
210
+ if cache_key not in self._pathspec_cache:
211
+ patterns = self.get_ignore_patterns(working_dir)
212
+ try:
213
+ self._pathspec_cache[cache_key] = pathspec.PathSpec.from_lines(
214
+ "gitwildmatch", patterns
215
+ )
216
+ except Exception as e:
217
+ self.logger.warning(f"Failed to create PathSpec: {e}")
218
+ return None
219
+
220
+ return self._pathspec_cache[cache_key]
221
+
222
+ def _find_gitignore_files(self, working_dir: Path) -> List[Path]:
223
+ """Find all .gitignore files in the directory tree.
224
+
225
+ Args:
226
+ working_dir: The directory to search
227
+
228
+ Returns:
229
+ List of .gitignore file paths
230
+ """
231
+ gitignore_files = []
232
+
233
+ # Check for .gitignore in working directory
234
+ main_gitignore = working_dir / ".gitignore"
235
+ if main_gitignore.exists():
236
+ gitignore_files.append(main_gitignore)
237
+
238
+ # Also check parent directories up to repository root
239
+ current = working_dir
240
+ while current != current.parent:
241
+ parent_gitignore = current.parent / ".gitignore"
242
+ if parent_gitignore.exists():
243
+ gitignore_files.append(parent_gitignore)
244
+
245
+ # Stop if we find a .git directory (repository root)
246
+ if (current / ".git").exists():
247
+ break
248
+
249
+ current = current.parent
250
+
251
+ return gitignore_files
252
+
253
+ def _parse_gitignore(self, gitignore_path: Path) -> List[str]:
254
+ """Parse a .gitignore file and return patterns.
255
+
256
+ Args:
257
+ gitignore_path: Path to .gitignore file
258
+
259
+ Returns:
260
+ List of patterns from the file
261
+ """
262
+ cache_key = str(gitignore_path)
263
+
264
+ # Check cache
265
+ if cache_key in self._gitignore_cache:
266
+ return self._gitignore_cache[cache_key]
267
+
268
+ patterns = []
269
+ try:
270
+ with open(gitignore_path, encoding="utf-8") as f:
271
+ for line in f:
272
+ line = line.strip()
273
+ # Skip empty lines and comments
274
+ if line and not line.startswith("#"):
275
+ patterns.append(line)
276
+
277
+ self._gitignore_cache[cache_key] = patterns
278
+ except Exception as e:
279
+ self.logger.warning(f"Failed to parse {gitignore_path}: {e}")
280
+
281
+ return patterns
282
+
283
+ def _basic_should_ignore(self, path: Path, working_dir: Path) -> bool:
284
+ """Basic pattern matching fallback when pathspec is not available.
285
+
286
+ Args:
287
+ path: The path to check
288
+ working_dir: The working directory
289
+
290
+ Returns:
291
+ True if the path should be ignored
292
+ """
293
+ path_str = str(path)
294
+ path_name = path.name
295
+
296
+ # 1. ALWAYS hide system files regardless of settings
297
+ ALWAYS_HIDE = {'.DS_Store', 'Thumbs.db', '.pyc', '.pyo', '.pyd'}
298
+ if path_name in ALWAYS_HIDE or path_name.endswith(('.pyc', '.pyo', '.pyd')):
299
+ return True
300
+
301
+ # 2. Check dotfiles BEFORE exceptions
302
+ if path_name.startswith('.'):
303
+ # If showing hidden files, check exceptions
304
+ if self.show_hidden_files:
305
+ return False # Show the dotfile
306
+ else:
307
+ # Only show if in exceptions list
308
+ return path_name not in self.DOTFILE_EXCEPTIONS
309
+
310
+ patterns = self.get_ignore_patterns(working_dir)
311
+
312
+ for pattern in patterns:
313
+ # Skip dotfile patterns since we already handled them above
314
+ if pattern in [".*", ".*/"]:
315
+ continue
316
+
317
+ # Simple pattern matching
318
+ if pattern.endswith("/"):
319
+ # Directory pattern
320
+ if path.is_dir() and path_name == pattern[:-1]:
321
+ return True
322
+ elif pattern.startswith("*."):
323
+ # Extension pattern
324
+ if path_name.endswith(pattern[1:]):
325
+ return True
326
+ elif "*" in pattern:
327
+ # Wildcard pattern (simplified)
328
+ import fnmatch
329
+
330
+ if fnmatch.fnmatch(path_name, pattern):
331
+ return True
332
+ elif pattern in path_str:
333
+ # Substring match
334
+ return True
335
+ elif path_name == pattern:
336
+ # Exact match
337
+ return True
338
+
339
+ return False
340
+
341
+ def clear_cache(self):
342
+ """Clear all caches."""
343
+ self._pathspec_cache.clear()
344
+ self._gitignore_cache.clear()
345
+
346
+
40
347
  @dataclass
41
348
  class CodeNode:
42
349
  """Represents a node in the code tree."""
@@ -325,7 +632,8 @@ class MultiLanguageAnalyzer:
325
632
  parser = tree_sitter.Parser(lang_obj)
326
633
  self.parsers[lang] = parser
327
634
  except (ImportError, AttributeError) as e:
328
- self.logger.warning(f"Language parser not available for {lang}: {e}")
635
+ # Silently skip unavailable parsers - will fall back to basic file discovery
636
+ self.logger.debug(f"Language parser not available for {lang}: {e}")
329
637
 
330
638
  def analyze_file(self, file_path: Path, language: str) -> List[CodeNode]:
331
639
  """Analyze a file using tree-sitter.
@@ -338,7 +646,10 @@ class MultiLanguageAnalyzer:
338
646
  List of code nodes found in the file
339
647
  """
340
648
  if language not in self.parsers:
341
- self.logger.warning(f"No parser available for language: {language}")
649
+ # No parser available - return empty list to fall back to basic discovery
650
+ self.logger.debug(
651
+ f"No parser available for language: {language}, using basic file discovery"
652
+ )
342
653
  return []
343
654
 
344
655
  nodes = []
@@ -488,6 +799,15 @@ class CodeTreeAnalyzer:
488
799
  languages, handling caching and incremental processing.
489
800
  """
490
801
 
802
+ # Define code file extensions at class level for directory filtering
803
+ CODE_EXTENSIONS = {
804
+ '.py', '.js', '.ts', '.tsx', '.jsx', '.java', '.cpp', '.c', '.h', '.hpp',
805
+ '.cs', '.go', '.rs', '.rb', '.php', '.swift', '.kt', '.scala', '.r',
806
+ '.m', '.mm', '.sh', '.bash', '.zsh', '.fish', '.ps1', '.bat', '.cmd',
807
+ '.sql', '.html', '.css', '.scss', '.sass', '.less', '.xml', '.json',
808
+ '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.md', '.rst', '.txt'
809
+ }
810
+
491
811
  # File extensions to language mapping
492
812
  LANGUAGE_MAP = {
493
813
  ".py": "python",
@@ -499,24 +819,46 @@ class CodeTreeAnalyzer:
499
819
  ".cjs": "javascript",
500
820
  }
501
821
 
502
- def __init__(self, emit_events: bool = True, cache_dir: Optional[Path] = None):
822
+ def __init__(
823
+ self,
824
+ emit_events: bool = True,
825
+ cache_dir: Optional[Path] = None,
826
+ emitter: Optional[CodeTreeEventEmitter] = None,
827
+ show_hidden_files: bool = False,
828
+ ):
503
829
  """Initialize the code tree analyzer.
504
830
 
505
831
  Args:
506
832
  emit_events: Whether to emit Socket.IO events
507
833
  cache_dir: Directory for caching analysis results
834
+ emitter: Optional event emitter to use (creates one if not provided)
835
+ show_hidden_files: Whether to show hidden files/directories (default False - hide dotfiles)
508
836
  """
509
837
  self.logger = get_logger(__name__)
510
838
  self.emit_events = emit_events
511
839
  self.cache_dir = cache_dir or Path.home() / ".claude-mpm" / "code-cache"
840
+ self.show_hidden_files = show_hidden_files
841
+
842
+ # Initialize gitignore manager with hidden files setting (default False)
843
+ self.gitignore_manager = GitignoreManager(show_hidden_files=show_hidden_files)
844
+ self._last_working_dir = None
512
845
 
513
- # Initialize event emitter - use stdout mode for subprocess communication
514
- self.emitter = CodeTreeEventEmitter(use_stdout=True) if emit_events else None
846
+ # Use provided emitter or create one
847
+ if emitter:
848
+ self.emitter = emitter
849
+ elif emit_events:
850
+ self.emitter = CodeTreeEventEmitter(use_stdout=True)
851
+ else:
852
+ self.emitter = None
515
853
 
516
854
  # Initialize language analyzers
517
855
  self.python_analyzer = PythonAnalyzer(self.emitter)
518
856
  self.multi_lang_analyzer = MultiLanguageAnalyzer(self.emitter)
519
857
 
858
+ # For JavaScript/TypeScript
859
+ self.javascript_analyzer = self.multi_lang_analyzer
860
+ self.generic_analyzer = self.multi_lang_analyzer
861
+
520
862
  # Cache for processed files
521
863
  self.cache = {}
522
864
  self._load_cache()
@@ -554,8 +896,14 @@ class CodeTreeAnalyzer:
554
896
  continue
555
897
 
556
898
  for file_path in directory.rglob(f"*{ext}"):
557
- # Apply ignore patterns
558
- if self._should_ignore(file_path, ignore_patterns):
899
+ # Use gitignore manager for filtering with directory as working dir
900
+ if self.gitignore_manager.should_ignore(file_path, directory):
901
+ continue
902
+
903
+ # Also check additional patterns
904
+ if ignore_patterns and any(
905
+ p in str(file_path) for p in ignore_patterns
906
+ ):
559
907
  continue
560
908
 
561
909
  # Check max depth
@@ -590,6 +938,12 @@ class CodeTreeAnalyzer:
590
938
  else:
591
939
  nodes = self.multi_lang_analyzer.analyze_file(file_path, language)
592
940
 
941
+ # If no nodes found and we have a valid language, emit basic file info
942
+ if not nodes and language != "unknown":
943
+ self.logger.debug(
944
+ f"No AST nodes found for {file_path}, using basic discovery"
945
+ )
946
+
593
947
  # Cache results
594
948
  self.cache[cache_key] = nodes
595
949
 
@@ -642,26 +996,29 @@ class CodeTreeAnalyzer:
642
996
  return {"tree": tree, "nodes": all_nodes, "stats": stats}
643
997
 
644
998
  def _should_ignore(self, file_path: Path, patterns: Optional[List[str]]) -> bool:
645
- """Check if file should be ignored."""
646
- if not patterns:
647
- patterns = []
648
-
649
- # Default ignore patterns
650
- default_ignores = [
651
- "__pycache__",
652
- ".git",
653
- "node_modules",
654
- ".venv",
655
- "venv",
656
- "dist",
657
- "build",
658
- ".pytest_cache",
659
- ".mypy_cache",
660
- ]
999
+ """Check if file should be ignored.
1000
+
1001
+ Uses GitignoreManager for proper pattern matching.
1002
+ """
1003
+ # Get the working directory (use parent for files, self for directories)
1004
+ if file_path.is_file():
1005
+ working_dir = file_path.parent
1006
+ else:
1007
+ # For directories during discovery, use the parent
1008
+ working_dir = (
1009
+ file_path.parent if file_path.parent != file_path else Path.cwd()
1010
+ )
1011
+
1012
+ # Use gitignore manager for checking
1013
+ if self.gitignore_manager.should_ignore(file_path, working_dir):
1014
+ return True
661
1015
 
662
- all_patterns = default_ignores + patterns
1016
+ # Also check any additional patterns provided
1017
+ if patterns:
1018
+ path_str = str(file_path)
1019
+ return any(pattern in path_str for pattern in patterns)
663
1020
 
664
- return any(pattern in str(file_path) for pattern in all_patterns)
1021
+ return False
665
1022
 
666
1023
  def _get_file_hash(self, file_path: Path) -> str:
667
1024
  """Get hash of file contents for caching."""
@@ -776,3 +1133,464 @@ class CodeTreeAnalyzer:
776
1133
  self.logger.info(f"Saved cache with {len(self.cache)} entries")
777
1134
  except Exception as e:
778
1135
  self.logger.warning(f"Failed to save cache: {e}")
1136
+
1137
+ def has_code_files(self, directory: Path, depth: int = 5, current_depth: int = 0) -> bool:
1138
+ """Check if directory contains code files up to 5 levels deep.
1139
+
1140
+ Args:
1141
+ directory: Directory to check
1142
+ depth: Maximum depth to search
1143
+ current_depth: Current recursion depth
1144
+
1145
+ Returns:
1146
+ True if directory contains code files within depth levels
1147
+ """
1148
+ if current_depth >= depth:
1149
+ return False
1150
+
1151
+ # Skip checking these directories entirely
1152
+ SKIP_DIRS = {'node_modules', '__pycache__', '.git', '.venv', 'venv', 'dist', 'build',
1153
+ '.tox', 'htmlcov', '.pytest_cache', '.mypy_cache', 'coverage',
1154
+ '.idea', '.vscode', 'env', '.coverage', '__MACOSX', '.ipynb_checkpoints'}
1155
+ if directory.name in SKIP_DIRS:
1156
+ return False
1157
+
1158
+ try:
1159
+ for item in directory.iterdir():
1160
+ # Skip hidden items in scan
1161
+ if item.name.startswith('.'):
1162
+ continue
1163
+
1164
+ if item.is_file():
1165
+ # Check if it's a code file
1166
+ ext = item.suffix.lower()
1167
+ if ext in self.CODE_EXTENSIONS:
1168
+ return True
1169
+ elif item.is_dir() and current_depth < depth - 1:
1170
+ if self.has_code_files(item, depth, current_depth + 1):
1171
+ return True
1172
+ except (PermissionError, OSError):
1173
+ pass
1174
+
1175
+ return False
1176
+
1177
+ def discover_top_level(
1178
+ self, directory: Path, ignore_patterns: Optional[List[str]] = None
1179
+ ) -> Dict[str, Any]:
1180
+ """Discover only top-level directories and files for lazy loading.
1181
+
1182
+ Args:
1183
+ directory: Root directory to discover
1184
+ ignore_patterns: Patterns to ignore
1185
+
1186
+ Returns:
1187
+ Dictionary with top-level structure
1188
+ """
1189
+ # CRITICAL FIX: Use the directory parameter as the base for relative paths
1190
+ # NOT the current working directory. This ensures we only show items
1191
+ # within the requested directory, not parent directories.
1192
+ working_dir = Path(directory).absolute()
1193
+
1194
+ # Emit discovery start event
1195
+ if self.emitter:
1196
+ from datetime import datetime
1197
+ self.emitter.emit('info', {
1198
+ 'type': 'discovery.start',
1199
+ 'action': 'scanning_directory',
1200
+ 'path': str(directory),
1201
+ 'message': f'Starting discovery of {directory.name}',
1202
+ 'timestamp': datetime.now().isoformat()
1203
+ })
1204
+
1205
+ result = {
1206
+ "path": str(directory),
1207
+ "name": directory.name,
1208
+ "type": "directory",
1209
+ "children": [],
1210
+ }
1211
+
1212
+ try:
1213
+ # Clear cache if working directory changed
1214
+ if self._last_working_dir != directory:
1215
+ self.gitignore_manager.clear_cache()
1216
+ self._last_working_dir = directory
1217
+
1218
+ # Get immediate children only (no recursion)
1219
+ files_count = 0
1220
+ dirs_count = 0
1221
+ ignored_count = 0
1222
+
1223
+ for item in directory.iterdir():
1224
+ # Use gitignore manager for filtering with the directory as working dir
1225
+ if self.gitignore_manager.should_ignore(item, directory):
1226
+ if self.emitter:
1227
+ from datetime import datetime
1228
+ self.emitter.emit('info', {
1229
+ 'type': 'filter.gitignore',
1230
+ 'path': str(item),
1231
+ 'reason': 'gitignore pattern',
1232
+ 'message': f'Ignored by gitignore: {item.name}',
1233
+ 'timestamp': datetime.now().isoformat()
1234
+ })
1235
+ ignored_count += 1
1236
+ continue
1237
+
1238
+ # Also check additional patterns if provided
1239
+ if ignore_patterns and any(p in str(item) for p in ignore_patterns):
1240
+ if self.emitter:
1241
+ from datetime import datetime
1242
+ self.emitter.emit('info', {
1243
+ 'type': 'filter.pattern',
1244
+ 'path': str(item),
1245
+ 'reason': 'custom pattern',
1246
+ 'message': f'Ignored by pattern: {item.name}',
1247
+ 'timestamp': datetime.now().isoformat()
1248
+ })
1249
+ ignored_count += 1
1250
+ continue
1251
+
1252
+ if item.is_dir():
1253
+ # Only include directories that contain code files (5-level deep scan)
1254
+ if not self.has_code_files(item, depth=5):
1255
+ if self.emitter:
1256
+ from datetime import datetime
1257
+ self.emitter.emit('info', {
1258
+ 'type': 'filter.no_code',
1259
+ 'path': str(item.name),
1260
+ 'reason': 'no code files',
1261
+ 'message': f'Skipped directory without code: {item.name}',
1262
+ 'timestamp': datetime.now().isoformat()
1263
+ })
1264
+ ignored_count += 1
1265
+ continue
1266
+
1267
+ # Directory - just mark as unexplored
1268
+ # CRITICAL FIX: Use relative path from working directory
1269
+ # This prevents the frontend from showing parent directories
1270
+ try:
1271
+ relative_path = item.relative_to(working_dir)
1272
+ path_str = str(relative_path)
1273
+ except ValueError:
1274
+ # If somehow the item is outside working_dir, skip it
1275
+ self.logger.warning(f"Directory outside working dir: {item}")
1276
+ continue
1277
+
1278
+ # Emit directory found event
1279
+ if self.emitter:
1280
+ from datetime import datetime
1281
+ self.emitter.emit('info', {
1282
+ 'type': 'discovery.directory',
1283
+ 'path': str(item),
1284
+ 'message': f'Found directory: {item.name}',
1285
+ 'timestamp': datetime.now().isoformat()
1286
+ })
1287
+ dirs_count += 1
1288
+
1289
+ child = {
1290
+ "path": path_str,
1291
+ "name": item.name,
1292
+ "type": "directory",
1293
+ "discovered": False,
1294
+ "children": [],
1295
+ }
1296
+ result["children"].append(child)
1297
+
1298
+ if self.emitter:
1299
+ self.emitter.emit_directory_discovered(path_str, [])
1300
+
1301
+ elif item.is_file():
1302
+ # Check if it's a supported code file or a special file we want to show
1303
+ if item.suffix in self.supported_extensions or item.name in ['.gitignore', '.env.example', '.env.sample']:
1304
+ # File - mark for lazy analysis
1305
+ language = self._get_language(item)
1306
+
1307
+ # CRITICAL FIX: Use relative path from working directory
1308
+ # This prevents the frontend from showing parent directories
1309
+ try:
1310
+ relative_path = item.relative_to(working_dir)
1311
+ path_str = str(relative_path)
1312
+ except ValueError:
1313
+ # If somehow the item is outside working_dir, skip it
1314
+ self.logger.warning(f"File outside working dir: {item}")
1315
+ continue
1316
+
1317
+ # Emit file found event
1318
+ if self.emitter:
1319
+ from datetime import datetime
1320
+ self.emitter.emit('info', {
1321
+ 'type': 'discovery.file',
1322
+ 'path': str(item),
1323
+ 'language': language,
1324
+ 'size': item.stat().st_size,
1325
+ 'message': f'Found file: {item.name} ({language})',
1326
+ 'timestamp': datetime.now().isoformat()
1327
+ })
1328
+ files_count += 1
1329
+
1330
+ child = {
1331
+ "path": path_str,
1332
+ "name": item.name,
1333
+ "type": "file",
1334
+ "language": language,
1335
+ "size": item.stat().st_size,
1336
+ "analyzed": False,
1337
+ }
1338
+ result["children"].append(child)
1339
+
1340
+ if self.emitter:
1341
+ self.emitter.emit_file_discovered(
1342
+ path_str, language, item.stat().st_size
1343
+ )
1344
+
1345
+ except PermissionError as e:
1346
+ self.logger.warning(f"Permission denied accessing {directory}: {e}")
1347
+ if self.emitter:
1348
+ self.emitter.emit_error(str(directory), f"Permission denied: {e}")
1349
+
1350
+ # Emit discovery complete event with stats
1351
+ if self.emitter:
1352
+ from datetime import datetime
1353
+ self.emitter.emit('info', {
1354
+ 'type': 'discovery.complete',
1355
+ 'path': str(directory),
1356
+ 'stats': {
1357
+ 'files': files_count,
1358
+ 'directories': dirs_count,
1359
+ 'ignored': ignored_count
1360
+ },
1361
+ 'message': f'Discovery complete: {files_count} files, {dirs_count} directories, {ignored_count} ignored',
1362
+ 'timestamp': datetime.now().isoformat()
1363
+ })
1364
+
1365
+ return result
1366
+
1367
+ def discover_directory(
1368
+ self, dir_path: str, ignore_patterns: Optional[List[str]] = None
1369
+ ) -> Dict[str, Any]:
1370
+ """Discover contents of a specific directory for lazy loading.
1371
+
1372
+ Args:
1373
+ dir_path: Directory path to discover
1374
+ ignore_patterns: Patterns to ignore
1375
+
1376
+ Returns:
1377
+ Dictionary with directory contents
1378
+ """
1379
+ directory = Path(dir_path)
1380
+ if not directory.exists() or not directory.is_dir():
1381
+ return {"error": f"Invalid directory: {dir_path}"}
1382
+
1383
+ # Clear cache if working directory changed
1384
+ if self._last_working_dir != directory.parent:
1385
+ self.gitignore_manager.clear_cache()
1386
+ self._last_working_dir = directory.parent
1387
+
1388
+ # The discover_top_level method will emit all the INFO events
1389
+ return self.discover_top_level(directory, ignore_patterns)
1390
+
1391
+ def analyze_file(self, file_path: str) -> Dict[str, Any]:
1392
+ """Analyze a specific file and return its AST structure.
1393
+
1394
+ Args:
1395
+ file_path: Path to file to analyze
1396
+
1397
+ Returns:
1398
+ Dictionary with file analysis results
1399
+ """
1400
+ path = Path(file_path)
1401
+ if not path.exists() or not path.is_file():
1402
+ return {"error": f"Invalid file: {file_path}"}
1403
+
1404
+ # Get language first (needed for return statement)
1405
+ language = self._get_language(path)
1406
+
1407
+ # Emit analysis start event
1408
+ if self.emitter:
1409
+ from datetime import datetime
1410
+ self.emitter.emit('info', {
1411
+ 'type': 'analysis.start',
1412
+ 'file': str(path),
1413
+ 'language': language,
1414
+ 'message': f'Analyzing: {path.name}',
1415
+ 'timestamp': datetime.now().isoformat()
1416
+ })
1417
+
1418
+ # Check cache
1419
+ file_hash = self._get_file_hash(path)
1420
+ cache_key = f"{file_path}:{file_hash}"
1421
+
1422
+ if cache_key in self.cache:
1423
+ nodes = self.cache[cache_key]
1424
+ if self.emitter:
1425
+ from datetime import datetime
1426
+ self.emitter.emit('info', {
1427
+ 'type': 'cache.hit',
1428
+ 'file': str(path),
1429
+ 'message': f'Using cached analysis for {path.name}',
1430
+ 'timestamp': datetime.now().isoformat()
1431
+ })
1432
+ else:
1433
+ # Analyze file
1434
+ if self.emitter:
1435
+ from datetime import datetime
1436
+ self.emitter.emit('info', {
1437
+ 'type': 'cache.miss',
1438
+ 'file': str(path),
1439
+ 'message': f'Cache miss, analyzing fresh: {path.name}',
1440
+ 'timestamp': datetime.now().isoformat()
1441
+ })
1442
+
1443
+ if language == "python":
1444
+ analyzer = self.python_analyzer
1445
+ elif language == "javascript" or language == "typescript":
1446
+ analyzer = self.javascript_analyzer
1447
+ else:
1448
+ analyzer = self.generic_analyzer
1449
+
1450
+ start_time = time.time()
1451
+
1452
+ # Emit parsing event
1453
+ if self.emitter:
1454
+ from datetime import datetime
1455
+ self.emitter.emit('info', {
1456
+ 'type': 'analysis.parse',
1457
+ 'file': str(path),
1458
+ 'message': f'Parsing file content: {path.name}',
1459
+ 'timestamp': datetime.now().isoformat()
1460
+ })
1461
+
1462
+ nodes = analyzer.analyze_file(path) if analyzer else []
1463
+ duration = time.time() - start_time
1464
+
1465
+ # Cache results
1466
+ self.cache[cache_key] = nodes
1467
+
1468
+ # Filter internal functions before emitting
1469
+ filtered_nodes = []
1470
+ classes_count = 0
1471
+ functions_count = 0
1472
+ methods_count = 0
1473
+
1474
+ for node in nodes:
1475
+ # Only include main structural elements
1476
+ if not self._is_internal_node(node):
1477
+ # Emit found element event
1478
+ if self.emitter:
1479
+ from datetime import datetime
1480
+ self.emitter.emit('info', {
1481
+ 'type': f'analysis.{node.node_type}',
1482
+ 'name': node.name,
1483
+ 'file': str(path),
1484
+ 'line_start': node.line_start,
1485
+ 'complexity': node.complexity,
1486
+ 'message': f'Found {node.node_type}: {node.name}',
1487
+ 'timestamp': datetime.now().isoformat()
1488
+ })
1489
+
1490
+ # Count node types
1491
+ if node.node_type == 'class':
1492
+ classes_count += 1
1493
+ elif node.node_type == 'function':
1494
+ functions_count += 1
1495
+ elif node.node_type == 'method':
1496
+ methods_count += 1
1497
+
1498
+ filtered_nodes.append(
1499
+ {
1500
+ "name": node.name,
1501
+ "type": node.node_type,
1502
+ "line_start": node.line_start,
1503
+ "line_end": node.line_end,
1504
+ "complexity": node.complexity,
1505
+ "has_docstring": node.has_docstring,
1506
+ "signature": node.signature,
1507
+ }
1508
+ )
1509
+
1510
+ # Emit analysis complete event with stats
1511
+ if self.emitter:
1512
+ from datetime import datetime
1513
+ self.emitter.emit('info', {
1514
+ 'type': 'analysis.complete',
1515
+ 'file': str(path),
1516
+ 'stats': {
1517
+ 'classes': classes_count,
1518
+ 'functions': functions_count,
1519
+ 'methods': methods_count,
1520
+ 'total_nodes': len(filtered_nodes)
1521
+ },
1522
+ 'duration': duration,
1523
+ 'message': f'Analysis complete: {classes_count} classes, {functions_count} functions, {methods_count} methods',
1524
+ 'timestamp': datetime.now().isoformat()
1525
+ })
1526
+
1527
+ self.emitter.emit_file_analyzed(file_path, filtered_nodes, duration)
1528
+
1529
+ return {
1530
+ "path": file_path,
1531
+ "language": language,
1532
+ "nodes": (
1533
+ filtered_nodes
1534
+ if "filtered_nodes" in locals()
1535
+ else [
1536
+ {
1537
+ "name": n.name,
1538
+ "type": n.node_type,
1539
+ "line_start": n.line_start,
1540
+ "line_end": n.line_end,
1541
+ "complexity": n.complexity,
1542
+ "has_docstring": n.has_docstring,
1543
+ "signature": n.signature,
1544
+ }
1545
+ for n in nodes
1546
+ if not self._is_internal_node(n)
1547
+ ]
1548
+ ),
1549
+ }
1550
+
1551
+ def _is_internal_node(self, node: CodeNode) -> bool:
1552
+ """Check if node is an internal function that should be filtered."""
1553
+ # Filter patterns for internal functions
1554
+ internal_patterns = [
1555
+ "handle", # Event handlers
1556
+ "on_", # Event callbacks
1557
+ "_", # Private methods
1558
+ "get_", # Simple getters
1559
+ "set_", # Simple setters
1560
+ "__", # Python magic methods
1561
+ ]
1562
+
1563
+ name_lower = node.name.lower()
1564
+
1565
+ # Don't filter classes or important public methods
1566
+ if node.node_type == "class":
1567
+ return False
1568
+
1569
+ # Check patterns
1570
+ for pattern in internal_patterns:
1571
+ if name_lower.startswith(pattern):
1572
+ # Exception: include __init__ methods
1573
+ if node.name == "__init__":
1574
+ return False
1575
+ return True
1576
+
1577
+ return False
1578
+
1579
+ @property
1580
+ def supported_extensions(self):
1581
+ """Get list of supported file extensions."""
1582
+ return {".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
1583
+
1584
+ def _get_language(self, file_path: Path) -> str:
1585
+ """Determine language from file extension."""
1586
+ ext = file_path.suffix.lower()
1587
+ language_map = {
1588
+ ".py": "python",
1589
+ ".js": "javascript",
1590
+ ".jsx": "javascript",
1591
+ ".ts": "typescript",
1592
+ ".tsx": "typescript",
1593
+ ".mjs": "javascript",
1594
+ ".cjs": "javascript",
1595
+ }
1596
+ return language_map.get(ext, "unknown")