claude-mpm 4.1.10__py3-none-any.whl → 4.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/INSTRUCTIONS.md +8 -0
  3. claude_mpm/cli/__init__.py +11 -0
  4. claude_mpm/cli/commands/analyze.py +2 -1
  5. claude_mpm/cli/commands/configure.py +9 -8
  6. claude_mpm/cli/commands/configure_tui.py +3 -1
  7. claude_mpm/cli/commands/dashboard.py +288 -0
  8. claude_mpm/cli/commands/debug.py +0 -1
  9. claude_mpm/cli/commands/mpm_init.py +442 -0
  10. claude_mpm/cli/commands/mpm_init_handler.py +84 -0
  11. claude_mpm/cli/parsers/base_parser.py +15 -0
  12. claude_mpm/cli/parsers/dashboard_parser.py +113 -0
  13. claude_mpm/cli/parsers/mpm_init_parser.py +128 -0
  14. claude_mpm/constants.py +10 -0
  15. claude_mpm/core/config.py +18 -0
  16. claude_mpm/core/instruction_reinforcement_hook.py +266 -0
  17. claude_mpm/core/pm_hook_interceptor.py +105 -8
  18. claude_mpm/dashboard/analysis_runner.py +52 -25
  19. claude_mpm/dashboard/static/built/components/activity-tree.js +1 -1
  20. claude_mpm/dashboard/static/built/components/code-tree.js +2 -0
  21. claude_mpm/dashboard/static/built/components/code-viewer.js +2 -0
  22. claude_mpm/dashboard/static/built/components/event-viewer.js +1 -1
  23. claude_mpm/dashboard/static/built/dashboard.js +1 -1
  24. claude_mpm/dashboard/static/built/socket-client.js +1 -1
  25. claude_mpm/dashboard/static/css/code-tree.css +330 -1
  26. claude_mpm/dashboard/static/dist/components/activity-tree.js +1 -1
  27. claude_mpm/dashboard/static/dist/components/code-tree.js +2593 -2
  28. claude_mpm/dashboard/static/dist/components/event-viewer.js +1 -1
  29. claude_mpm/dashboard/static/dist/dashboard.js +1 -1
  30. claude_mpm/dashboard/static/dist/socket-client.js +1 -1
  31. claude_mpm/dashboard/static/js/components/activity-tree.js +212 -13
  32. claude_mpm/dashboard/static/js/components/build-tracker.js +15 -13
  33. claude_mpm/dashboard/static/js/components/code-tree.js +2503 -917
  34. claude_mpm/dashboard/static/js/components/event-viewer.js +58 -19
  35. claude_mpm/dashboard/static/js/dashboard.js +46 -44
  36. claude_mpm/dashboard/static/js/socket-client.js +74 -32
  37. claude_mpm/dashboard/templates/index.html +25 -20
  38. claude_mpm/services/agents/deployment/agent_template_builder.py +11 -7
  39. claude_mpm/services/agents/memory/memory_format_service.py +3 -1
  40. claude_mpm/services/cli/agent_cleanup_service.py +1 -4
  41. claude_mpm/services/cli/socketio_manager.py +39 -8
  42. claude_mpm/services/cli/startup_checker.py +0 -1
  43. claude_mpm/services/core/cache_manager.py +0 -1
  44. claude_mpm/services/infrastructure/monitoring.py +1 -1
  45. claude_mpm/services/socketio/event_normalizer.py +64 -0
  46. claude_mpm/services/socketio/handlers/code_analysis.py +449 -0
  47. claude_mpm/services/socketio/server/connection_manager.py +3 -1
  48. claude_mpm/tools/code_tree_analyzer.py +930 -24
  49. claude_mpm/tools/code_tree_builder.py +0 -1
  50. claude_mpm/tools/code_tree_events.py +113 -15
  51. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.12.dist-info}/METADATA +2 -1
  52. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.12.dist-info}/RECORD +56 -48
  53. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.12.dist-info}/WHEEL +0 -0
  54. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.12.dist-info}/entry_points.txt +0 -0
  55. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.12.dist-info}/licenses/LICENSE +0 -0
  56. {claude_mpm-4.1.10.dist-info → claude_mpm-4.1.12.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,14 @@ from dataclasses import dataclass
22
22
  from pathlib import Path
23
23
  from typing import Any, Dict, List, Optional
24
24
 
25
+ try:
26
+ import pathspec
27
+
28
+ PATHSPEC_AVAILABLE = True
29
+ except ImportError:
30
+ PATHSPEC_AVAILABLE = False
31
+ pathspec = None
32
+
25
33
  try:
26
34
  import tree_sitter
27
35
  import tree_sitter_javascript
@@ -37,6 +45,293 @@ from ..core.logging_config import get_logger
37
45
  from .code_tree_events import CodeNodeEvent, CodeTreeEventEmitter
38
46
 
39
47
 
48
+ class GitignoreManager:
49
+ """Manages .gitignore pattern matching for file filtering.
50
+
51
+ WHY: Properly respecting .gitignore patterns ensures we don't analyze
52
+ or display files that should be ignored in the repository.
53
+ """
54
+
55
+ # Default patterns that should always be ignored
56
+ DEFAULT_PATTERNS = [
57
+ ".git/",
58
+ "__pycache__/",
59
+ "*.pyc",
60
+ "*.pyo",
61
+ ".DS_Store",
62
+ ".pytest_cache/",
63
+ ".mypy_cache/",
64
+ "dist/",
65
+ "build/",
66
+ "*.egg-info/",
67
+ ".coverage",
68
+ ".tox/",
69
+ "htmlcov/",
70
+ ".idea/",
71
+ ".vscode/",
72
+ "*.swp",
73
+ "*.swo",
74
+ "*~",
75
+ "Thumbs.db",
76
+ "node_modules/",
77
+ ".venv/",
78
+ "venv/",
79
+ "env/",
80
+ ".env",
81
+ "*.log",
82
+ ".ipynb_checkpoints/",
83
+ "__MACOSX/",
84
+ ".Spotlight-V100/",
85
+ ".Trashes/",
86
+ "desktop.ini",
87
+ ]
88
+
89
+ # Additional patterns to hide dotfiles (when enabled)
90
+ DOTFILE_PATTERNS = [
91
+ ".*", # All dotfiles
92
+ ".*/", # All dot directories
93
+ ]
94
+
95
+ # Important files/directories to always show
96
+ DOTFILE_EXCEPTIONS = {
97
+ # Removed .gitignore from exceptions - it should be hidden by default
98
+ ".env.example",
99
+ ".env.sample",
100
+ ".gitlab-ci.yml",
101
+ ".travis.yml",
102
+ ".dockerignore",
103
+ ".editorconfig",
104
+ ".eslintrc",
105
+ ".prettierrc",
106
+ # Removed .github from exceptions - it should be hidden by default
107
+ }
108
+
109
+ def __init__(self):
110
+ """Initialize the GitignoreManager."""
111
+ self.logger = get_logger(__name__)
112
+ self._pathspec_cache: Dict[str, Any] = {}
113
+ self._gitignore_cache: Dict[str, List[str]] = {}
114
+ self._use_pathspec = PATHSPEC_AVAILABLE
115
+
116
+ if not self._use_pathspec:
117
+ self.logger.warning(
118
+ "pathspec library not available - using basic pattern matching"
119
+ )
120
+
121
+ def get_ignore_patterns(self, working_dir: Path) -> List[str]:
122
+ """Get all ignore patterns for a directory.
123
+
124
+ Args:
125
+ working_dir: The working directory to search for .gitignore files
126
+
127
+ Returns:
128
+ Combined list of ignore patterns from all sources
129
+ """
130
+ # Always include default patterns
131
+ patterns = self.DEFAULT_PATTERNS.copy()
132
+
133
+ # Don't add dotfile patterns here - handle them separately in should_ignore
134
+ # This prevents exceptions from being overridden by the .* pattern
135
+
136
+ # Find and parse .gitignore files
137
+ gitignore_files = self._find_gitignore_files(working_dir)
138
+ for gitignore_file in gitignore_files:
139
+ patterns.extend(self._parse_gitignore(gitignore_file))
140
+
141
+ return patterns
142
+
143
+ def should_ignore(self, path: Path, working_dir: Path) -> bool:
144
+ """Check if a path should be ignored based on patterns.
145
+
146
+ Args:
147
+ path: The path to check
148
+ working_dir: The working directory (for relative path calculation)
149
+
150
+ Returns:
151
+ True if the path should be ignored
152
+ """
153
+ # Get the filename
154
+ filename = path.name
155
+
156
+ # 1. ALWAYS hide system files regardless of settings
157
+ ALWAYS_HIDE = {".DS_Store", "Thumbs.db", ".pyc", ".pyo", ".pyd"}
158
+ if filename in ALWAYS_HIDE or filename.endswith((".pyc", ".pyo", ".pyd")):
159
+ return True
160
+
161
+ # 2. Check dotfiles - ALWAYS filter them out (except exceptions)
162
+ if filename.startswith("."):
163
+ # Hide all dotfiles except those in the exceptions list
164
+ # This means: return True (ignore) if NOT in exceptions
165
+ return filename not in self.DOTFILE_EXCEPTIONS
166
+
167
+ # Get or create PathSpec for this working directory
168
+ pathspec_obj = self._get_pathspec(working_dir)
169
+
170
+ if pathspec_obj:
171
+ # Use pathspec for accurate matching
172
+ try:
173
+ rel_path = path.relative_to(working_dir)
174
+ rel_path_str = str(rel_path)
175
+
176
+ # For directories, also check with trailing slash
177
+ if path.is_dir():
178
+ return pathspec_obj.match_file(
179
+ rel_path_str
180
+ ) or pathspec_obj.match_file(rel_path_str + "/")
181
+ return pathspec_obj.match_file(rel_path_str)
182
+ except ValueError:
183
+ # Path is outside working directory
184
+ return False
185
+ else:
186
+ # Fallback to basic pattern matching
187
+ return self._basic_should_ignore(path, working_dir)
188
+
189
+ def _get_pathspec(self, working_dir: Path) -> Optional[Any]:
190
+ """Get or create a PathSpec object for the working directory.
191
+
192
+ Args:
193
+ working_dir: The working directory
194
+
195
+ Returns:
196
+ PathSpec object or None if not available
197
+ """
198
+ if not self._use_pathspec:
199
+ return None
200
+
201
+ cache_key = str(working_dir)
202
+ if cache_key not in self._pathspec_cache:
203
+ patterns = self.get_ignore_patterns(working_dir)
204
+ try:
205
+ self._pathspec_cache[cache_key] = pathspec.PathSpec.from_lines(
206
+ "gitwildmatch", patterns
207
+ )
208
+ except Exception as e:
209
+ self.logger.warning(f"Failed to create PathSpec: {e}")
210
+ return None
211
+
212
+ return self._pathspec_cache[cache_key]
213
+
214
+ def _find_gitignore_files(self, working_dir: Path) -> List[Path]:
215
+ """Find all .gitignore files in the directory tree.
216
+
217
+ Args:
218
+ working_dir: The directory to search
219
+
220
+ Returns:
221
+ List of .gitignore file paths
222
+ """
223
+ gitignore_files = []
224
+
225
+ # Check for .gitignore in working directory
226
+ main_gitignore = working_dir / ".gitignore"
227
+ if main_gitignore.exists():
228
+ gitignore_files.append(main_gitignore)
229
+
230
+ # Also check parent directories up to repository root
231
+ current = working_dir
232
+ while current != current.parent:
233
+ parent_gitignore = current.parent / ".gitignore"
234
+ if parent_gitignore.exists():
235
+ gitignore_files.append(parent_gitignore)
236
+
237
+ # Stop if we find a .git directory (repository root)
238
+ if (current / ".git").exists():
239
+ break
240
+
241
+ current = current.parent
242
+
243
+ return gitignore_files
244
+
245
+ def _parse_gitignore(self, gitignore_path: Path) -> List[str]:
246
+ """Parse a .gitignore file and return patterns.
247
+
248
+ Args:
249
+ gitignore_path: Path to .gitignore file
250
+
251
+ Returns:
252
+ List of patterns from the file
253
+ """
254
+ cache_key = str(gitignore_path)
255
+
256
+ # Check cache
257
+ if cache_key in self._gitignore_cache:
258
+ return self._gitignore_cache[cache_key]
259
+
260
+ patterns = []
261
+ try:
262
+ with open(gitignore_path, encoding="utf-8") as f:
263
+ for line in f:
264
+ line = line.strip()
265
+ # Skip empty lines and comments
266
+ if line and not line.startswith("#"):
267
+ patterns.append(line)
268
+
269
+ self._gitignore_cache[cache_key] = patterns
270
+ except Exception as e:
271
+ self.logger.warning(f"Failed to parse {gitignore_path}: {e}")
272
+
273
+ return patterns
274
+
275
+ def _basic_should_ignore(self, path: Path, working_dir: Path) -> bool:
276
+ """Basic pattern matching fallback when pathspec is not available.
277
+
278
+ Args:
279
+ path: The path to check
280
+ working_dir: The working directory
281
+
282
+ Returns:
283
+ True if the path should be ignored
284
+ """
285
+ path_str = str(path)
286
+ path_name = path.name
287
+
288
+ # 1. ALWAYS hide system files regardless of settings
289
+ ALWAYS_HIDE = {".DS_Store", "Thumbs.db", ".pyc", ".pyo", ".pyd"}
290
+ if path_name in ALWAYS_HIDE or path_name.endswith((".pyc", ".pyo", ".pyd")):
291
+ return True
292
+
293
+ # 2. Check dotfiles - ALWAYS filter them out (except exceptions)
294
+ if path_name.startswith("."):
295
+ # Only show if in exceptions list
296
+ return path_name not in self.DOTFILE_EXCEPTIONS
297
+
298
+ patterns = self.get_ignore_patterns(working_dir)
299
+
300
+ for pattern in patterns:
301
+ # Skip dotfile patterns since we already handled them above
302
+ if pattern in [".*", ".*/"]:
303
+ continue
304
+
305
+ # Simple pattern matching
306
+ if pattern.endswith("/"):
307
+ # Directory pattern
308
+ if path.is_dir() and path_name == pattern[:-1]:
309
+ return True
310
+ elif pattern.startswith("*."):
311
+ # Extension pattern
312
+ if path_name.endswith(pattern[1:]):
313
+ return True
314
+ elif "*" in pattern:
315
+ # Wildcard pattern (simplified)
316
+ import fnmatch
317
+
318
+ if fnmatch.fnmatch(path_name, pattern):
319
+ return True
320
+ elif pattern in path_str:
321
+ # Substring match
322
+ return True
323
+ elif path_name == pattern:
324
+ # Exact match
325
+ return True
326
+
327
+ return False
328
+
329
+ def clear_cache(self):
330
+ """Clear all caches."""
331
+ self._pathspec_cache.clear()
332
+ self._gitignore_cache.clear()
333
+
334
+
40
335
  @dataclass
41
336
  class CodeNode:
42
337
  """Represents a node in the code tree."""
@@ -325,7 +620,8 @@ class MultiLanguageAnalyzer:
325
620
  parser = tree_sitter.Parser(lang_obj)
326
621
  self.parsers[lang] = parser
327
622
  except (ImportError, AttributeError) as e:
328
- self.logger.warning(f"Language parser not available for {lang}: {e}")
623
+ # Silently skip unavailable parsers - will fall back to basic file discovery
624
+ self.logger.debug(f"Language parser not available for {lang}: {e}")
329
625
 
330
626
  def analyze_file(self, file_path: Path, language: str) -> List[CodeNode]:
331
627
  """Analyze a file using tree-sitter.
@@ -338,7 +634,10 @@ class MultiLanguageAnalyzer:
338
634
  List of code nodes found in the file
339
635
  """
340
636
  if language not in self.parsers:
341
- self.logger.warning(f"No parser available for language: {language}")
637
+ # No parser available - return empty list to fall back to basic discovery
638
+ self.logger.debug(
639
+ f"No parser available for language: {language}, using basic file discovery"
640
+ )
342
641
  return []
343
642
 
344
643
  nodes = []
@@ -488,6 +787,55 @@ class CodeTreeAnalyzer:
488
787
  languages, handling caching and incremental processing.
489
788
  """
490
789
 
790
+ # Define code file extensions at class level for directory filtering
791
+ CODE_EXTENSIONS = {
792
+ ".py",
793
+ ".js",
794
+ ".ts",
795
+ ".tsx",
796
+ ".jsx",
797
+ ".java",
798
+ ".cpp",
799
+ ".c",
800
+ ".h",
801
+ ".hpp",
802
+ ".cs",
803
+ ".go",
804
+ ".rs",
805
+ ".rb",
806
+ ".php",
807
+ ".swift",
808
+ ".kt",
809
+ ".scala",
810
+ ".r",
811
+ ".m",
812
+ ".mm",
813
+ ".sh",
814
+ ".bash",
815
+ ".zsh",
816
+ ".fish",
817
+ ".ps1",
818
+ ".bat",
819
+ ".cmd",
820
+ ".sql",
821
+ ".html",
822
+ ".css",
823
+ ".scss",
824
+ ".sass",
825
+ ".less",
826
+ ".xml",
827
+ ".json",
828
+ ".yaml",
829
+ ".yml",
830
+ ".toml",
831
+ ".ini",
832
+ ".cfg",
833
+ ".conf",
834
+ ".md",
835
+ ".rst",
836
+ ".txt",
837
+ }
838
+
491
839
  # File extensions to language mapping
492
840
  LANGUAGE_MAP = {
493
841
  ".py": "python",
@@ -499,24 +847,43 @@ class CodeTreeAnalyzer:
499
847
  ".cjs": "javascript",
500
848
  }
501
849
 
502
- def __init__(self, emit_events: bool = True, cache_dir: Optional[Path] = None):
850
+ def __init__(
851
+ self,
852
+ emit_events: bool = True,
853
+ cache_dir: Optional[Path] = None,
854
+ emitter: Optional[CodeTreeEventEmitter] = None,
855
+ ):
503
856
  """Initialize the code tree analyzer.
504
857
 
505
858
  Args:
506
859
  emit_events: Whether to emit Socket.IO events
507
860
  cache_dir: Directory for caching analysis results
861
+ emitter: Optional event emitter to use (creates one if not provided)
508
862
  """
509
863
  self.logger = get_logger(__name__)
510
864
  self.emit_events = emit_events
511
865
  self.cache_dir = cache_dir or Path.home() / ".claude-mpm" / "code-cache"
512
866
 
513
- # Initialize event emitter - use stdout mode for subprocess communication
514
- self.emitter = CodeTreeEventEmitter(use_stdout=True) if emit_events else None
867
+ # Initialize gitignore manager (always filters dotfiles)
868
+ self.gitignore_manager = GitignoreManager()
869
+ self._last_working_dir = None
870
+
871
+ # Use provided emitter or create one
872
+ if emitter:
873
+ self.emitter = emitter
874
+ elif emit_events:
875
+ self.emitter = CodeTreeEventEmitter(use_stdout=True)
876
+ else:
877
+ self.emitter = None
515
878
 
516
879
  # Initialize language analyzers
517
880
  self.python_analyzer = PythonAnalyzer(self.emitter)
518
881
  self.multi_lang_analyzer = MultiLanguageAnalyzer(self.emitter)
519
882
 
883
+ # For JavaScript/TypeScript
884
+ self.javascript_analyzer = self.multi_lang_analyzer
885
+ self.generic_analyzer = self.multi_lang_analyzer
886
+
520
887
  # Cache for processed files
521
888
  self.cache = {}
522
889
  self._load_cache()
@@ -554,8 +921,14 @@ class CodeTreeAnalyzer:
554
921
  continue
555
922
 
556
923
  for file_path in directory.rglob(f"*{ext}"):
557
- # Apply ignore patterns
558
- if self._should_ignore(file_path, ignore_patterns):
924
+ # Use gitignore manager for filtering with directory as working dir
925
+ if self.gitignore_manager.should_ignore(file_path, directory):
926
+ continue
927
+
928
+ # Also check additional patterns
929
+ if ignore_patterns and any(
930
+ p in str(file_path) for p in ignore_patterns
931
+ ):
559
932
  continue
560
933
 
561
934
  # Check max depth
@@ -590,6 +963,12 @@ class CodeTreeAnalyzer:
590
963
  else:
591
964
  nodes = self.multi_lang_analyzer.analyze_file(file_path, language)
592
965
 
966
+ # If no nodes found and we have a valid language, emit basic file info
967
+ if not nodes and language != "unknown":
968
+ self.logger.debug(
969
+ f"No AST nodes found for {file_path}, using basic discovery"
970
+ )
971
+
593
972
  # Cache results
594
973
  self.cache[cache_key] = nodes
595
974
 
@@ -642,26 +1021,29 @@ class CodeTreeAnalyzer:
642
1021
  return {"tree": tree, "nodes": all_nodes, "stats": stats}
643
1022
 
644
1023
  def _should_ignore(self, file_path: Path, patterns: Optional[List[str]]) -> bool:
645
- """Check if file should be ignored."""
646
- if not patterns:
647
- patterns = []
1024
+ """Check if file should be ignored.
648
1025
 
649
- # Default ignore patterns
650
- default_ignores = [
651
- "__pycache__",
652
- ".git",
653
- "node_modules",
654
- ".venv",
655
- "venv",
656
- "dist",
657
- "build",
658
- ".pytest_cache",
659
- ".mypy_cache",
660
- ]
1026
+ Uses GitignoreManager for proper pattern matching.
1027
+ """
1028
+ # Get the working directory (use parent for files, self for directories)
1029
+ if file_path.is_file():
1030
+ working_dir = file_path.parent
1031
+ else:
1032
+ # For directories during discovery, use the parent
1033
+ working_dir = (
1034
+ file_path.parent if file_path.parent != file_path else Path.cwd()
1035
+ )
1036
+
1037
+ # Use gitignore manager for checking
1038
+ if self.gitignore_manager.should_ignore(file_path, working_dir):
1039
+ return True
661
1040
 
662
- all_patterns = default_ignores + patterns
1041
+ # Also check any additional patterns provided
1042
+ if patterns:
1043
+ path_str = str(file_path)
1044
+ return any(pattern in path_str for pattern in patterns)
663
1045
 
664
- return any(pattern in str(file_path) for pattern in all_patterns)
1046
+ return False
665
1047
 
666
1048
  def _get_file_hash(self, file_path: Path) -> str:
667
1049
  """Get hash of file contents for caching."""
@@ -776,3 +1158,527 @@ class CodeTreeAnalyzer:
776
1158
  self.logger.info(f"Saved cache with {len(self.cache)} entries")
777
1159
  except Exception as e:
778
1160
  self.logger.warning(f"Failed to save cache: {e}")
1161
+
1162
+ def has_code_files(
1163
+ self, directory: Path, depth: int = 5, current_depth: int = 0
1164
+ ) -> bool:
1165
+ """Check if directory contains code files up to 5 levels deep.
1166
+
1167
+ Args:
1168
+ directory: Directory to check
1169
+ depth: Maximum depth to search
1170
+ current_depth: Current recursion depth
1171
+
1172
+ Returns:
1173
+ True if directory contains code files within depth levels
1174
+ """
1175
+ if current_depth >= depth:
1176
+ return False
1177
+
1178
+ # Skip checking these directories entirely
1179
+ SKIP_DIRS = {
1180
+ "node_modules",
1181
+ "__pycache__",
1182
+ ".git",
1183
+ ".venv",
1184
+ "venv",
1185
+ "dist",
1186
+ "build",
1187
+ ".tox",
1188
+ "htmlcov",
1189
+ ".pytest_cache",
1190
+ ".mypy_cache",
1191
+ "coverage",
1192
+ ".idea",
1193
+ ".vscode",
1194
+ "env",
1195
+ ".coverage",
1196
+ "__MACOSX",
1197
+ ".ipynb_checkpoints",
1198
+ }
1199
+ if directory.name in SKIP_DIRS:
1200
+ return False
1201
+
1202
+ try:
1203
+ for item in directory.iterdir():
1204
+ # Skip hidden items in scan
1205
+ if item.name.startswith("."):
1206
+ continue
1207
+
1208
+ if item.is_file():
1209
+ # Check if it's a code file
1210
+ ext = item.suffix.lower()
1211
+ if ext in self.CODE_EXTENSIONS:
1212
+ return True
1213
+ elif item.is_dir() and current_depth < depth - 1:
1214
+ if self.has_code_files(item, depth, current_depth + 1):
1215
+ return True
1216
+ except (PermissionError, OSError):
1217
+ pass
1218
+
1219
+ return False
1220
+
1221
+ def discover_top_level(
1222
+ self, directory: Path, ignore_patterns: Optional[List[str]] = None
1223
+ ) -> Dict[str, Any]:
1224
+ """Discover only top-level directories and files for lazy loading.
1225
+
1226
+ Args:
1227
+ directory: Root directory to discover
1228
+ ignore_patterns: Patterns to ignore
1229
+
1230
+ Returns:
1231
+ Dictionary with top-level structure
1232
+ """
1233
+ # CRITICAL FIX: Use the directory parameter as the base for relative paths
1234
+ # NOT the current working directory. This ensures we only show items
1235
+ # within the requested directory, not parent directories.
1236
+ working_dir = Path(directory).absolute()
1237
+
1238
+ # Emit discovery start event
1239
+ if self.emitter:
1240
+ from datetime import datetime
1241
+
1242
+ self.emitter.emit(
1243
+ "info",
1244
+ {
1245
+ "type": "discovery.start",
1246
+ "action": "scanning_directory",
1247
+ "path": str(directory),
1248
+ "message": f"Starting discovery of {directory.name}",
1249
+ "timestamp": datetime.now().isoformat(),
1250
+ },
1251
+ )
1252
+
1253
+ result = {
1254
+ "path": str(directory),
1255
+ "name": directory.name,
1256
+ "type": "directory",
1257
+ "children": [],
1258
+ }
1259
+
1260
+ try:
1261
+ # Clear cache if working directory changed
1262
+ if self._last_working_dir != directory:
1263
+ self.gitignore_manager.clear_cache()
1264
+ self._last_working_dir = directory
1265
+
1266
+ # Get immediate children only (no recursion)
1267
+ files_count = 0
1268
+ dirs_count = 0
1269
+ ignored_count = 0
1270
+
1271
+ for item in directory.iterdir():
1272
+ # Use gitignore manager for filtering with the directory as working dir
1273
+ if self.gitignore_manager.should_ignore(item, directory):
1274
+ if self.emitter:
1275
+ from datetime import datetime
1276
+
1277
+ self.emitter.emit(
1278
+ "info",
1279
+ {
1280
+ "type": "filter.gitignore",
1281
+ "path": str(item),
1282
+ "reason": "gitignore pattern",
1283
+ "message": f"Ignored by gitignore: {item.name}",
1284
+ "timestamp": datetime.now().isoformat(),
1285
+ },
1286
+ )
1287
+ ignored_count += 1
1288
+ continue
1289
+
1290
+ # Also check additional patterns if provided
1291
+ if ignore_patterns and any(p in str(item) for p in ignore_patterns):
1292
+ if self.emitter:
1293
+ from datetime import datetime
1294
+
1295
+ self.emitter.emit(
1296
+ "info",
1297
+ {
1298
+ "type": "filter.pattern",
1299
+ "path": str(item),
1300
+ "reason": "custom pattern",
1301
+ "message": f"Ignored by pattern: {item.name}",
1302
+ "timestamp": datetime.now().isoformat(),
1303
+ },
1304
+ )
1305
+ ignored_count += 1
1306
+ continue
1307
+
1308
+ if item.is_dir():
1309
+ # Only include directories that contain code files (5-level deep scan)
1310
+ if not self.has_code_files(item, depth=5):
1311
+ if self.emitter:
1312
+ from datetime import datetime
1313
+
1314
+ self.emitter.emit(
1315
+ "info",
1316
+ {
1317
+ "type": "filter.no_code",
1318
+ "path": str(item.name),
1319
+ "reason": "no code files",
1320
+ "message": f"Skipped directory without code: {item.name}",
1321
+ "timestamp": datetime.now().isoformat(),
1322
+ },
1323
+ )
1324
+ ignored_count += 1
1325
+ continue
1326
+
1327
+ # Directory - return just the item name
1328
+ # The frontend will construct the full path by combining parent path with child name
1329
+ path_str = item.name
1330
+
1331
+ # Emit directory found event
1332
+ if self.emitter:
1333
+ from datetime import datetime
1334
+
1335
+ self.emitter.emit(
1336
+ "info",
1337
+ {
1338
+ "type": "discovery.directory",
1339
+ "path": str(item),
1340
+ "message": f"Found directory: {item.name}",
1341
+ "timestamp": datetime.now().isoformat(),
1342
+ },
1343
+ )
1344
+ dirs_count += 1
1345
+
1346
+ child = {
1347
+ "path": path_str,
1348
+ "name": item.name,
1349
+ "type": "directory",
1350
+ "discovered": False,
1351
+ "children": [],
1352
+ }
1353
+ result["children"].append(child)
1354
+
1355
+ if self.emitter:
1356
+ self.emitter.emit_directory_discovered(path_str, [])
1357
+
1358
+ elif item.is_file():
1359
+ # Check if it's a supported code file or a special file we want to show
1360
+ if item.suffix in self.supported_extensions or item.name in [
1361
+ ".gitignore",
1362
+ ".env.example",
1363
+ ".env.sample",
1364
+ ]:
1365
+ # File - mark for lazy analysis
1366
+ language = self._get_language(item)
1367
+
1368
+ # File path should be just the item name
1369
+ # The frontend will construct the full path by combining parent path with child name
1370
+ path_str = item.name
1371
+
1372
+ # Emit file found event
1373
+ if self.emitter:
1374
+ from datetime import datetime
1375
+
1376
+ self.emitter.emit(
1377
+ "info",
1378
+ {
1379
+ "type": "discovery.file",
1380
+ "path": str(item),
1381
+ "language": language,
1382
+ "size": item.stat().st_size,
1383
+ "message": f"Found file: {item.name} ({language})",
1384
+ "timestamp": datetime.now().isoformat(),
1385
+ },
1386
+ )
1387
+ files_count += 1
1388
+
1389
+ child = {
1390
+ "path": path_str,
1391
+ "name": item.name,
1392
+ "type": "file",
1393
+ "language": language,
1394
+ "size": item.stat().st_size,
1395
+ "analyzed": False,
1396
+ }
1397
+ result["children"].append(child)
1398
+
1399
+ if self.emitter:
1400
+ self.emitter.emit_file_discovered(
1401
+ path_str, language, item.stat().st_size
1402
+ )
1403
+
1404
+ except PermissionError as e:
1405
+ self.logger.warning(f"Permission denied accessing {directory}: {e}")
1406
+ if self.emitter:
1407
+ self.emitter.emit_error(str(directory), f"Permission denied: {e}")
1408
+
1409
+ # Emit discovery complete event with stats
1410
+ if self.emitter:
1411
+ from datetime import datetime
1412
+
1413
+ self.emitter.emit(
1414
+ "info",
1415
+ {
1416
+ "type": "discovery.complete",
1417
+ "path": str(directory),
1418
+ "stats": {
1419
+ "files": files_count,
1420
+ "directories": dirs_count,
1421
+ "ignored": ignored_count,
1422
+ },
1423
+ "message": f"Discovery complete: {files_count} files, {dirs_count} directories, {ignored_count} ignored",
1424
+ "timestamp": datetime.now().isoformat(),
1425
+ },
1426
+ )
1427
+
1428
+ return result
1429
+
1430
+ def discover_directory(
1431
+ self, dir_path: str, ignore_patterns: Optional[List[str]] = None
1432
+ ) -> Dict[str, Any]:
1433
+ """Discover contents of a specific directory for lazy loading.
1434
+
1435
+ Args:
1436
+ dir_path: Directory path to discover
1437
+ ignore_patterns: Patterns to ignore
1438
+
1439
+ Returns:
1440
+ Dictionary with directory contents
1441
+ """
1442
+ directory = Path(dir_path)
1443
+ if not directory.exists() or not directory.is_dir():
1444
+ return {"error": f"Invalid directory: {dir_path}"}
1445
+
1446
+ # Clear cache if working directory changed
1447
+ if self._last_working_dir != directory.parent:
1448
+ self.gitignore_manager.clear_cache()
1449
+ self._last_working_dir = directory.parent
1450
+
1451
+ # The discover_top_level method will emit all the INFO events
1452
+ result = self.discover_top_level(directory, ignore_patterns)
1453
+ return result
1454
+
1455
+ def analyze_file(self, file_path: str) -> Dict[str, Any]:
1456
+ """Analyze a specific file and return its AST structure.
1457
+
1458
+ Args:
1459
+ file_path: Path to file to analyze
1460
+
1461
+ Returns:
1462
+ Dictionary with file analysis results
1463
+ """
1464
+ path = Path(file_path)
1465
+ if not path.exists() or not path.is_file():
1466
+ return {"error": f"Invalid file: {file_path}"}
1467
+
1468
+ # Get language first (needed for return statement)
1469
+ language = self._get_language(path)
1470
+
1471
+ # Emit analysis start event
1472
+ if self.emitter:
1473
+ from datetime import datetime
1474
+
1475
+ self.emitter.emit(
1476
+ "info",
1477
+ {
1478
+ "type": "analysis.start",
1479
+ "file": str(path),
1480
+ "language": language,
1481
+ "message": f"Analyzing: {path.name}",
1482
+ "timestamp": datetime.now().isoformat(),
1483
+ },
1484
+ )
1485
+
1486
+ # Check cache
1487
+ file_hash = self._get_file_hash(path)
1488
+ cache_key = f"{file_path}:{file_hash}"
1489
+
1490
+ if cache_key in self.cache:
1491
+ nodes = self.cache[cache_key]
1492
+ if self.emitter:
1493
+ from datetime import datetime
1494
+
1495
+ self.emitter.emit(
1496
+ "info",
1497
+ {
1498
+ "type": "cache.hit",
1499
+ "file": str(path),
1500
+ "message": f"Using cached analysis for {path.name}",
1501
+ "timestamp": datetime.now().isoformat(),
1502
+ },
1503
+ )
1504
+ else:
1505
+ # Analyze file
1506
+ if self.emitter:
1507
+ from datetime import datetime
1508
+
1509
+ self.emitter.emit(
1510
+ "info",
1511
+ {
1512
+ "type": "cache.miss",
1513
+ "file": str(path),
1514
+ "message": f"Cache miss, analyzing fresh: {path.name}",
1515
+ "timestamp": datetime.now().isoformat(),
1516
+ },
1517
+ )
1518
+
1519
+ if language == "python":
1520
+ analyzer = self.python_analyzer
1521
+ elif language == "javascript" or language == "typescript":
1522
+ analyzer = self.javascript_analyzer
1523
+ else:
1524
+ analyzer = self.generic_analyzer
1525
+
1526
+ start_time = time.time()
1527
+
1528
+ # Emit parsing event
1529
+ if self.emitter:
1530
+ from datetime import datetime
1531
+
1532
+ self.emitter.emit(
1533
+ "info",
1534
+ {
1535
+ "type": "analysis.parse",
1536
+ "file": str(path),
1537
+ "message": f"Parsing file content: {path.name}",
1538
+ "timestamp": datetime.now().isoformat(),
1539
+ },
1540
+ )
1541
+
1542
+ nodes = analyzer.analyze_file(path) if analyzer else []
1543
+ duration = time.time() - start_time
1544
+
1545
+ # Cache results
1546
+ self.cache[cache_key] = nodes
1547
+
1548
+ # Filter internal functions before emitting
1549
+ filtered_nodes = []
1550
+ classes_count = 0
1551
+ functions_count = 0
1552
+ methods_count = 0
1553
+
1554
+ for node in nodes:
1555
+ # Only include main structural elements
1556
+ if not self._is_internal_node(node):
1557
+ # Emit found element event
1558
+ if self.emitter:
1559
+ from datetime import datetime
1560
+
1561
+ self.emitter.emit(
1562
+ "info",
1563
+ {
1564
+ "type": f"analysis.{node.node_type}",
1565
+ "name": node.name,
1566
+ "file": str(path),
1567
+ "line_start": node.line_start,
1568
+ "complexity": node.complexity,
1569
+ "message": f"Found {node.node_type}: {node.name}",
1570
+ "timestamp": datetime.now().isoformat(),
1571
+ },
1572
+ )
1573
+
1574
+ # Count node types
1575
+ if node.node_type == "class":
1576
+ classes_count += 1
1577
+ elif node.node_type == "function":
1578
+ functions_count += 1
1579
+ elif node.node_type == "method":
1580
+ methods_count += 1
1581
+
1582
+ filtered_nodes.append(
1583
+ {
1584
+ "name": node.name,
1585
+ "type": node.node_type,
1586
+ "line_start": node.line_start,
1587
+ "line_end": node.line_end,
1588
+ "complexity": node.complexity,
1589
+ "has_docstring": node.has_docstring,
1590
+ "signature": node.signature,
1591
+ }
1592
+ )
1593
+
1594
+ # Emit analysis complete event with stats
1595
+ if self.emitter:
1596
+ from datetime import datetime
1597
+
1598
+ self.emitter.emit(
1599
+ "info",
1600
+ {
1601
+ "type": "analysis.complete",
1602
+ "file": str(path),
1603
+ "stats": {
1604
+ "classes": classes_count,
1605
+ "functions": functions_count,
1606
+ "methods": methods_count,
1607
+ "total_nodes": len(filtered_nodes),
1608
+ },
1609
+ "duration": duration,
1610
+ "message": f"Analysis complete: {classes_count} classes, {functions_count} functions, {methods_count} methods",
1611
+ "timestamp": datetime.now().isoformat(),
1612
+ },
1613
+ )
1614
+
1615
+ self.emitter.emit_file_analyzed(file_path, filtered_nodes, duration)
1616
+
1617
+ return {
1618
+ "path": file_path,
1619
+ "language": language,
1620
+ "nodes": (
1621
+ filtered_nodes
1622
+ if "filtered_nodes" in locals()
1623
+ else [
1624
+ {
1625
+ "name": n.name,
1626
+ "type": n.node_type,
1627
+ "line_start": n.line_start,
1628
+ "line_end": n.line_end,
1629
+ "complexity": n.complexity,
1630
+ "has_docstring": n.has_docstring,
1631
+ "signature": n.signature,
1632
+ }
1633
+ for n in nodes
1634
+ if not self._is_internal_node(n)
1635
+ ]
1636
+ ),
1637
+ }
1638
+
1639
+ def _is_internal_node(self, node: CodeNode) -> bool:
1640
+ """Check if node is an internal function that should be filtered."""
1641
+ # Filter patterns for internal functions
1642
+ internal_patterns = [
1643
+ "handle", # Event handlers
1644
+ "on_", # Event callbacks
1645
+ "_", # Private methods
1646
+ "get_", # Simple getters
1647
+ "set_", # Simple setters
1648
+ "__", # Python magic methods
1649
+ ]
1650
+
1651
+ name_lower = node.name.lower()
1652
+
1653
+ # Don't filter classes or important public methods
1654
+ if node.node_type == "class":
1655
+ return False
1656
+
1657
+ # Check patterns
1658
+ for pattern in internal_patterns:
1659
+ if name_lower.startswith(pattern):
1660
+ # Exception: include __init__ methods
1661
+ if node.name == "__init__":
1662
+ return False
1663
+ return True
1664
+
1665
+ return False
1666
+
1667
+ @property
1668
+ def supported_extensions(self):
1669
+ """Get list of supported file extensions."""
1670
+ return {".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}
1671
+
1672
+ def _get_language(self, file_path: Path) -> str:
1673
+ """Determine language from file extension."""
1674
+ ext = file_path.suffix.lower()
1675
+ language_map = {
1676
+ ".py": "python",
1677
+ ".js": "javascript",
1678
+ ".jsx": "javascript",
1679
+ ".ts": "typescript",
1680
+ ".tsx": "typescript",
1681
+ ".mjs": "javascript",
1682
+ ".cjs": "javascript",
1683
+ }
1684
+ return language_map.get(ext, "unknown")