mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -181,18 +181,148 @@ class PythonParser(BaseParser):
181
181
 
182
182
  return chunks
183
183
 
184
+ def _extract_class_skeleton(self, node, lines: list[str], file_path: Path) -> str:
185
+ """Extract class skeleton with method signatures only (no method bodies).
186
+
187
+ This reduces redundancy since method chunks contain full implementations.
188
+ """
189
+ skeleton_lines = []
190
+
191
+ # Find the class body block
192
+ class_block = None
193
+ for child in node.children:
194
+ if child.type == "block":
195
+ class_block = child
196
+ break
197
+
198
+ if not class_block:
199
+ # No block found, return full class content
200
+ start_line = node.start_point[0] + 1
201
+ end_line = node.end_point[0] + 1
202
+ return self._get_line_range(lines, start_line, end_line)
203
+
204
+ # Add class definition line(s) and decorators (everything before the block)
205
+ # but NOT the block's opening line (to avoid duplicating the docstring)
206
+ class_start = node.start_point[0]
207
+ block_start = class_block.start_point[0]
208
+
209
+ for line_idx in range(class_start, block_start):
210
+ if line_idx < len(lines):
211
+ line = lines[line_idx].rstrip()
212
+ # Add the line, ensuring we get the colon on the class definition
213
+ skeleton_lines.append(line)
214
+
215
+ # Add the colon line if it wasn't already added
216
+ if skeleton_lines and not skeleton_lines[-1].rstrip().endswith(":"):
217
+ # The class definition might span multiple lines
218
+ # Find and add up to the colon
219
+ for line_idx in range(class_start, block_start + 1):
220
+ if line_idx < len(lines):
221
+ line = lines[line_idx].rstrip()
222
+ if line not in [s.rstrip() for s in skeleton_lines]:
223
+ skeleton_lines.append(line)
224
+ if line.endswith(":"):
225
+ break
226
+
227
+ # Process class body - add class variables and method signatures
228
+ indent = " " # Standard Python indent
229
+ docstring_added = False
230
+
231
+ for stmt in class_block.children:
232
+ if stmt.type == "expression_statement":
233
+ # Check if it's a docstring (first statement after class def)
234
+ for expr_child in stmt.children:
235
+ if expr_child.type == "string":
236
+ # Add docstring only once
237
+ if not docstring_added:
238
+ doc_start = stmt.start_point[0]
239
+ doc_end = stmt.end_point[0]
240
+ for line_idx in range(doc_start, doc_end + 1):
241
+ if line_idx < len(lines):
242
+ skeleton_lines.append(lines[line_idx].rstrip())
243
+ docstring_added = True
244
+ break
245
+ else:
246
+ # Not a docstring - could be a class variable assignment
247
+ # Add it to the skeleton
248
+ stmt_start = stmt.start_point[0]
249
+ stmt_end = stmt.end_point[0]
250
+ for line_idx in range(stmt_start, stmt_end + 1):
251
+ if line_idx < len(lines):
252
+ skeleton_lines.append(lines[line_idx].rstrip())
253
+
254
+ elif stmt.type in ("assignment", "annotated_assignment"):
255
+ # Class variable - add it
256
+ stmt_start = stmt.start_point[0]
257
+ stmt_end = stmt.end_point[0]
258
+ for line_idx in range(stmt_start, stmt_end + 1):
259
+ if line_idx < len(lines):
260
+ skeleton_lines.append(lines[line_idx].rstrip())
261
+
262
+ elif stmt.type == "function_definition":
263
+ # Method - add only the signature (no body)
264
+ _ = self._get_node_name(stmt) # Not used, but validates method
265
+
266
+ # Add decorators
267
+ for deco_child in stmt.children:
268
+ if deco_child.type == "decorator":
269
+ deco_line = deco_child.start_point[0]
270
+ if deco_line < len(lines):
271
+ skeleton_lines.append(lines[deco_line].rstrip())
272
+
273
+ # Add the def line (with parameters and return type)
274
+ def_line_start = stmt.start_point[0]
275
+
276
+ # Find where the actual body starts (after the colon)
277
+ # We want everything up to and including the colon
278
+ for child in stmt.children:
279
+ if child.type == "block":
280
+ # The block starts after the colon
281
+ # Get lines up to the colon
282
+ block_line = child.start_point[0]
283
+ for line_idx in range(def_line_start, block_line + 1):
284
+ if line_idx < len(lines):
285
+ line = lines[line_idx].rstrip()
286
+ skeleton_lines.append(line)
287
+ # Stop if we've added the colon line
288
+ if ":" in line:
289
+ break
290
+
291
+ # Check if there's a docstring in the method
292
+ for block_child in child.children:
293
+ if block_child.type == "expression_statement":
294
+ for expr_child in block_child.children:
295
+ if expr_child.type == "string":
296
+ # Add method docstring
297
+ doc_start = block_child.start_point[0]
298
+ doc_end = block_child.end_point[0]
299
+ for line_idx in range(doc_start, doc_end + 1):
300
+ if line_idx < len(lines):
301
+ skeleton_lines.append(
302
+ lines[line_idx].rstrip()
303
+ )
304
+ break
305
+ break
306
+
307
+ # Add placeholder for method body
308
+ skeleton_lines.append(f"{indent}{indent}...")
309
+ skeleton_lines.append("") # Blank line between methods
310
+ break
311
+
312
+ return "\n".join(skeleton_lines)
313
+
184
314
  def _extract_class(
185
315
  self, node, lines: list[str], file_path: Path
186
316
  ) -> list[CodeChunk]:
187
- """Extract class definition as a chunk."""
317
+ """Extract class definition as a chunk (skeleton only, no method bodies)."""
188
318
  chunks = []
189
319
 
190
320
  class_name = self._get_node_name(node)
191
321
  start_line = node.start_point[0] + 1
192
322
  end_line = node.end_point[0] + 1
193
323
 
194
- # Get class content
195
- content = self._get_line_range(lines, start_line, end_line)
324
+ # Get class skeleton (without method bodies)
325
+ content = self._extract_class_skeleton(node, lines, file_path)
196
326
 
197
327
  # Extract docstring if present
198
328
  docstring = self._extract_docstring(node, lines)
@@ -339,11 +469,16 @@ class PythonParser(BaseParser):
339
469
  class_content = self._get_line_range(lines, start_line, end_line)
340
470
 
341
471
  if class_content.strip(): # Only add if content is not empty
472
+ # Extract class skeleton (method signatures only)
473
+ skeleton_content = self._extract_class_skeleton_regex(
474
+ class_content, start_line, lines
475
+ )
476
+
342
477
  # Extract class docstring
343
- docstring = self._extract_docstring_regex(class_content)
478
+ docstring = self._extract_docstring_regex(skeleton_content)
344
479
 
345
480
  chunk = self._create_chunk(
346
- content=class_content,
481
+ content=skeleton_content,
347
482
  file_path=file_path,
348
483
  start_line=start_line,
349
484
  end_line=end_line,
@@ -397,6 +532,151 @@ class PythonParser(BaseParser):
397
532
  """Find the end line of a class using indentation."""
398
533
  return self._find_function_end(lines, start_line)
399
534
 
535
+ def _extract_class_skeleton_regex(
536
+ self, class_content: str, start_line: int, all_lines: list[str]
537
+ ) -> str:
538
+ """Extract class skeleton using regex (fallback when tree-sitter unavailable).
539
+
540
+ Returns class with method signatures only, no method bodies.
541
+ """
542
+ lines = class_content.splitlines()
543
+ skeleton_lines = []
544
+ i = 0
545
+
546
+ # Get class definition line(s)
547
+ while i < len(lines):
548
+ line = lines[i]
549
+ skeleton_lines.append(line)
550
+ # Stop at the colon that ends the class definition
551
+ if line.rstrip().endswith(":"):
552
+ i += 1
553
+ break
554
+ i += 1
555
+
556
+ # Track indentation level
557
+ class_indent = None
558
+ if skeleton_lines:
559
+ first_line = skeleton_lines[0]
560
+ class_indent = len(first_line) - len(first_line.lstrip())
561
+
562
+ # Process class body
563
+ in_method = False
564
+ method_indent = None
565
+
566
+ while i < len(lines):
567
+ line = lines[i]
568
+ stripped = line.strip()
569
+
570
+ if not stripped:
571
+ # Keep blank lines if not in a method body
572
+ if not in_method:
573
+ skeleton_lines.append(line)
574
+ i += 1
575
+ continue
576
+
577
+ # Calculate indentation
578
+ current_indent = len(line) - len(line.lstrip())
579
+
580
+ # Check if we're back at class level or beyond
581
+ if class_indent is not None and current_indent <= class_indent and stripped:
582
+ # End of class
583
+ break
584
+
585
+ # Check if this is a method definition
586
+ if re.match(r"^\s*(async\s+)?def\s+\w+", line):
587
+ in_method = True
588
+ method_indent = current_indent
589
+
590
+ # Add any decorators before this method
591
+ # (look backwards for @ lines)
592
+ j = i - 1
593
+ decorator_lines = []
594
+ while j >= 0:
595
+ prev_line = lines[j]
596
+ if prev_line.strip().startswith("@"):
597
+ decorator_lines.insert(0, prev_line)
598
+ j -= 1
599
+ elif prev_line.strip():
600
+ break
601
+ else:
602
+ j -= 1
603
+
604
+ # Remove decorators if we already added them
605
+ if decorator_lines:
606
+ # Check if they're not already in skeleton_lines
607
+ for dec in decorator_lines:
608
+ if dec not in skeleton_lines[-len(decorator_lines) :]:
609
+ skeleton_lines.append(dec)
610
+
611
+ # Add method signature line
612
+ skeleton_lines.append(line)
613
+
614
+ # Check if there's a docstring on next lines
615
+ j = i + 1
616
+ while j < len(lines):
617
+ next_line = lines[j]
618
+ next_stripped = next_line.strip()
619
+
620
+ if not next_stripped:
621
+ j += 1
622
+ continue
623
+
624
+ # Check for docstring
625
+ if next_stripped.startswith('"""') or next_stripped.startswith(
626
+ "'''"
627
+ ):
628
+ quote_type = next_stripped[:3]
629
+ # Add docstring
630
+ skeleton_lines.append(next_line)
631
+ if not (
632
+ next_stripped.endswith(quote_type)
633
+ and len(next_stripped) > 6
634
+ ):
635
+ # Multi-line docstring
636
+ j += 1
637
+ while j < len(lines):
638
+ doc_line = lines[j]
639
+ skeleton_lines.append(doc_line)
640
+ if doc_line.strip().endswith(quote_type):
641
+ j += 1
642
+ break
643
+ j += 1
644
+ else:
645
+ j += 1
646
+ break
647
+ else:
648
+ break
649
+
650
+ # Add placeholder for method body
651
+ if method_indent is not None:
652
+ skeleton_lines.append(" " * (method_indent + 4) + "...")
653
+ else:
654
+ skeleton_lines.append(" ...")
655
+
656
+ i += 1
657
+ continue
658
+
659
+ # Check if we're still in a method
660
+ if in_method:
661
+ if method_indent is not None and current_indent <= method_indent:
662
+ # End of method
663
+ in_method = False
664
+ # Don't skip this line, process it in next iteration
665
+ continue
666
+ else:
667
+ # Inside method body - skip it
668
+ i += 1
669
+ continue
670
+
671
+ # Class-level statement (not a method)
672
+ # This could be a class variable, docstring, etc.
673
+ if current_indent > (class_indent or 0):
674
+ skeleton_lines.append(line)
675
+
676
+ i += 1
677
+
678
+ return "\n".join(skeleton_lines)
679
+
400
680
  def _extract_docstring_regex(self, content: str) -> str | None:
401
681
  """Extract docstring using regex patterns."""
402
682
  # Look for triple-quoted strings at the beginning of the content
@@ -6,6 +6,7 @@ from .gitignore import (
6
6
  create_gitignore_parser,
7
7
  is_path_gitignored,
8
8
  )
9
+ from .gitignore_updater import ensure_gitignore_entry
9
10
  from .timing import (
10
11
  PerformanceProfiler,
11
12
  SearchProfiler,
@@ -24,6 +25,7 @@ __all__ = [
24
25
  "GitignorePattern",
25
26
  "create_gitignore_parser",
26
27
  "is_path_gitignored",
28
+ "ensure_gitignore_entry",
27
29
  # Timing utilities
28
30
  "PerformanceProfiler",
29
31
  "TimingResult",
@@ -65,9 +65,6 @@ class GitignorePattern:
65
65
  parent = "/".join(path_parts[:i])
66
66
  if fnmatch.fnmatch(parent, pattern):
67
67
  return True
68
- # If no parent matches and this is not a directory, don't exclude
69
- if not is_directory:
70
- return False
71
68
 
72
69
  # Try exact match first
73
70
  if fnmatch.fnmatch(path, pattern):
@@ -0,0 +1,212 @@
1
+ """Gitignore file update utilities for automatic .gitignore entry management."""
2
+
3
+ from pathlib import Path
4
+
5
+ from loguru import logger
6
+
7
+
8
+ def ensure_gitignore_entry(
9
+ project_root: Path,
10
+ pattern: str = ".mcp-vector-search/",
11
+ comment: str | None = "MCP Vector Search index directory",
12
+ create_if_missing: bool = True,
13
+ ) -> bool:
14
+ """Ensure a pattern exists in .gitignore file.
15
+
16
+ This function safely adds a pattern to .gitignore if it doesn't already exist.
17
+ It handles various edge cases including:
18
+ - Non-existent .gitignore files (creates if in git repo)
19
+ - Empty .gitignore files
20
+ - Existing patterns in various formats
21
+ - Negation patterns (conflict detection)
22
+ - Permission errors
23
+ - Encoding issues
24
+
25
+ Design Decision: Non-Blocking Operation
26
+ ----------------------------------------
27
+ This function is designed to be non-critical and non-blocking. It will:
28
+ - NEVER raise exceptions (returns False on errors)
29
+ - Log warnings for failures instead of blocking
30
+ - Allow project initialization to continue even if gitignore update fails
31
+
32
+ Rationale: .gitignore updates are a quality-of-life improvement, not a
33
+ requirement for mcp-vector-search functionality. Users can manually add
34
+ the entry if automatic update fails.
35
+
36
+ Pattern Detection Strategy
37
+ --------------------------
38
+ The function checks for semantic equivalents of the pattern:
39
+ - `.mcp-vector-search/` (exact match)
40
+ - `.mcp-vector-search` (without trailing slash)
41
+ - `.mcp-vector-search/*` (with wildcard)
42
+ - `/.mcp-vector-search/` (root-relative)
43
+
44
+ All are treated as equivalent to avoid duplicate entries.
45
+
46
+ Edge Cases Handled
47
+ ------------------
48
+ 1. .gitignore does not exist -> Create (if in git repo)
49
+ 2. .gitignore is empty -> Add pattern
50
+ 3. Pattern already exists -> Skip (log debug)
51
+ 4. Similar pattern exists -> Skip (log debug)
52
+ 5. Negation pattern exists -> Warn and skip (respects user intent)
53
+ 6. Not a git repository -> Skip (no .gitignore needed)
54
+ 7. Permission denied -> Warn and skip (log manual instructions)
55
+ 8. Encoding errors -> Try fallback encoding
56
+ 9. Missing parent directory -> Should not occur (project_root exists)
57
+ 10. Concurrent modification -> Safe (append operation is atomic-ish)
58
+
59
+ Args:
60
+ project_root: Project root directory (must exist)
61
+ pattern: Pattern to add to .gitignore (default: .mcp-vector-search/)
62
+ comment: Optional comment to add before the pattern
63
+ create_if_missing: Create .gitignore if it doesn't exist (default: True)
64
+
65
+ Returns:
66
+ True if pattern was added or already exists, False on error
67
+
68
+ Performance:
69
+ - Time Complexity: O(n) where n = lines in .gitignore (typically <1000)
70
+ - Space Complexity: O(n) for reading file into memory
71
+ - Expected Runtime: <10ms for typical .gitignore files
72
+
73
+ Notes:
74
+ - Only creates .gitignore in git repositories (checks for .git directory)
75
+ - Preserves existing file structure and encoding (UTF-8)
76
+ - Handles negation patterns gracefully (warns but doesn't override)
77
+ - Non-blocking: logs warnings instead of raising exceptions
78
+
79
+ Examples:
80
+ >>> # Basic usage during project initialization
81
+ >>> ensure_gitignore_entry(Path("/path/to/project"))
82
+ True
83
+
84
+ >>> # Custom pattern with custom comment
85
+ >>> ensure_gitignore_entry(
86
+ ... Path("/path/to/project"),
87
+ ... pattern=".custom-dir/",
88
+ ... comment="Custom tool directory"
89
+ ... )
90
+ True
91
+
92
+ >>> # Don't create .gitignore if missing
93
+ >>> ensure_gitignore_entry(
94
+ ... Path("/path/to/project"),
95
+ ... create_if_missing=False
96
+ ... )
97
+ False
98
+ """
99
+ gitignore_path = project_root / ".gitignore"
100
+
101
+ # Edge Case 1: Check if this is a git repository
102
+ # Only create/modify .gitignore in git repositories to avoid polluting non-git projects
103
+ git_dir = project_root / ".git"
104
+ if not git_dir.exists():
105
+ logger.debug(
106
+ "Not a git repository (no .git directory), skipping .gitignore update"
107
+ )
108
+ return False
109
+
110
+ try:
111
+ # Edge Case 2: Handle non-existent .gitignore
112
+ if not gitignore_path.exists():
113
+ if not create_if_missing:
114
+ logger.debug(".gitignore does not exist and create_if_missing=False")
115
+ return False
116
+
117
+ # Create new .gitignore with the pattern
118
+ content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
119
+ gitignore_path.write_text(content, encoding="utf-8")
120
+ logger.info(f"Created .gitignore with {pattern} entry")
121
+ return True
122
+
123
+ # Read existing content with UTF-8 encoding
124
+ try:
125
+ content = gitignore_path.read_text(encoding="utf-8")
126
+ except UnicodeDecodeError:
127
+ # Edge Case 8: Fallback to more lenient encoding
128
+ logger.debug("UTF-8 decode failed, trying with error replacement")
129
+ try:
130
+ content = gitignore_path.read_text(encoding="utf-8", errors="replace")
131
+ except Exception as e:
132
+ logger.warning(
133
+ f"Failed to read .gitignore due to encoding error: {e}. "
134
+ f"Please manually add '{pattern}' to your .gitignore"
135
+ )
136
+ return False
137
+
138
+ # Edge Case 3: Handle empty .gitignore
139
+ stripped_content = content.strip()
140
+ if not stripped_content:
141
+ content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
142
+ gitignore_path.write_text(content, encoding="utf-8")
143
+ logger.info(f"Added {pattern} to empty .gitignore")
144
+ return True
145
+
146
+ # Check for existing patterns (Edge Cases 4, 5, 6)
147
+ lines = content.split("\n")
148
+ normalized_pattern = pattern.rstrip("/").lstrip("/")
149
+
150
+ for line in lines:
151
+ # Skip comments and empty lines
152
+ stripped_line = line.strip()
153
+ if not stripped_line or stripped_line.startswith("#"):
154
+ continue
155
+
156
+ # Edge Case 6: Check for negation pattern (conflict)
157
+ # Negation patterns indicate explicit user intent to track the directory
158
+ if stripped_line.startswith("!") and normalized_pattern in stripped_line:
159
+ logger.warning(
160
+ f".gitignore contains negation pattern: {stripped_line}. "
161
+ "This indicates you want to track .mcp-vector-search/ in git. "
162
+ "Skipping automatic entry to respect your configuration."
163
+ )
164
+ return False
165
+
166
+ # Normalize line for comparison
167
+ normalized_line = stripped_line.rstrip("/").lstrip("/")
168
+
169
+ # Edge Cases 4 & 5: Check for exact or similar matches
170
+ # These patterns are semantically equivalent for .gitignore:
171
+ # - .mcp-vector-search/
172
+ # - .mcp-vector-search
173
+ # - .mcp-vector-search/*
174
+ # - /.mcp-vector-search/
175
+ if (
176
+ normalized_line == normalized_pattern
177
+ or normalized_line == normalized_pattern + "/*"
178
+ ):
179
+ logger.debug(f"Pattern already exists in .gitignore: {stripped_line}")
180
+ return True
181
+
182
+ # Pattern doesn't exist, add it
183
+ # Preserve file structure: ensure proper newline handling
184
+ if not content.endswith("\n"):
185
+ content += "\n"
186
+
187
+ # Add blank line before comment for visual separation
188
+ content += "\n"
189
+
190
+ if comment:
191
+ content += f"# {comment}\n"
192
+ content += f"{pattern}\n"
193
+
194
+ # Write back to file
195
+ gitignore_path.write_text(content, encoding="utf-8")
196
+ logger.info(f"Added {pattern} to .gitignore")
197
+ return True
198
+
199
+ except PermissionError:
200
+ # Edge Case 7: Handle read-only .gitignore or protected directory
201
+ logger.warning(
202
+ f"Cannot update .gitignore: Permission denied. "
203
+ f"Please manually add '{pattern}' to your .gitignore file at {gitignore_path}"
204
+ )
205
+ return False
206
+ except Exception as e:
207
+ # Catch-all for unexpected errors (don't block initialization)
208
+ logger.warning(
209
+ f"Failed to update .gitignore: {e}. "
210
+ f"Please manually add '{pattern}' to your .gitignore"
211
+ )
212
+ return False
@@ -6,6 +6,34 @@ from typing import NamedTuple
6
6
 
7
7
  from loguru import logger
8
8
 
9
+ # Directories to exclude from subproject detection
10
+ # These are typically test/example/docs directories, not actual subprojects
11
+ EXCLUDED_SUBPROJECT_DIRS = {
12
+ "tests",
13
+ "test",
14
+ "examples",
15
+ "example",
16
+ "docs",
17
+ "doc",
18
+ "scripts",
19
+ "tools",
20
+ "benchmarks",
21
+ "benchmark",
22
+ "node_modules",
23
+ ".git",
24
+ ".github",
25
+ ".gitlab",
26
+ "build",
27
+ "dist",
28
+ "__pycache__",
29
+ ".pytest_cache",
30
+ ".mypy_cache",
31
+ ".ruff_cache",
32
+ "coverage",
33
+ ".coverage",
34
+ "htmlcov",
35
+ }
36
+
9
37
 
10
38
  class Subproject(NamedTuple):
11
39
  """Represents a subproject in a monorepo."""
@@ -27,6 +55,23 @@ class MonorepoDetector:
27
55
  self.project_root = project_root
28
56
  self._subprojects: list[Subproject] | None = None
29
57
 
58
+ def _is_excluded_path(self, path: Path) -> bool:
59
+ """Check if a path should be excluded from subproject detection.
60
+
61
+ Args:
62
+ path: Path to check (relative to project root)
63
+
64
+ Returns:
65
+ True if path should be excluded from subproject detection
66
+ """
67
+ try:
68
+ relative_path = path.relative_to(self.project_root)
69
+ # Check if any part of the path is in the excluded set
70
+ return any(part in EXCLUDED_SUBPROJECT_DIRS for part in relative_path.parts)
71
+ except ValueError:
72
+ # Path is not relative to project root
73
+ return True
74
+
30
75
  def is_monorepo(self) -> bool:
31
76
  """Check if project is a monorepo.
32
77
 
@@ -162,6 +207,13 @@ class MonorepoDetector:
162
207
  if base_path.exists():
163
208
  for subdir in base_path.iterdir():
164
209
  if subdir.is_dir() and not subdir.name.startswith("."):
210
+ # Skip excluded directories
211
+ if self._is_excluded_path(subdir):
212
+ logger.debug(
213
+ f"Skipping excluded nx workspace path: {subdir.relative_to(self.project_root)}"
214
+ )
215
+ continue
216
+
165
217
  package_json = subdir / "package.json"
166
218
  name = self._get_package_name(package_json) or subdir.name
167
219
  relative = str(subdir.relative_to(self.project_root))
@@ -179,14 +231,17 @@ class MonorepoDetector:
179
231
 
180
232
  # Only search up to 3 levels deep
181
233
  for package_json in self.project_root.rglob("package.json"):
182
- # Skip node_modules
183
- if "node_modules" in package_json.parts:
184
- continue
185
-
186
234
  # Skip root package.json
187
235
  if package_json.parent == self.project_root:
188
236
  continue
189
237
 
238
+ # Skip excluded directories (tests, examples, docs, etc.)
239
+ if self._is_excluded_path(package_json.parent):
240
+ logger.debug(
241
+ f"Skipping excluded path: {package_json.relative_to(self.project_root)}"
242
+ )
243
+ continue
244
+
190
245
  # Check depth
191
246
  relative_parts = package_json.relative_to(self.project_root).parts
192
247
  if len(relative_parts) > 4: # Too deep
@@ -223,6 +278,13 @@ class MonorepoDetector:
223
278
  if path.name.startswith("."):
224
279
  continue
225
280
 
281
+ # Skip excluded directories (tests, examples, docs, etc.)
282
+ if self._is_excluded_path(path):
283
+ logger.debug(
284
+ f"Skipping excluded workspace path: {path.relative_to(self.project_root)}"
285
+ )
286
+ continue
287
+
226
288
  # Try to get name from package.json
227
289
  package_json = path / "package.json"
228
290
  name = self._get_package_name(package_json) or path.name
@@ -142,12 +142,16 @@ class PerformanceProfiler:
142
142
  "min": min(durations),
143
143
  "max": max(durations),
144
144
  "std_dev": statistics.stdev(durations) if len(durations) > 1 else 0.0,
145
- "p95": statistics.quantiles(durations, n=20)[18]
146
- if len(durations) >= 20
147
- else max(durations),
148
- "p99": statistics.quantiles(durations, n=100)[98]
149
- if len(durations) >= 100
150
- else max(durations),
145
+ "p95": (
146
+ statistics.quantiles(durations, n=20)[18]
147
+ if len(durations) >= 20
148
+ else max(durations)
149
+ ),
150
+ "p99": (
151
+ statistics.quantiles(durations, n=100)[98]
152
+ if len(durations) >= 100
153
+ else max(durations)
154
+ ),
151
155
  }
152
156
 
153
157
  def get_operation_breakdown(self) -> dict[str, dict[str, Any]]: