mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +3 -3
- mcp_vector_search/analysis/__init__.py +111 -0
- mcp_vector_search/analysis/baseline/__init__.py +68 -0
- mcp_vector_search/analysis/baseline/comparator.py +462 -0
- mcp_vector_search/analysis/baseline/manager.py +621 -0
- mcp_vector_search/analysis/collectors/__init__.py +74 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/cohesion.py +463 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/collectors/coupling.py +1162 -0
- mcp_vector_search/analysis/collectors/halstead.py +514 -0
- mcp_vector_search/analysis/collectors/smells.py +325 -0
- mcp_vector_search/analysis/debt.py +516 -0
- mcp_vector_search/analysis/interpretation.py +685 -0
- mcp_vector_search/analysis/metrics.py +414 -0
- mcp_vector_search/analysis/reporters/__init__.py +7 -0
- mcp_vector_search/analysis/reporters/console.py +646 -0
- mcp_vector_search/analysis/reporters/markdown.py +480 -0
- mcp_vector_search/analysis/reporters/sarif.py +377 -0
- mcp_vector_search/analysis/storage/__init__.py +93 -0
- mcp_vector_search/analysis/storage/metrics_store.py +762 -0
- mcp_vector_search/analysis/storage/schema.py +245 -0
- mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
- mcp_vector_search/analysis/trends.py +308 -0
- mcp_vector_search/analysis/visualizer/__init__.py +90 -0
- mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
- mcp_vector_search/analysis/visualizer/exporter.py +484 -0
- mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
- mcp_vector_search/analysis/visualizer/schemas.py +525 -0
- mcp_vector_search/cli/commands/analyze.py +1062 -0
- mcp_vector_search/cli/commands/chat.py +1455 -0
- mcp_vector_search/cli/commands/index.py +621 -5
- mcp_vector_search/cli/commands/index_background.py +467 -0
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +224 -8
- mcp_vector_search/cli/commands/setup.py +1184 -0
- mcp_vector_search/cli/commands/status.py +339 -5
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +292 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +600 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
- mcp_vector_search/cli/didyoumean.py +27 -2
- mcp_vector_search/cli/main.py +127 -160
- mcp_vector_search/cli/output.py +158 -13
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +273 -0
- mcp_vector_search/core/__init__.py +16 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +406 -94
- mcp_vector_search/core/embeddings.py +24 -0
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git.py +380 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +632 -54
- mcp_vector_search/core/llm_client.py +756 -0
- mcp_vector_search/core/models.py +91 -1
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/relationships.py +473 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +819 -9
- mcp_vector_search/parsers/python.py +285 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore.py +0 -3
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
- mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -181,18 +181,148 @@ class PythonParser(BaseParser):
|
|
|
181
181
|
|
|
182
182
|
return chunks
|
|
183
183
|
|
|
184
|
+
def _extract_class_skeleton(self, node, lines: list[str], file_path: Path) -> str:
|
|
185
|
+
"""Extract class skeleton with method signatures only (no method bodies).
|
|
186
|
+
|
|
187
|
+
This reduces redundancy since method chunks contain full implementations.
|
|
188
|
+
"""
|
|
189
|
+
skeleton_lines = []
|
|
190
|
+
|
|
191
|
+
# Find the class body block
|
|
192
|
+
class_block = None
|
|
193
|
+
for child in node.children:
|
|
194
|
+
if child.type == "block":
|
|
195
|
+
class_block = child
|
|
196
|
+
break
|
|
197
|
+
|
|
198
|
+
if not class_block:
|
|
199
|
+
# No block found, return full class content
|
|
200
|
+
start_line = node.start_point[0] + 1
|
|
201
|
+
end_line = node.end_point[0] + 1
|
|
202
|
+
return self._get_line_range(lines, start_line, end_line)
|
|
203
|
+
|
|
204
|
+
# Add class definition line(s) and decorators (everything before the block)
|
|
205
|
+
# but NOT the block's opening line (to avoid duplicating the docstring)
|
|
206
|
+
class_start = node.start_point[0]
|
|
207
|
+
block_start = class_block.start_point[0]
|
|
208
|
+
|
|
209
|
+
for line_idx in range(class_start, block_start):
|
|
210
|
+
if line_idx < len(lines):
|
|
211
|
+
line = lines[line_idx].rstrip()
|
|
212
|
+
# Add the line, ensuring we get the colon on the class definition
|
|
213
|
+
skeleton_lines.append(line)
|
|
214
|
+
|
|
215
|
+
# Add the colon line if it wasn't already added
|
|
216
|
+
if skeleton_lines and not skeleton_lines[-1].rstrip().endswith(":"):
|
|
217
|
+
# The class definition might span multiple lines
|
|
218
|
+
# Find and add up to the colon
|
|
219
|
+
for line_idx in range(class_start, block_start + 1):
|
|
220
|
+
if line_idx < len(lines):
|
|
221
|
+
line = lines[line_idx].rstrip()
|
|
222
|
+
if line not in [s.rstrip() for s in skeleton_lines]:
|
|
223
|
+
skeleton_lines.append(line)
|
|
224
|
+
if line.endswith(":"):
|
|
225
|
+
break
|
|
226
|
+
|
|
227
|
+
# Process class body - add class variables and method signatures
|
|
228
|
+
indent = " " # Standard Python indent
|
|
229
|
+
docstring_added = False
|
|
230
|
+
|
|
231
|
+
for stmt in class_block.children:
|
|
232
|
+
if stmt.type == "expression_statement":
|
|
233
|
+
# Check if it's a docstring (first statement after class def)
|
|
234
|
+
for expr_child in stmt.children:
|
|
235
|
+
if expr_child.type == "string":
|
|
236
|
+
# Add docstring only once
|
|
237
|
+
if not docstring_added:
|
|
238
|
+
doc_start = stmt.start_point[0]
|
|
239
|
+
doc_end = stmt.end_point[0]
|
|
240
|
+
for line_idx in range(doc_start, doc_end + 1):
|
|
241
|
+
if line_idx < len(lines):
|
|
242
|
+
skeleton_lines.append(lines[line_idx].rstrip())
|
|
243
|
+
docstring_added = True
|
|
244
|
+
break
|
|
245
|
+
else:
|
|
246
|
+
# Not a docstring - could be a class variable assignment
|
|
247
|
+
# Add it to the skeleton
|
|
248
|
+
stmt_start = stmt.start_point[0]
|
|
249
|
+
stmt_end = stmt.end_point[0]
|
|
250
|
+
for line_idx in range(stmt_start, stmt_end + 1):
|
|
251
|
+
if line_idx < len(lines):
|
|
252
|
+
skeleton_lines.append(lines[line_idx].rstrip())
|
|
253
|
+
|
|
254
|
+
elif stmt.type in ("assignment", "annotated_assignment"):
|
|
255
|
+
# Class variable - add it
|
|
256
|
+
stmt_start = stmt.start_point[0]
|
|
257
|
+
stmt_end = stmt.end_point[0]
|
|
258
|
+
for line_idx in range(stmt_start, stmt_end + 1):
|
|
259
|
+
if line_idx < len(lines):
|
|
260
|
+
skeleton_lines.append(lines[line_idx].rstrip())
|
|
261
|
+
|
|
262
|
+
elif stmt.type == "function_definition":
|
|
263
|
+
# Method - add only the signature (no body)
|
|
264
|
+
_ = self._get_node_name(stmt) # Not used, but validates method
|
|
265
|
+
|
|
266
|
+
# Add decorators
|
|
267
|
+
for deco_child in stmt.children:
|
|
268
|
+
if deco_child.type == "decorator":
|
|
269
|
+
deco_line = deco_child.start_point[0]
|
|
270
|
+
if deco_line < len(lines):
|
|
271
|
+
skeleton_lines.append(lines[deco_line].rstrip())
|
|
272
|
+
|
|
273
|
+
# Add the def line (with parameters and return type)
|
|
274
|
+
def_line_start = stmt.start_point[0]
|
|
275
|
+
|
|
276
|
+
# Find where the actual body starts (after the colon)
|
|
277
|
+
# We want everything up to and including the colon
|
|
278
|
+
for child in stmt.children:
|
|
279
|
+
if child.type == "block":
|
|
280
|
+
# The block starts after the colon
|
|
281
|
+
# Get lines up to the colon
|
|
282
|
+
block_line = child.start_point[0]
|
|
283
|
+
for line_idx in range(def_line_start, block_line + 1):
|
|
284
|
+
if line_idx < len(lines):
|
|
285
|
+
line = lines[line_idx].rstrip()
|
|
286
|
+
skeleton_lines.append(line)
|
|
287
|
+
# Stop if we've added the colon line
|
|
288
|
+
if ":" in line:
|
|
289
|
+
break
|
|
290
|
+
|
|
291
|
+
# Check if there's a docstring in the method
|
|
292
|
+
for block_child in child.children:
|
|
293
|
+
if block_child.type == "expression_statement":
|
|
294
|
+
for expr_child in block_child.children:
|
|
295
|
+
if expr_child.type == "string":
|
|
296
|
+
# Add method docstring
|
|
297
|
+
doc_start = block_child.start_point[0]
|
|
298
|
+
doc_end = block_child.end_point[0]
|
|
299
|
+
for line_idx in range(doc_start, doc_end + 1):
|
|
300
|
+
if line_idx < len(lines):
|
|
301
|
+
skeleton_lines.append(
|
|
302
|
+
lines[line_idx].rstrip()
|
|
303
|
+
)
|
|
304
|
+
break
|
|
305
|
+
break
|
|
306
|
+
|
|
307
|
+
# Add placeholder for method body
|
|
308
|
+
skeleton_lines.append(f"{indent}{indent}...")
|
|
309
|
+
skeleton_lines.append("") # Blank line between methods
|
|
310
|
+
break
|
|
311
|
+
|
|
312
|
+
return "\n".join(skeleton_lines)
|
|
313
|
+
|
|
184
314
|
def _extract_class(
|
|
185
315
|
self, node, lines: list[str], file_path: Path
|
|
186
316
|
) -> list[CodeChunk]:
|
|
187
|
-
"""Extract class definition as a chunk."""
|
|
317
|
+
"""Extract class definition as a chunk (skeleton only, no method bodies)."""
|
|
188
318
|
chunks = []
|
|
189
319
|
|
|
190
320
|
class_name = self._get_node_name(node)
|
|
191
321
|
start_line = node.start_point[0] + 1
|
|
192
322
|
end_line = node.end_point[0] + 1
|
|
193
323
|
|
|
194
|
-
# Get class
|
|
195
|
-
content = self.
|
|
324
|
+
# Get class skeleton (without method bodies)
|
|
325
|
+
content = self._extract_class_skeleton(node, lines, file_path)
|
|
196
326
|
|
|
197
327
|
# Extract docstring if present
|
|
198
328
|
docstring = self._extract_docstring(node, lines)
|
|
@@ -339,11 +469,16 @@ class PythonParser(BaseParser):
|
|
|
339
469
|
class_content = self._get_line_range(lines, start_line, end_line)
|
|
340
470
|
|
|
341
471
|
if class_content.strip(): # Only add if content is not empty
|
|
472
|
+
# Extract class skeleton (method signatures only)
|
|
473
|
+
skeleton_content = self._extract_class_skeleton_regex(
|
|
474
|
+
class_content, start_line, lines
|
|
475
|
+
)
|
|
476
|
+
|
|
342
477
|
# Extract class docstring
|
|
343
|
-
docstring = self._extract_docstring_regex(
|
|
478
|
+
docstring = self._extract_docstring_regex(skeleton_content)
|
|
344
479
|
|
|
345
480
|
chunk = self._create_chunk(
|
|
346
|
-
content=
|
|
481
|
+
content=skeleton_content,
|
|
347
482
|
file_path=file_path,
|
|
348
483
|
start_line=start_line,
|
|
349
484
|
end_line=end_line,
|
|
@@ -397,6 +532,151 @@ class PythonParser(BaseParser):
|
|
|
397
532
|
"""Find the end line of a class using indentation."""
|
|
398
533
|
return self._find_function_end(lines, start_line)
|
|
399
534
|
|
|
535
|
+
def _extract_class_skeleton_regex(
|
|
536
|
+
self, class_content: str, start_line: int, all_lines: list[str]
|
|
537
|
+
) -> str:
|
|
538
|
+
"""Extract class skeleton using regex (fallback when tree-sitter unavailable).
|
|
539
|
+
|
|
540
|
+
Returns class with method signatures only, no method bodies.
|
|
541
|
+
"""
|
|
542
|
+
lines = class_content.splitlines()
|
|
543
|
+
skeleton_lines = []
|
|
544
|
+
i = 0
|
|
545
|
+
|
|
546
|
+
# Get class definition line(s)
|
|
547
|
+
while i < len(lines):
|
|
548
|
+
line = lines[i]
|
|
549
|
+
skeleton_lines.append(line)
|
|
550
|
+
# Stop at the colon that ends the class definition
|
|
551
|
+
if line.rstrip().endswith(":"):
|
|
552
|
+
i += 1
|
|
553
|
+
break
|
|
554
|
+
i += 1
|
|
555
|
+
|
|
556
|
+
# Track indentation level
|
|
557
|
+
class_indent = None
|
|
558
|
+
if skeleton_lines:
|
|
559
|
+
first_line = skeleton_lines[0]
|
|
560
|
+
class_indent = len(first_line) - len(first_line.lstrip())
|
|
561
|
+
|
|
562
|
+
# Process class body
|
|
563
|
+
in_method = False
|
|
564
|
+
method_indent = None
|
|
565
|
+
|
|
566
|
+
while i < len(lines):
|
|
567
|
+
line = lines[i]
|
|
568
|
+
stripped = line.strip()
|
|
569
|
+
|
|
570
|
+
if not stripped:
|
|
571
|
+
# Keep blank lines if not in a method body
|
|
572
|
+
if not in_method:
|
|
573
|
+
skeleton_lines.append(line)
|
|
574
|
+
i += 1
|
|
575
|
+
continue
|
|
576
|
+
|
|
577
|
+
# Calculate indentation
|
|
578
|
+
current_indent = len(line) - len(line.lstrip())
|
|
579
|
+
|
|
580
|
+
# Check if we're back at class level or beyond
|
|
581
|
+
if class_indent is not None and current_indent <= class_indent and stripped:
|
|
582
|
+
# End of class
|
|
583
|
+
break
|
|
584
|
+
|
|
585
|
+
# Check if this is a method definition
|
|
586
|
+
if re.match(r"^\s*(async\s+)?def\s+\w+", line):
|
|
587
|
+
in_method = True
|
|
588
|
+
method_indent = current_indent
|
|
589
|
+
|
|
590
|
+
# Add any decorators before this method
|
|
591
|
+
# (look backwards for @ lines)
|
|
592
|
+
j = i - 1
|
|
593
|
+
decorator_lines = []
|
|
594
|
+
while j >= 0:
|
|
595
|
+
prev_line = lines[j]
|
|
596
|
+
if prev_line.strip().startswith("@"):
|
|
597
|
+
decorator_lines.insert(0, prev_line)
|
|
598
|
+
j -= 1
|
|
599
|
+
elif prev_line.strip():
|
|
600
|
+
break
|
|
601
|
+
else:
|
|
602
|
+
j -= 1
|
|
603
|
+
|
|
604
|
+
# Remove decorators if we already added them
|
|
605
|
+
if decorator_lines:
|
|
606
|
+
# Check if they're not already in skeleton_lines
|
|
607
|
+
for dec in decorator_lines:
|
|
608
|
+
if dec not in skeleton_lines[-len(decorator_lines) :]:
|
|
609
|
+
skeleton_lines.append(dec)
|
|
610
|
+
|
|
611
|
+
# Add method signature line
|
|
612
|
+
skeleton_lines.append(line)
|
|
613
|
+
|
|
614
|
+
# Check if there's a docstring on next lines
|
|
615
|
+
j = i + 1
|
|
616
|
+
while j < len(lines):
|
|
617
|
+
next_line = lines[j]
|
|
618
|
+
next_stripped = next_line.strip()
|
|
619
|
+
|
|
620
|
+
if not next_stripped:
|
|
621
|
+
j += 1
|
|
622
|
+
continue
|
|
623
|
+
|
|
624
|
+
# Check for docstring
|
|
625
|
+
if next_stripped.startswith('"""') or next_stripped.startswith(
|
|
626
|
+
"'''"
|
|
627
|
+
):
|
|
628
|
+
quote_type = next_stripped[:3]
|
|
629
|
+
# Add docstring
|
|
630
|
+
skeleton_lines.append(next_line)
|
|
631
|
+
if not (
|
|
632
|
+
next_stripped.endswith(quote_type)
|
|
633
|
+
and len(next_stripped) > 6
|
|
634
|
+
):
|
|
635
|
+
# Multi-line docstring
|
|
636
|
+
j += 1
|
|
637
|
+
while j < len(lines):
|
|
638
|
+
doc_line = lines[j]
|
|
639
|
+
skeleton_lines.append(doc_line)
|
|
640
|
+
if doc_line.strip().endswith(quote_type):
|
|
641
|
+
j += 1
|
|
642
|
+
break
|
|
643
|
+
j += 1
|
|
644
|
+
else:
|
|
645
|
+
j += 1
|
|
646
|
+
break
|
|
647
|
+
else:
|
|
648
|
+
break
|
|
649
|
+
|
|
650
|
+
# Add placeholder for method body
|
|
651
|
+
if method_indent is not None:
|
|
652
|
+
skeleton_lines.append(" " * (method_indent + 4) + "...")
|
|
653
|
+
else:
|
|
654
|
+
skeleton_lines.append(" ...")
|
|
655
|
+
|
|
656
|
+
i += 1
|
|
657
|
+
continue
|
|
658
|
+
|
|
659
|
+
# Check if we're still in a method
|
|
660
|
+
if in_method:
|
|
661
|
+
if method_indent is not None and current_indent <= method_indent:
|
|
662
|
+
# End of method
|
|
663
|
+
in_method = False
|
|
664
|
+
# Don't skip this line, process it in next iteration
|
|
665
|
+
continue
|
|
666
|
+
else:
|
|
667
|
+
# Inside method body - skip it
|
|
668
|
+
i += 1
|
|
669
|
+
continue
|
|
670
|
+
|
|
671
|
+
# Class-level statement (not a method)
|
|
672
|
+
# This could be a class variable, docstring, etc.
|
|
673
|
+
if current_indent > (class_indent or 0):
|
|
674
|
+
skeleton_lines.append(line)
|
|
675
|
+
|
|
676
|
+
i += 1
|
|
677
|
+
|
|
678
|
+
return "\n".join(skeleton_lines)
|
|
679
|
+
|
|
400
680
|
def _extract_docstring_regex(self, content: str) -> str | None:
|
|
401
681
|
"""Extract docstring using regex patterns."""
|
|
402
682
|
# Look for triple-quoted strings at the beginning of the content
|
|
@@ -6,6 +6,7 @@ from .gitignore import (
|
|
|
6
6
|
create_gitignore_parser,
|
|
7
7
|
is_path_gitignored,
|
|
8
8
|
)
|
|
9
|
+
from .gitignore_updater import ensure_gitignore_entry
|
|
9
10
|
from .timing import (
|
|
10
11
|
PerformanceProfiler,
|
|
11
12
|
SearchProfiler,
|
|
@@ -24,6 +25,7 @@ __all__ = [
|
|
|
24
25
|
"GitignorePattern",
|
|
25
26
|
"create_gitignore_parser",
|
|
26
27
|
"is_path_gitignored",
|
|
28
|
+
"ensure_gitignore_entry",
|
|
27
29
|
# Timing utilities
|
|
28
30
|
"PerformanceProfiler",
|
|
29
31
|
"TimingResult",
|
|
@@ -65,9 +65,6 @@ class GitignorePattern:
|
|
|
65
65
|
parent = "/".join(path_parts[:i])
|
|
66
66
|
if fnmatch.fnmatch(parent, pattern):
|
|
67
67
|
return True
|
|
68
|
-
# If no parent matches and this is not a directory, don't exclude
|
|
69
|
-
if not is_directory:
|
|
70
|
-
return False
|
|
71
68
|
|
|
72
69
|
# Try exact match first
|
|
73
70
|
if fnmatch.fnmatch(path, pattern):
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Gitignore file update utilities for automatic .gitignore entry management."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def ensure_gitignore_entry(
|
|
9
|
+
project_root: Path,
|
|
10
|
+
pattern: str = ".mcp-vector-search/",
|
|
11
|
+
comment: str | None = "MCP Vector Search index directory",
|
|
12
|
+
create_if_missing: bool = True,
|
|
13
|
+
) -> bool:
|
|
14
|
+
"""Ensure a pattern exists in .gitignore file.
|
|
15
|
+
|
|
16
|
+
This function safely adds a pattern to .gitignore if it doesn't already exist.
|
|
17
|
+
It handles various edge cases including:
|
|
18
|
+
- Non-existent .gitignore files (creates if in git repo)
|
|
19
|
+
- Empty .gitignore files
|
|
20
|
+
- Existing patterns in various formats
|
|
21
|
+
- Negation patterns (conflict detection)
|
|
22
|
+
- Permission errors
|
|
23
|
+
- Encoding issues
|
|
24
|
+
|
|
25
|
+
Design Decision: Non-Blocking Operation
|
|
26
|
+
----------------------------------------
|
|
27
|
+
This function is designed to be non-critical and non-blocking. It will:
|
|
28
|
+
- NEVER raise exceptions (returns False on errors)
|
|
29
|
+
- Log warnings for failures instead of blocking
|
|
30
|
+
- Allow project initialization to continue even if gitignore update fails
|
|
31
|
+
|
|
32
|
+
Rationale: .gitignore updates are a quality-of-life improvement, not a
|
|
33
|
+
requirement for mcp-vector-search functionality. Users can manually add
|
|
34
|
+
the entry if automatic update fails.
|
|
35
|
+
|
|
36
|
+
Pattern Detection Strategy
|
|
37
|
+
--------------------------
|
|
38
|
+
The function checks for semantic equivalents of the pattern:
|
|
39
|
+
- `.mcp-vector-search/` (exact match)
|
|
40
|
+
- `.mcp-vector-search` (without trailing slash)
|
|
41
|
+
- `.mcp-vector-search/*` (with wildcard)
|
|
42
|
+
- `/.mcp-vector-search/` (root-relative)
|
|
43
|
+
|
|
44
|
+
All are treated as equivalent to avoid duplicate entries.
|
|
45
|
+
|
|
46
|
+
Edge Cases Handled
|
|
47
|
+
------------------
|
|
48
|
+
1. .gitignore does not exist -> Create (if in git repo)
|
|
49
|
+
2. .gitignore is empty -> Add pattern
|
|
50
|
+
3. Pattern already exists -> Skip (log debug)
|
|
51
|
+
4. Similar pattern exists -> Skip (log debug)
|
|
52
|
+
5. Negation pattern exists -> Warn and skip (respects user intent)
|
|
53
|
+
6. Not a git repository -> Skip (no .gitignore needed)
|
|
54
|
+
7. Permission denied -> Warn and skip (log manual instructions)
|
|
55
|
+
8. Encoding errors -> Try fallback encoding
|
|
56
|
+
9. Missing parent directory -> Should not occur (project_root exists)
|
|
57
|
+
10. Concurrent modification -> Safe (append operation is atomic-ish)
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
project_root: Project root directory (must exist)
|
|
61
|
+
pattern: Pattern to add to .gitignore (default: .mcp-vector-search/)
|
|
62
|
+
comment: Optional comment to add before the pattern
|
|
63
|
+
create_if_missing: Create .gitignore if it doesn't exist (default: True)
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
True if pattern was added or already exists, False on error
|
|
67
|
+
|
|
68
|
+
Performance:
|
|
69
|
+
- Time Complexity: O(n) where n = lines in .gitignore (typically <1000)
|
|
70
|
+
- Space Complexity: O(n) for reading file into memory
|
|
71
|
+
- Expected Runtime: <10ms for typical .gitignore files
|
|
72
|
+
|
|
73
|
+
Notes:
|
|
74
|
+
- Only creates .gitignore in git repositories (checks for .git directory)
|
|
75
|
+
- Preserves existing file structure and encoding (UTF-8)
|
|
76
|
+
- Handles negation patterns gracefully (warns but doesn't override)
|
|
77
|
+
- Non-blocking: logs warnings instead of raising exceptions
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
>>> # Basic usage during project initialization
|
|
81
|
+
>>> ensure_gitignore_entry(Path("/path/to/project"))
|
|
82
|
+
True
|
|
83
|
+
|
|
84
|
+
>>> # Custom pattern with custom comment
|
|
85
|
+
>>> ensure_gitignore_entry(
|
|
86
|
+
... Path("/path/to/project"),
|
|
87
|
+
... pattern=".custom-dir/",
|
|
88
|
+
... comment="Custom tool directory"
|
|
89
|
+
... )
|
|
90
|
+
True
|
|
91
|
+
|
|
92
|
+
>>> # Don't create .gitignore if missing
|
|
93
|
+
>>> ensure_gitignore_entry(
|
|
94
|
+
... Path("/path/to/project"),
|
|
95
|
+
... create_if_missing=False
|
|
96
|
+
... )
|
|
97
|
+
False
|
|
98
|
+
"""
|
|
99
|
+
gitignore_path = project_root / ".gitignore"
|
|
100
|
+
|
|
101
|
+
# Edge Case 1: Check if this is a git repository
|
|
102
|
+
# Only create/modify .gitignore in git repositories to avoid polluting non-git projects
|
|
103
|
+
git_dir = project_root / ".git"
|
|
104
|
+
if not git_dir.exists():
|
|
105
|
+
logger.debug(
|
|
106
|
+
"Not a git repository (no .git directory), skipping .gitignore update"
|
|
107
|
+
)
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
# Edge Case 2: Handle non-existent .gitignore
|
|
112
|
+
if not gitignore_path.exists():
|
|
113
|
+
if not create_if_missing:
|
|
114
|
+
logger.debug(".gitignore does not exist and create_if_missing=False")
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
# Create new .gitignore with the pattern
|
|
118
|
+
content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
|
|
119
|
+
gitignore_path.write_text(content, encoding="utf-8")
|
|
120
|
+
logger.info(f"Created .gitignore with {pattern} entry")
|
|
121
|
+
return True
|
|
122
|
+
|
|
123
|
+
# Read existing content with UTF-8 encoding
|
|
124
|
+
try:
|
|
125
|
+
content = gitignore_path.read_text(encoding="utf-8")
|
|
126
|
+
except UnicodeDecodeError:
|
|
127
|
+
# Edge Case 8: Fallback to more lenient encoding
|
|
128
|
+
logger.debug("UTF-8 decode failed, trying with error replacement")
|
|
129
|
+
try:
|
|
130
|
+
content = gitignore_path.read_text(encoding="utf-8", errors="replace")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.warning(
|
|
133
|
+
f"Failed to read .gitignore due to encoding error: {e}. "
|
|
134
|
+
f"Please manually add '{pattern}' to your .gitignore"
|
|
135
|
+
)
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
# Edge Case 3: Handle empty .gitignore
|
|
139
|
+
stripped_content = content.strip()
|
|
140
|
+
if not stripped_content:
|
|
141
|
+
content = f"# {comment}\n{pattern}\n" if comment else f"{pattern}\n"
|
|
142
|
+
gitignore_path.write_text(content, encoding="utf-8")
|
|
143
|
+
logger.info(f"Added {pattern} to empty .gitignore")
|
|
144
|
+
return True
|
|
145
|
+
|
|
146
|
+
# Check for existing patterns (Edge Cases 4, 5, 6)
|
|
147
|
+
lines = content.split("\n")
|
|
148
|
+
normalized_pattern = pattern.rstrip("/").lstrip("/")
|
|
149
|
+
|
|
150
|
+
for line in lines:
|
|
151
|
+
# Skip comments and empty lines
|
|
152
|
+
stripped_line = line.strip()
|
|
153
|
+
if not stripped_line or stripped_line.startswith("#"):
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
# Edge Case 6: Check for negation pattern (conflict)
|
|
157
|
+
# Negation patterns indicate explicit user intent to track the directory
|
|
158
|
+
if stripped_line.startswith("!") and normalized_pattern in stripped_line:
|
|
159
|
+
logger.warning(
|
|
160
|
+
f".gitignore contains negation pattern: {stripped_line}. "
|
|
161
|
+
"This indicates you want to track .mcp-vector-search/ in git. "
|
|
162
|
+
"Skipping automatic entry to respect your configuration."
|
|
163
|
+
)
|
|
164
|
+
return False
|
|
165
|
+
|
|
166
|
+
# Normalize line for comparison
|
|
167
|
+
normalized_line = stripped_line.rstrip("/").lstrip("/")
|
|
168
|
+
|
|
169
|
+
# Edge Cases 4 & 5: Check for exact or similar matches
|
|
170
|
+
# These patterns are semantically equivalent for .gitignore:
|
|
171
|
+
# - .mcp-vector-search/
|
|
172
|
+
# - .mcp-vector-search
|
|
173
|
+
# - .mcp-vector-search/*
|
|
174
|
+
# - /.mcp-vector-search/
|
|
175
|
+
if (
|
|
176
|
+
normalized_line == normalized_pattern
|
|
177
|
+
or normalized_line == normalized_pattern + "/*"
|
|
178
|
+
):
|
|
179
|
+
logger.debug(f"Pattern already exists in .gitignore: {stripped_line}")
|
|
180
|
+
return True
|
|
181
|
+
|
|
182
|
+
# Pattern doesn't exist, add it
|
|
183
|
+
# Preserve file structure: ensure proper newline handling
|
|
184
|
+
if not content.endswith("\n"):
|
|
185
|
+
content += "\n"
|
|
186
|
+
|
|
187
|
+
# Add blank line before comment for visual separation
|
|
188
|
+
content += "\n"
|
|
189
|
+
|
|
190
|
+
if comment:
|
|
191
|
+
content += f"# {comment}\n"
|
|
192
|
+
content += f"{pattern}\n"
|
|
193
|
+
|
|
194
|
+
# Write back to file
|
|
195
|
+
gitignore_path.write_text(content, encoding="utf-8")
|
|
196
|
+
logger.info(f"Added {pattern} to .gitignore")
|
|
197
|
+
return True
|
|
198
|
+
|
|
199
|
+
except PermissionError:
|
|
200
|
+
# Edge Case 7: Handle read-only .gitignore or protected directory
|
|
201
|
+
logger.warning(
|
|
202
|
+
f"Cannot update .gitignore: Permission denied. "
|
|
203
|
+
f"Please manually add '{pattern}' to your .gitignore file at {gitignore_path}"
|
|
204
|
+
)
|
|
205
|
+
return False
|
|
206
|
+
except Exception as e:
|
|
207
|
+
# Catch-all for unexpected errors (don't block initialization)
|
|
208
|
+
logger.warning(
|
|
209
|
+
f"Failed to update .gitignore: {e}. "
|
|
210
|
+
f"Please manually add '{pattern}' to your .gitignore"
|
|
211
|
+
)
|
|
212
|
+
return False
|
|
@@ -6,6 +6,34 @@ from typing import NamedTuple
|
|
|
6
6
|
|
|
7
7
|
from loguru import logger
|
|
8
8
|
|
|
9
|
+
# Directories to exclude from subproject detection
|
|
10
|
+
# These are typically test/example/docs directories, not actual subprojects
|
|
11
|
+
EXCLUDED_SUBPROJECT_DIRS = {
|
|
12
|
+
"tests",
|
|
13
|
+
"test",
|
|
14
|
+
"examples",
|
|
15
|
+
"example",
|
|
16
|
+
"docs",
|
|
17
|
+
"doc",
|
|
18
|
+
"scripts",
|
|
19
|
+
"tools",
|
|
20
|
+
"benchmarks",
|
|
21
|
+
"benchmark",
|
|
22
|
+
"node_modules",
|
|
23
|
+
".git",
|
|
24
|
+
".github",
|
|
25
|
+
".gitlab",
|
|
26
|
+
"build",
|
|
27
|
+
"dist",
|
|
28
|
+
"__pycache__",
|
|
29
|
+
".pytest_cache",
|
|
30
|
+
".mypy_cache",
|
|
31
|
+
".ruff_cache",
|
|
32
|
+
"coverage",
|
|
33
|
+
".coverage",
|
|
34
|
+
"htmlcov",
|
|
35
|
+
}
|
|
36
|
+
|
|
9
37
|
|
|
10
38
|
class Subproject(NamedTuple):
|
|
11
39
|
"""Represents a subproject in a monorepo."""
|
|
@@ -27,6 +55,23 @@ class MonorepoDetector:
|
|
|
27
55
|
self.project_root = project_root
|
|
28
56
|
self._subprojects: list[Subproject] | None = None
|
|
29
57
|
|
|
58
|
+
def _is_excluded_path(self, path: Path) -> bool:
|
|
59
|
+
"""Check if a path should be excluded from subproject detection.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
path: Path to check (relative to project root)
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
True if path should be excluded from subproject detection
|
|
66
|
+
"""
|
|
67
|
+
try:
|
|
68
|
+
relative_path = path.relative_to(self.project_root)
|
|
69
|
+
# Check if any part of the path is in the excluded set
|
|
70
|
+
return any(part in EXCLUDED_SUBPROJECT_DIRS for part in relative_path.parts)
|
|
71
|
+
except ValueError:
|
|
72
|
+
# Path is not relative to project root
|
|
73
|
+
return True
|
|
74
|
+
|
|
30
75
|
def is_monorepo(self) -> bool:
|
|
31
76
|
"""Check if project is a monorepo.
|
|
32
77
|
|
|
@@ -162,6 +207,13 @@ class MonorepoDetector:
|
|
|
162
207
|
if base_path.exists():
|
|
163
208
|
for subdir in base_path.iterdir():
|
|
164
209
|
if subdir.is_dir() and not subdir.name.startswith("."):
|
|
210
|
+
# Skip excluded directories
|
|
211
|
+
if self._is_excluded_path(subdir):
|
|
212
|
+
logger.debug(
|
|
213
|
+
f"Skipping excluded nx workspace path: {subdir.relative_to(self.project_root)}"
|
|
214
|
+
)
|
|
215
|
+
continue
|
|
216
|
+
|
|
165
217
|
package_json = subdir / "package.json"
|
|
166
218
|
name = self._get_package_name(package_json) or subdir.name
|
|
167
219
|
relative = str(subdir.relative_to(self.project_root))
|
|
@@ -179,14 +231,17 @@ class MonorepoDetector:
|
|
|
179
231
|
|
|
180
232
|
# Only search up to 3 levels deep
|
|
181
233
|
for package_json in self.project_root.rglob("package.json"):
|
|
182
|
-
# Skip node_modules
|
|
183
|
-
if "node_modules" in package_json.parts:
|
|
184
|
-
continue
|
|
185
|
-
|
|
186
234
|
# Skip root package.json
|
|
187
235
|
if package_json.parent == self.project_root:
|
|
188
236
|
continue
|
|
189
237
|
|
|
238
|
+
# Skip excluded directories (tests, examples, docs, etc.)
|
|
239
|
+
if self._is_excluded_path(package_json.parent):
|
|
240
|
+
logger.debug(
|
|
241
|
+
f"Skipping excluded path: {package_json.relative_to(self.project_root)}"
|
|
242
|
+
)
|
|
243
|
+
continue
|
|
244
|
+
|
|
190
245
|
# Check depth
|
|
191
246
|
relative_parts = package_json.relative_to(self.project_root).parts
|
|
192
247
|
if len(relative_parts) > 4: # Too deep
|
|
@@ -223,6 +278,13 @@ class MonorepoDetector:
|
|
|
223
278
|
if path.name.startswith("."):
|
|
224
279
|
continue
|
|
225
280
|
|
|
281
|
+
# Skip excluded directories (tests, examples, docs, etc.)
|
|
282
|
+
if self._is_excluded_path(path):
|
|
283
|
+
logger.debug(
|
|
284
|
+
f"Skipping excluded workspace path: {path.relative_to(self.project_root)}"
|
|
285
|
+
)
|
|
286
|
+
continue
|
|
287
|
+
|
|
226
288
|
# Try to get name from package.json
|
|
227
289
|
package_json = path / "package.json"
|
|
228
290
|
name = self._get_package_name(package_json) or path.name
|
|
@@ -142,12 +142,16 @@ class PerformanceProfiler:
|
|
|
142
142
|
"min": min(durations),
|
|
143
143
|
"max": max(durations),
|
|
144
144
|
"std_dev": statistics.stdev(durations) if len(durations) > 1 else 0.0,
|
|
145
|
-
"p95":
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
145
|
+
"p95": (
|
|
146
|
+
statistics.quantiles(durations, n=20)[18]
|
|
147
|
+
if len(durations) >= 20
|
|
148
|
+
else max(durations)
|
|
149
|
+
),
|
|
150
|
+
"p99": (
|
|
151
|
+
statistics.quantiles(durations, n=100)[98]
|
|
152
|
+
if len(durations) >= 100
|
|
153
|
+
else max(durations)
|
|
154
|
+
),
|
|
151
155
|
}
|
|
152
156
|
|
|
153
157
|
def get_operation_breakdown(self) -> dict[str, dict[str, Any]]:
|