mcp-vector-search 0.8.7__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
1
  """MCP Vector Search - CLI-first semantic code search with MCP integration."""
2
2
 
3
- __version__ = "0.8.7"
4
- __build__ = "36"
3
+ __version__ = "0.9.1"
4
+ __build__ = "38"
5
5
  __author__ = "Robert Matsuoka"
6
6
  __email__ = "bobmatnyc@gmail.com"
7
7
 
@@ -332,6 +332,8 @@ async def _run_batch_indexing(
332
332
  )
333
333
  error_log_path = indexer.project_root / ".mcp-vector-search" / "indexing_errors.log"
334
334
  if error_log_path.exists():
335
+ # Prune log to keep only last 1000 errors
336
+ _prune_error_log(error_log_path, max_lines=1000)
335
337
  console.print(
336
338
  f"[dim] → See details in: {error_log_path}[/dim]"
337
339
  )
@@ -692,5 +694,28 @@ def health_cmd(
692
694
  health_main(project_root=project_root, repair=repair)
693
695
 
694
696
 
697
+ def _prune_error_log(log_path: Path, max_lines: int = 1000) -> None:
698
+ """Prune error log to keep only the most recent N lines.
699
+
700
+ Args:
701
+ log_path: Path to the error log file
702
+ max_lines: Maximum number of lines to keep (default: 1000)
703
+ """
704
+ try:
705
+ with open(log_path, 'r') as f:
706
+ lines = f.readlines()
707
+
708
+ if len(lines) > max_lines:
709
+ # Keep only the last max_lines lines
710
+ pruned_lines = lines[-max_lines:]
711
+
712
+ with open(log_path, 'w') as f:
713
+ f.writelines(pruned_lines)
714
+
715
+ logger.debug(f"Pruned error log from {len(lines)} to {len(pruned_lines)} lines")
716
+ except Exception as e:
717
+ logger.warning(f"Failed to prune error log: {e}")
718
+
719
+
695
720
  if __name__ == "__main__":
696
721
  index_app()
@@ -72,48 +72,116 @@ async def _export_chunks(output: Path, file_filter: str | None) -> None:
72
72
 
73
73
  # Get all chunks with metadata
74
74
  console.print("[cyan]Fetching chunks from database...[/cyan]")
75
+ chunks = await database.get_all_chunks()
75
76
 
76
- # Query all chunks (we'll use a dummy search to get all)
77
- stats = await database.get_stats()
78
-
79
- if stats.total_chunks == 0:
77
+ if len(chunks) == 0:
80
78
  console.print("[yellow]No chunks found in index. Run 'mcp-vector-search index' first.[/yellow]")
81
79
  raise typer.Exit(1)
82
80
 
81
+ console.print(f"[green]✓[/green] Retrieved {len(chunks)} chunks")
82
+
83
+ # Apply file filter if specified
84
+ if file_filter:
85
+ from fnmatch import fnmatch
86
+ chunks = [c for c in chunks if fnmatch(str(c.file_path), file_filter)]
87
+ console.print(f"[cyan]Filtered to {len(chunks)} chunks matching '{file_filter}'[/cyan]")
88
+
89
+ # Collect subprojects for monorepo support
90
+ subprojects = {}
91
+ for chunk in chunks:
92
+ if chunk.subproject_name and chunk.subproject_name not in subprojects:
93
+ subprojects[chunk.subproject_name] = {
94
+ "name": chunk.subproject_name,
95
+ "path": chunk.subproject_path,
96
+ "color": _get_subproject_color(chunk.subproject_name, len(subprojects)),
97
+ }
98
+
83
99
  # Build graph data structure
84
100
  nodes = []
85
101
  links = []
102
+ chunk_id_map = {} # Map chunk IDs to array indices
103
+
104
+ # Add subproject root nodes for monorepos
105
+ if subprojects:
106
+ console.print(f"[cyan]Detected monorepo with {len(subprojects)} subprojects[/cyan]")
107
+ for sp_name, sp_data in subprojects.items():
108
+ node = {
109
+ "id": f"subproject_{sp_name}",
110
+ "name": sp_name,
111
+ "type": "subproject",
112
+ "file_path": sp_data["path"] or "",
113
+ "start_line": 0,
114
+ "end_line": 0,
115
+ "complexity": 0,
116
+ "color": sp_data["color"],
117
+ "depth": 0,
118
+ }
119
+ nodes.append(node)
120
+
121
+ # Add chunk nodes
122
+ for chunk in chunks:
123
+ node = {
124
+ "id": chunk.chunk_id or chunk.id,
125
+ "name": chunk.function_name or chunk.class_name or f"L{chunk.start_line}",
126
+ "type": chunk.chunk_type,
127
+ "file_path": str(chunk.file_path),
128
+ "start_line": chunk.start_line,
129
+ "end_line": chunk.end_line,
130
+ "complexity": chunk.complexity_score,
131
+ "parent_id": chunk.parent_chunk_id,
132
+ "depth": chunk.chunk_depth,
133
+ }
86
134
 
87
- # We need to query the database to get actual chunk data
88
- # Since there's no "get all chunks" method, we'll work with the stats
89
- # In a real implementation, you would add a method to get all chunks
135
+ # Add subproject info for monorepos
136
+ if chunk.subproject_name:
137
+ node["subproject"] = chunk.subproject_name
138
+ node["color"] = subprojects[chunk.subproject_name]["color"]
90
139
 
91
- console.print(f"[yellow]Note: Full chunk export requires database enhancement.[/yellow]")
92
- console.print(f"[cyan]Creating placeholder graph with {stats.total_chunks} chunks...[/cyan]")
140
+ nodes.append(node)
141
+ chunk_id_map[node["id"]] = len(nodes) - 1
142
+
143
+ # Build hierarchical links from parent-child relationships
144
+ for chunk in chunks:
145
+ chunk_id = chunk.chunk_id or chunk.id
146
+
147
+ # Link to subproject root if in monorepo
148
+ if chunk.subproject_name and not chunk.parent_chunk_id:
149
+ links.append({
150
+ "source": f"subproject_{chunk.subproject_name}",
151
+ "target": chunk_id,
152
+ })
153
+
154
+ # Link to parent chunk
155
+ if chunk.parent_chunk_id and chunk.parent_chunk_id in chunk_id_map:
156
+ links.append({
157
+ "source": chunk.parent_chunk_id,
158
+ "target": chunk_id,
159
+ })
93
160
 
94
- # Create sample graph structure
161
+ # Parse inter-project dependencies for monorepos
162
+ if subprojects:
163
+ console.print("[cyan]Parsing inter-project dependencies...[/cyan]")
164
+ dep_links = _parse_project_dependencies(
165
+ project_manager.project_root,
166
+ subprojects
167
+ )
168
+ links.extend(dep_links)
169
+ if dep_links:
170
+ console.print(f"[green]✓[/green] Found {len(dep_links)} inter-project dependencies")
171
+
172
+ # Get stats
173
+ stats = await database.get_stats()
174
+
175
+ # Build final graph data
95
176
  graph_data = {
96
- "nodes": [
97
- {
98
- "id": f"chunk_{i}",
99
- "name": f"Chunk {i}",
100
- "type": "code",
101
- "file_path": "example.py",
102
- "start_line": i * 10,
103
- "end_line": (i + 1) * 10,
104
- "complexity": 1.0 + (i % 5),
105
- }
106
- for i in range(min(stats.total_chunks, 50)) # Limit to 50 for demo
107
- ],
108
- "links": [
109
- {"source": f"chunk_{i}", "target": f"chunk_{i+1}"}
110
- for i in range(min(stats.total_chunks - 1, 49))
111
- ],
177
+ "nodes": nodes,
178
+ "links": links,
112
179
  "metadata": {
113
- "total_chunks": stats.total_chunks,
180
+ "total_chunks": len(chunks),
114
181
  "total_files": stats.total_files,
115
182
  "languages": stats.languages,
116
- "export_note": "This is a placeholder. Full export requires database enhancement.",
183
+ "is_monorepo": len(subprojects) > 0,
184
+ "subprojects": list(subprojects.keys()) if subprojects else [],
117
185
  },
118
186
  }
119
187
 
@@ -129,7 +197,8 @@ async def _export_chunks(output: Path, file_filter: str | None) -> None:
129
197
  Panel.fit(
130
198
  f"[green]✓[/green] Exported graph data to [cyan]{output}[/cyan]\n\n"
131
199
  f"Nodes: {len(graph_data['nodes'])}\n"
132
- f"Links: {len(graph_data['links'])}\n\n"
200
+ f"Links: {len(graph_data['links'])}\n"
201
+ f"{'Subprojects: ' + str(len(subprojects)) if subprojects else ''}\n\n"
133
202
  f"[dim]Next: Run 'mcp-vector-search visualize serve' to view[/dim]",
134
203
  title="Export Complete",
135
204
  border_style="green",
@@ -142,6 +211,69 @@ async def _export_chunks(output: Path, file_filter: str | None) -> None:
142
211
  raise typer.Exit(1)
143
212
 
144
213
 
214
+ def _get_subproject_color(subproject_name: str, index: int) -> str:
215
+ """Get a consistent color for a subproject."""
216
+ # Color palette for subprojects (GitHub-style colors)
217
+ colors = [
218
+ "#238636", # Green
219
+ "#1f6feb", # Blue
220
+ "#d29922", # Yellow
221
+ "#8957e5", # Purple
222
+ "#da3633", # Red
223
+ "#bf8700", # Orange
224
+ "#1a7f37", # Dark green
225
+ "#0969da", # Dark blue
226
+ ]
227
+ return colors[index % len(colors)]
228
+
229
+
230
+ def _parse_project_dependencies(project_root: Path, subprojects: dict) -> list[dict]:
231
+ """Parse package.json files to find inter-project dependencies.
232
+
233
+ Args:
234
+ project_root: Root directory of the monorepo
235
+ subprojects: Dictionary of subproject information
236
+
237
+ Returns:
238
+ List of dependency links between subprojects
239
+ """
240
+ dependency_links = []
241
+
242
+ for sp_name, sp_data in subprojects.items():
243
+ package_json = project_root / sp_data["path"] / "package.json"
244
+
245
+ if not package_json.exists():
246
+ continue
247
+
248
+ try:
249
+ with open(package_json) as f:
250
+ package_data = json.load(f)
251
+
252
+ # Check all dependency types
253
+ all_deps = {}
254
+ for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
255
+ if dep_type in package_data:
256
+ all_deps.update(package_data[dep_type])
257
+
258
+ # Find dependencies on other subprojects
259
+ for dep_name in all_deps.keys():
260
+ # Check if this dependency is another subproject
261
+ for other_sp_name in subprojects.keys():
262
+ if other_sp_name != sp_name and dep_name == other_sp_name:
263
+ # Found inter-project dependency
264
+ dependency_links.append({
265
+ "source": f"subproject_{sp_name}",
266
+ "target": f"subproject_{other_sp_name}",
267
+ "type": "dependency",
268
+ })
269
+
270
+ except Exception as e:
271
+ logger.debug(f"Failed to parse {package_json}: {e}")
272
+ continue
273
+
274
+ return dependency_links
275
+
276
+
145
277
  @app.command()
146
278
  def serve(
147
279
  port: int = typer.Option(8080, "--port", "-p", help="Port for visualization server"),
@@ -344,6 +476,7 @@ def _create_visualization_html(html_file: Path) -> None:
344
476
  .node.function circle { fill: #d29922; }
345
477
  .node.method circle { fill: #8957e5; }
346
478
  .node.code circle { fill: #6e7681; }
479
+ .node.subproject circle { fill: #da3633; stroke-width: 3px; }
347
480
 
348
481
  .node text {
349
482
  font-size: 11px;
@@ -359,6 +492,13 @@ def _create_visualization_html(html_file: Path) -> None:
359
492
  stroke-width: 1.5px;
360
493
  }
361
494
 
495
+ .link.dependency {
496
+ stroke: #d29922;
497
+ stroke-opacity: 0.8;
498
+ stroke-width: 2px;
499
+ stroke-dasharray: 5,5;
500
+ }
501
+
362
502
  .tooltip {
363
503
  position: absolute;
364
504
  padding: 12px;
@@ -391,6 +531,9 @@ def _create_visualization_html(html_file: Path) -> None:
391
531
 
392
532
  <h3>Legend</h3>
393
533
  <div class="legend">
534
+ <div class="legend-item">
535
+ <span class="legend-color" style="background: #da3633;"></span> Subproject
536
+ </div>
394
537
  <div class="legend-item">
395
538
  <span class="legend-color" style="background: #238636;"></span> Module
396
539
  </div>
@@ -408,6 +551,11 @@ def _create_visualization_html(html_file: Path) -> None:
408
551
  </div>
409
552
  </div>
410
553
 
554
+ <div id="subprojects-legend" style="display: none;">
555
+ <h3>Subprojects</h3>
556
+ <div class="legend" id="subprojects-list"></div>
557
+ </div>
558
+
411
559
  <div class="stats" id="stats"></div>
412
560
  </div>
413
561
 
@@ -439,10 +587,17 @@ def _create_visualization_html(html_file: Path) -> None:
439
587
  allNodes = data.nodes;
440
588
  allLinks = data.links;
441
589
 
442
- // Find root nodes (nodes without parents or depth 0/1)
443
- const rootNodes = allNodes.filter(n =>
444
- !n.parent_id || n.depth === 0 || n.depth === 1 || n.type === 'module'
445
- );
590
+ // Find root nodes
591
+ let rootNodes;
592
+ if (data.metadata && data.metadata.is_monorepo) {
593
+ // In monorepos, subproject nodes are roots
594
+ rootNodes = allNodes.filter(n => n.type === 'subproject');
595
+ } else {
596
+ // Regular projects: nodes without parents or depth 0/1
597
+ rootNodes = allNodes.filter(n =>
598
+ !n.parent_id || n.depth === 0 || n.depth === 1 || n.type === 'module'
599
+ );
600
+ }
446
601
 
447
602
  // Start with only root nodes visible
448
603
  visibleNodes = new Set(rootNodes.map(n => n.id));
@@ -470,7 +625,7 @@ def _create_visualization_html(html_file: Path) -> None:
470
625
  .selectAll("line")
471
626
  .data(visibleLinks)
472
627
  .join("line")
473
- .attr("class", "link");
628
+ .attr("class", d => d.type === "dependency" ? "link dependency" : "link");
474
629
 
475
630
  const node = g.append("g")
476
631
  .selectAll("g")
@@ -484,9 +639,13 @@ def _create_visualization_html(html_file: Path) -> None:
484
639
 
485
640
  // Add circles with expand indicator
486
641
  node.append("circle")
487
- .attr("r", d => d.complexity ? Math.min(8 + d.complexity * 2, 25) : 12)
642
+ .attr("r", d => {
643
+ if (d.type === 'subproject') return 20;
644
+ return d.complexity ? Math.min(8 + d.complexity * 2, 25) : 12;
645
+ })
488
646
  .attr("stroke", d => hasChildren(d) ? "#ffffff" : "none")
489
- .attr("stroke-width", d => hasChildren(d) ? 2 : 0);
647
+ .attr("stroke-width", d => hasChildren(d) ? 2 : 0)
648
+ .style("fill", d => d.color || null); // Use custom color if available
490
649
 
491
650
  // Add expand/collapse indicator
492
651
  node.filter(d => hasChildren(d))
@@ -620,7 +779,27 @@ def _create_visualization_html(html_file: Path) -> None:
620
779
  <div>Nodes: ${data.nodes.length}</div>
621
780
  <div>Links: ${data.links.length}</div>
622
781
  ${data.metadata ? `<div>Files: ${data.metadata.total_files || 'N/A'}</div>` : ''}
782
+ ${data.metadata && data.metadata.is_monorepo ? `<div>Monorepo: ${data.metadata.subprojects.length} subprojects</div>` : ''}
623
783
  `);
784
+
785
+ // Show subproject legend if monorepo
786
+ if (data.metadata && data.metadata.is_monorepo && data.metadata.subprojects.length > 0) {
787
+ const subprojectsLegend = d3.select("#subprojects-legend");
788
+ const subprojectsList = d3.select("#subprojects-list");
789
+
790
+ subprojectsLegend.style("display", "block");
791
+
792
+ // Get subproject nodes with colors
793
+ const subprojectNodes = allNodes.filter(n => n.type === 'subproject');
794
+
795
+ subprojectsList.html(
796
+ subprojectNodes.map(sp =>
797
+ `<div class="legend-item">
798
+ <span class="legend-color" style="background: ${sp.color};"></span> ${sp.name}
799
+ </div>`
800
+ ).join('')
801
+ );
802
+ }
624
803
  }
625
804
 
626
805
  // Auto-load graph data on page load
@@ -98,6 +98,15 @@ class VectorDatabase(ABC):
98
98
  """Reset the database (delete all data)."""
99
99
  ...
100
100
 
101
+ @abstractmethod
102
+ async def get_all_chunks(self) -> list[CodeChunk]:
103
+ """Get all chunks from the database.
104
+
105
+ Returns:
106
+ List of all code chunks with metadata
107
+ """
108
+ ...
109
+
101
110
  @abstractmethod
102
111
  async def health_check(self) -> bool:
103
112
  """Check database health and integrity.
@@ -467,6 +476,59 @@ class ChromaVectorDatabase(VectorDatabase):
467
476
  logger.error(f"Failed to reset database: {e}")
468
477
  raise DatabaseError(f"Failed to reset database: {e}") from e
469
478
 
479
+ async def get_all_chunks(self) -> list[CodeChunk]:
480
+ """Get all chunks from the database.
481
+
482
+ Returns:
483
+ List of all code chunks with metadata
484
+ """
485
+ if not self._collection:
486
+ raise DatabaseNotInitializedError("Database not initialized")
487
+
488
+ try:
489
+ # Get all documents from collection
490
+ results = self._collection.get(
491
+ include=["metadatas", "documents"]
492
+ )
493
+
494
+ chunks = []
495
+ if results and results.get("ids"):
496
+ for i, chunk_id in enumerate(results["ids"]):
497
+ metadata = results["metadatas"][i]
498
+ content = results["documents"][i]
499
+
500
+ chunk = CodeChunk(
501
+ content=content,
502
+ file_path=Path(metadata["file_path"]),
503
+ start_line=metadata["start_line"],
504
+ end_line=metadata["end_line"],
505
+ language=metadata["language"],
506
+ chunk_type=metadata.get("chunk_type", "code"),
507
+ function_name=metadata.get("function_name"),
508
+ class_name=metadata.get("class_name"),
509
+ docstring=metadata.get("docstring"),
510
+ imports=metadata.get("imports", []),
511
+ complexity_score=metadata.get("complexity_score", 0.0),
512
+ chunk_id=metadata.get("chunk_id"),
513
+ parent_chunk_id=metadata.get("parent_chunk_id"),
514
+ child_chunk_ids=metadata.get("child_chunk_ids", []),
515
+ chunk_depth=metadata.get("chunk_depth", 0),
516
+ decorators=metadata.get("decorators", []),
517
+ parameters=metadata.get("parameters", []),
518
+ return_type=metadata.get("return_type"),
519
+ type_annotations=metadata.get("type_annotations", {}),
520
+ subproject_name=metadata.get("subproject_name"),
521
+ subproject_path=metadata.get("subproject_path"),
522
+ )
523
+ chunks.append(chunk)
524
+
525
+ logger.debug(f"Retrieved {len(chunks)} chunks from database")
526
+ return chunks
527
+
528
+ except Exception as e:
529
+ logger.error(f"Failed to get all chunks: {e}")
530
+ raise DatabaseError(f"Failed to get all chunks: {e}") from e
531
+
470
532
  def _create_searchable_text(self, chunk: CodeChunk) -> str:
471
533
  """Create optimized searchable text from code chunk."""
472
534
  parts = [chunk.content]
@@ -914,6 +976,57 @@ class PooledChromaVectorDatabase(VectorDatabase):
914
976
  logger.error(f"Failed to reset database: {e}")
915
977
  raise DatabaseError(f"Failed to reset database: {e}") from e
916
978
 
979
+ async def get_all_chunks(self) -> list[CodeChunk]:
980
+ """Get all chunks from the database using pooled connection.
981
+
982
+ Returns:
983
+ List of all code chunks with metadata
984
+ """
985
+ try:
986
+ async with self._pool.get_connection() as conn:
987
+ # Get all documents from collection
988
+ results = conn.collection.get(
989
+ include=["metadatas", "documents"]
990
+ )
991
+
992
+ chunks = []
993
+ if results and results.get("ids"):
994
+ for i, chunk_id in enumerate(results["ids"]):
995
+ metadata = results["metadatas"][i]
996
+ content = results["documents"][i]
997
+
998
+ chunk = CodeChunk(
999
+ content=content,
1000
+ file_path=Path(metadata["file_path"]),
1001
+ start_line=metadata["start_line"],
1002
+ end_line=metadata["end_line"],
1003
+ language=metadata["language"],
1004
+ chunk_type=metadata.get("chunk_type", "code"),
1005
+ function_name=metadata.get("function_name"),
1006
+ class_name=metadata.get("class_name"),
1007
+ docstring=metadata.get("docstring"),
1008
+ imports=metadata.get("imports", []),
1009
+ complexity_score=metadata.get("complexity_score", 0.0),
1010
+ chunk_id=metadata.get("chunk_id"),
1011
+ parent_chunk_id=metadata.get("parent_chunk_id"),
1012
+ child_chunk_ids=metadata.get("child_chunk_ids", []),
1013
+ chunk_depth=metadata.get("chunk_depth", 0),
1014
+ decorators=metadata.get("decorators", []),
1015
+ parameters=metadata.get("parameters", []),
1016
+ return_type=metadata.get("return_type"),
1017
+ type_annotations=metadata.get("type_annotations", {}),
1018
+ subproject_name=metadata.get("subproject_name"),
1019
+ subproject_path=metadata.get("subproject_path"),
1020
+ )
1021
+ chunks.append(chunk)
1022
+
1023
+ logger.debug(f"Retrieved {len(chunks)} chunks from database")
1024
+ return chunks
1025
+
1026
+ except Exception as e:
1027
+ logger.error(f"Failed to get all chunks: {e}")
1028
+ raise DatabaseError(f"Failed to get all chunks: {e}") from e
1029
+
917
1030
  def _build_where_clause(self, filters: dict[str, Any]) -> dict[str, Any] | None:
918
1031
  """Build ChromaDB where clause from filters."""
919
1032
  if not filters:
@@ -13,6 +13,7 @@ from .. import __version__
13
13
  from ..config.defaults import DEFAULT_IGNORE_PATTERNS
14
14
  from ..parsers.registry import get_parser_registry
15
15
  from ..utils.gitignore import create_gitignore_parser
16
+ from ..utils.monorepo import MonorepoDetector
16
17
  from .database import VectorDatabase
17
18
  from .exceptions import ParsingError
18
19
  from .models import CodeChunk
@@ -72,6 +73,14 @@ class SemanticIndexer:
72
73
  logger.warning(f"Failed to load gitignore patterns: {e}")
73
74
  self.gitignore_parser = None
74
75
 
76
+ # Initialize monorepo detector
77
+ self.monorepo_detector = MonorepoDetector(project_root)
78
+ if self.monorepo_detector.is_monorepo():
79
+ subprojects = self.monorepo_detector.detect_subprojects()
80
+ logger.info(f"Detected monorepo with {len(subprojects)} subprojects")
81
+ for sp in subprojects:
82
+ logger.debug(f" - {sp.name} ({sp.relative_path})")
83
+
75
84
  async def index_project(
76
85
  self,
77
86
  force_reindex: bool = False,
@@ -519,7 +528,7 @@ class SemanticIndexer:
519
528
  file_path: Path to the file to parse
520
529
 
521
530
  Returns:
522
- List of code chunks
531
+ List of code chunks with subproject information
523
532
  """
524
533
  try:
525
534
  # Get appropriate parser
@@ -531,6 +540,13 @@ class SemanticIndexer:
531
540
  # Filter out empty chunks
532
541
  valid_chunks = [chunk for chunk in chunks if chunk.content.strip()]
533
542
 
543
+ # Assign subproject information for monorepos
544
+ subproject = self.monorepo_detector.get_subproject_for_file(file_path)
545
+ if subproject:
546
+ for chunk in valid_chunks:
547
+ chunk.subproject_name = subproject.name
548
+ chunk.subproject_path = subproject.relative_path
549
+
534
550
  return valid_chunks
535
551
 
536
552
  except Exception as e:
@@ -37,6 +37,10 @@ class CodeChunk:
37
37
  return_type: str | None = None
38
38
  type_annotations: dict[str, str] = None
39
39
 
40
+ # Enhancement 5: Monorepo support
41
+ subproject_name: str | None = None # "ewtn-plus-foundation"
42
+ subproject_path: str | None = None # Relative path from root
43
+
40
44
  def __post_init__(self) -> None:
41
45
  """Initialize default values and generate chunk ID."""
42
46
  if self.imports is None:
@@ -93,6 +97,8 @@ class CodeChunk:
93
97
  "parameters": self.parameters,
94
98
  "return_type": self.return_type,
95
99
  "type_annotations": self.type_annotations,
100
+ "subproject_name": self.subproject_name,
101
+ "subproject_path": self.subproject_path,
96
102
  }
97
103
 
98
104
  @classmethod
@@ -118,6 +124,8 @@ class CodeChunk:
118
124
  parameters=data.get("parameters", []),
119
125
  return_type=data.get("return_type"),
120
126
  type_annotations=data.get("type_annotations", {}),
127
+ subproject_name=data.get("subproject_name"),
128
+ subproject_path=data.get("subproject_path"),
121
129
  )
122
130
 
123
131
 
@@ -0,0 +1,277 @@
1
+ """Monorepo detection and subproject identification."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import NamedTuple
6
+
7
+ from loguru import logger
8
+
9
+
10
+ class Subproject(NamedTuple):
11
+ """Represents a subproject in a monorepo."""
12
+
13
+ name: str # "ewtn-plus-foundation"
14
+ path: Path # Absolute path to subproject
15
+ relative_path: str # Relative to monorepo root
16
+
17
+
18
+ class MonorepoDetector:
19
+ """Detects monorepo structure and identifies subprojects."""
20
+
21
+ def __init__(self, project_root: Path):
22
+ """Initialize monorepo detector.
23
+
24
+ Args:
25
+ project_root: Root directory of the project
26
+ """
27
+ self.project_root = project_root
28
+ self._subprojects: list[Subproject] | None = None
29
+
30
+ def is_monorepo(self) -> bool:
31
+ """Check if project is a monorepo.
32
+
33
+ Returns:
34
+ True if monorepo structure detected
35
+ """
36
+ return bool(self.detect_subprojects())
37
+
38
+ def detect_subprojects(self) -> list[Subproject]:
39
+ """Detect all subprojects in the monorepo.
40
+
41
+ Returns:
42
+ List of detected subprojects
43
+ """
44
+ if self._subprojects is not None:
45
+ return self._subprojects
46
+
47
+ subprojects = []
48
+
49
+ # Try package.json workspaces (npm/yarn/pnpm)
50
+ subprojects.extend(self._detect_npm_workspaces())
51
+
52
+ # Try lerna.json
53
+ if not subprojects:
54
+ subprojects.extend(self._detect_lerna_packages())
55
+
56
+ # Try pnpm-workspace.yaml
57
+ if not subprojects:
58
+ subprojects.extend(self._detect_pnpm_workspaces())
59
+
60
+ # Try nx workspace
61
+ if not subprojects:
62
+ subprojects.extend(self._detect_nx_workspace())
63
+
64
+ # Fallback: Look for multiple package.json files
65
+ if not subprojects:
66
+ subprojects.extend(self._detect_by_package_json())
67
+
68
+ self._subprojects = subprojects
69
+ logger.debug(f"Detected {len(subprojects)} subprojects in {self.project_root}")
70
+
71
+ return subprojects
72
+
73
+ def _detect_npm_workspaces(self) -> list[Subproject]:
74
+ """Detect npm/yarn/pnpm workspaces from package.json.
75
+
76
+ Returns:
77
+ List of subprojects from workspaces
78
+ """
79
+ package_json = self.project_root / "package.json"
80
+ if not package_json.exists():
81
+ return []
82
+
83
+ try:
84
+ with open(package_json) as f:
85
+ data = json.load(f)
86
+
87
+ workspaces = data.get("workspaces", [])
88
+
89
+ # Handle both array and object format
90
+ if isinstance(workspaces, dict):
91
+ workspaces = workspaces.get("packages", [])
92
+
93
+ return self._expand_workspace_patterns(workspaces)
94
+
95
+ except Exception as e:
96
+ logger.debug(f"Failed to parse package.json workspaces: {e}")
97
+ return []
98
+
99
+ def _detect_lerna_packages(self) -> list[Subproject]:
100
+ """Detect lerna packages from lerna.json.
101
+
102
+ Returns:
103
+ List of subprojects from lerna
104
+ """
105
+ lerna_json = self.project_root / "lerna.json"
106
+ if not lerna_json.exists():
107
+ return []
108
+
109
+ try:
110
+ with open(lerna_json) as f:
111
+ data = json.load(f)
112
+
113
+ packages = data.get("packages", ["packages/*"])
114
+ return self._expand_workspace_patterns(packages)
115
+
116
+ except Exception as e:
117
+ logger.debug(f"Failed to parse lerna.json: {e}")
118
+ return []
119
+
120
+ def _detect_pnpm_workspaces(self) -> list[Subproject]:
121
+ """Detect pnpm workspaces from pnpm-workspace.yaml.
122
+
123
+ Returns:
124
+ List of subprojects from pnpm
125
+ """
126
+ pnpm_workspace = self.project_root / "pnpm-workspace.yaml"
127
+ if not pnpm_workspace.exists():
128
+ return []
129
+
130
+ try:
131
+ import yaml
132
+
133
+ with open(pnpm_workspace) as f:
134
+ data = yaml.safe_load(f)
135
+
136
+ packages = data.get("packages", [])
137
+ return self._expand_workspace_patterns(packages)
138
+
139
+ except ImportError:
140
+ logger.debug("pyyaml not installed, skipping pnpm-workspace.yaml detection")
141
+ return []
142
+ except Exception as e:
143
+ logger.debug(f"Failed to parse pnpm-workspace.yaml: {e}")
144
+ return []
145
+
146
+ def _detect_nx_workspace(self) -> list[Subproject]:
147
+ """Detect nx workspace projects.
148
+
149
+ Returns:
150
+ List of subprojects from nx workspace
151
+ """
152
+ nx_json = self.project_root / "nx.json"
153
+ workspace_json = self.project_root / "workspace.json"
154
+
155
+ if not (nx_json.exists() or workspace_json.exists()):
156
+ return []
157
+
158
+ # Nx projects are typically in apps/ and libs/
159
+ subprojects = []
160
+ for base_dir in ["apps", "libs", "packages"]:
161
+ base_path = self.project_root / base_dir
162
+ if base_path.exists():
163
+ for subdir in base_path.iterdir():
164
+ if subdir.is_dir() and not subdir.name.startswith("."):
165
+ package_json = subdir / "package.json"
166
+ name = self._get_package_name(package_json) or subdir.name
167
+ relative = str(subdir.relative_to(self.project_root))
168
+ subprojects.append(Subproject(name, subdir, relative))
169
+
170
+ return subprojects
171
+
172
+ def _detect_by_package_json(self) -> list[Subproject]:
173
+ """Fallback: Find all directories with package.json.
174
+
175
+ Returns:
176
+ List of subprojects by package.json presence
177
+ """
178
+ subprojects = []
179
+
180
+ # Only search up to 3 levels deep
181
+ for package_json in self.project_root.rglob("package.json"):
182
+ # Skip node_modules
183
+ if "node_modules" in package_json.parts:
184
+ continue
185
+
186
+ # Skip root package.json
187
+ if package_json.parent == self.project_root:
188
+ continue
189
+
190
+ # Check depth
191
+ relative_parts = package_json.relative_to(self.project_root).parts
192
+ if len(relative_parts) > 4: # Too deep
193
+ continue
194
+
195
+ subdir = package_json.parent
196
+ name = self._get_package_name(package_json) or subdir.name
197
+ relative = str(subdir.relative_to(self.project_root))
198
+ subprojects.append(Subproject(name, subdir, relative))
199
+
200
+ return subprojects
201
+
202
+ def _expand_workspace_patterns(self, patterns: list[str]) -> list[Subproject]:
203
+ """Expand workspace glob patterns to actual directories.
204
+
205
+ Args:
206
+ patterns: List of glob patterns (e.g., ["packages/*", "apps/*"])
207
+
208
+ Returns:
209
+ List of subprojects matching patterns
210
+ """
211
+ subprojects = []
212
+
213
+ for pattern in patterns:
214
+ # Remove negation patterns (e.g., "!packages/excluded")
215
+ if pattern.startswith("!"):
216
+ continue
217
+
218
+ # Expand glob pattern
219
+ for path in self.project_root.glob(pattern):
220
+ if not path.is_dir():
221
+ continue
222
+
223
+ if path.name.startswith("."):
224
+ continue
225
+
226
+ # Try to get name from package.json
227
+ package_json = path / "package.json"
228
+ name = self._get_package_name(package_json) or path.name
229
+ relative = str(path.relative_to(self.project_root))
230
+
231
+ subprojects.append(Subproject(name, path, relative))
232
+
233
+ return subprojects
234
+
235
+ def _get_package_name(self, package_json: Path) -> str | None:
236
+ """Get package name from package.json.
237
+
238
+ Args:
239
+ package_json: Path to package.json file
240
+
241
+ Returns:
242
+ Package name or None
243
+ """
244
+ if not package_json.exists():
245
+ return None
246
+
247
+ try:
248
+ with open(package_json) as f:
249
+ data = json.load(f)
250
+ return data.get("name")
251
+ except Exception:
252
+ return None
253
+
254
+ def get_subproject_for_file(self, file_path: Path) -> Subproject | None:
255
+ """Determine which subproject a file belongs to.
256
+
257
+ Args:
258
+ file_path: Path to file
259
+
260
+ Returns:
261
+ Subproject containing the file, or None
262
+ """
263
+ subprojects = self.detect_subprojects()
264
+
265
+ if not subprojects:
266
+ return None
267
+
268
+ # Find the most specific (deepest) subproject containing this file
269
+ matching_subprojects = [
270
+ sp for sp in subprojects if file_path.is_relative_to(sp.path)
271
+ ]
272
+
273
+ if not matching_subprojects:
274
+ return None
275
+
276
+ # Return the deepest match (longest path)
277
+ return max(matching_subprojects, key=lambda sp: len(sp.path.parts))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-vector-search
3
- Version: 0.8.7
3
+ Version: 0.9.1
4
4
  Summary: CLI-first semantic code search with MCP integration
5
5
  Project-URL: Homepage, https://github.com/bobmatnyc/mcp-vector-search
6
6
  Project-URL: Documentation, https://mcp-vector-search.readthedocs.io
@@ -1,4 +1,4 @@
1
- mcp_vector_search/__init__.py,sha256=kjU0gRV7u0d23c46XZySbs-fn3sxnWAP3tg6H9X9iws,299
1
+ mcp_vector_search/__init__.py,sha256=wmIPI3OtTC0MnYzNIIhOfn-WL_Wne538UHx6NYSGYos,299
2
2
  mcp_vector_search/py.typed,sha256=lCKeV9Qcn9sGtbRsgg-LJO2ZwWRuknnnlmomq3bJFH0,43
3
3
  mcp_vector_search/cli/__init__.py,sha256=TNB7CaOASz8u3yHWLbNmo8-GtHF0qwUjVKWAuNphKgo,40
4
4
  mcp_vector_search/cli/didyoumean.py,sha256=F_ss-EX4F9RgnMsEhdTwLpyNCah9SqnBZc2tBtzASck,15918
@@ -12,14 +12,14 @@ mcp_vector_search/cli/commands/__init__.py,sha256=vQls-YKZ54YEwmf7g1dL0T2SS9D4pd
12
12
  mcp_vector_search/cli/commands/auto_index.py,sha256=imVVbxWRlA128NPdK9BetNNl3ELrsdq-hqcsLqyAmoM,12712
13
13
  mcp_vector_search/cli/commands/config.py,sha256=mKE8gUgAOqCM__4yzEEu9HJPbx9X15lN264zkDJBRxg,12399
14
14
  mcp_vector_search/cli/commands/demo.py,sha256=MVfEkYmA2abRFwAbk-lpa6P14_SLJBHZAuHb9d6d02U,10630
15
- mcp_vector_search/cli/commands/index.py,sha256=DOJa2zLuL10qJ8QHpMWsTUm53vj5ERGSGAMub1-P7lI,22385
15
+ mcp_vector_search/cli/commands/index.py,sha256=5GhJZzbzCBZYfMfZPjs9cf6RsSdgAAX2MkiTXm1i0K4,23258
16
16
  mcp_vector_search/cli/commands/init.py,sha256=2kdjtIPPeutKUXs65-6W1VQPF_BQrbV6_U3TCE7U5mw,23242
17
17
  mcp_vector_search/cli/commands/install.py,sha256=phk7Eb7UOU5IsRfJyaDPdOfdUWli9gyA4cHjhgXcNEI,24609
18
18
  mcp_vector_search/cli/commands/mcp.py,sha256=Mk4g43R9yRiJVMxsDFUsZldKqY0yi2coQmhAqIMPklo,38958
19
19
  mcp_vector_search/cli/commands/reset.py,sha256=bsIT6zjDf6gsvIkVaRaUClYzlTyNe--8t0NWkBY0ldU,13724
20
20
  mcp_vector_search/cli/commands/search.py,sha256=yyou7wO9qZ_w2oiKdyOrk2WUxvkFpc-Up8hpflxYlyw,24802
21
21
  mcp_vector_search/cli/commands/status.py,sha256=sa_0QHioCmPF5A7obqV2ls-9kmX_JYo7nq3XUe1dmrg,19630
22
- mcp_vector_search/cli/commands/visualize.py,sha256=tipe_QLjkZboqEz8SfIx5mjYrAenqrKsQPnXkgG7GBg,21398
22
+ mcp_vector_search/cli/commands/visualize.py,sha256=JY1MSWW5ybpzwzbTWFeMlydyL_UgtoeQq1RUweL4Y44,28336
23
23
  mcp_vector_search/cli/commands/watch.py,sha256=2pyWRoo4fIppFnyQ4sW4IBLHmpb_IwnTjRnzHkVBPcQ,8927
24
24
  mcp_vector_search/config/__init__.py,sha256=r_qAQkU5gc0EQ2pv8EQARACe4klhrR_WRJqCb9lfGc0,54
25
25
  mcp_vector_search/config/constants.py,sha256=afXR6SvLLd8QYY4MG4s1vq-hCJiQsE5PhnE-XG9lvb4,1092
@@ -28,13 +28,13 @@ mcp_vector_search/config/settings.py,sha256=m8o8j-tvWcuzrnNL6YWbi2fFbcB3lZY1kMNi
28
28
  mcp_vector_search/core/__init__.py,sha256=bWKtKmmaFs7gG5XPCbrx77UYIVeO1FF8wIJxpj1dLNw,48
29
29
  mcp_vector_search/core/auto_indexer.py,sha256=0S4lZXaUgqEytMSA2FxQsh5hN7V1mbSLYVzEf_dslYQ,10307
30
30
  mcp_vector_search/core/connection_pool.py,sha256=Yo-gUQQbHawtuvh6OcJiAlbbvWQGQBd31QZOvs498fg,11224
31
- mcp_vector_search/core/database.py,sha256=wmrnlyWvNH1jr4Rx_b6OjxAeUQ-33G3Vj6v_lji2Eik,37705
31
+ mcp_vector_search/core/database.py,sha256=HMyQ3J9DTgE8VpafoorRePtolzkX1W9wAZ3U8RvyDK4,42931
32
32
  mcp_vector_search/core/embeddings.py,sha256=wSMUNxZcuGPMxxQ1AbKqA1a3-0c6AiOqmuuI7OqTyaQ,10578
33
33
  mcp_vector_search/core/exceptions.py,sha256=3bCjT8wmrLz_0e_Tayr90049zNTKYFWZa19kl0saKz8,1597
34
34
  mcp_vector_search/core/factory.py,sha256=tM6Ft-V9buF7nn9xbRMU1ngji-BJOKt6BhtfQhFLmF4,10384
35
35
  mcp_vector_search/core/git_hooks.py,sha256=xOfPpzgKoNTwM-vbhAihUucgudBQk45bCAVR5zJOFlQ,10878
36
- mcp_vector_search/core/indexer.py,sha256=IpCzP50wLOttWuUI-NE0qwYq-LlbDo5lrGfVMFMzwAM,29089
37
- mcp_vector_search/core/models.py,sha256=f9T2vZxhOUun1nGgdhNLGQGojZewFUi9W_rvYf-IfAo,8838
36
+ mcp_vector_search/core/indexer.py,sha256=rB4XJ2iRyk4qWuM5ykUBfbSPSdJYVNLSXNZ7qPrY9BE,29912
37
+ mcp_vector_search/core/models.py,sha256=vWEP7JtIv9cG4eQRkUB0TW5Xo6KChzafngsj-rWnF34,9228
38
38
  mcp_vector_search/core/project.py,sha256=l81uc5B4CB8VXDbcHzF-_CagxIERDh23tH0iNqTePTs,10403
39
39
  mcp_vector_search/core/scheduler.py,sha256=PBSlu-ieDYCXOMGYY7QKv9UReFEDPHNmwnUv_xb4vxg,11761
40
40
  mcp_vector_search/core/search.py,sha256=9OC8-KwWdbw4y4QPQ-VXfz0encVHTJWYLtah3_chqG8,33682
@@ -55,10 +55,11 @@ mcp_vector_search/parsers/text.py,sha256=jvMdFspbmrrOR1GSGzf2gvBDCXz1cPN_xemoDK4
55
55
  mcp_vector_search/parsers/utils.py,sha256=10vT-GJSeDUoGSIslz8zq4RyavFiMtizCmcnn9cbQqE,8103
56
56
  mcp_vector_search/utils/__init__.py,sha256=Eq6lY-oPMfCt-GpPUbg9QbmTHuQVmTaVDBMU2183KVw,887
57
57
  mcp_vector_search/utils/gitignore.py,sha256=GiHQu9kv9PRLsWuNS8kbpXsTaBdhlsSHTu1NrZ8Ug5Y,8162
58
+ mcp_vector_search/utils/monorepo.py,sha256=leTYx4ffN4IO0wDg7OWYfXMWMPp2Q_uEHl5WQFNk5Hs,8657
58
59
  mcp_vector_search/utils/timing.py,sha256=THC7mfbTYnUpnnDcblgQacYMzbEkfFoIShx6plmhCgg,11285
59
60
  mcp_vector_search/utils/version.py,sha256=d7fS-CLemxb8UzZ9j18zH0Y0Ud097ljKKYYOPulnGPE,1138
60
- mcp_vector_search-0.8.7.dist-info/METADATA,sha256=IABufdfY297Aq8rtvzJZIXHTM3J4nY8FeIPGAvAN8iQ,19120
61
- mcp_vector_search-0.8.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
62
- mcp_vector_search-0.8.7.dist-info/entry_points.txt,sha256=y3Ygtc_JiBchNEIL-tPABo7EbzBExGAxwGdkkeP5D2I,86
63
- mcp_vector_search-0.8.7.dist-info/licenses/LICENSE,sha256=FqZUgGJH_tZKZLQsMCpXaLawRyLmyFKRVfMwYyEcyTs,1072
64
- mcp_vector_search-0.8.7.dist-info/RECORD,,
61
+ mcp_vector_search-0.9.1.dist-info/METADATA,sha256=AjybXcW7c9FHyG5OGsQDx0tPNvxAtxeHShq78CrX42o,19120
62
+ mcp_vector_search-0.9.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
63
+ mcp_vector_search-0.9.1.dist-info/entry_points.txt,sha256=y3Ygtc_JiBchNEIL-tPABo7EbzBExGAxwGdkkeP5D2I,86
64
+ mcp_vector_search-0.9.1.dist-info/licenses/LICENSE,sha256=FqZUgGJH_tZKZLQsMCpXaLawRyLmyFKRVfMwYyEcyTs,1072
65
+ mcp_vector_search-0.9.1.dist-info/RECORD,,