mcp-code-indexer 4.0.2__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. mcp_code_indexer/database/models.py +125 -1
  2. mcp_code_indexer/main.py +60 -0
  3. mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  4. mcp_code_indexer/server/mcp_server.py +3 -0
  5. mcp_code_indexer/vector_mode/__init__.py +36 -0
  6. mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  7. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  8. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  9. mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  10. mcp_code_indexer/vector_mode/config.py +167 -0
  11. mcp_code_indexer/vector_mode/daemon.py +335 -0
  12. mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  13. mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  14. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  15. mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  16. mcp_code_indexer/vector_mode/providers/__init__.py +17 -0
  17. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +217 -0
  18. mcp_code_indexer/vector_mode/providers/voyage_client.py +119 -0
  19. mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  20. mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  21. mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  22. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/METADATA +66 -5
  23. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/RECORD +26 -8
  24. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/LICENSE +0 -0
  25. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/WHEEL +0 -0
  26. {mcp_code_indexer-4.0.2.dist-info → mcp_code_indexer-4.2.0.dist-info}/entry_points.txt +0 -0
@@ -7,7 +7,8 @@ the database operations.
7
7
  """
8
8
 
9
9
  from datetime import datetime
10
- from typing import List, Optional
10
+ from typing import List, Optional, Dict, Any
11
+ from enum import Enum
11
12
 
12
13
  from pydantic import BaseModel, Field
13
14
 
@@ -185,6 +186,129 @@ class WordFrequencyResult(BaseModel):
185
186
  total_unique_terms: int = Field(..., description="Number of unique terms found")
186
187
 
187
188
 
189
+ # Vector Mode Models
190
+
191
+ class ChunkType(str, Enum):
192
+ """Types of code chunks for semantic analysis."""
193
+ FUNCTION = "function"
194
+ CLASS = "class"
195
+ METHOD = "method"
196
+ IMPORT = "import"
197
+ DOCSTRING = "docstring"
198
+ COMMENT = "comment"
199
+ VARIABLE = "variable"
200
+ INTERFACE = "interface"
201
+ TYPE_DEFINITION = "type_definition"
202
+ MODULE = "module"
203
+ NAMESPACE = "namespace"
204
+ GENERIC = "generic"
205
+
206
+ class NodeType(str, Enum):
207
+ """Types of nodes in Merkle tree."""
208
+ FILE = "file"
209
+ DIRECTORY = "directory"
210
+ PROJECT = "project"
211
+
212
+ class SyncStatus(str, Enum):
213
+ """Vector index synchronization status."""
214
+ PENDING = "pending"
215
+ IN_PROGRESS = "in_progress"
216
+ COMPLETED = "completed"
217
+ FAILED = "failed"
218
+ PAUSED = "paused"
219
+
220
+ class CodeChunk(BaseModel):
221
+ """
222
+ Represents a semantic chunk of code extracted from a file.
223
+
224
+ Used for embedding generation and vector search operations.
225
+ """
226
+
227
+ id: Optional[int] = Field(None, description="Database ID")
228
+ file_id: int = Field(..., description="Reference to FileDescription")
229
+ project_id: str = Field(..., description="Reference to project")
230
+ chunk_type: ChunkType = Field(..., description="Type of code chunk")
231
+ name: Optional[str] = Field(None, description="Name of function/class/etc")
232
+ start_line: int = Field(..., description="Starting line number")
233
+ end_line: int = Field(..., description="Ending line number")
234
+ content_hash: str = Field(..., description="SHA-256 hash of chunk content")
235
+ embedding_id: Optional[str] = Field(None, description="Vector database ID")
236
+ redacted: bool = Field(default=False, description="Whether content was redacted")
237
+ metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
238
+ created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
239
+ last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
240
+
241
+ class MerkleNode(BaseModel):
242
+ """
243
+ Represents a node in the Merkle tree for change detection.
244
+
245
+ Used to efficiently detect file system changes without scanning entire directory trees.
246
+ """
247
+
248
+ id: Optional[int] = Field(None, description="Database ID")
249
+ project_id: str = Field(..., description="Reference to project")
250
+ path: str = Field(..., description="File/directory path relative to project root")
251
+ hash: str = Field(..., description="SHA-256 hash of content or children")
252
+ node_type: NodeType = Field(..., description="Type of filesystem node")
253
+ parent_path: Optional[str] = Field(None, description="Path to parent directory")
254
+ children_hash: Optional[str] = Field(None, description="Combined hash of children")
255
+ last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
256
+
257
+ class IndexMeta(BaseModel):
258
+ """
259
+ Metadata about vector indexing progress and status for a project.
260
+
261
+ Tracks indexing state, statistics, and synchronization status.
262
+ """
263
+
264
+ id: Optional[int] = Field(None, description="Database ID")
265
+ project_id: str = Field(..., description="Reference to project", unique=True)
266
+ total_chunks: int = Field(default=0, description="Total number of chunks")
267
+ indexed_chunks: int = Field(default=0, description="Number of chunks with embeddings")
268
+ total_files: int = Field(default=0, description="Total number of files")
269
+ indexed_files: int = Field(default=0, description="Number of files processed")
270
+ last_sync: Optional[datetime] = Field(None, description="Last successful sync timestamp")
271
+ sync_status: SyncStatus = Field(default=SyncStatus.PENDING, description="Current sync status")
272
+ error_message: Optional[str] = Field(None, description="Last error message")
273
+ queue_depth: int = Field(default=0, description="Number of pending tasks")
274
+ processing_rate: float = Field(default=0.0, description="Files per second processing rate")
275
+ estimated_completion: Optional[datetime] = Field(None, description="Estimated completion time")
276
+ metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
277
+ created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
278
+ last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
279
+
280
+ class VectorSearchResult(BaseModel):
281
+ """
282
+ Represents a vector search result with similarity scoring.
283
+ """
284
+
285
+ file_path: str = Field(..., description="Path to the matching file")
286
+ chunk_name: Optional[str] = Field(None, description="Name of the code chunk")
287
+ chunk_type: ChunkType = Field(..., description="Type of code chunk")
288
+ code_snippet: str = Field(..., description="Original code content")
289
+ start_line: int = Field(..., description="Starting line number")
290
+ end_line: int = Field(..., description="Ending line number")
291
+ similarity_score: float = Field(..., description="Cosine similarity score")
292
+ project_id: str = Field(..., description="Project identifier")
293
+ metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
294
+
295
+ class VectorIndexStatus(BaseModel):
296
+ """
297
+ Current status of vector indexing for a project.
298
+ """
299
+
300
+ is_indexing: bool = Field(..., description="Whether indexing is currently active")
301
+ indexed_files: int = Field(..., description="Number of files indexed")
302
+ total_files: int = Field(..., description="Total number of files")
303
+ indexed_chunks: int = Field(..., description="Number of chunks indexed")
304
+ total_chunks: int = Field(..., description="Total number of chunks")
305
+ last_sync: Optional[datetime] = Field(None, description="Last sync timestamp")
306
+ sync_status: SyncStatus = Field(..., description="Current sync status")
307
+ queue_depth: int = Field(..., description="Number of pending tasks")
308
+ processing_rate: float = Field(..., description="Processing rate")
309
+ estimated_completion: Optional[datetime] = Field(None, description="Estimated completion time")
310
+ error_message: Optional[str] = Field(None, description="Last error message")
311
+
188
312
  # Enable forward references for recursive models
189
313
  FolderNode.model_rebuild()
190
314
  CodebaseOverview.model_rebuild()
mcp_code_indexer/main.py CHANGED
@@ -151,6 +151,19 @@ def parse_arguments() -> argparse.Namespace:
151
151
  help="Allowed CORS origins for HTTP transport (default: allow all)",
152
152
  )
153
153
 
154
+ # Vector mode options
155
+ parser.add_argument(
156
+ "--vector",
157
+ action="store_true",
158
+ help="Enable vector mode with semantic search capabilities (requires vector extras)",
159
+ )
160
+
161
+ parser.add_argument(
162
+ "--vector-config",
163
+ type=str,
164
+ help="Path to vector mode configuration file",
165
+ )
166
+
154
167
  return parser.parse_args()
155
168
 
156
169
 
@@ -996,6 +1009,52 @@ async def main() -> None:
996
1009
  )
997
1010
 
998
1011
  try:
1012
+ # Handle vector mode initialization
1013
+ vector_daemon_task = None
1014
+ if args.vector:
1015
+ try:
1016
+ from .vector_mode import is_vector_mode_available, check_api_keys
1017
+ from .vector_mode.config import load_vector_config
1018
+ from .vector_mode.daemon import start_vector_daemon
1019
+
1020
+ # Check if vector mode is available
1021
+ if not is_vector_mode_available():
1022
+ logger.error("Vector mode requires additional dependencies. Install with: pip install mcp-code-indexer[vector]")
1023
+ sys.exit(1)
1024
+
1025
+ # Check API keys
1026
+ api_keys = check_api_keys()
1027
+ if not all(api_keys.values()):
1028
+ missing = [k for k, v in api_keys.items() if not v]
1029
+ logger.error(f"Missing API keys for vector mode: {', '.join(missing)}")
1030
+ sys.exit(1)
1031
+
1032
+ # Load vector configuration
1033
+ vector_config_path = Path(args.vector_config).expanduser() if args.vector_config else None
1034
+ vector_config = load_vector_config(vector_config_path)
1035
+
1036
+ logger.info(
1037
+ "Vector mode enabled",
1038
+ extra={
1039
+ "structured_data": {
1040
+ "embedding_model": vector_config.embedding_model,
1041
+ "batch_size": vector_config.batch_size,
1042
+ "daemon_enabled": vector_config.daemon_enabled,
1043
+ }
1044
+ }
1045
+ )
1046
+
1047
+ # Start vector daemon in background
1048
+ if vector_config.daemon_enabled:
1049
+ vector_daemon_task = asyncio.create_task(
1050
+ start_vector_daemon(vector_config_path, db_path, cache_dir)
1051
+ )
1052
+ logger.info("Vector daemon started")
1053
+
1054
+ except Exception as e:
1055
+ logger.error(f"Failed to initialize vector mode: {e}")
1056
+ sys.exit(1)
1057
+
999
1058
  # Import and run the MCP server
1000
1059
  from .server.mcp_server import MCPCodeIndexServer
1001
1060
 
@@ -1028,6 +1087,7 @@ async def main() -> None:
1028
1087
  db_path=db_path,
1029
1088
  cache_dir=cache_dir,
1030
1089
  transport=transport,
1090
+ vector_mode=args.vector,
1031
1091
  )
1032
1092
 
1033
1093
  # Set server instance in transport after server creation
@@ -0,0 +1,189 @@
1
+ -- Migration 006: Add vector mode tables and indexes
2
+ -- This migration adds support for semantic search capabilities with embeddings
3
+ -- Includes code chunks, Merkle tree nodes, and indexing metadata
4
+
5
+ -- Ensure WAL mode is enabled for safe migrations
6
+ PRAGMA journal_mode=WAL;
7
+
8
+ -- Temporarily disable foreign key constraints for migration
9
+ PRAGMA foreign_keys=OFF;
10
+
11
+ -- Start transaction for atomic migration
12
+ BEGIN TRANSACTION;
13
+
14
+ -- Create code_chunks table for storing semantic code chunks
15
+ CREATE TABLE code_chunks (
16
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
17
+ file_id INTEGER NOT NULL,
18
+ project_id TEXT NOT NULL,
19
+ chunk_type TEXT NOT NULL DEFAULT 'generic', -- function, class, method, import, etc.
20
+ name TEXT, -- Name of function/class/etc, can be NULL for generic chunks
21
+ start_line INTEGER NOT NULL,
22
+ end_line INTEGER NOT NULL,
23
+ content_hash TEXT NOT NULL, -- SHA-256 hash of chunk content
24
+ embedding_id TEXT, -- ID in vector database (Turbopuffer)
25
+ redacted BOOLEAN DEFAULT FALSE, -- Whether content was redacted for security
26
+ metadata TEXT DEFAULT '{}', -- JSON metadata about the chunk
27
+ created DATETIME DEFAULT CURRENT_TIMESTAMP,
28
+ last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
29
+ FOREIGN KEY (file_id) REFERENCES file_descriptions(id) ON DELETE CASCADE,
30
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
31
+ );
32
+
33
+ -- Create indexes for code_chunks table
34
+ CREATE INDEX idx_code_chunks_file_id ON code_chunks(file_id);
35
+ CREATE INDEX idx_code_chunks_project_id ON code_chunks(project_id);
36
+ CREATE INDEX idx_code_chunks_chunk_type ON code_chunks(chunk_type);
37
+ CREATE INDEX idx_code_chunks_content_hash ON code_chunks(content_hash);
38
+ CREATE INDEX idx_code_chunks_embedding_id ON code_chunks(embedding_id);
39
+ CREATE INDEX idx_code_chunks_last_modified ON code_chunks(last_modified);
40
+ CREATE INDEX idx_code_chunks_redacted ON code_chunks(redacted);
41
+
42
+ -- Create merkle_nodes table for efficient change detection
43
+ CREATE TABLE merkle_nodes (
44
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
45
+ project_id TEXT NOT NULL,
46
+ path TEXT NOT NULL, -- File/directory path relative to project root
47
+ hash TEXT NOT NULL, -- SHA-256 hash of content or children
48
+ node_type TEXT NOT NULL DEFAULT 'file', -- file, directory, project
49
+ parent_path TEXT, -- Path to parent directory, NULL for root
50
+ children_hash TEXT, -- Combined hash of children for directories
51
+ last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
52
+ UNIQUE(project_id, path),
53
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
54
+ );
55
+
56
+ -- Create indexes for merkle_nodes table
57
+ CREATE INDEX idx_merkle_nodes_project_id ON merkle_nodes(project_id);
58
+ CREATE INDEX idx_merkle_nodes_path ON merkle_nodes(path);
59
+ CREATE INDEX idx_merkle_nodes_hash ON merkle_nodes(hash);
60
+ CREATE INDEX idx_merkle_nodes_node_type ON merkle_nodes(node_type);
61
+ CREATE INDEX idx_merkle_nodes_parent_path ON merkle_nodes(parent_path);
62
+ CREATE INDEX idx_merkle_nodes_last_modified ON merkle_nodes(last_modified);
63
+
64
+ -- Create index_meta table for tracking vector indexing progress
65
+ CREATE TABLE index_meta (
66
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
67
+ project_id TEXT NOT NULL UNIQUE,
68
+ total_chunks INTEGER DEFAULT 0,
69
+ indexed_chunks INTEGER DEFAULT 0,
70
+ total_files INTEGER DEFAULT 0,
71
+ indexed_files INTEGER DEFAULT 0,
72
+ last_sync DATETIME,
73
+ sync_status TEXT DEFAULT 'pending', -- pending, in_progress, completed, failed, paused
74
+ error_message TEXT,
75
+ queue_depth INTEGER DEFAULT 0,
76
+ processing_rate REAL DEFAULT 0.0, -- Files per second
77
+ estimated_completion DATETIME,
78
+ metadata TEXT DEFAULT '{}', -- JSON metadata
79
+ created DATETIME DEFAULT CURRENT_TIMESTAMP,
80
+ last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
81
+ FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
82
+ );
83
+
84
+ -- Create indexes for index_meta table
85
+ CREATE INDEX idx_index_meta_project_id ON index_meta(project_id);
86
+ CREATE INDEX idx_index_meta_sync_status ON index_meta(sync_status);
87
+ CREATE INDEX idx_index_meta_last_sync ON index_meta(last_sync);
88
+ CREATE INDEX idx_index_meta_last_modified ON index_meta(last_modified);
89
+
90
+ -- Add vector_mode column to projects table to track which projects use vector search
91
+ ALTER TABLE projects ADD COLUMN vector_mode BOOLEAN DEFAULT FALSE;
92
+ CREATE INDEX idx_projects_vector_mode ON projects(vector_mode);
93
+
94
+ -- Create triggers to maintain consistency between file_descriptions and code_chunks
95
+ CREATE TRIGGER code_chunks_cleanup_on_file_delete
96
+ AFTER DELETE ON file_descriptions
97
+ BEGIN
98
+ DELETE FROM code_chunks WHERE file_id = OLD.id;
99
+ END;
100
+
101
+ -- Create triggers to update index_meta when chunks are added/removed
102
+ CREATE TRIGGER update_index_meta_on_chunk_insert
103
+ AFTER INSERT ON code_chunks
104
+ BEGIN
105
+ INSERT OR REPLACE INTO index_meta (
106
+ project_id, total_chunks, indexed_chunks, total_files, indexed_files, last_modified
107
+ )
108
+ SELECT
109
+ NEW.project_id,
110
+ COUNT(*) as total_chunks,
111
+ COUNT(embedding_id) as indexed_chunks,
112
+ (SELECT COUNT(DISTINCT file_id) FROM code_chunks WHERE project_id = NEW.project_id) as total_files,
113
+ (SELECT COUNT(DISTINCT file_id) FROM code_chunks WHERE project_id = NEW.project_id AND embedding_id IS NOT NULL) as indexed_files,
114
+ CURRENT_TIMESTAMP
115
+ FROM code_chunks
116
+ WHERE project_id = NEW.project_id;
117
+ END;
118
+
119
+ CREATE TRIGGER update_index_meta_on_chunk_update
120
+ AFTER UPDATE ON code_chunks
121
+ BEGIN
122
+ UPDATE index_meta SET
123
+ indexed_chunks = (
124
+ SELECT COUNT(*) FROM code_chunks
125
+ WHERE project_id = NEW.project_id AND embedding_id IS NOT NULL
126
+ ),
127
+ indexed_files = (
128
+ SELECT COUNT(DISTINCT file_id) FROM code_chunks
129
+ WHERE project_id = NEW.project_id AND embedding_id IS NOT NULL
130
+ ),
131
+ last_modified = CURRENT_TIMESTAMP
132
+ WHERE project_id = NEW.project_id;
133
+ END;
134
+
135
+ CREATE TRIGGER update_index_meta_on_chunk_delete
136
+ AFTER DELETE ON code_chunks
137
+ BEGIN
138
+ UPDATE index_meta SET
139
+ total_chunks = (
140
+ SELECT COUNT(*) FROM code_chunks
141
+ WHERE project_id = OLD.project_id
142
+ ),
143
+ indexed_chunks = (
144
+ SELECT COUNT(*) FROM code_chunks
145
+ WHERE project_id = OLD.project_id AND embedding_id IS NOT NULL
146
+ ),
147
+ total_files = (
148
+ SELECT COUNT(DISTINCT file_id) FROM code_chunks
149
+ WHERE project_id = OLD.project_id
150
+ ),
151
+ indexed_files = (
152
+ SELECT COUNT(DISTINCT file_id) FROM code_chunks
153
+ WHERE project_id = OLD.project_id AND embedding_id IS NOT NULL
154
+ ),
155
+ last_modified = CURRENT_TIMESTAMP
156
+ WHERE project_id = OLD.project_id;
157
+ END;
158
+
159
+ -- Create view for vector search results with file information
160
+ CREATE VIEW vector_search_view AS
161
+ SELECT
162
+ cc.id as chunk_id,
163
+ cc.file_id,
164
+ fd.file_path,
165
+ cc.chunk_type,
166
+ cc.name as chunk_name,
167
+ cc.start_line,
168
+ cc.end_line,
169
+ cc.content_hash,
170
+ cc.embedding_id,
171
+ cc.redacted,
172
+ cc.metadata as chunk_metadata,
173
+ cc.project_id,
174
+ p.name as project_name,
175
+ fd.description as file_description,
176
+ cc.created as chunk_created,
177
+ cc.last_modified as chunk_modified,
178
+ fd.last_modified as file_modified
179
+ FROM code_chunks cc
180
+ JOIN file_descriptions fd ON cc.file_id = fd.id
181
+ JOIN projects p ON cc.project_id = p.id
182
+ WHERE cc.embedding_id IS NOT NULL
183
+ AND fd.to_be_cleaned IS NULL;
184
+
185
+ -- Re-enable foreign key constraints
186
+ PRAGMA foreign_keys=ON;
187
+
188
+ -- Commit the migration
189
+ COMMIT;
@@ -63,6 +63,7 @@ class MCPCodeIndexServer:
63
63
  retry_max_wait: float = 2.0,
64
64
  retry_jitter: float = 0.2,
65
65
  transport: Optional[Any] = None,
66
+ vector_mode: bool = False,
66
67
  ):
67
68
  """
68
69
  Initialize the MCP Code Index Server.
@@ -80,10 +81,12 @@ class MCPCodeIndexServer:
80
81
  retry_max_wait: Maximum wait time between retries in seconds
81
82
  retry_jitter: Maximum jitter to add to retry delays in seconds
82
83
  transport: Optional transport instance (if None, uses default stdio)
84
+ vector_mode: Enable vector search capabilities and tools
83
85
  """
84
86
  self.token_limit = token_limit
85
87
  self.db_path = db_path or Path.home() / ".mcp-code-index" / "tracker.db"
86
88
  self.cache_dir = cache_dir or Path.home() / ".mcp-code-index" / "cache"
89
+ self.vector_mode = vector_mode
87
90
 
88
91
  # Store database configuration
89
92
  self.db_config = {
@@ -0,0 +1,36 @@
1
+ """
2
+ Vector Mode for MCP Code Indexer.
3
+
4
+ This package provides semantic search capabilities using embeddings and vector databases.
5
+ Includes automated file monitoring, AST-based code chunking, and secure embedding generation.
6
+ """
7
+
8
+ from typing import Optional
9
+ from pathlib import Path
10
+ import os
11
+
12
+ __version__ = "1.0.0"
13
+
14
+ def is_vector_mode_available() -> bool:
15
+ """Check if vector mode dependencies are available."""
16
+ try:
17
+ import voyage
18
+ import turbopuffer
19
+ import tree_sitter
20
+ import watchdog
21
+ return True
22
+ except ImportError:
23
+ return False
24
+
25
+ def get_vector_config_path() -> Path:
26
+ """Get path to vector mode configuration."""
27
+ config_dir = Path.home() / ".mcp-code-index" / "vector"
28
+ config_dir.mkdir(parents=True, exist_ok=True)
29
+ return config_dir / "config.yaml"
30
+
31
+ def check_api_keys() -> dict[str, bool]:
32
+ """Check availability of required API keys."""
33
+ return {
34
+ "voyage": os.getenv("VOYAGE_API_KEY") is not None,
35
+ "turbopuffer": os.getenv("TURBOPUFFER_API_KEY") is not None,
36
+ }
@@ -0,0 +1,19 @@
1
+ """
2
+ AST-based code chunking for vector mode.
3
+
4
+ Provides semantic code chunking using Tree-sitter parsers to extract
5
+ meaningful code units for embedding generation.
6
+ """
7
+
8
+ from .ast_chunker import ASTChunker, CodeChunk
9
+ from .language_handlers import LanguageHandler, get_language_handler
10
+ from .chunk_optimizer import ChunkOptimizer, OptimizedChunk
11
+
12
+ __all__ = [
13
+ "ASTChunker",
14
+ "CodeChunk",
15
+ "LanguageHandler",
16
+ "get_language_handler",
17
+ "ChunkOptimizer",
18
+ "OptimizedChunk",
19
+ ]