mcp-code-indexer 4.0.2__tar.gz → 4.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/PKG-INFO +61 -3
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/README.md +59 -2
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/pyproject.toml +10 -1
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/models.py +125 -1
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/main.py +60 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/server/mcp_server.py +3 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/__init__.py +36 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/config.py +155 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/daemon.py +335 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/__init__.py +11 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/patterns.py +297 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/redactor.py +368 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/LICENSE +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/__main__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/ask_handler.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/claude_api_handler.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/cleanup_manager.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/commands/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/commands/makelocal.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/connection_health.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/database.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/database_factory.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/exceptions.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/path_resolver.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/retry_executor.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/deepask_handler.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/error_handler.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/file_scanner.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/git_hook_handler.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/logging_config.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/auth.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/logging.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/security.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/001_initial.sql +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/002_performance_indexes.sql +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/003_project_overviews.sql +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/005_remove_git_remotes.sql +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/query_preprocessor.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/server/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/token_counter.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/tools/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/__init__.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/base.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/http_transport.py +0 -0
- {mcp_code_indexer-4.0.2 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/stdio_transport.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mcp-code-indexer
|
|
3
|
-
Version: 4.0
|
|
3
|
+
Version: 4.1.0
|
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
|
|
@@ -23,6 +23,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
23
23
|
Classifier: Topic :: Software Development
|
|
24
24
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
25
|
Classifier: Typing :: Typed
|
|
26
|
+
Provides-Extra: vector
|
|
26
27
|
Requires-Dist: aiofiles (==23.2.0)
|
|
27
28
|
Requires-Dist: aiohttp (>=3.8.0)
|
|
28
29
|
Requires-Dist: aiosqlite (==0.19.0)
|
|
@@ -43,8 +44,8 @@ Description-Content-Type: text/markdown
|
|
|
43
44
|
|
|
44
45
|
# MCP Code Indexer 🚀
|
|
45
46
|
|
|
46
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
|
48
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
|
48
49
|
[](https://opensource.org/licenses/MIT)
|
|
49
50
|
|
|
50
51
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
|
@@ -197,6 +198,63 @@ The git hook integration provides intelligent automation:
|
|
|
197
198
|
|
|
198
199
|
**Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
|
|
199
200
|
|
|
201
|
+
## 🧠 Vector Mode (BETA)
|
|
202
|
+
|
|
203
|
+
🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
|
|
204
|
+
|
|
205
|
+
### 🎯 What is Vector Mode?
|
|
206
|
+
|
|
207
|
+
Vector Mode transforms how you search and understand codebases by using AI embeddings:
|
|
208
|
+
|
|
209
|
+
- **🔍 Semantic Search**: Find code by meaning, not just text matching
|
|
210
|
+
- **⚡ Real-time Indexing**: Automatic embedding generation as code changes
|
|
211
|
+
- **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
|
|
212
|
+
- **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
|
|
213
|
+
- **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
|
|
214
|
+
|
|
215
|
+
### 🚀 Quick Start
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Install vector mode dependencies
|
|
219
|
+
pip install mcp-code-indexer[vector]
|
|
220
|
+
|
|
221
|
+
# Set required API keys
|
|
222
|
+
export VOYAGE_API_KEY="pa-your-voyage-api-key"
|
|
223
|
+
export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
|
|
224
|
+
|
|
225
|
+
# Start with vector mode enabled
|
|
226
|
+
mcp-code-indexer --vector
|
|
227
|
+
|
|
228
|
+
# The daemon automatically starts and begins indexing your projects
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### 💡 Key Features
|
|
232
|
+
|
|
233
|
+
- **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
|
|
234
|
+
- **🌳 Merkle Trees**: Efficient change detection without full directory scans
|
|
235
|
+
- **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
|
|
236
|
+
- **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
|
|
237
|
+
|
|
238
|
+
### 🔧 Advanced Configuration
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
# Custom configuration
|
|
242
|
+
mcp-code-indexer --vector --vector-config /path/to/config.yaml
|
|
243
|
+
|
|
244
|
+
# HTTP mode with vector search
|
|
245
|
+
mcp-code-indexer --vector --http --port 8080
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### 🛠️ Architecture
|
|
249
|
+
|
|
250
|
+
Vector Mode adds powerful new MCP tools:
|
|
251
|
+
- `vector_search` - Semantic code search across projects
|
|
252
|
+
- `similarity_search` - Find similar code patterns
|
|
253
|
+
- `dependency_search` - Discover code relationships
|
|
254
|
+
- `vector_status` - Monitor indexing progress
|
|
255
|
+
|
|
256
|
+
**Status**: Currently in BETA - foundations implemented, full pipeline in development.
|
|
257
|
+
|
|
200
258
|
## 🔧 Development Setup
|
|
201
259
|
|
|
202
260
|
### 👨💻 For Contributors
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# MCP Code Indexer 🚀
|
|
2
2
|
|
|
3
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
|
4
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
7
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
|
@@ -154,6 +154,63 @@ The git hook integration provides intelligent automation:
|
|
|
154
154
|
|
|
155
155
|
**Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
|
|
156
156
|
|
|
157
|
+
## 🧠 Vector Mode (BETA)
|
|
158
|
+
|
|
159
|
+
🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
|
|
160
|
+
|
|
161
|
+
### 🎯 What is Vector Mode?
|
|
162
|
+
|
|
163
|
+
Vector Mode transforms how you search and understand codebases by using AI embeddings:
|
|
164
|
+
|
|
165
|
+
- **🔍 Semantic Search**: Find code by meaning, not just text matching
|
|
166
|
+
- **⚡ Real-time Indexing**: Automatic embedding generation as code changes
|
|
167
|
+
- **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
|
|
168
|
+
- **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
|
|
169
|
+
- **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
|
|
170
|
+
|
|
171
|
+
### 🚀 Quick Start
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
# Install vector mode dependencies
|
|
175
|
+
pip install mcp-code-indexer[vector]
|
|
176
|
+
|
|
177
|
+
# Set required API keys
|
|
178
|
+
export VOYAGE_API_KEY="pa-your-voyage-api-key"
|
|
179
|
+
export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
|
|
180
|
+
|
|
181
|
+
# Start with vector mode enabled
|
|
182
|
+
mcp-code-indexer --vector
|
|
183
|
+
|
|
184
|
+
# The daemon automatically starts and begins indexing your projects
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### 💡 Key Features
|
|
188
|
+
|
|
189
|
+
- **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
|
|
190
|
+
- **🌳 Merkle Trees**: Efficient change detection without full directory scans
|
|
191
|
+
- **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
|
|
192
|
+
- **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
|
|
193
|
+
|
|
194
|
+
### 🔧 Advanced Configuration
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
# Custom configuration
|
|
198
|
+
mcp-code-indexer --vector --vector-config /path/to/config.yaml
|
|
199
|
+
|
|
200
|
+
# HTTP mode with vector search
|
|
201
|
+
mcp-code-indexer --vector --http --port 8080
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 🛠️ Architecture
|
|
205
|
+
|
|
206
|
+
Vector Mode adds powerful new MCP tools:
|
|
207
|
+
- `vector_search` - Semantic code search across projects
|
|
208
|
+
- `similarity_search` - Find similar code patterns
|
|
209
|
+
- `dependency_search` - Discover code relationships
|
|
210
|
+
- `vector_status` - Monitor indexing progress
|
|
211
|
+
|
|
212
|
+
**Status**: Currently in BETA - foundations implemented, full pipeline in development.
|
|
213
|
+
|
|
157
214
|
## 🔧 Development Setup
|
|
158
215
|
|
|
159
216
|
### 👨💻 For Contributors
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "mcp-code-indexer"
|
|
7
|
-
version = "4.0
|
|
7
|
+
version = "4.1.0"
|
|
8
8
|
description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
|
|
9
9
|
authors = ["MCP Code Indexer Contributors"]
|
|
10
10
|
maintainers = ["MCP Code Indexer Contributors"]
|
|
@@ -59,6 +59,15 @@ fastapi = ">=0.104.0"
|
|
|
59
59
|
uvicorn = ">=0.24.0"
|
|
60
60
|
python-multipart = ">=0.0.6"
|
|
61
61
|
|
|
62
|
+
[tool.poetry.extras]
|
|
63
|
+
vector = [
|
|
64
|
+
"voyageai",
|
|
65
|
+
"turbopuffer",
|
|
66
|
+
"tree-sitter",
|
|
67
|
+
"watchdog",
|
|
68
|
+
"pyyaml"
|
|
69
|
+
]
|
|
70
|
+
|
|
62
71
|
[tool.poetry.group.dev.dependencies]
|
|
63
72
|
pytest = ">=8.0.0"
|
|
64
73
|
pytest-asyncio = ">=0.21.0"
|
|
@@ -7,7 +7,8 @@ the database operations.
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from datetime import datetime
|
|
10
|
-
from typing import List, Optional
|
|
10
|
+
from typing import List, Optional, Dict, Any
|
|
11
|
+
from enum import Enum
|
|
11
12
|
|
|
12
13
|
from pydantic import BaseModel, Field
|
|
13
14
|
|
|
@@ -185,6 +186,129 @@ class WordFrequencyResult(BaseModel):
|
|
|
185
186
|
total_unique_terms: int = Field(..., description="Number of unique terms found")
|
|
186
187
|
|
|
187
188
|
|
|
189
|
+
# Vector Mode Models
|
|
190
|
+
|
|
191
|
+
class ChunkType(str, Enum):
|
|
192
|
+
"""Types of code chunks for semantic analysis."""
|
|
193
|
+
FUNCTION = "function"
|
|
194
|
+
CLASS = "class"
|
|
195
|
+
METHOD = "method"
|
|
196
|
+
IMPORT = "import"
|
|
197
|
+
DOCSTRING = "docstring"
|
|
198
|
+
COMMENT = "comment"
|
|
199
|
+
VARIABLE = "variable"
|
|
200
|
+
INTERFACE = "interface"
|
|
201
|
+
TYPE_DEFINITION = "type_definition"
|
|
202
|
+
MODULE = "module"
|
|
203
|
+
NAMESPACE = "namespace"
|
|
204
|
+
GENERIC = "generic"
|
|
205
|
+
|
|
206
|
+
class NodeType(str, Enum):
|
|
207
|
+
"""Types of nodes in Merkle tree."""
|
|
208
|
+
FILE = "file"
|
|
209
|
+
DIRECTORY = "directory"
|
|
210
|
+
PROJECT = "project"
|
|
211
|
+
|
|
212
|
+
class SyncStatus(str, Enum):
|
|
213
|
+
"""Vector index synchronization status."""
|
|
214
|
+
PENDING = "pending"
|
|
215
|
+
IN_PROGRESS = "in_progress"
|
|
216
|
+
COMPLETED = "completed"
|
|
217
|
+
FAILED = "failed"
|
|
218
|
+
PAUSED = "paused"
|
|
219
|
+
|
|
220
|
+
class CodeChunk(BaseModel):
|
|
221
|
+
"""
|
|
222
|
+
Represents a semantic chunk of code extracted from a file.
|
|
223
|
+
|
|
224
|
+
Used for embedding generation and vector search operations.
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
id: Optional[int] = Field(None, description="Database ID")
|
|
228
|
+
file_id: int = Field(..., description="Reference to FileDescription")
|
|
229
|
+
project_id: str = Field(..., description="Reference to project")
|
|
230
|
+
chunk_type: ChunkType = Field(..., description="Type of code chunk")
|
|
231
|
+
name: Optional[str] = Field(None, description="Name of function/class/etc")
|
|
232
|
+
start_line: int = Field(..., description="Starting line number")
|
|
233
|
+
end_line: int = Field(..., description="Ending line number")
|
|
234
|
+
content_hash: str = Field(..., description="SHA-256 hash of chunk content")
|
|
235
|
+
embedding_id: Optional[str] = Field(None, description="Vector database ID")
|
|
236
|
+
redacted: bool = Field(default=False, description="Whether content was redacted")
|
|
237
|
+
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
|
238
|
+
created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
|
|
239
|
+
last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
|
|
240
|
+
|
|
241
|
+
class MerkleNode(BaseModel):
|
|
242
|
+
"""
|
|
243
|
+
Represents a node in the Merkle tree for change detection.
|
|
244
|
+
|
|
245
|
+
Used to efficiently detect file system changes without scanning entire directory trees.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
id: Optional[int] = Field(None, description="Database ID")
|
|
249
|
+
project_id: str = Field(..., description="Reference to project")
|
|
250
|
+
path: str = Field(..., description="File/directory path relative to project root")
|
|
251
|
+
hash: str = Field(..., description="SHA-256 hash of content or children")
|
|
252
|
+
node_type: NodeType = Field(..., description="Type of filesystem node")
|
|
253
|
+
parent_path: Optional[str] = Field(None, description="Path to parent directory")
|
|
254
|
+
children_hash: Optional[str] = Field(None, description="Combined hash of children")
|
|
255
|
+
last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
|
|
256
|
+
|
|
257
|
+
class IndexMeta(BaseModel):
|
|
258
|
+
"""
|
|
259
|
+
Metadata about vector indexing progress and status for a project.
|
|
260
|
+
|
|
261
|
+
Tracks indexing state, statistics, and synchronization status.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
id: Optional[int] = Field(None, description="Database ID")
|
|
265
|
+
project_id: str = Field(..., description="Reference to project", unique=True)
|
|
266
|
+
total_chunks: int = Field(default=0, description="Total number of chunks")
|
|
267
|
+
indexed_chunks: int = Field(default=0, description="Number of chunks with embeddings")
|
|
268
|
+
total_files: int = Field(default=0, description="Total number of files")
|
|
269
|
+
indexed_files: int = Field(default=0, description="Number of files processed")
|
|
270
|
+
last_sync: Optional[datetime] = Field(None, description="Last successful sync timestamp")
|
|
271
|
+
sync_status: SyncStatus = Field(default=SyncStatus.PENDING, description="Current sync status")
|
|
272
|
+
error_message: Optional[str] = Field(None, description="Last error message")
|
|
273
|
+
queue_depth: int = Field(default=0, description="Number of pending tasks")
|
|
274
|
+
processing_rate: float = Field(default=0.0, description="Files per second processing rate")
|
|
275
|
+
estimated_completion: Optional[datetime] = Field(None, description="Estimated completion time")
|
|
276
|
+
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
|
277
|
+
created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
|
|
278
|
+
last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
|
|
279
|
+
|
|
280
|
+
class VectorSearchResult(BaseModel):
|
|
281
|
+
"""
|
|
282
|
+
Represents a vector search result with similarity scoring.
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
file_path: str = Field(..., description="Path to the matching file")
|
|
286
|
+
chunk_name: Optional[str] = Field(None, description="Name of the code chunk")
|
|
287
|
+
chunk_type: ChunkType = Field(..., description="Type of code chunk")
|
|
288
|
+
code_snippet: str = Field(..., description="Original code content")
|
|
289
|
+
start_line: int = Field(..., description="Starting line number")
|
|
290
|
+
end_line: int = Field(..., description="Ending line number")
|
|
291
|
+
similarity_score: float = Field(..., description="Cosine similarity score")
|
|
292
|
+
project_id: str = Field(..., description="Project identifier")
|
|
293
|
+
metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
|
294
|
+
|
|
295
|
+
class VectorIndexStatus(BaseModel):
|
|
296
|
+
"""
|
|
297
|
+
Current status of vector indexing for a project.
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
is_indexing: bool = Field(..., description="Whether indexing is currently active")
|
|
301
|
+
indexed_files: int = Field(..., description="Number of files indexed")
|
|
302
|
+
total_files: int = Field(..., description="Total number of files")
|
|
303
|
+
indexed_chunks: int = Field(..., description="Number of chunks indexed")
|
|
304
|
+
total_chunks: int = Field(..., description="Total number of chunks")
|
|
305
|
+
last_sync: Optional[datetime] = Field(None, description="Last sync timestamp")
|
|
306
|
+
sync_status: SyncStatus = Field(..., description="Current sync status")
|
|
307
|
+
queue_depth: int = Field(..., description="Number of pending tasks")
|
|
308
|
+
processing_rate: float = Field(..., description="Processing rate")
|
|
309
|
+
estimated_completion: Optional[datetime] = Field(None, description="Estimated completion time")
|
|
310
|
+
error_message: Optional[str] = Field(None, description="Last error message")
|
|
311
|
+
|
|
188
312
|
# Enable forward references for recursive models
|
|
189
313
|
FolderNode.model_rebuild()
|
|
190
314
|
CodebaseOverview.model_rebuild()
|
|
@@ -151,6 +151,19 @@ def parse_arguments() -> argparse.Namespace:
|
|
|
151
151
|
help="Allowed CORS origins for HTTP transport (default: allow all)",
|
|
152
152
|
)
|
|
153
153
|
|
|
154
|
+
# Vector mode options
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"--vector",
|
|
157
|
+
action="store_true",
|
|
158
|
+
help="Enable vector mode with semantic search capabilities (requires vector extras)",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
parser.add_argument(
|
|
162
|
+
"--vector-config",
|
|
163
|
+
type=str,
|
|
164
|
+
help="Path to vector mode configuration file",
|
|
165
|
+
)
|
|
166
|
+
|
|
154
167
|
return parser.parse_args()
|
|
155
168
|
|
|
156
169
|
|
|
@@ -996,6 +1009,52 @@ async def main() -> None:
|
|
|
996
1009
|
)
|
|
997
1010
|
|
|
998
1011
|
try:
|
|
1012
|
+
# Handle vector mode initialization
|
|
1013
|
+
vector_daemon_task = None
|
|
1014
|
+
if args.vector:
|
|
1015
|
+
try:
|
|
1016
|
+
from .vector_mode import is_vector_mode_available, check_api_keys
|
|
1017
|
+
from .vector_mode.config import load_vector_config
|
|
1018
|
+
from .vector_mode.daemon import start_vector_daemon
|
|
1019
|
+
|
|
1020
|
+
# Check if vector mode is available
|
|
1021
|
+
if not is_vector_mode_available():
|
|
1022
|
+
logger.error("Vector mode requires additional dependencies. Install with: pip install mcp-code-indexer[vector]")
|
|
1023
|
+
sys.exit(1)
|
|
1024
|
+
|
|
1025
|
+
# Check API keys
|
|
1026
|
+
api_keys = check_api_keys()
|
|
1027
|
+
if not all(api_keys.values()):
|
|
1028
|
+
missing = [k for k, v in api_keys.items() if not v]
|
|
1029
|
+
logger.error(f"Missing API keys for vector mode: {', '.join(missing)}")
|
|
1030
|
+
sys.exit(1)
|
|
1031
|
+
|
|
1032
|
+
# Load vector configuration
|
|
1033
|
+
vector_config_path = Path(args.vector_config).expanduser() if args.vector_config else None
|
|
1034
|
+
vector_config = load_vector_config(vector_config_path)
|
|
1035
|
+
|
|
1036
|
+
logger.info(
|
|
1037
|
+
"Vector mode enabled",
|
|
1038
|
+
extra={
|
|
1039
|
+
"structured_data": {
|
|
1040
|
+
"embedding_model": vector_config.embedding_model,
|
|
1041
|
+
"batch_size": vector_config.batch_size,
|
|
1042
|
+
"daemon_enabled": vector_config.daemon_enabled,
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
# Start vector daemon in background
|
|
1048
|
+
if vector_config.daemon_enabled:
|
|
1049
|
+
vector_daemon_task = asyncio.create_task(
|
|
1050
|
+
start_vector_daemon(vector_config_path, db_path, cache_dir)
|
|
1051
|
+
)
|
|
1052
|
+
logger.info("Vector daemon started")
|
|
1053
|
+
|
|
1054
|
+
except Exception as e:
|
|
1055
|
+
logger.error(f"Failed to initialize vector mode: {e}")
|
|
1056
|
+
sys.exit(1)
|
|
1057
|
+
|
|
999
1058
|
# Import and run the MCP server
|
|
1000
1059
|
from .server.mcp_server import MCPCodeIndexServer
|
|
1001
1060
|
|
|
@@ -1028,6 +1087,7 @@ async def main() -> None:
|
|
|
1028
1087
|
db_path=db_path,
|
|
1029
1088
|
cache_dir=cache_dir,
|
|
1030
1089
|
transport=transport,
|
|
1090
|
+
vector_mode=args.vector,
|
|
1031
1091
|
)
|
|
1032
1092
|
|
|
1033
1093
|
# Set server instance in transport after server creation
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
-- Migration 006: Add vector mode tables and indexes
|
|
2
|
+
-- This migration adds support for semantic search capabilities with embeddings
|
|
3
|
+
-- Includes code chunks, Merkle tree nodes, and indexing metadata
|
|
4
|
+
|
|
5
|
+
-- Ensure WAL mode is enabled for safe migrations
|
|
6
|
+
PRAGMA journal_mode=WAL;
|
|
7
|
+
|
|
8
|
+
-- Temporarily disable foreign key constraints for migration
|
|
9
|
+
PRAGMA foreign_keys=OFF;
|
|
10
|
+
|
|
11
|
+
-- Start transaction for atomic migration
|
|
12
|
+
BEGIN TRANSACTION;
|
|
13
|
+
|
|
14
|
+
-- Create code_chunks table for storing semantic code chunks
|
|
15
|
+
CREATE TABLE code_chunks (
|
|
16
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
17
|
+
file_id INTEGER NOT NULL,
|
|
18
|
+
project_id TEXT NOT NULL,
|
|
19
|
+
chunk_type TEXT NOT NULL DEFAULT 'generic', -- function, class, method, import, etc.
|
|
20
|
+
name TEXT, -- Name of function/class/etc, can be NULL for generic chunks
|
|
21
|
+
start_line INTEGER NOT NULL,
|
|
22
|
+
end_line INTEGER NOT NULL,
|
|
23
|
+
content_hash TEXT NOT NULL, -- SHA-256 hash of chunk content
|
|
24
|
+
embedding_id TEXT, -- ID in vector database (Turbopuffer)
|
|
25
|
+
redacted BOOLEAN DEFAULT FALSE, -- Whether content was redacted for security
|
|
26
|
+
metadata TEXT DEFAULT '{}', -- JSON metadata about the chunk
|
|
27
|
+
created DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
28
|
+
last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
29
|
+
FOREIGN KEY (file_id) REFERENCES file_descriptions(id) ON DELETE CASCADE,
|
|
30
|
+
FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
-- Create indexes for code_chunks table
|
|
34
|
+
CREATE INDEX idx_code_chunks_file_id ON code_chunks(file_id);
|
|
35
|
+
CREATE INDEX idx_code_chunks_project_id ON code_chunks(project_id);
|
|
36
|
+
CREATE INDEX idx_code_chunks_chunk_type ON code_chunks(chunk_type);
|
|
37
|
+
CREATE INDEX idx_code_chunks_content_hash ON code_chunks(content_hash);
|
|
38
|
+
CREATE INDEX idx_code_chunks_embedding_id ON code_chunks(embedding_id);
|
|
39
|
+
CREATE INDEX idx_code_chunks_last_modified ON code_chunks(last_modified);
|
|
40
|
+
CREATE INDEX idx_code_chunks_redacted ON code_chunks(redacted);
|
|
41
|
+
|
|
42
|
+
-- Create merkle_nodes table for efficient change detection
|
|
43
|
+
CREATE TABLE merkle_nodes (
|
|
44
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
45
|
+
project_id TEXT NOT NULL,
|
|
46
|
+
path TEXT NOT NULL, -- File/directory path relative to project root
|
|
47
|
+
hash TEXT NOT NULL, -- SHA-256 hash of content or children
|
|
48
|
+
node_type TEXT NOT NULL DEFAULT 'file', -- file, directory, project
|
|
49
|
+
parent_path TEXT, -- Path to parent directory, NULL for root
|
|
50
|
+
children_hash TEXT, -- Combined hash of children for directories
|
|
51
|
+
last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
52
|
+
UNIQUE(project_id, path),
|
|
53
|
+
FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
-- Create indexes for merkle_nodes table
|
|
57
|
+
CREATE INDEX idx_merkle_nodes_project_id ON merkle_nodes(project_id);
|
|
58
|
+
CREATE INDEX idx_merkle_nodes_path ON merkle_nodes(path);
|
|
59
|
+
CREATE INDEX idx_merkle_nodes_hash ON merkle_nodes(hash);
|
|
60
|
+
CREATE INDEX idx_merkle_nodes_node_type ON merkle_nodes(node_type);
|
|
61
|
+
CREATE INDEX idx_merkle_nodes_parent_path ON merkle_nodes(parent_path);
|
|
62
|
+
CREATE INDEX idx_merkle_nodes_last_modified ON merkle_nodes(last_modified);
|
|
63
|
+
|
|
64
|
+
-- Create index_meta table for tracking vector indexing progress
|
|
65
|
+
CREATE TABLE index_meta (
|
|
66
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
67
|
+
project_id TEXT NOT NULL UNIQUE,
|
|
68
|
+
total_chunks INTEGER DEFAULT 0,
|
|
69
|
+
indexed_chunks INTEGER DEFAULT 0,
|
|
70
|
+
total_files INTEGER DEFAULT 0,
|
|
71
|
+
indexed_files INTEGER DEFAULT 0,
|
|
72
|
+
last_sync DATETIME,
|
|
73
|
+
sync_status TEXT DEFAULT 'pending', -- pending, in_progress, completed, failed, paused
|
|
74
|
+
error_message TEXT,
|
|
75
|
+
queue_depth INTEGER DEFAULT 0,
|
|
76
|
+
processing_rate REAL DEFAULT 0.0, -- Files per second
|
|
77
|
+
estimated_completion DATETIME,
|
|
78
|
+
metadata TEXT DEFAULT '{}', -- JSON metadata
|
|
79
|
+
created DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
80
|
+
last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
81
|
+
FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
-- Create indexes for index_meta table
|
|
85
|
+
CREATE INDEX idx_index_meta_project_id ON index_meta(project_id);
|
|
86
|
+
CREATE INDEX idx_index_meta_sync_status ON index_meta(sync_status);
|
|
87
|
+
CREATE INDEX idx_index_meta_last_sync ON index_meta(last_sync);
|
|
88
|
+
CREATE INDEX idx_index_meta_last_modified ON index_meta(last_modified);
|
|
89
|
+
|
|
90
|
+
-- Add vector_mode column to projects table to track which projects use vector search
|
|
91
|
+
ALTER TABLE projects ADD COLUMN vector_mode BOOLEAN DEFAULT FALSE;
|
|
92
|
+
CREATE INDEX idx_projects_vector_mode ON projects(vector_mode);
|
|
93
|
+
|
|
94
|
+
-- Create triggers to maintain consistency between file_descriptions and code_chunks
|
|
95
|
+
CREATE TRIGGER code_chunks_cleanup_on_file_delete
|
|
96
|
+
AFTER DELETE ON file_descriptions
|
|
97
|
+
BEGIN
|
|
98
|
+
DELETE FROM code_chunks WHERE file_id = OLD.id;
|
|
99
|
+
END;
|
|
100
|
+
|
|
101
|
+
-- Create triggers to update index_meta when chunks are added/removed
|
|
102
|
+
CREATE TRIGGER update_index_meta_on_chunk_insert
|
|
103
|
+
AFTER INSERT ON code_chunks
|
|
104
|
+
BEGIN
|
|
105
|
+
INSERT OR REPLACE INTO index_meta (
|
|
106
|
+
project_id, total_chunks, indexed_chunks, total_files, indexed_files, last_modified
|
|
107
|
+
)
|
|
108
|
+
SELECT
|
|
109
|
+
NEW.project_id,
|
|
110
|
+
COUNT(*) as total_chunks,
|
|
111
|
+
COUNT(embedding_id) as indexed_chunks,
|
|
112
|
+
(SELECT COUNT(DISTINCT file_id) FROM code_chunks WHERE project_id = NEW.project_id) as total_files,
|
|
113
|
+
(SELECT COUNT(DISTINCT file_id) FROM code_chunks WHERE project_id = NEW.project_id AND embedding_id IS NOT NULL) as indexed_files,
|
|
114
|
+
CURRENT_TIMESTAMP
|
|
115
|
+
FROM code_chunks
|
|
116
|
+
WHERE project_id = NEW.project_id;
|
|
117
|
+
END;
|
|
118
|
+
|
|
119
|
+
CREATE TRIGGER update_index_meta_on_chunk_update
|
|
120
|
+
AFTER UPDATE ON code_chunks
|
|
121
|
+
BEGIN
|
|
122
|
+
UPDATE index_meta SET
|
|
123
|
+
indexed_chunks = (
|
|
124
|
+
SELECT COUNT(*) FROM code_chunks
|
|
125
|
+
WHERE project_id = NEW.project_id AND embedding_id IS NOT NULL
|
|
126
|
+
),
|
|
127
|
+
indexed_files = (
|
|
128
|
+
SELECT COUNT(DISTINCT file_id) FROM code_chunks
|
|
129
|
+
WHERE project_id = NEW.project_id AND embedding_id IS NOT NULL
|
|
130
|
+
),
|
|
131
|
+
last_modified = CURRENT_TIMESTAMP
|
|
132
|
+
WHERE project_id = NEW.project_id;
|
|
133
|
+
END;
|
|
134
|
+
|
|
135
|
+
CREATE TRIGGER update_index_meta_on_chunk_delete
|
|
136
|
+
AFTER DELETE ON code_chunks
|
|
137
|
+
BEGIN
|
|
138
|
+
UPDATE index_meta SET
|
|
139
|
+
total_chunks = (
|
|
140
|
+
SELECT COUNT(*) FROM code_chunks
|
|
141
|
+
WHERE project_id = OLD.project_id
|
|
142
|
+
),
|
|
143
|
+
indexed_chunks = (
|
|
144
|
+
SELECT COUNT(*) FROM code_chunks
|
|
145
|
+
WHERE project_id = OLD.project_id AND embedding_id IS NOT NULL
|
|
146
|
+
),
|
|
147
|
+
total_files = (
|
|
148
|
+
SELECT COUNT(DISTINCT file_id) FROM code_chunks
|
|
149
|
+
WHERE project_id = OLD.project_id
|
|
150
|
+
),
|
|
151
|
+
indexed_files = (
|
|
152
|
+
SELECT COUNT(DISTINCT file_id) FROM code_chunks
|
|
153
|
+
WHERE project_id = OLD.project_id AND embedding_id IS NOT NULL
|
|
154
|
+
),
|
|
155
|
+
last_modified = CURRENT_TIMESTAMP
|
|
156
|
+
WHERE project_id = OLD.project_id;
|
|
157
|
+
END;
|
|
158
|
+
|
|
159
|
+
-- Create view for vector search results with file information
|
|
160
|
+
CREATE VIEW vector_search_view AS
|
|
161
|
+
SELECT
|
|
162
|
+
cc.id as chunk_id,
|
|
163
|
+
cc.file_id,
|
|
164
|
+
fd.file_path,
|
|
165
|
+
cc.chunk_type,
|
|
166
|
+
cc.name as chunk_name,
|
|
167
|
+
cc.start_line,
|
|
168
|
+
cc.end_line,
|
|
169
|
+
cc.content_hash,
|
|
170
|
+
cc.embedding_id,
|
|
171
|
+
cc.redacted,
|
|
172
|
+
cc.metadata as chunk_metadata,
|
|
173
|
+
cc.project_id,
|
|
174
|
+
p.name as project_name,
|
|
175
|
+
fd.description as file_description,
|
|
176
|
+
cc.created as chunk_created,
|
|
177
|
+
cc.last_modified as chunk_modified,
|
|
178
|
+
fd.last_modified as file_modified
|
|
179
|
+
FROM code_chunks cc
|
|
180
|
+
JOIN file_descriptions fd ON cc.file_id = fd.id
|
|
181
|
+
JOIN projects p ON cc.project_id = p.id
|
|
182
|
+
WHERE cc.embedding_id IS NOT NULL
|
|
183
|
+
AND fd.to_be_cleaned IS NULL;
|
|
184
|
+
|
|
185
|
+
-- Re-enable foreign key constraints
|
|
186
|
+
PRAGMA foreign_keys=ON;
|
|
187
|
+
|
|
188
|
+
-- Commit the migration
|
|
189
|
+
COMMIT;
|
|
@@ -63,6 +63,7 @@ class MCPCodeIndexServer:
|
|
|
63
63
|
retry_max_wait: float = 2.0,
|
|
64
64
|
retry_jitter: float = 0.2,
|
|
65
65
|
transport: Optional[Any] = None,
|
|
66
|
+
vector_mode: bool = False,
|
|
66
67
|
):
|
|
67
68
|
"""
|
|
68
69
|
Initialize the MCP Code Index Server.
|
|
@@ -80,10 +81,12 @@ class MCPCodeIndexServer:
|
|
|
80
81
|
retry_max_wait: Maximum wait time between retries in seconds
|
|
81
82
|
retry_jitter: Maximum jitter to add to retry delays in seconds
|
|
82
83
|
transport: Optional transport instance (if None, uses default stdio)
|
|
84
|
+
vector_mode: Enable vector search capabilities and tools
|
|
83
85
|
"""
|
|
84
86
|
self.token_limit = token_limit
|
|
85
87
|
self.db_path = db_path or Path.home() / ".mcp-code-index" / "tracker.db"
|
|
86
88
|
self.cache_dir = cache_dir or Path.home() / ".mcp-code-index" / "cache"
|
|
89
|
+
self.vector_mode = vector_mode
|
|
87
90
|
|
|
88
91
|
# Store database configuration
|
|
89
92
|
self.db_config = {
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Vector Mode for MCP Code Indexer.
|
|
3
|
+
|
|
4
|
+
This package provides semantic search capabilities using embeddings and vector databases.
|
|
5
|
+
Includes automated file monitoring, AST-based code chunking, and secure embedding generation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
__version__ = "1.0.0"
|
|
13
|
+
|
|
14
|
+
def is_vector_mode_available() -> bool:
|
|
15
|
+
"""Check if vector mode dependencies are available."""
|
|
16
|
+
try:
|
|
17
|
+
import voyage
|
|
18
|
+
import turbopuffer
|
|
19
|
+
import tree_sitter
|
|
20
|
+
import watchdog
|
|
21
|
+
return True
|
|
22
|
+
except ImportError:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
def get_vector_config_path() -> Path:
|
|
26
|
+
"""Get path to vector mode configuration."""
|
|
27
|
+
config_dir = Path.home() / ".mcp-code-index" / "vector"
|
|
28
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
return config_dir / "config.yaml"
|
|
30
|
+
|
|
31
|
+
def check_api_keys() -> dict[str, bool]:
|
|
32
|
+
"""Check availability of required API keys."""
|
|
33
|
+
return {
|
|
34
|
+
"voyage": os.getenv("VOYAGE_API_KEY") is not None,
|
|
35
|
+
"turbopuffer": os.getenv("TURBOPUFFER_API_KEY") is not None,
|
|
36
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AST-based code chunking for vector mode.
|
|
3
|
+
|
|
4
|
+
Provides semantic code chunking using Tree-sitter parsers to extract
|
|
5
|
+
meaningful code units for embedding generation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .ast_chunker import ASTChunker, CodeChunk
|
|
9
|
+
from .language_handlers import LanguageHandler, get_language_handler
|
|
10
|
+
from .chunk_optimizer import ChunkOptimizer, OptimizedChunk
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"ASTChunker",
|
|
14
|
+
"CodeChunk",
|
|
15
|
+
"LanguageHandler",
|
|
16
|
+
"get_language_handler",
|
|
17
|
+
"ChunkOptimizer",
|
|
18
|
+
"OptimizedChunk",
|
|
19
|
+
]
|