mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/database/database.py +334 -115
- mcp_code_indexer/database/database_factory.py +1 -1
- mcp_code_indexer/database/exceptions.py +1 -1
- mcp_code_indexer/database/models.py +66 -24
- mcp_code_indexer/database/retry_executor.py +15 -5
- mcp_code_indexer/file_scanner.py +107 -12
- mcp_code_indexer/main.py +43 -30
- mcp_code_indexer/server/mcp_server.py +201 -7
- mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
- mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
- mcp_code_indexer/vector_mode/config.py +113 -45
- mcp_code_indexer/vector_mode/const.py +24 -0
- mcp_code_indexer/vector_mode/daemon.py +860 -98
- mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
- mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
- mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
- mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
- mcp_code_indexer/vector_mode/services/__init__.py +9 -0
- mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
- mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
- mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
- mcp_code_indexer/vector_mode/types.py +46 -0
- mcp_code_indexer/vector_mode/utils.py +50 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/METADATA +13 -10
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/RECORD +28 -21
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/WHEEL +1 -1
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info/licenses}/LICENSE +0 -0
|
@@ -32,7 +32,9 @@ class Project(BaseModel):
|
|
|
32
32
|
last_accessed: datetime = Field(
|
|
33
33
|
default_factory=datetime.utcnow, description="Last access timestamp"
|
|
34
34
|
)
|
|
35
|
-
vector_mode: bool = Field(
|
|
35
|
+
vector_mode: bool = Field(
|
|
36
|
+
default=False, description="Enable vector search for this project"
|
|
37
|
+
)
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
class FileDescription(BaseModel):
|
|
@@ -189,10 +191,12 @@ class WordFrequencyResult(BaseModel):
|
|
|
189
191
|
|
|
190
192
|
# Vector Mode Models
|
|
191
193
|
|
|
194
|
+
|
|
192
195
|
class ChunkType(str, Enum):
|
|
193
196
|
"""Types of code chunks for semantic analysis."""
|
|
197
|
+
|
|
194
198
|
FUNCTION = "function"
|
|
195
|
-
CLASS = "class"
|
|
199
|
+
CLASS = "class"
|
|
196
200
|
METHOD = "method"
|
|
197
201
|
IMPORT = "import"
|
|
198
202
|
DOCSTRING = "docstring"
|
|
@@ -204,27 +208,32 @@ class ChunkType(str, Enum):
|
|
|
204
208
|
NAMESPACE = "namespace"
|
|
205
209
|
GENERIC = "generic"
|
|
206
210
|
|
|
211
|
+
|
|
207
212
|
class NodeType(str, Enum):
|
|
208
213
|
"""Types of nodes in Merkle tree."""
|
|
214
|
+
|
|
209
215
|
FILE = "file"
|
|
210
216
|
DIRECTORY = "directory"
|
|
211
217
|
PROJECT = "project"
|
|
212
218
|
|
|
219
|
+
|
|
213
220
|
class SyncStatus(str, Enum):
|
|
214
221
|
"""Vector index synchronization status."""
|
|
222
|
+
|
|
215
223
|
PENDING = "pending"
|
|
216
224
|
IN_PROGRESS = "in_progress"
|
|
217
225
|
COMPLETED = "completed"
|
|
218
226
|
FAILED = "failed"
|
|
219
227
|
PAUSED = "paused"
|
|
220
228
|
|
|
229
|
+
|
|
221
230
|
class CodeChunk(BaseModel):
|
|
222
231
|
"""
|
|
223
232
|
Represents a semantic chunk of code extracted from a file.
|
|
224
|
-
|
|
233
|
+
|
|
225
234
|
Used for embedding generation and vector search operations.
|
|
226
235
|
"""
|
|
227
|
-
|
|
236
|
+
|
|
228
237
|
id: Optional[int] = Field(None, description="Database ID")
|
|
229
238
|
file_id: int = Field(..., description="Reference to FileDescription")
|
|
230
239
|
project_id: str = Field(..., description="Reference to project")
|
|
@@ -235,17 +244,24 @@ class CodeChunk(BaseModel):
|
|
|
235
244
|
content_hash: str = Field(..., description="SHA-256 hash of chunk content")
|
|
236
245
|
embedding_id: Optional[str] = Field(None, description="Vector database ID")
|
|
237
246
|
redacted: bool = Field(default=False, description="Whether content was redacted")
|
|
238
|
-
metadata: Dict[str, Any] = Field(
|
|
239
|
-
|
|
240
|
-
|
|
247
|
+
metadata: Dict[str, Any] = Field(
|
|
248
|
+
default_factory=dict, description="Additional metadata"
|
|
249
|
+
)
|
|
250
|
+
created: datetime = Field(
|
|
251
|
+
default_factory=datetime.utcnow, description="Creation timestamp"
|
|
252
|
+
)
|
|
253
|
+
last_modified: datetime = Field(
|
|
254
|
+
default_factory=datetime.utcnow, description="Last update timestamp"
|
|
255
|
+
)
|
|
256
|
+
|
|
241
257
|
|
|
242
258
|
class MerkleNode(BaseModel):
|
|
243
259
|
"""
|
|
244
260
|
Represents a node in the Merkle tree for change detection.
|
|
245
|
-
|
|
261
|
+
|
|
246
262
|
Used to efficiently detect file system changes without scanning entire directory trees.
|
|
247
263
|
"""
|
|
248
|
-
|
|
264
|
+
|
|
249
265
|
id: Optional[int] = Field(None, description="Database ID")
|
|
250
266
|
project_id: str = Field(..., description="Reference to project")
|
|
251
267
|
path: str = Field(..., description="File/directory path relative to project root")
|
|
@@ -253,36 +269,56 @@ class MerkleNode(BaseModel):
|
|
|
253
269
|
node_type: NodeType = Field(..., description="Type of filesystem node")
|
|
254
270
|
parent_path: Optional[str] = Field(None, description="Path to parent directory")
|
|
255
271
|
children_hash: Optional[str] = Field(None, description="Combined hash of children")
|
|
256
|
-
last_modified: datetime = Field(
|
|
272
|
+
last_modified: datetime = Field(
|
|
273
|
+
default_factory=datetime.utcnow, description="Last update timestamp"
|
|
274
|
+
)
|
|
275
|
+
|
|
257
276
|
|
|
258
277
|
class IndexMeta(BaseModel):
|
|
259
278
|
"""
|
|
260
279
|
Metadata about vector indexing progress and status for a project.
|
|
261
|
-
|
|
280
|
+
|
|
262
281
|
Tracks indexing state, statistics, and synchronization status.
|
|
263
282
|
"""
|
|
264
|
-
|
|
283
|
+
|
|
265
284
|
id: Optional[int] = Field(None, description="Database ID")
|
|
266
285
|
project_id: str = Field(..., description="Reference to project", unique=True)
|
|
267
286
|
total_chunks: int = Field(default=0, description="Total number of chunks")
|
|
268
|
-
indexed_chunks: int = Field(
|
|
287
|
+
indexed_chunks: int = Field(
|
|
288
|
+
default=0, description="Number of chunks with embeddings"
|
|
289
|
+
)
|
|
269
290
|
total_files: int = Field(default=0, description="Total number of files")
|
|
270
291
|
indexed_files: int = Field(default=0, description="Number of files processed")
|
|
271
|
-
last_sync: Optional[datetime] = Field(
|
|
272
|
-
|
|
292
|
+
last_sync: Optional[datetime] = Field(
|
|
293
|
+
None, description="Last successful sync timestamp"
|
|
294
|
+
)
|
|
295
|
+
sync_status: SyncStatus = Field(
|
|
296
|
+
default=SyncStatus.PENDING, description="Current sync status"
|
|
297
|
+
)
|
|
273
298
|
error_message: Optional[str] = Field(None, description="Last error message")
|
|
274
299
|
queue_depth: int = Field(default=0, description="Number of pending tasks")
|
|
275
|
-
processing_rate: float = Field(
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
300
|
+
processing_rate: float = Field(
|
|
301
|
+
default=0.0, description="Files per second processing rate"
|
|
302
|
+
)
|
|
303
|
+
estimated_completion: Optional[datetime] = Field(
|
|
304
|
+
None, description="Estimated completion time"
|
|
305
|
+
)
|
|
306
|
+
metadata: Dict[str, Any] = Field(
|
|
307
|
+
default_factory=dict, description="Additional metadata"
|
|
308
|
+
)
|
|
309
|
+
created: datetime = Field(
|
|
310
|
+
default_factory=datetime.utcnow, description="Creation timestamp"
|
|
311
|
+
)
|
|
312
|
+
last_modified: datetime = Field(
|
|
313
|
+
default_factory=datetime.utcnow, description="Last update timestamp"
|
|
314
|
+
)
|
|
315
|
+
|
|
280
316
|
|
|
281
317
|
class VectorSearchResult(BaseModel):
|
|
282
318
|
"""
|
|
283
319
|
Represents a vector search result with similarity scoring.
|
|
284
320
|
"""
|
|
285
|
-
|
|
321
|
+
|
|
286
322
|
file_path: str = Field(..., description="Path to the matching file")
|
|
287
323
|
chunk_name: Optional[str] = Field(None, description="Name of the code chunk")
|
|
288
324
|
chunk_type: ChunkType = Field(..., description="Type of code chunk")
|
|
@@ -291,13 +327,16 @@ class VectorSearchResult(BaseModel):
|
|
|
291
327
|
end_line: int = Field(..., description="Ending line number")
|
|
292
328
|
similarity_score: float = Field(..., description="Cosine similarity score")
|
|
293
329
|
project_id: str = Field(..., description="Project identifier")
|
|
294
|
-
metadata: Dict[str, Any] = Field(
|
|
330
|
+
metadata: Dict[str, Any] = Field(
|
|
331
|
+
default_factory=dict, description="Additional metadata"
|
|
332
|
+
)
|
|
333
|
+
|
|
295
334
|
|
|
296
335
|
class VectorIndexStatus(BaseModel):
|
|
297
336
|
"""
|
|
298
337
|
Current status of vector indexing for a project.
|
|
299
338
|
"""
|
|
300
|
-
|
|
339
|
+
|
|
301
340
|
is_indexing: bool = Field(..., description="Whether indexing is currently active")
|
|
302
341
|
indexed_files: int = Field(..., description="Number of files indexed")
|
|
303
342
|
total_files: int = Field(..., description="Total number of files")
|
|
@@ -307,9 +346,12 @@ class VectorIndexStatus(BaseModel):
|
|
|
307
346
|
sync_status: SyncStatus = Field(..., description="Current sync status")
|
|
308
347
|
queue_depth: int = Field(..., description="Number of pending tasks")
|
|
309
348
|
processing_rate: float = Field(..., description="Processing rate")
|
|
310
|
-
estimated_completion: Optional[datetime] = Field(
|
|
349
|
+
estimated_completion: Optional[datetime] = Field(
|
|
350
|
+
None, description="Estimated completion time"
|
|
351
|
+
)
|
|
311
352
|
error_message: Optional[str] = Field(None, description="Last error message")
|
|
312
353
|
|
|
354
|
+
|
|
313
355
|
# Enable forward references for recursive models
|
|
314
356
|
FolderNode.model_rebuild()
|
|
315
357
|
CodebaseOverview.model_rebuild()
|
|
@@ -279,8 +279,13 @@ class RetryExecutor:
|
|
|
279
279
|
Yields:
|
|
280
280
|
Database connection
|
|
281
281
|
"""
|
|
282
|
+
import sys
|
|
283
|
+
|
|
284
|
+
# Store the context manager so we can properly call __aexit__
|
|
285
|
+
ctx_manager: Optional[AsyncContextManager[aiosqlite.Connection]] = None
|
|
282
286
|
|
|
283
287
|
async def acquire_connection() -> aiosqlite.Connection:
|
|
288
|
+
nonlocal ctx_manager
|
|
284
289
|
# This function will be retried by execute_with_retry
|
|
285
290
|
# Get the async context manager and enter it
|
|
286
291
|
ctx_manager = connection_factory()
|
|
@@ -288,15 +293,20 @@ class RetryExecutor:
|
|
|
288
293
|
return conn
|
|
289
294
|
|
|
290
295
|
# Use execute_with_retry to handle the retry logic
|
|
291
|
-
# We create a connection and store it for the context manager
|
|
292
296
|
connection = await self.execute_with_retry(acquire_connection, operation_name)
|
|
293
297
|
|
|
294
298
|
try:
|
|
295
299
|
yield connection
|
|
296
|
-
|
|
297
|
-
#
|
|
298
|
-
|
|
299
|
-
|
|
300
|
+
except BaseException:
|
|
301
|
+
# Pass actual exception info to __aexit__ for proper rollback/cleanup
|
|
302
|
+
exc_type, exc, tb = sys.exc_info()
|
|
303
|
+
if ctx_manager is not None:
|
|
304
|
+
await ctx_manager.__aexit__(exc_type, exc, tb)
|
|
305
|
+
raise
|
|
306
|
+
else:
|
|
307
|
+
# No exception - call __aexit__ with None values
|
|
308
|
+
if ctx_manager is not None:
|
|
309
|
+
await ctx_manager.__aexit__(None, None, None)
|
|
300
310
|
|
|
301
311
|
def _should_retry_exception(self, retry_state: RetryCallState) -> bool:
|
|
302
312
|
"""
|
mcp_code_indexer/file_scanner.py
CHANGED
|
@@ -6,10 +6,12 @@ while respecting .gitignore patterns and common ignore patterns. It enables
|
|
|
6
6
|
efficient discovery of files that need description tracking.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
import asyncio
|
|
9
10
|
import fnmatch
|
|
10
11
|
import logging
|
|
12
|
+
import os
|
|
11
13
|
from pathlib import Path
|
|
12
|
-
from typing import Dict,
|
|
14
|
+
from typing import Dict, Iterator, List, Optional, Set, Union, Any, cast
|
|
13
15
|
|
|
14
16
|
try:
|
|
15
17
|
from gitignore_parser import parse_gitignore
|
|
@@ -150,6 +152,13 @@ class FileScanner:
|
|
|
150
152
|
self.project_root = Path(project_root).resolve()
|
|
151
153
|
self._gitignore_cache: Dict[str, Any] = {}
|
|
152
154
|
self._load_gitignore_patterns()
|
|
155
|
+
# Build ignore patterns set for directory pruning
|
|
156
|
+
self.ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def root_path(self) -> Path:
|
|
160
|
+
"""Get the root path for the scanner (alias for project_root)."""
|
|
161
|
+
return self.project_root
|
|
153
162
|
|
|
154
163
|
def _load_gitignore_patterns(self) -> None:
|
|
155
164
|
"""Load and cache gitignore patterns from the project."""
|
|
@@ -228,6 +237,53 @@ class FileScanner:
|
|
|
228
237
|
"""Check if a file has an ignored extension."""
|
|
229
238
|
return file_path.suffix.lower() in IGNORED_EXTENSIONS
|
|
230
239
|
|
|
240
|
+
def should_ignore_path(self, path: Path) -> bool:
|
|
241
|
+
"""
|
|
242
|
+
Check if a path (file or directory) should be ignored based on patterns.
|
|
243
|
+
|
|
244
|
+
This is used for directory pruning during walks to skip entire subtrees
|
|
245
|
+
like node_modules, .git, etc.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
path: Path to check (can be file or directory)
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
True if the path should be ignored
|
|
252
|
+
"""
|
|
253
|
+
try:
|
|
254
|
+
rel_path = path.relative_to(self.project_root)
|
|
255
|
+
except ValueError:
|
|
256
|
+
rel_path = path
|
|
257
|
+
|
|
258
|
+
path_str = str(rel_path)
|
|
259
|
+
path_name = path.name
|
|
260
|
+
|
|
261
|
+
# Check against ignore patterns
|
|
262
|
+
for pattern in self.ignore_patterns:
|
|
263
|
+
# Handle directory patterns (ending with /)
|
|
264
|
+
if pattern.endswith("/"):
|
|
265
|
+
pattern_no_slash = pattern.rstrip("/")
|
|
266
|
+
if path_name == pattern_no_slash:
|
|
267
|
+
return True
|
|
268
|
+
# Handle wildcard patterns (starting with *)
|
|
269
|
+
elif pattern.startswith("*"):
|
|
270
|
+
if path_str.endswith(pattern[1:]) or path_name.endswith(pattern[1:]):
|
|
271
|
+
return True
|
|
272
|
+
# Handle path patterns (containing / or \)
|
|
273
|
+
elif "/" in pattern or "\\" in pattern:
|
|
274
|
+
if pattern in path_str:
|
|
275
|
+
return True
|
|
276
|
+
# Handle simple name patterns
|
|
277
|
+
else:
|
|
278
|
+
if pattern in path.parts or path_name == pattern:
|
|
279
|
+
return True
|
|
280
|
+
|
|
281
|
+
# Also check gitignore
|
|
282
|
+
if self._is_ignored_by_gitignore(path):
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
return False
|
|
286
|
+
|
|
231
287
|
def should_ignore_file(self, file_path: Path) -> bool:
|
|
232
288
|
"""
|
|
233
289
|
Determine if a file should be ignored.
|
|
@@ -246,12 +302,8 @@ class FileScanner:
|
|
|
246
302
|
if self._is_ignored_by_extension(file_path):
|
|
247
303
|
return True
|
|
248
304
|
|
|
249
|
-
# Check
|
|
250
|
-
if self.
|
|
251
|
-
return True
|
|
252
|
-
|
|
253
|
-
# Check gitignore patterns
|
|
254
|
-
if self._is_ignored_by_gitignore(file_path):
|
|
305
|
+
# Check path-based patterns
|
|
306
|
+
if self.should_ignore_path(file_path):
|
|
255
307
|
return True
|
|
256
308
|
|
|
257
309
|
return False
|
|
@@ -286,12 +338,27 @@ class FileScanner:
|
|
|
286
338
|
logger.info(f"Found {len(files)} trackable files in {self.project_root}")
|
|
287
339
|
return files
|
|
288
340
|
|
|
289
|
-
def _walk_directory(self) ->
|
|
290
|
-
"""
|
|
341
|
+
def _walk_directory(self) -> Iterator[Path]:
|
|
342
|
+
"""
|
|
343
|
+
Walk directory using os.walk with directory pruning.
|
|
344
|
+
|
|
345
|
+
This skips ignored directories entirely rather than traversing then filtering.
|
|
346
|
+
Critical for performance - avoids traversing node_modules, .git, etc.
|
|
347
|
+
"""
|
|
291
348
|
try:
|
|
292
|
-
for
|
|
293
|
-
|
|
294
|
-
|
|
349
|
+
for dirpath, dirnames, filenames in os.walk(self.project_root):
|
|
350
|
+
current_dir = Path(dirpath)
|
|
351
|
+
|
|
352
|
+
# Prune ignored directories in-place to prevent descending into them
|
|
353
|
+
# Modifying dirnames in-place is the documented way to prune os.walk
|
|
354
|
+
dirnames[:] = [
|
|
355
|
+
d for d in dirnames
|
|
356
|
+
if not self.should_ignore_path(current_dir / d)
|
|
357
|
+
]
|
|
358
|
+
|
|
359
|
+
for filename in filenames:
|
|
360
|
+
yield current_dir / filename
|
|
361
|
+
|
|
295
362
|
except PermissionError as e:
|
|
296
363
|
logger.warning(f"Permission denied accessing {e.filename}")
|
|
297
364
|
except Exception as e:
|
|
@@ -404,3 +471,31 @@ class FileScanner:
|
|
|
404
471
|
logger.error(f"Error getting project stats: {e}")
|
|
405
472
|
|
|
406
473
|
return stats
|
|
474
|
+
|
|
475
|
+
async def scan_directory_async(
|
|
476
|
+
self, max_files: Optional[int] = None
|
|
477
|
+
) -> List[Path]:
|
|
478
|
+
"""
|
|
479
|
+
Async version of scan_directory running in a thread.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
max_files: Maximum number of files to return (None for no limit)
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
List of file paths that should be tracked
|
|
486
|
+
"""
|
|
487
|
+
loop = asyncio.get_running_loop()
|
|
488
|
+
return await loop.run_in_executor(None, self.scan_directory, max_files)
|
|
489
|
+
|
|
490
|
+
async def find_missing_files_async(self, existing_paths: Set[str]) -> List[Path]:
|
|
491
|
+
"""
|
|
492
|
+
Async version of find_missing_files running in a thread.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
existing_paths: Set of relative file paths that already have descriptions
|
|
496
|
+
|
|
497
|
+
Returns:
|
|
498
|
+
List of file paths that are missing descriptions
|
|
499
|
+
"""
|
|
500
|
+
loop = asyncio.get_running_loop()
|
|
501
|
+
return await loop.run_in_executor(None, self.find_missing_files, existing_paths)
|
mcp_code_indexer/main.py
CHANGED
|
@@ -377,6 +377,8 @@ async def handle_runcommand(args: argparse.Namespace) -> None:
|
|
|
377
377
|
"get_word_frequency": server._handle_get_word_frequency,
|
|
378
378
|
"search_codebase_overview": server._handle_search_codebase_overview,
|
|
379
379
|
"check_database_health": server._handle_check_database_health,
|
|
380
|
+
"enabled_vector_mode": server._handle_enabled_vector_mode,
|
|
381
|
+
"find_similar_code": server._handle_find_similar_code,
|
|
380
382
|
}
|
|
381
383
|
|
|
382
384
|
if tool_name not in tool_handlers:
|
|
@@ -1017,41 +1019,49 @@ async def main() -> None:
|
|
|
1017
1019
|
from .vector_mode import is_vector_mode_available, check_api_keys
|
|
1018
1020
|
from .vector_mode.config import load_vector_config
|
|
1019
1021
|
from .vector_mode.daemon import start_vector_daemon
|
|
1020
|
-
|
|
1022
|
+
|
|
1021
1023
|
# Check if vector mode is available
|
|
1022
1024
|
if not is_vector_mode_available():
|
|
1023
|
-
logger.error(
|
|
1025
|
+
logger.error(
|
|
1026
|
+
"Vector mode dependencies not found. Try reinstalling: pip install --upgrade mcp-code-indexer"
|
|
1027
|
+
)
|
|
1024
1028
|
sys.exit(1)
|
|
1025
|
-
|
|
1029
|
+
|
|
1026
1030
|
# Check API keys
|
|
1027
1031
|
api_keys = check_api_keys()
|
|
1028
1032
|
if not all(api_keys.values()):
|
|
1029
1033
|
missing = [k for k, v in api_keys.items() if not v]
|
|
1030
|
-
logger.error(
|
|
1034
|
+
logger.error(
|
|
1035
|
+
f"Missing API keys for vector mode: {', '.join(missing)}"
|
|
1036
|
+
)
|
|
1031
1037
|
sys.exit(1)
|
|
1032
|
-
|
|
1038
|
+
|
|
1033
1039
|
# Load vector configuration
|
|
1034
|
-
vector_config_path =
|
|
1040
|
+
vector_config_path = (
|
|
1041
|
+
Path(args.vector_config).expanduser()
|
|
1042
|
+
if args.vector_config
|
|
1043
|
+
else None
|
|
1044
|
+
)
|
|
1035
1045
|
vector_config = load_vector_config(vector_config_path)
|
|
1036
|
-
|
|
1046
|
+
|
|
1037
1047
|
logger.info(
|
|
1038
|
-
"Vector mode enabled",
|
|
1048
|
+
"Vector mode enabled",
|
|
1039
1049
|
extra={
|
|
1040
1050
|
"structured_data": {
|
|
1041
1051
|
"embedding_model": vector_config.embedding_model,
|
|
1042
1052
|
"batch_size": vector_config.batch_size,
|
|
1043
1053
|
"daemon_enabled": vector_config.daemon_enabled,
|
|
1044
1054
|
}
|
|
1045
|
-
}
|
|
1055
|
+
},
|
|
1046
1056
|
)
|
|
1047
|
-
|
|
1057
|
+
|
|
1048
1058
|
# Start vector daemon in background
|
|
1049
1059
|
if vector_config.daemon_enabled:
|
|
1050
1060
|
vector_daemon_task = asyncio.create_task(
|
|
1051
1061
|
start_vector_daemon(vector_config_path, db_path, cache_dir)
|
|
1052
1062
|
)
|
|
1053
1063
|
logger.info("Vector daemon started")
|
|
1054
|
-
|
|
1064
|
+
|
|
1055
1065
|
except Exception as e:
|
|
1056
1066
|
logger.error(f"Failed to initialize vector mode: {e}")
|
|
1057
1067
|
sys.exit(1)
|
|
@@ -1100,27 +1110,26 @@ async def main() -> None:
|
|
|
1100
1110
|
if args.vector and vector_daemon_task:
|
|
1101
1111
|
# Setup signal handling for graceful shutdown
|
|
1102
1112
|
shutdown_event = asyncio.Event()
|
|
1103
|
-
|
|
1113
|
+
|
|
1104
1114
|
def signal_handler():
|
|
1105
1115
|
logger.info("Shutdown signal received")
|
|
1106
1116
|
shutdown_event.set()
|
|
1107
|
-
|
|
1117
|
+
|
|
1108
1118
|
# Register signal handlers
|
|
1109
1119
|
loop = asyncio.get_running_loop()
|
|
1110
1120
|
for sig in [signal.SIGTERM, signal.SIGINT]:
|
|
1111
1121
|
loop.add_signal_handler(sig, signal_handler)
|
|
1112
|
-
|
|
1122
|
+
|
|
1113
1123
|
# Run server and wait for shutdown signal
|
|
1114
1124
|
server_task = asyncio.create_task(server.run())
|
|
1115
1125
|
shutdown_task = asyncio.create_task(shutdown_event.wait())
|
|
1116
|
-
|
|
1126
|
+
|
|
1117
1127
|
try:
|
|
1118
1128
|
# Wait for either server completion or shutdown signal
|
|
1119
1129
|
done, pending = await asyncio.wait(
|
|
1120
|
-
[server_task, shutdown_task],
|
|
1121
|
-
return_when=asyncio.FIRST_COMPLETED
|
|
1130
|
+
[server_task, shutdown_task], return_when=asyncio.FIRST_COMPLETED
|
|
1122
1131
|
)
|
|
1123
|
-
|
|
1132
|
+
|
|
1124
1133
|
# Cancel remaining tasks
|
|
1125
1134
|
for task in pending:
|
|
1126
1135
|
task.cancel()
|
|
@@ -1128,7 +1137,7 @@ async def main() -> None:
|
|
|
1128
1137
|
await task
|
|
1129
1138
|
except asyncio.CancelledError:
|
|
1130
1139
|
pass
|
|
1131
|
-
|
|
1140
|
+
|
|
1132
1141
|
except Exception as e:
|
|
1133
1142
|
logger.error(f"Error during server execution: {e}")
|
|
1134
1143
|
raise
|
|
@@ -1144,17 +1153,21 @@ async def main() -> None:
|
|
|
1144
1153
|
if vector_daemon_task and not vector_daemon_task.done():
|
|
1145
1154
|
logger.info("Cancelling vector daemon")
|
|
1146
1155
|
vector_daemon_task.cancel()
|
|
1147
|
-
|
|
1156
|
+
|
|
1148
1157
|
# Wait for vector daemon to finish
|
|
1149
1158
|
if vector_daemon_task:
|
|
1150
1159
|
try:
|
|
1151
1160
|
await vector_daemon_task
|
|
1152
1161
|
except asyncio.CancelledError:
|
|
1153
1162
|
logger.info("Vector daemon cancelled successfully")
|
|
1154
|
-
|
|
1163
|
+
|
|
1155
1164
|
# Clean up any remaining asyncio tasks to prevent hanging
|
|
1156
1165
|
current_task = asyncio.current_task()
|
|
1157
|
-
tasks = [
|
|
1166
|
+
tasks = [
|
|
1167
|
+
task
|
|
1168
|
+
for task in asyncio.all_tasks()
|
|
1169
|
+
if not task.done() and task is not current_task
|
|
1170
|
+
]
|
|
1158
1171
|
if tasks:
|
|
1159
1172
|
logger.info(f"Cancelling {len(tasks)} remaining tasks")
|
|
1160
1173
|
for task in tasks:
|
|
@@ -1163,22 +1176,21 @@ async def main() -> None:
|
|
|
1163
1176
|
# Wait for cancellation but don't wait forever
|
|
1164
1177
|
try:
|
|
1165
1178
|
await asyncio.wait_for(
|
|
1166
|
-
asyncio.gather(*tasks, return_exceptions=True),
|
|
1167
|
-
timeout=2.0
|
|
1179
|
+
asyncio.gather(*tasks, return_exceptions=True), timeout=2.0
|
|
1168
1180
|
)
|
|
1169
1181
|
except asyncio.TimeoutError:
|
|
1170
1182
|
logger.warning("Some tasks did not cancel within timeout")
|
|
1171
|
-
|
|
1183
|
+
|
|
1172
1184
|
# Force close any remaining connections and cleanup resources
|
|
1173
1185
|
try:
|
|
1174
1186
|
# Give a moment for final cleanup
|
|
1175
1187
|
await asyncio.sleep(0.1)
|
|
1176
|
-
|
|
1188
|
+
|
|
1177
1189
|
# Shutdown the event loop executor to stop any background threads
|
|
1178
1190
|
loop = asyncio.get_running_loop()
|
|
1179
|
-
if hasattr(loop,
|
|
1191
|
+
if hasattr(loop, "_default_executor") and loop._default_executor:
|
|
1180
1192
|
loop._default_executor.shutdown(wait=False)
|
|
1181
|
-
|
|
1193
|
+
|
|
1182
1194
|
except Exception as e:
|
|
1183
1195
|
logger.warning(f"Error during final cleanup: {e}")
|
|
1184
1196
|
|
|
@@ -1202,14 +1214,15 @@ def cli_main() -> None:
|
|
|
1202
1214
|
# Force cleanup of any remaining resources to prevent hanging
|
|
1203
1215
|
import threading
|
|
1204
1216
|
import time
|
|
1205
|
-
|
|
1217
|
+
|
|
1206
1218
|
# Give main threads a moment to finish
|
|
1207
1219
|
time.sleep(0.1)
|
|
1208
|
-
|
|
1220
|
+
|
|
1209
1221
|
# Force exit if daemon threads are preventing shutdown
|
|
1210
1222
|
active_threads = threading.active_count()
|
|
1211
1223
|
if active_threads > 1: # More than just the main thread
|
|
1212
1224
|
import os
|
|
1225
|
+
|
|
1213
1226
|
os._exit(0)
|
|
1214
1227
|
|
|
1215
1228
|
|