mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. mcp_code_indexer/database/database.py +251 -85
  2. mcp_code_indexer/database/models.py +66 -24
  3. mcp_code_indexer/database/retry_executor.py +15 -5
  4. mcp_code_indexer/file_scanner.py +107 -12
  5. mcp_code_indexer/main.py +43 -30
  6. mcp_code_indexer/server/mcp_server.py +191 -1
  7. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
  8. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
  9. mcp_code_indexer/vector_mode/config.py +113 -45
  10. mcp_code_indexer/vector_mode/const.py +24 -0
  11. mcp_code_indexer/vector_mode/daemon.py +860 -98
  12. mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
  13. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
  14. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
  15. mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
  16. mcp_code_indexer/vector_mode/services/__init__.py +9 -0
  17. mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
  18. mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
  19. mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
  20. mcp_code_indexer/vector_mode/types.py +46 -0
  21. mcp_code_indexer/vector_mode/utils.py +50 -0
  22. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/METADATA +13 -10
  23. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/RECORD +26 -19
  24. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/WHEEL +1 -1
  25. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info}/entry_points.txt +0 -0
  26. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.16.dist-info/licenses}/LICENSE +0 -0
@@ -27,9 +27,11 @@ from mcp_code_indexer.database.exceptions import (
27
27
  )
28
28
  from mcp_code_indexer.database.models import (
29
29
  FileDescription,
30
+ IndexMeta,
30
31
  Project,
31
32
  ProjectOverview,
32
33
  SearchResult,
34
+ SyncStatus,
33
35
  WordFrequencyResult,
34
36
  WordFrequencyTerm,
35
37
  )
@@ -315,12 +317,10 @@ class DatabaseManager:
315
317
  self, operation_name: str = "write_operation"
316
318
  ) -> AsyncIterator[aiosqlite.Connection]:
317
319
  """
318
- Get a database connection with write serialization and automatic
319
- retry logic.
320
+ Get a database connection with write serialization.
320
321
 
321
- This uses the new RetryExecutor to properly handle retry logic
322
- without the broken yield-in-retry-loop pattern that caused
323
- generator errors.
322
+ Ensures the write lock is held throughout the duration of the context
323
+ to prevent race conditions and database locking errors.
324
324
 
325
325
  Args:
326
326
  operation_name: Name of the operation for logging and
@@ -331,43 +331,10 @@ class DatabaseManager:
331
331
  "DatabaseManager not initialized - call initialize() first"
332
332
  )
333
333
 
334
- async def get_write_connection() -> aiosqlite.Connection:
335
- """Inner function to get connection - retried by executor."""
336
- if self._write_lock is None:
337
- raise RuntimeError("Write lock not initialized")
338
- async with self._write_lock:
339
- async with self.get_connection() as conn:
340
- return conn
341
-
342
- try:
343
- # Use retry executor to handle connection acquisition with retries
344
- connection = await self._retry_executor.execute_with_retry(
345
- get_write_connection, operation_name
346
- )
347
-
348
- try:
349
- yield connection
350
-
351
- # Success - retry executor handles all failure tracking
352
-
353
- except Exception:
354
- # Error handling is managed by the retry executor
355
- raise
356
-
357
- except DatabaseError:
358
- # Re-raise our custom database errors as-is
359
- raise
360
- except Exception as e:
361
- # Classify and wrap other exceptions
362
- classified_error = classify_sqlite_error(e, operation_name)
363
- logger.error(
364
- (
365
- f"Database operation '{operation_name}' failed: "
366
- f"{classified_error.message}"
367
- ),
368
- extra={"structured_data": classified_error.to_dict()},
369
- )
370
- raise classified_error
334
+ # Acquire lock for exclusive write access - hold it for entire context
335
+ async with self._write_lock:
336
+ async with self.get_connection() as conn:
337
+ yield conn
371
338
 
372
339
  def get_database_stats(self) -> Dict[str, Any]:
373
340
  """
@@ -740,6 +707,25 @@ class DatabaseManager:
740
707
  await db.commit()
741
708
  logger.debug(f"Updated project: {project.id}")
742
709
 
710
+ async def set_project_vector_mode(self, project_id: str, enabled: bool) -> None:
711
+ """Set the vector_mode for a specific project."""
712
+ async with self.get_write_connection_with_retry(
713
+ "set_project_vector_mode"
714
+ ) as db:
715
+ await db.execute(
716
+ "UPDATE projects SET vector_mode = ? WHERE id = ?",
717
+ (int(enabled), project_id),
718
+ )
719
+
720
+ # Check if the project was actually updated
721
+ cursor = await db.execute("SELECT changes()")
722
+ changes = await cursor.fetchone()
723
+ if changes[0] == 0:
724
+ raise ValueError(f"Project not found: {project_id}")
725
+
726
+ await db.commit()
727
+ logger.debug(f"Set vector_mode={enabled} for project: {project_id}")
728
+
743
729
  async def get_all_projects(self) -> List[Project]:
744
730
  """Get all projects in the database."""
745
731
  async with self.get_connection() as db:
@@ -795,12 +781,19 @@ class DatabaseManager:
795
781
  ) as db:
796
782
  await db.execute(
797
783
  """
798
- INSERT OR REPLACE INTO file_descriptions
784
+ INSERT INTO file_descriptions
799
785
  (
800
786
  project_id, file_path, description, file_hash, last_modified,
801
787
  version, source_project_id, to_be_cleaned
802
788
  )
803
789
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
790
+ ON CONFLICT(project_id, file_path) DO UPDATE SET
791
+ description=excluded.description,
792
+ file_hash=excluded.file_hash,
793
+ last_modified=excluded.last_modified,
794
+ version=excluded.version,
795
+ source_project_id=excluded.source_project_id,
796
+ to_be_cleaned=excluded.to_be_cleaned
804
797
  """,
805
798
  (
806
799
  file_desc.project_id,
@@ -898,12 +891,19 @@ class DatabaseManager:
898
891
 
899
892
  await conn.executemany(
900
893
  """
901
- INSERT OR REPLACE INTO file_descriptions
894
+ INSERT INTO file_descriptions
902
895
  (
903
896
  project_id, file_path, description, file_hash, last_modified,
904
897
  version, source_project_id, to_be_cleaned
905
898
  )
906
899
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
900
+ ON CONFLICT(project_id, file_path) DO UPDATE SET
901
+ description=excluded.description,
902
+ file_hash=excluded.file_hash,
903
+ last_modified=excluded.last_modified,
904
+ version=excluded.version,
905
+ source_project_id=excluded.source_project_id,
906
+ to_be_cleaned=excluded.to_be_cleaned
907
907
  """,
908
908
  data,
909
909
  )
@@ -1067,10 +1067,8 @@ class DatabaseManager:
1067
1067
  Returns:
1068
1068
  List of file paths that were marked for cleanup
1069
1069
  """
1070
- removed_files: List[str] = []
1071
-
1072
- async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
1073
- # Get all active file descriptions for this project
1070
+ # 1. Get all active file paths (fast DB read)
1071
+ async with self.get_connection() as conn:
1074
1072
  cursor = await conn.execute(
1075
1073
  (
1076
1074
  "SELECT file_path FROM file_descriptions WHERE "
@@ -1078,46 +1076,29 @@ class DatabaseManager:
1078
1076
  ),
1079
1077
  (project_id,),
1080
1078
  )
1081
-
1082
1079
  rows = await cursor.fetchall()
1080
+ file_paths = [row["file_path"] for row in rows]
1083
1081
 
1084
- # Check which files no longer exist
1085
- to_remove = []
1086
- for row in rows:
1087
- file_path = row["file_path"]
1082
+ # 2. Check existence on disk (blocking IO - run in executor)
1083
+ def find_removed_files() -> List[str]:
1084
+ missing = []
1085
+ for file_path in file_paths:
1088
1086
  full_path = project_root / file_path
1089
-
1090
1087
  if not full_path.exists():
1091
- to_remove.append(file_path)
1092
-
1093
- # Mark descriptions for cleanup instead of deleting
1094
- if to_remove:
1095
- import time
1096
-
1097
- cleanup_timestamp = int(time.time())
1098
- await conn.executemany(
1099
- (
1100
- "UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
1101
- "project_id = ? AND file_path = ?"
1102
- ),
1103
- [(cleanup_timestamp, project_id, path) for path in to_remove],
1104
- )
1105
- logger.info(
1106
- (
1107
- f"Marked {len(to_remove)} missing files for cleanup "
1108
- f"from {project_id}"
1109
- )
1110
- )
1088
+ missing.append(file_path)
1089
+ return missing
1111
1090
 
1112
- return to_remove
1091
+ loop = asyncio.get_running_loop()
1092
+ to_remove = await loop.run_in_executor(None, find_removed_files)
1113
1093
 
1114
- removed_files = await self.execute_transaction_with_retry(
1115
- cleanup_operation,
1116
- f"cleanup_missing_files_{project_id}",
1117
- timeout_seconds=60.0, # Longer timeout for file system operations
1118
- )
1094
+ # 3. Mark for cleanup (fast DB write)
1095
+ if to_remove:
1096
+ await self.cleanup_manager.mark_files_for_cleanup(project_id, to_remove)
1097
+ logger.info(
1098
+ f"Marked {len(to_remove)} missing files for cleanup from {project_id}"
1099
+ )
1119
1100
 
1120
- return removed_files
1101
+ return to_remove
1121
1102
 
1122
1103
  async def analyze_word_frequency(
1123
1104
  self, project_id: str, limit: int = 200
@@ -1139,7 +1120,7 @@ class DatabaseManager:
1139
1120
  stop_words_path = (
1140
1121
  Path(__file__).parent.parent / "data" / "stop_words_english.txt"
1141
1122
  )
1142
- stop_words = set()
1123
+ stop_words: set = set()
1143
1124
 
1144
1125
  if stop_words_path.exists():
1145
1126
  with open(stop_words_path, "r", encoding="utf-8") as f:
@@ -1186,8 +1167,8 @@ class DatabaseManager:
1186
1167
  }
1187
1168
  stop_words.update(programming_keywords)
1188
1169
 
1170
+ # Get all descriptions for this project (fast DB read)
1189
1171
  async with self.get_connection() as db:
1190
- # Get all descriptions for this project
1191
1172
  cursor = await db.execute(
1192
1173
  (
1193
1174
  "SELECT description FROM file_descriptions WHERE "
@@ -1195,11 +1176,13 @@ class DatabaseManager:
1195
1176
  ),
1196
1177
  (project_id,),
1197
1178
  )
1198
-
1199
1179
  rows = await cursor.fetchall()
1180
+ descriptions = [row["description"] for row in rows]
1200
1181
 
1182
+ # Process word frequency in executor (CPU-bound work)
1183
+ def process_word_frequency() -> WordFrequencyResult:
1201
1184
  # Combine all descriptions
1202
- all_text = " ".join(row["description"] for row in rows)
1185
+ all_text = " ".join(descriptions)
1203
1186
 
1204
1187
  # Tokenize and filter
1205
1188
  words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
@@ -1220,6 +1203,9 @@ class DatabaseManager:
1220
1203
  total_unique_terms=len(word_counts),
1221
1204
  )
1222
1205
 
1206
+ loop = asyncio.get_running_loop()
1207
+ return await loop.run_in_executor(None, process_word_frequency)
1208
+
1223
1209
  async def cleanup_empty_projects(self) -> int:
1224
1210
  """
1225
1211
  Remove projects that have no file descriptions and no project overview.
@@ -1320,6 +1306,186 @@ class DatabaseManager:
1320
1306
  "files": file_descriptions,
1321
1307
  }
1322
1308
 
1309
+ # IndexMeta operations
1310
+ async def create_index_meta(self, index_meta: IndexMeta) -> None:
1311
+ """Create or update index metadata for a project."""
1312
+ async with self.get_write_connection_with_retry("create_index_meta") as db:
1313
+ await db.execute(
1314
+ """
1315
+ INSERT OR REPLACE INTO index_meta (
1316
+ project_id, total_chunks, indexed_chunks, total_files, indexed_files,
1317
+ last_sync, sync_status, error_message, queue_depth, processing_rate,
1318
+ estimated_completion, metadata, created, last_modified
1319
+ )
1320
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1321
+ """,
1322
+ (
1323
+ index_meta.project_id,
1324
+ index_meta.total_chunks,
1325
+ index_meta.indexed_chunks,
1326
+ index_meta.total_files,
1327
+ index_meta.indexed_files,
1328
+ index_meta.last_sync,
1329
+ index_meta.sync_status.value,
1330
+ index_meta.error_message,
1331
+ index_meta.queue_depth,
1332
+ index_meta.processing_rate,
1333
+ index_meta.estimated_completion,
1334
+ json.dumps(index_meta.metadata),
1335
+ index_meta.created,
1336
+ index_meta.last_modified,
1337
+ ),
1338
+ )
1339
+ await db.commit()
1340
+ logger.debug(
1341
+ f"Created/updated index metadata for project: {index_meta.project_id}"
1342
+ )
1343
+
1344
+ async def update_index_meta(self, index_meta: IndexMeta) -> None:
1345
+ """Update existing index metadata for a project."""
1346
+ async with self.get_write_connection_with_retry("update_index_meta") as db:
1347
+ await db.execute(
1348
+ """
1349
+ UPDATE index_meta
1350
+ SET total_chunks = ?, indexed_chunks = ?, total_files = ?, indexed_files = ?,
1351
+ last_sync = ?, sync_status = ?, error_message = ?, queue_depth = ?,
1352
+ processing_rate = ?, estimated_completion = ?, metadata = ?, last_modified = ?
1353
+ WHERE project_id = ?
1354
+ """,
1355
+ (
1356
+ index_meta.total_chunks,
1357
+ index_meta.indexed_chunks,
1358
+ index_meta.total_files,
1359
+ index_meta.indexed_files,
1360
+ index_meta.last_sync,
1361
+ index_meta.sync_status.value,
1362
+ index_meta.error_message,
1363
+ index_meta.queue_depth,
1364
+ index_meta.processing_rate,
1365
+ index_meta.estimated_completion,
1366
+ json.dumps(index_meta.metadata),
1367
+ index_meta.last_modified,
1368
+ index_meta.project_id,
1369
+ ),
1370
+ )
1371
+
1372
+ # Check if the project was actually updated
1373
+ cursor = await db.execute("SELECT changes()")
1374
+ changes = await cursor.fetchone()
1375
+ if changes[0] == 0:
1376
+ raise ValueError(
1377
+ f"Index metadata not found for project: {index_meta.project_id}"
1378
+ )
1379
+
1380
+ await db.commit()
1381
+ logger.debug(f"Updated index metadata for project: {index_meta.project_id}")
1382
+
1383
+ async def get_index_meta(self, project_id: str) -> Optional[IndexMeta]:
1384
+ """Retrieve index metadata for a project."""
1385
+ async with self.get_connection() as db:
1386
+ cursor = await db.execute(
1387
+ "SELECT * FROM index_meta WHERE project_id = ?", (project_id,)
1388
+ )
1389
+ row = await cursor.fetchone()
1390
+
1391
+ if row:
1392
+ # Convert row to dict for easier field access
1393
+ row_dict = dict(row)
1394
+
1395
+ # Parse JSON metadata field
1396
+ metadata = (
1397
+ json.loads(row_dict["metadata"]) if row_dict["metadata"] else {}
1398
+ )
1399
+
1400
+ # Parse datetime fields
1401
+ created = (
1402
+ datetime.fromisoformat(row_dict["created"])
1403
+ if row_dict["created"]
1404
+ else datetime.utcnow()
1405
+ )
1406
+ last_modified = (
1407
+ datetime.fromisoformat(row_dict["last_modified"])
1408
+ if row_dict["last_modified"]
1409
+ else datetime.utcnow()
1410
+ )
1411
+ last_sync = (
1412
+ datetime.fromisoformat(row_dict["last_sync"])
1413
+ if row_dict["last_sync"]
1414
+ else None
1415
+ )
1416
+ estimated_completion = (
1417
+ datetime.fromisoformat(row_dict["estimated_completion"])
1418
+ if row_dict["estimated_completion"]
1419
+ else None
1420
+ )
1421
+
1422
+ return IndexMeta(
1423
+ id=row_dict["id"],
1424
+ project_id=row_dict["project_id"],
1425
+ total_chunks=row_dict["total_chunks"],
1426
+ indexed_chunks=row_dict["indexed_chunks"],
1427
+ total_files=row_dict["total_files"],
1428
+ indexed_files=row_dict["indexed_files"],
1429
+ last_sync=last_sync,
1430
+ sync_status=row_dict["sync_status"],
1431
+ error_message=row_dict["error_message"],
1432
+ queue_depth=row_dict["queue_depth"],
1433
+ processing_rate=row_dict["processing_rate"],
1434
+ estimated_completion=estimated_completion,
1435
+ metadata=metadata,
1436
+ created=created,
1437
+ last_modified=last_modified,
1438
+ )
1439
+ return None
1440
+
1441
+ async def get_or_create_index_meta(self, project_id: str, **kwargs) -> IndexMeta:
1442
+ """
1443
+ Get existing index metadata or create new one with default values.
1444
+
1445
+ Args:
1446
+ project_id: Project identifier
1447
+ **kwargs: Optional fields to override defaults when creating new metadata
1448
+
1449
+ Returns:
1450
+ IndexMeta object (existing or newly created)
1451
+ """
1452
+ # Try to get existing metadata first
1453
+ existing_meta = await self.get_index_meta(project_id)
1454
+ if existing_meta:
1455
+ return existing_meta
1456
+
1457
+ # Create new metadata with defaults, allowing kwargs to override
1458
+ default_metadata = {
1459
+ "project_id": project_id,
1460
+ "total_chunks": 0,
1461
+ "indexed_chunks": 0,
1462
+ "total_files": 0,
1463
+ "indexed_files": 0,
1464
+ "last_sync": None,
1465
+ "sync_status": SyncStatus.PENDING,
1466
+ "error_message": None,
1467
+ "queue_depth": 0,
1468
+ "processing_rate": 0.0,
1469
+ "estimated_completion": None,
1470
+ "metadata": {},
1471
+ }
1472
+
1473
+ # Override defaults with provided kwargs
1474
+ default_metadata.update(kwargs)
1475
+
1476
+ # Create the IndexMeta object
1477
+ new_meta = IndexMeta(**default_metadata)
1478
+
1479
+ # Store it in the database
1480
+ await self.create_index_meta(new_meta)
1481
+
1482
+ # Return the created metadata (fetch it back to get the assigned ID)
1483
+ result = await self.get_index_meta(project_id)
1484
+ if result is None:
1485
+ raise DatabaseError(f"Failed to create index metadata for project: {project_id}")
1486
+
1487
+ return result
1488
+
1323
1489
  # Cleanup operations
1324
1490
 
1325
1491
  @property
@@ -32,7 +32,9 @@ class Project(BaseModel):
32
32
  last_accessed: datetime = Field(
33
33
  default_factory=datetime.utcnow, description="Last access timestamp"
34
34
  )
35
- vector_mode: bool = Field(default=False, description="Enable vector search for this project")
35
+ vector_mode: bool = Field(
36
+ default=False, description="Enable vector search for this project"
37
+ )
36
38
 
37
39
 
38
40
  class FileDescription(BaseModel):
@@ -189,10 +191,12 @@ class WordFrequencyResult(BaseModel):
189
191
 
190
192
  # Vector Mode Models
191
193
 
194
+
192
195
  class ChunkType(str, Enum):
193
196
  """Types of code chunks for semantic analysis."""
197
+
194
198
  FUNCTION = "function"
195
- CLASS = "class"
199
+ CLASS = "class"
196
200
  METHOD = "method"
197
201
  IMPORT = "import"
198
202
  DOCSTRING = "docstring"
@@ -204,27 +208,32 @@ class ChunkType(str, Enum):
204
208
  NAMESPACE = "namespace"
205
209
  GENERIC = "generic"
206
210
 
211
+
207
212
  class NodeType(str, Enum):
208
213
  """Types of nodes in Merkle tree."""
214
+
209
215
  FILE = "file"
210
216
  DIRECTORY = "directory"
211
217
  PROJECT = "project"
212
218
 
219
+
213
220
  class SyncStatus(str, Enum):
214
221
  """Vector index synchronization status."""
222
+
215
223
  PENDING = "pending"
216
224
  IN_PROGRESS = "in_progress"
217
225
  COMPLETED = "completed"
218
226
  FAILED = "failed"
219
227
  PAUSED = "paused"
220
228
 
229
+
221
230
  class CodeChunk(BaseModel):
222
231
  """
223
232
  Represents a semantic chunk of code extracted from a file.
224
-
233
+
225
234
  Used for embedding generation and vector search operations.
226
235
  """
227
-
236
+
228
237
  id: Optional[int] = Field(None, description="Database ID")
229
238
  file_id: int = Field(..., description="Reference to FileDescription")
230
239
  project_id: str = Field(..., description="Reference to project")
@@ -235,17 +244,24 @@ class CodeChunk(BaseModel):
235
244
  content_hash: str = Field(..., description="SHA-256 hash of chunk content")
236
245
  embedding_id: Optional[str] = Field(None, description="Vector database ID")
237
246
  redacted: bool = Field(default=False, description="Whether content was redacted")
238
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
239
- created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
240
- last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
247
+ metadata: Dict[str, Any] = Field(
248
+ default_factory=dict, description="Additional metadata"
249
+ )
250
+ created: datetime = Field(
251
+ default_factory=datetime.utcnow, description="Creation timestamp"
252
+ )
253
+ last_modified: datetime = Field(
254
+ default_factory=datetime.utcnow, description="Last update timestamp"
255
+ )
256
+
241
257
 
242
258
  class MerkleNode(BaseModel):
243
259
  """
244
260
  Represents a node in the Merkle tree for change detection.
245
-
261
+
246
262
  Used to efficiently detect file system changes without scanning entire directory trees.
247
263
  """
248
-
264
+
249
265
  id: Optional[int] = Field(None, description="Database ID")
250
266
  project_id: str = Field(..., description="Reference to project")
251
267
  path: str = Field(..., description="File/directory path relative to project root")
@@ -253,36 +269,56 @@ class MerkleNode(BaseModel):
253
269
  node_type: NodeType = Field(..., description="Type of filesystem node")
254
270
  parent_path: Optional[str] = Field(None, description="Path to parent directory")
255
271
  children_hash: Optional[str] = Field(None, description="Combined hash of children")
256
- last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
272
+ last_modified: datetime = Field(
273
+ default_factory=datetime.utcnow, description="Last update timestamp"
274
+ )
275
+
257
276
 
258
277
  class IndexMeta(BaseModel):
259
278
  """
260
279
  Metadata about vector indexing progress and status for a project.
261
-
280
+
262
281
  Tracks indexing state, statistics, and synchronization status.
263
282
  """
264
-
283
+
265
284
  id: Optional[int] = Field(None, description="Database ID")
266
285
  project_id: str = Field(..., description="Reference to project", unique=True)
267
286
  total_chunks: int = Field(default=0, description="Total number of chunks")
268
- indexed_chunks: int = Field(default=0, description="Number of chunks with embeddings")
287
+ indexed_chunks: int = Field(
288
+ default=0, description="Number of chunks with embeddings"
289
+ )
269
290
  total_files: int = Field(default=0, description="Total number of files")
270
291
  indexed_files: int = Field(default=0, description="Number of files processed")
271
- last_sync: Optional[datetime] = Field(None, description="Last successful sync timestamp")
272
- sync_status: SyncStatus = Field(default=SyncStatus.PENDING, description="Current sync status")
292
+ last_sync: Optional[datetime] = Field(
293
+ None, description="Last successful sync timestamp"
294
+ )
295
+ sync_status: SyncStatus = Field(
296
+ default=SyncStatus.PENDING, description="Current sync status"
297
+ )
273
298
  error_message: Optional[str] = Field(None, description="Last error message")
274
299
  queue_depth: int = Field(default=0, description="Number of pending tasks")
275
- processing_rate: float = Field(default=0.0, description="Files per second processing rate")
276
- estimated_completion: Optional[datetime] = Field(None, description="Estimated completion time")
277
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
278
- created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
279
- last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
300
+ processing_rate: float = Field(
301
+ default=0.0, description="Files per second processing rate"
302
+ )
303
+ estimated_completion: Optional[datetime] = Field(
304
+ None, description="Estimated completion time"
305
+ )
306
+ metadata: Dict[str, Any] = Field(
307
+ default_factory=dict, description="Additional metadata"
308
+ )
309
+ created: datetime = Field(
310
+ default_factory=datetime.utcnow, description="Creation timestamp"
311
+ )
312
+ last_modified: datetime = Field(
313
+ default_factory=datetime.utcnow, description="Last update timestamp"
314
+ )
315
+
280
316
 
281
317
  class VectorSearchResult(BaseModel):
282
318
  """
283
319
  Represents a vector search result with similarity scoring.
284
320
  """
285
-
321
+
286
322
  file_path: str = Field(..., description="Path to the matching file")
287
323
  chunk_name: Optional[str] = Field(None, description="Name of the code chunk")
288
324
  chunk_type: ChunkType = Field(..., description="Type of code chunk")
@@ -291,13 +327,16 @@ class VectorSearchResult(BaseModel):
291
327
  end_line: int = Field(..., description="Ending line number")
292
328
  similarity_score: float = Field(..., description="Cosine similarity score")
293
329
  project_id: str = Field(..., description="Project identifier")
294
- metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
330
+ metadata: Dict[str, Any] = Field(
331
+ default_factory=dict, description="Additional metadata"
332
+ )
333
+
295
334
 
296
335
  class VectorIndexStatus(BaseModel):
297
336
  """
298
337
  Current status of vector indexing for a project.
299
338
  """
300
-
339
+
301
340
  is_indexing: bool = Field(..., description="Whether indexing is currently active")
302
341
  indexed_files: int = Field(..., description="Number of files indexed")
303
342
  total_files: int = Field(..., description="Total number of files")
@@ -307,9 +346,12 @@ class VectorIndexStatus(BaseModel):
307
346
  sync_status: SyncStatus = Field(..., description="Current sync status")
308
347
  queue_depth: int = Field(..., description="Number of pending tasks")
309
348
  processing_rate: float = Field(..., description="Processing rate")
310
- estimated_completion: Optional[datetime] = Field(None, description="Estimated completion time")
349
+ estimated_completion: Optional[datetime] = Field(
350
+ None, description="Estimated completion time"
351
+ )
311
352
  error_message: Optional[str] = Field(None, description="Last error message")
312
353
 
354
+
313
355
  # Enable forward references for recursive models
314
356
  FolderNode.model_rebuild()
315
357
  CodebaseOverview.model_rebuild()
@@ -279,8 +279,13 @@ class RetryExecutor:
279
279
  Yields:
280
280
  Database connection
281
281
  """
282
+ import sys
283
+
284
+ # Store the context manager so we can properly call __aexit__
285
+ ctx_manager: Optional[AsyncContextManager[aiosqlite.Connection]] = None
282
286
 
283
287
  async def acquire_connection() -> aiosqlite.Connection:
288
+ nonlocal ctx_manager
284
289
  # This function will be retried by execute_with_retry
285
290
  # Get the async context manager and enter it
286
291
  ctx_manager = connection_factory()
@@ -288,15 +293,20 @@ class RetryExecutor:
288
293
  return conn
289
294
 
290
295
  # Use execute_with_retry to handle the retry logic
291
- # We create a connection and store it for the context manager
292
296
  connection = await self.execute_with_retry(acquire_connection, operation_name)
293
297
 
294
298
  try:
295
299
  yield connection
296
- finally:
297
- # Close the connection properly
298
- if hasattr(connection, "close"):
299
- await connection.close()
300
+ except BaseException:
301
+ # Pass actual exception info to __aexit__ for proper rollback/cleanup
302
+ exc_type, exc, tb = sys.exc_info()
303
+ if ctx_manager is not None:
304
+ await ctx_manager.__aexit__(exc_type, exc, tb)
305
+ raise
306
+ else:
307
+ # No exception - call __aexit__ with None values
308
+ if ctx_manager is not None:
309
+ await ctx_manager.__aexit__(None, None, None)
300
310
 
301
311
  def _should_retry_exception(self, retry_state: RetryCallState) -> bool:
302
312
  """