mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. mcp_code_indexer/database/database.py +334 -115
  2. mcp_code_indexer/database/database_factory.py +1 -1
  3. mcp_code_indexer/database/exceptions.py +1 -1
  4. mcp_code_indexer/database/models.py +66 -24
  5. mcp_code_indexer/database/retry_executor.py +15 -5
  6. mcp_code_indexer/file_scanner.py +107 -12
  7. mcp_code_indexer/main.py +43 -30
  8. mcp_code_indexer/server/mcp_server.py +201 -7
  9. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
  10. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
  11. mcp_code_indexer/vector_mode/config.py +113 -45
  12. mcp_code_indexer/vector_mode/const.py +24 -0
  13. mcp_code_indexer/vector_mode/daemon.py +860 -98
  14. mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
  15. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
  16. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
  17. mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
  18. mcp_code_indexer/vector_mode/services/__init__.py +9 -0
  19. mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
  20. mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
  21. mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
  22. mcp_code_indexer/vector_mode/types.py +46 -0
  23. mcp_code_indexer/vector_mode/utils.py +50 -0
  24. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/METADATA +13 -10
  25. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/RECORD +28 -21
  26. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/WHEEL +1 -1
  27. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/entry_points.txt +0 -0
  28. {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info/licenses}/LICENSE +0 -0
@@ -27,13 +27,18 @@ from mcp_code_indexer.database.exceptions import (
27
27
  )
28
28
  from mcp_code_indexer.database.models import (
29
29
  FileDescription,
30
+ IndexMeta,
30
31
  Project,
31
32
  ProjectOverview,
32
33
  SearchResult,
34
+ SyncStatus,
33
35
  WordFrequencyResult,
34
36
  WordFrequencyTerm,
35
37
  )
36
- from mcp_code_indexer.database.retry_executor import create_retry_executor
38
+ from mcp_code_indexer.database.retry_executor import (
39
+ create_retry_executor,
40
+ DatabaseLockError,
41
+ )
37
42
  from mcp_code_indexer.query_preprocessor import preprocess_search_query
38
43
 
39
44
  logger = logging.getLogger(__name__)
@@ -52,7 +57,7 @@ class DatabaseManager:
52
57
  db_path: Path,
53
58
  pool_size: int = 3,
54
59
  retry_count: int = 5,
55
- timeout: float = 10.0,
60
+ timeout: float = 30.0,
56
61
  enable_wal_mode: bool = True,
57
62
  health_check_interval: float = 30.0,
58
63
  retry_min_wait: float = 0.1,
@@ -220,7 +225,7 @@ class DatabaseManager:
220
225
  "PRAGMA cache_size = -64000", # 64MB cache
221
226
  "PRAGMA temp_store = MEMORY", # Use memory for temp tables
222
227
  "PRAGMA mmap_size = 268435456", # 256MB memory mapping
223
- "PRAGMA busy_timeout = 10000", # 10s timeout (reduced from 30s)
228
+ f"PRAGMA busy_timeout = {int(self.timeout * 1000)}", # Use configured timeout
224
229
  "PRAGMA optimize", # Enable query planner optimizations
225
230
  ]
226
231
  )
@@ -315,12 +320,10 @@ class DatabaseManager:
315
320
  self, operation_name: str = "write_operation"
316
321
  ) -> AsyncIterator[aiosqlite.Connection]:
317
322
  """
318
- Get a database connection with write serialization and automatic
319
- retry logic.
323
+ Get a database connection with write serialization.
320
324
 
321
- This uses the new RetryExecutor to properly handle retry logic
322
- without the broken yield-in-retry-loop pattern that caused
323
- generator errors.
325
+ Ensures the write lock is held throughout the duration of the context
326
+ to prevent race conditions and database locking errors.
324
327
 
325
328
  Args:
326
329
  operation_name: Name of the operation for logging and
@@ -331,43 +334,10 @@ class DatabaseManager:
331
334
  "DatabaseManager not initialized - call initialize() first"
332
335
  )
333
336
 
334
- async def get_write_connection() -> aiosqlite.Connection:
335
- """Inner function to get connection - retried by executor."""
336
- if self._write_lock is None:
337
- raise RuntimeError("Write lock not initialized")
338
- async with self._write_lock:
339
- async with self.get_connection() as conn:
340
- return conn
341
-
342
- try:
343
- # Use retry executor to handle connection acquisition with retries
344
- connection = await self._retry_executor.execute_with_retry(
345
- get_write_connection, operation_name
346
- )
347
-
348
- try:
349
- yield connection
350
-
351
- # Success - retry executor handles all failure tracking
352
-
353
- except Exception:
354
- # Error handling is managed by the retry executor
355
- raise
356
-
357
- except DatabaseError:
358
- # Re-raise our custom database errors as-is
359
- raise
360
- except Exception as e:
361
- # Classify and wrap other exceptions
362
- classified_error = classify_sqlite_error(e, operation_name)
363
- logger.error(
364
- (
365
- f"Database operation '{operation_name}' failed: "
366
- f"{classified_error.message}"
367
- ),
368
- extra={"structured_data": classified_error.to_dict()},
369
- )
370
- raise classified_error
337
+ # Acquire lock for exclusive write access - hold it for entire context
338
+ async with self._write_lock:
339
+ async with self.get_connection() as conn:
340
+ yield conn
371
341
 
372
342
  def get_database_stats(self) -> Dict[str, Any]:
373
343
  """
@@ -523,26 +493,39 @@ class DatabaseManager:
523
493
 
524
494
  return result
525
495
 
526
- except (aiosqlite.OperationalError, asyncio.TimeoutError) as e:
496
+ except aiosqlite.OperationalError as e:
527
497
  # Record locking event for metrics
528
- if self._metrics_collector and "locked" in str(e).lower():
498
+ error_msg = str(e).lower()
499
+ if self._metrics_collector and "locked" in error_msg:
529
500
  self._metrics_collector.record_locking_event(operation_name, str(e))
530
501
 
531
- # Classify the error for better handling
532
- classified_error = classify_sqlite_error(e, operation_name)
533
-
534
- # Record failed operation metrics for non-retryable errors
535
- if not is_retryable_error(classified_error):
536
- if self._metrics_collector:
537
- self._metrics_collector.record_operation(
538
- operation_name,
539
- timeout_seconds * 1000,
540
- False,
541
- len(self._connection_pool),
542
- )
502
+ # For retryable errors (locked/busy), re-raise the ORIGINAL error
503
+ # so tenacity can retry. Only classify non-retryable errors.
504
+ if "locked" in error_msg or "busy" in error_msg:
505
+ raise # Let tenacity retry this
543
506
 
507
+ # Non-retryable OperationalError - classify and raise
508
+ classified_error = classify_sqlite_error(e, operation_name)
509
+ if self._metrics_collector:
510
+ self._metrics_collector.record_operation(
511
+ operation_name,
512
+ timeout_seconds * 1000,
513
+ False,
514
+ len(self._connection_pool),
515
+ )
544
516
  raise classified_error
545
517
 
518
+ except asyncio.TimeoutError as e:
519
+ # Timeout on BEGIN IMMEDIATE - this is retryable
520
+ if self._metrics_collector:
521
+ self._metrics_collector.record_locking_event(
522
+ operation_name, "timeout waiting for lock"
523
+ )
524
+ # Re-raise as OperationalError so tenacity can retry
525
+ raise aiosqlite.OperationalError(
526
+ f"Timeout waiting for database lock: {e}"
527
+ ) from e
528
+
546
529
  try:
547
530
  # Create a temporary retry executor with custom max_retries if different
548
531
  # from default
@@ -567,8 +550,27 @@ class DatabaseManager:
567
550
  execute_transaction, operation_name
568
551
  )
569
552
 
553
+ except DatabaseLockError as e:
554
+ # Retries exhausted - record metrics and convert to DatabaseError
555
+ if self._metrics_collector:
556
+ self._metrics_collector.record_operation(
557
+ operation_name,
558
+ timeout_seconds * 1000,
559
+ False,
560
+ len(self._connection_pool),
561
+ )
562
+ # Convert to a proper DatabaseError for consistent error handling
563
+ raise DatabaseError(
564
+ f"Database operation failed after retries: {e.message}",
565
+ error_context={
566
+ "operation": operation_name,
567
+ "retry_count": e.retry_count,
568
+ "retryable": False, # Retries already exhausted
569
+ },
570
+ ) from e
571
+
570
572
  except DatabaseError:
571
- # Record failed operation metrics for final failure
573
+ # Non-retryable DatabaseError from classification
572
574
  if self._metrics_collector:
573
575
  self._metrics_collector.record_operation(
574
576
  operation_name,
@@ -740,6 +742,25 @@ class DatabaseManager:
740
742
  await db.commit()
741
743
  logger.debug(f"Updated project: {project.id}")
742
744
 
745
+ async def set_project_vector_mode(self, project_id: str, enabled: bool) -> None:
746
+ """Set the vector_mode for a specific project."""
747
+ async with self.get_write_connection_with_retry(
748
+ "set_project_vector_mode"
749
+ ) as db:
750
+ await db.execute(
751
+ "UPDATE projects SET vector_mode = ? WHERE id = ?",
752
+ (int(enabled), project_id),
753
+ )
754
+
755
+ # Check if the project was actually updated
756
+ cursor = await db.execute("SELECT changes()")
757
+ changes = await cursor.fetchone()
758
+ if changes[0] == 0:
759
+ raise DatabaseError(f"Project not found: {project_id}")
760
+
761
+ await db.commit()
762
+ logger.debug(f"Set vector_mode={enabled} for project: {project_id}")
763
+
743
764
  async def get_all_projects(self) -> List[Project]:
744
765
  """Get all projects in the database."""
745
766
  async with self.get_connection() as db:
@@ -751,12 +772,18 @@ class DatabaseManager:
751
772
  projects = []
752
773
  for row in rows:
753
774
  aliases = json.loads(row[2]) if row[2] else []
775
+ created = row[3]
776
+ last_accessed = row[4]
777
+ if isinstance(created, str):
778
+ created = datetime.fromisoformat(created)
779
+ if isinstance(last_accessed, str):
780
+ last_accessed = datetime.fromisoformat(last_accessed)
754
781
  project = Project(
755
782
  id=row[0],
756
783
  name=row[1],
757
784
  aliases=aliases,
758
- created=row[3],
759
- last_accessed=row[4],
785
+ created=created,
786
+ last_accessed=last_accessed,
760
787
  vector_mode=bool(row[5]),
761
788
  )
762
789
  projects.append(project)
@@ -774,12 +801,18 @@ class DatabaseManager:
774
801
  projects = []
775
802
  for row in rows:
776
803
  aliases = json.loads(row[2]) if row[2] else []
804
+ created = row[3]
805
+ last_accessed = row[4]
806
+ if isinstance(created, str):
807
+ created = datetime.fromisoformat(created)
808
+ if isinstance(last_accessed, str):
809
+ last_accessed = datetime.fromisoformat(last_accessed)
777
810
  project = Project(
778
811
  id=row[0],
779
812
  name=row[1],
780
813
  aliases=aliases,
781
- created=row[3],
782
- last_accessed=row[4],
814
+ created=created,
815
+ last_accessed=last_accessed,
783
816
  vector_mode=bool(row[5]),
784
817
  )
785
818
  projects.append(project)
@@ -790,17 +823,22 @@ class DatabaseManager:
790
823
 
791
824
  async def create_file_description(self, file_desc: FileDescription) -> None:
792
825
  """Create or update a file description."""
793
- async with self.get_write_connection_with_retry(
794
- "create_file_description"
795
- ) as db:
826
+ async def operation(db: aiosqlite.Connection) -> None:
796
827
  await db.execute(
797
828
  """
798
- INSERT OR REPLACE INTO file_descriptions
829
+ INSERT INTO file_descriptions
799
830
  (
800
831
  project_id, file_path, description, file_hash, last_modified,
801
832
  version, source_project_id, to_be_cleaned
802
833
  )
803
834
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
835
+ ON CONFLICT(project_id, file_path) DO UPDATE SET
836
+ description=excluded.description,
837
+ file_hash=excluded.file_hash,
838
+ last_modified=excluded.last_modified,
839
+ version=excluded.version,
840
+ source_project_id=excluded.source_project_id,
841
+ to_be_cleaned=excluded.to_be_cleaned
804
842
  """,
805
843
  (
806
844
  file_desc.project_id,
@@ -813,8 +851,12 @@ class DatabaseManager:
813
851
  file_desc.to_be_cleaned,
814
852
  ),
815
853
  )
816
- await db.commit()
817
- logger.debug(f"Saved file description: {file_desc.file_path}")
854
+
855
+ await self.execute_transaction_with_retry(
856
+ operation,
857
+ "create_file_description"
858
+ )
859
+ logger.debug(f"Saved file description: {file_desc.file_path}")
818
860
 
819
861
  async def get_file_description(
820
862
  self, project_id: str, file_path: str
@@ -898,12 +940,19 @@ class DatabaseManager:
898
940
 
899
941
  await conn.executemany(
900
942
  """
901
- INSERT OR REPLACE INTO file_descriptions
943
+ INSERT INTO file_descriptions
902
944
  (
903
945
  project_id, file_path, description, file_hash, last_modified,
904
946
  version, source_project_id, to_be_cleaned
905
947
  )
906
948
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
949
+ ON CONFLICT(project_id, file_path) DO UPDATE SET
950
+ description=excluded.description,
951
+ file_hash=excluded.file_hash,
952
+ last_modified=excluded.last_modified,
953
+ version=excluded.version,
954
+ source_project_id=excluded.source_project_id,
955
+ to_be_cleaned=excluded.to_be_cleaned
907
956
  """,
908
957
  data,
909
958
  )
@@ -1018,7 +1067,7 @@ class DatabaseManager:
1018
1067
 
1019
1068
  async def create_project_overview(self, overview: ProjectOverview) -> None:
1020
1069
  """Create or update a project overview."""
1021
- async with self.get_write_connection() as db:
1070
+ async def operation(db: aiosqlite.Connection) -> None:
1022
1071
  await db.execute(
1023
1072
  """
1024
1073
  INSERT OR REPLACE INTO project_overviews
@@ -1033,8 +1082,12 @@ class DatabaseManager:
1033
1082
  overview.total_tokens,
1034
1083
  ),
1035
1084
  )
1036
- await db.commit()
1037
- logger.debug(f"Created/updated overview for project {overview.project_id}")
1085
+
1086
+ await self.execute_transaction_with_retry(
1087
+ operation,
1088
+ "create_project_overview"
1089
+ )
1090
+ logger.debug(f"Created/updated overview for project {overview.project_id}")
1038
1091
 
1039
1092
  async def get_project_overview(self, project_id: str) -> Optional[ProjectOverview]:
1040
1093
  """Get project overview by ID."""
@@ -1067,10 +1120,8 @@ class DatabaseManager:
1067
1120
  Returns:
1068
1121
  List of file paths that were marked for cleanup
1069
1122
  """
1070
- removed_files: List[str] = []
1071
-
1072
- async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
1073
- # Get all active file descriptions for this project
1123
+ # 1. Get all active file paths (fast DB read)
1124
+ async with self.get_connection() as conn:
1074
1125
  cursor = await conn.execute(
1075
1126
  (
1076
1127
  "SELECT file_path FROM file_descriptions WHERE "
@@ -1078,46 +1129,29 @@ class DatabaseManager:
1078
1129
  ),
1079
1130
  (project_id,),
1080
1131
  )
1081
-
1082
1132
  rows = await cursor.fetchall()
1133
+ file_paths = [row["file_path"] for row in rows]
1083
1134
 
1084
- # Check which files no longer exist
1085
- to_remove = []
1086
- for row in rows:
1087
- file_path = row["file_path"]
1135
+ # 2. Check existence on disk (blocking IO - run in executor)
1136
+ def find_removed_files() -> List[str]:
1137
+ missing = []
1138
+ for file_path in file_paths:
1088
1139
  full_path = project_root / file_path
1089
-
1090
1140
  if not full_path.exists():
1091
- to_remove.append(file_path)
1092
-
1093
- # Mark descriptions for cleanup instead of deleting
1094
- if to_remove:
1095
- import time
1096
-
1097
- cleanup_timestamp = int(time.time())
1098
- await conn.executemany(
1099
- (
1100
- "UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
1101
- "project_id = ? AND file_path = ?"
1102
- ),
1103
- [(cleanup_timestamp, project_id, path) for path in to_remove],
1104
- )
1105
- logger.info(
1106
- (
1107
- f"Marked {len(to_remove)} missing files for cleanup "
1108
- f"from {project_id}"
1109
- )
1110
- )
1141
+ missing.append(file_path)
1142
+ return missing
1111
1143
 
1112
- return to_remove
1144
+ loop = asyncio.get_running_loop()
1145
+ to_remove = await loop.run_in_executor(None, find_removed_files)
1113
1146
 
1114
- removed_files = await self.execute_transaction_with_retry(
1115
- cleanup_operation,
1116
- f"cleanup_missing_files_{project_id}",
1117
- timeout_seconds=60.0, # Longer timeout for file system operations
1118
- )
1147
+ # 3. Mark for cleanup (fast DB write)
1148
+ if to_remove:
1149
+ await self.cleanup_manager.mark_files_for_cleanup(project_id, to_remove)
1150
+ logger.info(
1151
+ f"Marked {len(to_remove)} missing files for cleanup from {project_id}"
1152
+ )
1119
1153
 
1120
- return removed_files
1154
+ return to_remove
1121
1155
 
1122
1156
  async def analyze_word_frequency(
1123
1157
  self, project_id: str, limit: int = 200
@@ -1139,7 +1173,7 @@ class DatabaseManager:
1139
1173
  stop_words_path = (
1140
1174
  Path(__file__).parent.parent / "data" / "stop_words_english.txt"
1141
1175
  )
1142
- stop_words = set()
1176
+ stop_words: set = set()
1143
1177
 
1144
1178
  if stop_words_path.exists():
1145
1179
  with open(stop_words_path, "r", encoding="utf-8") as f:
@@ -1186,8 +1220,8 @@ class DatabaseManager:
1186
1220
  }
1187
1221
  stop_words.update(programming_keywords)
1188
1222
 
1223
+ # Get all descriptions for this project (fast DB read)
1189
1224
  async with self.get_connection() as db:
1190
- # Get all descriptions for this project
1191
1225
  cursor = await db.execute(
1192
1226
  (
1193
1227
  "SELECT description FROM file_descriptions WHERE "
@@ -1195,11 +1229,13 @@ class DatabaseManager:
1195
1229
  ),
1196
1230
  (project_id,),
1197
1231
  )
1198
-
1199
1232
  rows = await cursor.fetchall()
1233
+ descriptions = [row["description"] for row in rows]
1200
1234
 
1235
+ # Process word frequency in executor (CPU-bound work)
1236
+ def process_word_frequency() -> WordFrequencyResult:
1201
1237
  # Combine all descriptions
1202
- all_text = " ".join(row["description"] for row in rows)
1238
+ all_text = " ".join(descriptions)
1203
1239
 
1204
1240
  # Tokenize and filter
1205
1241
  words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
@@ -1220,6 +1256,9 @@ class DatabaseManager:
1220
1256
  total_unique_terms=len(word_counts),
1221
1257
  )
1222
1258
 
1259
+ loop = asyncio.get_running_loop()
1260
+ return await loop.run_in_executor(None, process_word_frequency)
1261
+
1223
1262
  async def cleanup_empty_projects(self) -> int:
1224
1263
  """
1225
1264
  Remove projects that have no file descriptions and no project overview.
@@ -1320,6 +1359,186 @@ class DatabaseManager:
1320
1359
  "files": file_descriptions,
1321
1360
  }
1322
1361
 
1362
+ # IndexMeta operations
1363
+ async def create_index_meta(self, index_meta: IndexMeta) -> None:
1364
+ """Create or update index metadata for a project."""
1365
+ async with self.get_write_connection_with_retry("create_index_meta") as db:
1366
+ await db.execute(
1367
+ """
1368
+ INSERT OR REPLACE INTO index_meta (
1369
+ project_id, total_chunks, indexed_chunks, total_files, indexed_files,
1370
+ last_sync, sync_status, error_message, queue_depth, processing_rate,
1371
+ estimated_completion, metadata, created, last_modified
1372
+ )
1373
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1374
+ """,
1375
+ (
1376
+ index_meta.project_id,
1377
+ index_meta.total_chunks,
1378
+ index_meta.indexed_chunks,
1379
+ index_meta.total_files,
1380
+ index_meta.indexed_files,
1381
+ index_meta.last_sync,
1382
+ index_meta.sync_status.value,
1383
+ index_meta.error_message,
1384
+ index_meta.queue_depth,
1385
+ index_meta.processing_rate,
1386
+ index_meta.estimated_completion,
1387
+ json.dumps(index_meta.metadata),
1388
+ index_meta.created,
1389
+ index_meta.last_modified,
1390
+ ),
1391
+ )
1392
+ await db.commit()
1393
+ logger.debug(
1394
+ f"Created/updated index metadata for project: {index_meta.project_id}"
1395
+ )
1396
+
1397
+ async def update_index_meta(self, index_meta: IndexMeta) -> None:
1398
+ """Update existing index metadata for a project."""
1399
+ async with self.get_write_connection_with_retry("update_index_meta") as db:
1400
+ await db.execute(
1401
+ """
1402
+ UPDATE index_meta
1403
+ SET total_chunks = ?, indexed_chunks = ?, total_files = ?, indexed_files = ?,
1404
+ last_sync = ?, sync_status = ?, error_message = ?, queue_depth = ?,
1405
+ processing_rate = ?, estimated_completion = ?, metadata = ?, last_modified = ?
1406
+ WHERE project_id = ?
1407
+ """,
1408
+ (
1409
+ index_meta.total_chunks,
1410
+ index_meta.indexed_chunks,
1411
+ index_meta.total_files,
1412
+ index_meta.indexed_files,
1413
+ index_meta.last_sync,
1414
+ index_meta.sync_status.value,
1415
+ index_meta.error_message,
1416
+ index_meta.queue_depth,
1417
+ index_meta.processing_rate,
1418
+ index_meta.estimated_completion,
1419
+ json.dumps(index_meta.metadata),
1420
+ index_meta.last_modified,
1421
+ index_meta.project_id,
1422
+ ),
1423
+ )
1424
+
1425
+ # Check if the project was actually updated
1426
+ cursor = await db.execute("SELECT changes()")
1427
+ changes = await cursor.fetchone()
1428
+ if changes[0] == 0:
1429
+ raise DatabaseError(
1430
+ f"Index metadata not found for project: {index_meta.project_id}"
1431
+ )
1432
+
1433
+ await db.commit()
1434
+ logger.debug(f"Updated index metadata for project: {index_meta.project_id}")
1435
+
1436
+ async def get_index_meta(self, project_id: str) -> Optional[IndexMeta]:
1437
+ """Retrieve index metadata for a project."""
1438
+ async with self.get_connection() as db:
1439
+ cursor = await db.execute(
1440
+ "SELECT * FROM index_meta WHERE project_id = ?", (project_id,)
1441
+ )
1442
+ row = await cursor.fetchone()
1443
+
1444
+ if row:
1445
+ # Convert row to dict for easier field access
1446
+ row_dict = dict(row)
1447
+
1448
+ # Parse JSON metadata field
1449
+ metadata = (
1450
+ json.loads(row_dict["metadata"]) if row_dict["metadata"] else {}
1451
+ )
1452
+
1453
+ # Parse datetime fields
1454
+ created = (
1455
+ datetime.fromisoformat(row_dict["created"])
1456
+ if row_dict["created"]
1457
+ else datetime.utcnow()
1458
+ )
1459
+ last_modified = (
1460
+ datetime.fromisoformat(row_dict["last_modified"])
1461
+ if row_dict["last_modified"]
1462
+ else datetime.utcnow()
1463
+ )
1464
+ last_sync = (
1465
+ datetime.fromisoformat(row_dict["last_sync"])
1466
+ if row_dict["last_sync"]
1467
+ else None
1468
+ )
1469
+ estimated_completion = (
1470
+ datetime.fromisoformat(row_dict["estimated_completion"])
1471
+ if row_dict["estimated_completion"]
1472
+ else None
1473
+ )
1474
+
1475
+ return IndexMeta(
1476
+ id=row_dict["id"],
1477
+ project_id=row_dict["project_id"],
1478
+ total_chunks=row_dict["total_chunks"],
1479
+ indexed_chunks=row_dict["indexed_chunks"],
1480
+ total_files=row_dict["total_files"],
1481
+ indexed_files=row_dict["indexed_files"],
1482
+ last_sync=last_sync,
1483
+ sync_status=row_dict["sync_status"],
1484
+ error_message=row_dict["error_message"],
1485
+ queue_depth=row_dict["queue_depth"],
1486
+ processing_rate=row_dict["processing_rate"],
1487
+ estimated_completion=estimated_completion,
1488
+ metadata=metadata,
1489
+ created=created,
1490
+ last_modified=last_modified,
1491
+ )
1492
+ return None
1493
+
1494
+ async def get_or_create_index_meta(self, project_id: str, **kwargs) -> IndexMeta:
1495
+ """
1496
+ Get existing index metadata or create new one with default values.
1497
+
1498
+ Args:
1499
+ project_id: Project identifier
1500
+ **kwargs: Optional fields to override defaults when creating new metadata
1501
+
1502
+ Returns:
1503
+ IndexMeta object (existing or newly created)
1504
+ """
1505
+ # Try to get existing metadata first
1506
+ existing_meta = await self.get_index_meta(project_id)
1507
+ if existing_meta:
1508
+ return existing_meta
1509
+
1510
+ # Create new metadata with defaults, allowing kwargs to override
1511
+ default_metadata = {
1512
+ "project_id": project_id,
1513
+ "total_chunks": 0,
1514
+ "indexed_chunks": 0,
1515
+ "total_files": 0,
1516
+ "indexed_files": 0,
1517
+ "last_sync": None,
1518
+ "sync_status": SyncStatus.PENDING,
1519
+ "error_message": None,
1520
+ "queue_depth": 0,
1521
+ "processing_rate": 0.0,
1522
+ "estimated_completion": None,
1523
+ "metadata": {},
1524
+ }
1525
+
1526
+ # Override defaults with provided kwargs
1527
+ default_metadata.update(kwargs)
1528
+
1529
+ # Create the IndexMeta object
1530
+ new_meta = IndexMeta(**default_metadata)
1531
+
1532
+ # Store it in the database
1533
+ await self.create_index_meta(new_meta)
1534
+
1535
+ # Return the created metadata (fetch it back to get the assigned ID)
1536
+ result = await self.get_index_meta(project_id)
1537
+ if result is None:
1538
+ raise DatabaseError(f"Failed to create index metadata for project: {project_id}")
1539
+
1540
+ return result
1541
+
1323
1542
  # Cleanup operations
1324
1543
 
1325
1544
  @property
@@ -28,7 +28,7 @@ class DatabaseFactory:
28
28
  global_db_path: Path,
29
29
  pool_size: int = 3,
30
30
  retry_count: int = 5,
31
- timeout: float = 10.0,
31
+ timeout: float = 30.0,
32
32
  enable_wal_mode: bool = True,
33
33
  health_check_interval: float = 30.0,
34
34
  retry_min_wait: float = 0.1,
@@ -236,7 +236,7 @@ def classify_sqlite_error(error: Exception, operation_name: str = "") -> Databas
236
236
  for msg in [
237
237
  "no such table",
238
238
  "no such column",
239
- "table already exists",
239
+ "already exists",
240
240
  "syntax error",
241
241
  ]
242
242
  ):