mcp-code-indexer 4.2.15__py3-none-any.whl → 4.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/database/database.py +334 -115
- mcp_code_indexer/database/database_factory.py +1 -1
- mcp_code_indexer/database/exceptions.py +1 -1
- mcp_code_indexer/database/models.py +66 -24
- mcp_code_indexer/database/retry_executor.py +15 -5
- mcp_code_indexer/file_scanner.py +107 -12
- mcp_code_indexer/main.py +43 -30
- mcp_code_indexer/server/mcp_server.py +201 -7
- mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
- mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
- mcp_code_indexer/vector_mode/config.py +113 -45
- mcp_code_indexer/vector_mode/const.py +24 -0
- mcp_code_indexer/vector_mode/daemon.py +860 -98
- mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
- mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
- mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
- mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
- mcp_code_indexer/vector_mode/services/__init__.py +9 -0
- mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
- mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
- mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
- mcp_code_indexer/vector_mode/types.py +46 -0
- mcp_code_indexer/vector_mode/utils.py +50 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/METADATA +13 -10
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/RECORD +28 -21
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/WHEEL +1 -1
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-4.2.15.dist-info → mcp_code_indexer-4.2.17.dist-info/licenses}/LICENSE +0 -0
|
@@ -27,13 +27,18 @@ from mcp_code_indexer.database.exceptions import (
|
|
|
27
27
|
)
|
|
28
28
|
from mcp_code_indexer.database.models import (
|
|
29
29
|
FileDescription,
|
|
30
|
+
IndexMeta,
|
|
30
31
|
Project,
|
|
31
32
|
ProjectOverview,
|
|
32
33
|
SearchResult,
|
|
34
|
+
SyncStatus,
|
|
33
35
|
WordFrequencyResult,
|
|
34
36
|
WordFrequencyTerm,
|
|
35
37
|
)
|
|
36
|
-
from mcp_code_indexer.database.retry_executor import
|
|
38
|
+
from mcp_code_indexer.database.retry_executor import (
|
|
39
|
+
create_retry_executor,
|
|
40
|
+
DatabaseLockError,
|
|
41
|
+
)
|
|
37
42
|
from mcp_code_indexer.query_preprocessor import preprocess_search_query
|
|
38
43
|
|
|
39
44
|
logger = logging.getLogger(__name__)
|
|
@@ -52,7 +57,7 @@ class DatabaseManager:
|
|
|
52
57
|
db_path: Path,
|
|
53
58
|
pool_size: int = 3,
|
|
54
59
|
retry_count: int = 5,
|
|
55
|
-
timeout: float =
|
|
60
|
+
timeout: float = 30.0,
|
|
56
61
|
enable_wal_mode: bool = True,
|
|
57
62
|
health_check_interval: float = 30.0,
|
|
58
63
|
retry_min_wait: float = 0.1,
|
|
@@ -220,7 +225,7 @@ class DatabaseManager:
|
|
|
220
225
|
"PRAGMA cache_size = -64000", # 64MB cache
|
|
221
226
|
"PRAGMA temp_store = MEMORY", # Use memory for temp tables
|
|
222
227
|
"PRAGMA mmap_size = 268435456", # 256MB memory mapping
|
|
223
|
-
"PRAGMA busy_timeout =
|
|
228
|
+
f"PRAGMA busy_timeout = {int(self.timeout * 1000)}", # Use configured timeout
|
|
224
229
|
"PRAGMA optimize", # Enable query planner optimizations
|
|
225
230
|
]
|
|
226
231
|
)
|
|
@@ -315,12 +320,10 @@ class DatabaseManager:
|
|
|
315
320
|
self, operation_name: str = "write_operation"
|
|
316
321
|
) -> AsyncIterator[aiosqlite.Connection]:
|
|
317
322
|
"""
|
|
318
|
-
Get a database connection with write serialization
|
|
319
|
-
retry logic.
|
|
323
|
+
Get a database connection with write serialization.
|
|
320
324
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
generator errors.
|
|
325
|
+
Ensures the write lock is held throughout the duration of the context
|
|
326
|
+
to prevent race conditions and database locking errors.
|
|
324
327
|
|
|
325
328
|
Args:
|
|
326
329
|
operation_name: Name of the operation for logging and
|
|
@@ -331,43 +334,10 @@ class DatabaseManager:
|
|
|
331
334
|
"DatabaseManager not initialized - call initialize() first"
|
|
332
335
|
)
|
|
333
336
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
async with self._write_lock:
|
|
339
|
-
async with self.get_connection() as conn:
|
|
340
|
-
return conn
|
|
341
|
-
|
|
342
|
-
try:
|
|
343
|
-
# Use retry executor to handle connection acquisition with retries
|
|
344
|
-
connection = await self._retry_executor.execute_with_retry(
|
|
345
|
-
get_write_connection, operation_name
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
try:
|
|
349
|
-
yield connection
|
|
350
|
-
|
|
351
|
-
# Success - retry executor handles all failure tracking
|
|
352
|
-
|
|
353
|
-
except Exception:
|
|
354
|
-
# Error handling is managed by the retry executor
|
|
355
|
-
raise
|
|
356
|
-
|
|
357
|
-
except DatabaseError:
|
|
358
|
-
# Re-raise our custom database errors as-is
|
|
359
|
-
raise
|
|
360
|
-
except Exception as e:
|
|
361
|
-
# Classify and wrap other exceptions
|
|
362
|
-
classified_error = classify_sqlite_error(e, operation_name)
|
|
363
|
-
logger.error(
|
|
364
|
-
(
|
|
365
|
-
f"Database operation '{operation_name}' failed: "
|
|
366
|
-
f"{classified_error.message}"
|
|
367
|
-
),
|
|
368
|
-
extra={"structured_data": classified_error.to_dict()},
|
|
369
|
-
)
|
|
370
|
-
raise classified_error
|
|
337
|
+
# Acquire lock for exclusive write access - hold it for entire context
|
|
338
|
+
async with self._write_lock:
|
|
339
|
+
async with self.get_connection() as conn:
|
|
340
|
+
yield conn
|
|
371
341
|
|
|
372
342
|
def get_database_stats(self) -> Dict[str, Any]:
|
|
373
343
|
"""
|
|
@@ -523,26 +493,39 @@ class DatabaseManager:
|
|
|
523
493
|
|
|
524
494
|
return result
|
|
525
495
|
|
|
526
|
-
except
|
|
496
|
+
except aiosqlite.OperationalError as e:
|
|
527
497
|
# Record locking event for metrics
|
|
528
|
-
|
|
498
|
+
error_msg = str(e).lower()
|
|
499
|
+
if self._metrics_collector and "locked" in error_msg:
|
|
529
500
|
self._metrics_collector.record_locking_event(operation_name, str(e))
|
|
530
501
|
|
|
531
|
-
#
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
if not is_retryable_error(classified_error):
|
|
536
|
-
if self._metrics_collector:
|
|
537
|
-
self._metrics_collector.record_operation(
|
|
538
|
-
operation_name,
|
|
539
|
-
timeout_seconds * 1000,
|
|
540
|
-
False,
|
|
541
|
-
len(self._connection_pool),
|
|
542
|
-
)
|
|
502
|
+
# For retryable errors (locked/busy), re-raise the ORIGINAL error
|
|
503
|
+
# so tenacity can retry. Only classify non-retryable errors.
|
|
504
|
+
if "locked" in error_msg or "busy" in error_msg:
|
|
505
|
+
raise # Let tenacity retry this
|
|
543
506
|
|
|
507
|
+
# Non-retryable OperationalError - classify and raise
|
|
508
|
+
classified_error = classify_sqlite_error(e, operation_name)
|
|
509
|
+
if self._metrics_collector:
|
|
510
|
+
self._metrics_collector.record_operation(
|
|
511
|
+
operation_name,
|
|
512
|
+
timeout_seconds * 1000,
|
|
513
|
+
False,
|
|
514
|
+
len(self._connection_pool),
|
|
515
|
+
)
|
|
544
516
|
raise classified_error
|
|
545
517
|
|
|
518
|
+
except asyncio.TimeoutError as e:
|
|
519
|
+
# Timeout on BEGIN IMMEDIATE - this is retryable
|
|
520
|
+
if self._metrics_collector:
|
|
521
|
+
self._metrics_collector.record_locking_event(
|
|
522
|
+
operation_name, "timeout waiting for lock"
|
|
523
|
+
)
|
|
524
|
+
# Re-raise as OperationalError so tenacity can retry
|
|
525
|
+
raise aiosqlite.OperationalError(
|
|
526
|
+
f"Timeout waiting for database lock: {e}"
|
|
527
|
+
) from e
|
|
528
|
+
|
|
546
529
|
try:
|
|
547
530
|
# Create a temporary retry executor with custom max_retries if different
|
|
548
531
|
# from default
|
|
@@ -567,8 +550,27 @@ class DatabaseManager:
|
|
|
567
550
|
execute_transaction, operation_name
|
|
568
551
|
)
|
|
569
552
|
|
|
553
|
+
except DatabaseLockError as e:
|
|
554
|
+
# Retries exhausted - record metrics and convert to DatabaseError
|
|
555
|
+
if self._metrics_collector:
|
|
556
|
+
self._metrics_collector.record_operation(
|
|
557
|
+
operation_name,
|
|
558
|
+
timeout_seconds * 1000,
|
|
559
|
+
False,
|
|
560
|
+
len(self._connection_pool),
|
|
561
|
+
)
|
|
562
|
+
# Convert to a proper DatabaseError for consistent error handling
|
|
563
|
+
raise DatabaseError(
|
|
564
|
+
f"Database operation failed after retries: {e.message}",
|
|
565
|
+
error_context={
|
|
566
|
+
"operation": operation_name,
|
|
567
|
+
"retry_count": e.retry_count,
|
|
568
|
+
"retryable": False, # Retries already exhausted
|
|
569
|
+
},
|
|
570
|
+
) from e
|
|
571
|
+
|
|
570
572
|
except DatabaseError:
|
|
571
|
-
#
|
|
573
|
+
# Non-retryable DatabaseError from classification
|
|
572
574
|
if self._metrics_collector:
|
|
573
575
|
self._metrics_collector.record_operation(
|
|
574
576
|
operation_name,
|
|
@@ -740,6 +742,25 @@ class DatabaseManager:
|
|
|
740
742
|
await db.commit()
|
|
741
743
|
logger.debug(f"Updated project: {project.id}")
|
|
742
744
|
|
|
745
|
+
async def set_project_vector_mode(self, project_id: str, enabled: bool) -> None:
|
|
746
|
+
"""Set the vector_mode for a specific project."""
|
|
747
|
+
async with self.get_write_connection_with_retry(
|
|
748
|
+
"set_project_vector_mode"
|
|
749
|
+
) as db:
|
|
750
|
+
await db.execute(
|
|
751
|
+
"UPDATE projects SET vector_mode = ? WHERE id = ?",
|
|
752
|
+
(int(enabled), project_id),
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
# Check if the project was actually updated
|
|
756
|
+
cursor = await db.execute("SELECT changes()")
|
|
757
|
+
changes = await cursor.fetchone()
|
|
758
|
+
if changes[0] == 0:
|
|
759
|
+
raise DatabaseError(f"Project not found: {project_id}")
|
|
760
|
+
|
|
761
|
+
await db.commit()
|
|
762
|
+
logger.debug(f"Set vector_mode={enabled} for project: {project_id}")
|
|
763
|
+
|
|
743
764
|
async def get_all_projects(self) -> List[Project]:
|
|
744
765
|
"""Get all projects in the database."""
|
|
745
766
|
async with self.get_connection() as db:
|
|
@@ -751,12 +772,18 @@ class DatabaseManager:
|
|
|
751
772
|
projects = []
|
|
752
773
|
for row in rows:
|
|
753
774
|
aliases = json.loads(row[2]) if row[2] else []
|
|
775
|
+
created = row[3]
|
|
776
|
+
last_accessed = row[4]
|
|
777
|
+
if isinstance(created, str):
|
|
778
|
+
created = datetime.fromisoformat(created)
|
|
779
|
+
if isinstance(last_accessed, str):
|
|
780
|
+
last_accessed = datetime.fromisoformat(last_accessed)
|
|
754
781
|
project = Project(
|
|
755
782
|
id=row[0],
|
|
756
783
|
name=row[1],
|
|
757
784
|
aliases=aliases,
|
|
758
|
-
created=
|
|
759
|
-
last_accessed=
|
|
785
|
+
created=created,
|
|
786
|
+
last_accessed=last_accessed,
|
|
760
787
|
vector_mode=bool(row[5]),
|
|
761
788
|
)
|
|
762
789
|
projects.append(project)
|
|
@@ -774,12 +801,18 @@ class DatabaseManager:
|
|
|
774
801
|
projects = []
|
|
775
802
|
for row in rows:
|
|
776
803
|
aliases = json.loads(row[2]) if row[2] else []
|
|
804
|
+
created = row[3]
|
|
805
|
+
last_accessed = row[4]
|
|
806
|
+
if isinstance(created, str):
|
|
807
|
+
created = datetime.fromisoformat(created)
|
|
808
|
+
if isinstance(last_accessed, str):
|
|
809
|
+
last_accessed = datetime.fromisoformat(last_accessed)
|
|
777
810
|
project = Project(
|
|
778
811
|
id=row[0],
|
|
779
812
|
name=row[1],
|
|
780
813
|
aliases=aliases,
|
|
781
|
-
created=
|
|
782
|
-
last_accessed=
|
|
814
|
+
created=created,
|
|
815
|
+
last_accessed=last_accessed,
|
|
783
816
|
vector_mode=bool(row[5]),
|
|
784
817
|
)
|
|
785
818
|
projects.append(project)
|
|
@@ -790,17 +823,22 @@ class DatabaseManager:
|
|
|
790
823
|
|
|
791
824
|
async def create_file_description(self, file_desc: FileDescription) -> None:
|
|
792
825
|
"""Create or update a file description."""
|
|
793
|
-
async
|
|
794
|
-
"create_file_description"
|
|
795
|
-
) as db:
|
|
826
|
+
async def operation(db: aiosqlite.Connection) -> None:
|
|
796
827
|
await db.execute(
|
|
797
828
|
"""
|
|
798
|
-
INSERT
|
|
829
|
+
INSERT INTO file_descriptions
|
|
799
830
|
(
|
|
800
831
|
project_id, file_path, description, file_hash, last_modified,
|
|
801
832
|
version, source_project_id, to_be_cleaned
|
|
802
833
|
)
|
|
803
834
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
835
|
+
ON CONFLICT(project_id, file_path) DO UPDATE SET
|
|
836
|
+
description=excluded.description,
|
|
837
|
+
file_hash=excluded.file_hash,
|
|
838
|
+
last_modified=excluded.last_modified,
|
|
839
|
+
version=excluded.version,
|
|
840
|
+
source_project_id=excluded.source_project_id,
|
|
841
|
+
to_be_cleaned=excluded.to_be_cleaned
|
|
804
842
|
""",
|
|
805
843
|
(
|
|
806
844
|
file_desc.project_id,
|
|
@@ -813,8 +851,12 @@ class DatabaseManager:
|
|
|
813
851
|
file_desc.to_be_cleaned,
|
|
814
852
|
),
|
|
815
853
|
)
|
|
816
|
-
|
|
817
|
-
|
|
854
|
+
|
|
855
|
+
await self.execute_transaction_with_retry(
|
|
856
|
+
operation,
|
|
857
|
+
"create_file_description"
|
|
858
|
+
)
|
|
859
|
+
logger.debug(f"Saved file description: {file_desc.file_path}")
|
|
818
860
|
|
|
819
861
|
async def get_file_description(
|
|
820
862
|
self, project_id: str, file_path: str
|
|
@@ -898,12 +940,19 @@ class DatabaseManager:
|
|
|
898
940
|
|
|
899
941
|
await conn.executemany(
|
|
900
942
|
"""
|
|
901
|
-
INSERT
|
|
943
|
+
INSERT INTO file_descriptions
|
|
902
944
|
(
|
|
903
945
|
project_id, file_path, description, file_hash, last_modified,
|
|
904
946
|
version, source_project_id, to_be_cleaned
|
|
905
947
|
)
|
|
906
948
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
949
|
+
ON CONFLICT(project_id, file_path) DO UPDATE SET
|
|
950
|
+
description=excluded.description,
|
|
951
|
+
file_hash=excluded.file_hash,
|
|
952
|
+
last_modified=excluded.last_modified,
|
|
953
|
+
version=excluded.version,
|
|
954
|
+
source_project_id=excluded.source_project_id,
|
|
955
|
+
to_be_cleaned=excluded.to_be_cleaned
|
|
907
956
|
""",
|
|
908
957
|
data,
|
|
909
958
|
)
|
|
@@ -1018,7 +1067,7 @@ class DatabaseManager:
|
|
|
1018
1067
|
|
|
1019
1068
|
async def create_project_overview(self, overview: ProjectOverview) -> None:
|
|
1020
1069
|
"""Create or update a project overview."""
|
|
1021
|
-
async
|
|
1070
|
+
async def operation(db: aiosqlite.Connection) -> None:
|
|
1022
1071
|
await db.execute(
|
|
1023
1072
|
"""
|
|
1024
1073
|
INSERT OR REPLACE INTO project_overviews
|
|
@@ -1033,8 +1082,12 @@ class DatabaseManager:
|
|
|
1033
1082
|
overview.total_tokens,
|
|
1034
1083
|
),
|
|
1035
1084
|
)
|
|
1036
|
-
|
|
1037
|
-
|
|
1085
|
+
|
|
1086
|
+
await self.execute_transaction_with_retry(
|
|
1087
|
+
operation,
|
|
1088
|
+
"create_project_overview"
|
|
1089
|
+
)
|
|
1090
|
+
logger.debug(f"Created/updated overview for project {overview.project_id}")
|
|
1038
1091
|
|
|
1039
1092
|
async def get_project_overview(self, project_id: str) -> Optional[ProjectOverview]:
|
|
1040
1093
|
"""Get project overview by ID."""
|
|
@@ -1067,10 +1120,8 @@ class DatabaseManager:
|
|
|
1067
1120
|
Returns:
|
|
1068
1121
|
List of file paths that were marked for cleanup
|
|
1069
1122
|
"""
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
|
|
1073
|
-
# Get all active file descriptions for this project
|
|
1123
|
+
# 1. Get all active file paths (fast DB read)
|
|
1124
|
+
async with self.get_connection() as conn:
|
|
1074
1125
|
cursor = await conn.execute(
|
|
1075
1126
|
(
|
|
1076
1127
|
"SELECT file_path FROM file_descriptions WHERE "
|
|
@@ -1078,46 +1129,29 @@ class DatabaseManager:
|
|
|
1078
1129
|
),
|
|
1079
1130
|
(project_id,),
|
|
1080
1131
|
)
|
|
1081
|
-
|
|
1082
1132
|
rows = await cursor.fetchall()
|
|
1133
|
+
file_paths = [row["file_path"] for row in rows]
|
|
1083
1134
|
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1135
|
+
# 2. Check existence on disk (blocking IO - run in executor)
|
|
1136
|
+
def find_removed_files() -> List[str]:
|
|
1137
|
+
missing = []
|
|
1138
|
+
for file_path in file_paths:
|
|
1088
1139
|
full_path = project_root / file_path
|
|
1089
|
-
|
|
1090
1140
|
if not full_path.exists():
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
# Mark descriptions for cleanup instead of deleting
|
|
1094
|
-
if to_remove:
|
|
1095
|
-
import time
|
|
1096
|
-
|
|
1097
|
-
cleanup_timestamp = int(time.time())
|
|
1098
|
-
await conn.executemany(
|
|
1099
|
-
(
|
|
1100
|
-
"UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
|
|
1101
|
-
"project_id = ? AND file_path = ?"
|
|
1102
|
-
),
|
|
1103
|
-
[(cleanup_timestamp, project_id, path) for path in to_remove],
|
|
1104
|
-
)
|
|
1105
|
-
logger.info(
|
|
1106
|
-
(
|
|
1107
|
-
f"Marked {len(to_remove)} missing files for cleanup "
|
|
1108
|
-
f"from {project_id}"
|
|
1109
|
-
)
|
|
1110
|
-
)
|
|
1141
|
+
missing.append(file_path)
|
|
1142
|
+
return missing
|
|
1111
1143
|
|
|
1112
|
-
|
|
1144
|
+
loop = asyncio.get_running_loop()
|
|
1145
|
+
to_remove = await loop.run_in_executor(None, find_removed_files)
|
|
1113
1146
|
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1147
|
+
# 3. Mark for cleanup (fast DB write)
|
|
1148
|
+
if to_remove:
|
|
1149
|
+
await self.cleanup_manager.mark_files_for_cleanup(project_id, to_remove)
|
|
1150
|
+
logger.info(
|
|
1151
|
+
f"Marked {len(to_remove)} missing files for cleanup from {project_id}"
|
|
1152
|
+
)
|
|
1119
1153
|
|
|
1120
|
-
return
|
|
1154
|
+
return to_remove
|
|
1121
1155
|
|
|
1122
1156
|
async def analyze_word_frequency(
|
|
1123
1157
|
self, project_id: str, limit: int = 200
|
|
@@ -1139,7 +1173,7 @@ class DatabaseManager:
|
|
|
1139
1173
|
stop_words_path = (
|
|
1140
1174
|
Path(__file__).parent.parent / "data" / "stop_words_english.txt"
|
|
1141
1175
|
)
|
|
1142
|
-
stop_words = set()
|
|
1176
|
+
stop_words: set = set()
|
|
1143
1177
|
|
|
1144
1178
|
if stop_words_path.exists():
|
|
1145
1179
|
with open(stop_words_path, "r", encoding="utf-8") as f:
|
|
@@ -1186,8 +1220,8 @@ class DatabaseManager:
|
|
|
1186
1220
|
}
|
|
1187
1221
|
stop_words.update(programming_keywords)
|
|
1188
1222
|
|
|
1223
|
+
# Get all descriptions for this project (fast DB read)
|
|
1189
1224
|
async with self.get_connection() as db:
|
|
1190
|
-
# Get all descriptions for this project
|
|
1191
1225
|
cursor = await db.execute(
|
|
1192
1226
|
(
|
|
1193
1227
|
"SELECT description FROM file_descriptions WHERE "
|
|
@@ -1195,11 +1229,13 @@ class DatabaseManager:
|
|
|
1195
1229
|
),
|
|
1196
1230
|
(project_id,),
|
|
1197
1231
|
)
|
|
1198
|
-
|
|
1199
1232
|
rows = await cursor.fetchall()
|
|
1233
|
+
descriptions = [row["description"] for row in rows]
|
|
1200
1234
|
|
|
1235
|
+
# Process word frequency in executor (CPU-bound work)
|
|
1236
|
+
def process_word_frequency() -> WordFrequencyResult:
|
|
1201
1237
|
# Combine all descriptions
|
|
1202
|
-
all_text = " ".join(
|
|
1238
|
+
all_text = " ".join(descriptions)
|
|
1203
1239
|
|
|
1204
1240
|
# Tokenize and filter
|
|
1205
1241
|
words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
|
|
@@ -1220,6 +1256,9 @@ class DatabaseManager:
|
|
|
1220
1256
|
total_unique_terms=len(word_counts),
|
|
1221
1257
|
)
|
|
1222
1258
|
|
|
1259
|
+
loop = asyncio.get_running_loop()
|
|
1260
|
+
return await loop.run_in_executor(None, process_word_frequency)
|
|
1261
|
+
|
|
1223
1262
|
async def cleanup_empty_projects(self) -> int:
|
|
1224
1263
|
"""
|
|
1225
1264
|
Remove projects that have no file descriptions and no project overview.
|
|
@@ -1320,6 +1359,186 @@ class DatabaseManager:
|
|
|
1320
1359
|
"files": file_descriptions,
|
|
1321
1360
|
}
|
|
1322
1361
|
|
|
1362
|
+
# IndexMeta operations
|
|
1363
|
+
async def create_index_meta(self, index_meta: IndexMeta) -> None:
|
|
1364
|
+
"""Create or update index metadata for a project."""
|
|
1365
|
+
async with self.get_write_connection_with_retry("create_index_meta") as db:
|
|
1366
|
+
await db.execute(
|
|
1367
|
+
"""
|
|
1368
|
+
INSERT OR REPLACE INTO index_meta (
|
|
1369
|
+
project_id, total_chunks, indexed_chunks, total_files, indexed_files,
|
|
1370
|
+
last_sync, sync_status, error_message, queue_depth, processing_rate,
|
|
1371
|
+
estimated_completion, metadata, created, last_modified
|
|
1372
|
+
)
|
|
1373
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1374
|
+
""",
|
|
1375
|
+
(
|
|
1376
|
+
index_meta.project_id,
|
|
1377
|
+
index_meta.total_chunks,
|
|
1378
|
+
index_meta.indexed_chunks,
|
|
1379
|
+
index_meta.total_files,
|
|
1380
|
+
index_meta.indexed_files,
|
|
1381
|
+
index_meta.last_sync,
|
|
1382
|
+
index_meta.sync_status.value,
|
|
1383
|
+
index_meta.error_message,
|
|
1384
|
+
index_meta.queue_depth,
|
|
1385
|
+
index_meta.processing_rate,
|
|
1386
|
+
index_meta.estimated_completion,
|
|
1387
|
+
json.dumps(index_meta.metadata),
|
|
1388
|
+
index_meta.created,
|
|
1389
|
+
index_meta.last_modified,
|
|
1390
|
+
),
|
|
1391
|
+
)
|
|
1392
|
+
await db.commit()
|
|
1393
|
+
logger.debug(
|
|
1394
|
+
f"Created/updated index metadata for project: {index_meta.project_id}"
|
|
1395
|
+
)
|
|
1396
|
+
|
|
1397
|
+
async def update_index_meta(self, index_meta: IndexMeta) -> None:
|
|
1398
|
+
"""Update existing index metadata for a project."""
|
|
1399
|
+
async with self.get_write_connection_with_retry("update_index_meta") as db:
|
|
1400
|
+
await db.execute(
|
|
1401
|
+
"""
|
|
1402
|
+
UPDATE index_meta
|
|
1403
|
+
SET total_chunks = ?, indexed_chunks = ?, total_files = ?, indexed_files = ?,
|
|
1404
|
+
last_sync = ?, sync_status = ?, error_message = ?, queue_depth = ?,
|
|
1405
|
+
processing_rate = ?, estimated_completion = ?, metadata = ?, last_modified = ?
|
|
1406
|
+
WHERE project_id = ?
|
|
1407
|
+
""",
|
|
1408
|
+
(
|
|
1409
|
+
index_meta.total_chunks,
|
|
1410
|
+
index_meta.indexed_chunks,
|
|
1411
|
+
index_meta.total_files,
|
|
1412
|
+
index_meta.indexed_files,
|
|
1413
|
+
index_meta.last_sync,
|
|
1414
|
+
index_meta.sync_status.value,
|
|
1415
|
+
index_meta.error_message,
|
|
1416
|
+
index_meta.queue_depth,
|
|
1417
|
+
index_meta.processing_rate,
|
|
1418
|
+
index_meta.estimated_completion,
|
|
1419
|
+
json.dumps(index_meta.metadata),
|
|
1420
|
+
index_meta.last_modified,
|
|
1421
|
+
index_meta.project_id,
|
|
1422
|
+
),
|
|
1423
|
+
)
|
|
1424
|
+
|
|
1425
|
+
# Check if the project was actually updated
|
|
1426
|
+
cursor = await db.execute("SELECT changes()")
|
|
1427
|
+
changes = await cursor.fetchone()
|
|
1428
|
+
if changes[0] == 0:
|
|
1429
|
+
raise DatabaseError(
|
|
1430
|
+
f"Index metadata not found for project: {index_meta.project_id}"
|
|
1431
|
+
)
|
|
1432
|
+
|
|
1433
|
+
await db.commit()
|
|
1434
|
+
logger.debug(f"Updated index metadata for project: {index_meta.project_id}")
|
|
1435
|
+
|
|
1436
|
+
async def get_index_meta(self, project_id: str) -> Optional[IndexMeta]:
|
|
1437
|
+
"""Retrieve index metadata for a project."""
|
|
1438
|
+
async with self.get_connection() as db:
|
|
1439
|
+
cursor = await db.execute(
|
|
1440
|
+
"SELECT * FROM index_meta WHERE project_id = ?", (project_id,)
|
|
1441
|
+
)
|
|
1442
|
+
row = await cursor.fetchone()
|
|
1443
|
+
|
|
1444
|
+
if row:
|
|
1445
|
+
# Convert row to dict for easier field access
|
|
1446
|
+
row_dict = dict(row)
|
|
1447
|
+
|
|
1448
|
+
# Parse JSON metadata field
|
|
1449
|
+
metadata = (
|
|
1450
|
+
json.loads(row_dict["metadata"]) if row_dict["metadata"] else {}
|
|
1451
|
+
)
|
|
1452
|
+
|
|
1453
|
+
# Parse datetime fields
|
|
1454
|
+
created = (
|
|
1455
|
+
datetime.fromisoformat(row_dict["created"])
|
|
1456
|
+
if row_dict["created"]
|
|
1457
|
+
else datetime.utcnow()
|
|
1458
|
+
)
|
|
1459
|
+
last_modified = (
|
|
1460
|
+
datetime.fromisoformat(row_dict["last_modified"])
|
|
1461
|
+
if row_dict["last_modified"]
|
|
1462
|
+
else datetime.utcnow()
|
|
1463
|
+
)
|
|
1464
|
+
last_sync = (
|
|
1465
|
+
datetime.fromisoformat(row_dict["last_sync"])
|
|
1466
|
+
if row_dict["last_sync"]
|
|
1467
|
+
else None
|
|
1468
|
+
)
|
|
1469
|
+
estimated_completion = (
|
|
1470
|
+
datetime.fromisoformat(row_dict["estimated_completion"])
|
|
1471
|
+
if row_dict["estimated_completion"]
|
|
1472
|
+
else None
|
|
1473
|
+
)
|
|
1474
|
+
|
|
1475
|
+
return IndexMeta(
|
|
1476
|
+
id=row_dict["id"],
|
|
1477
|
+
project_id=row_dict["project_id"],
|
|
1478
|
+
total_chunks=row_dict["total_chunks"],
|
|
1479
|
+
indexed_chunks=row_dict["indexed_chunks"],
|
|
1480
|
+
total_files=row_dict["total_files"],
|
|
1481
|
+
indexed_files=row_dict["indexed_files"],
|
|
1482
|
+
last_sync=last_sync,
|
|
1483
|
+
sync_status=row_dict["sync_status"],
|
|
1484
|
+
error_message=row_dict["error_message"],
|
|
1485
|
+
queue_depth=row_dict["queue_depth"],
|
|
1486
|
+
processing_rate=row_dict["processing_rate"],
|
|
1487
|
+
estimated_completion=estimated_completion,
|
|
1488
|
+
metadata=metadata,
|
|
1489
|
+
created=created,
|
|
1490
|
+
last_modified=last_modified,
|
|
1491
|
+
)
|
|
1492
|
+
return None
|
|
1493
|
+
|
|
1494
|
+
async def get_or_create_index_meta(self, project_id: str, **kwargs) -> IndexMeta:
|
|
1495
|
+
"""
|
|
1496
|
+
Get existing index metadata or create new one with default values.
|
|
1497
|
+
|
|
1498
|
+
Args:
|
|
1499
|
+
project_id: Project identifier
|
|
1500
|
+
**kwargs: Optional fields to override defaults when creating new metadata
|
|
1501
|
+
|
|
1502
|
+
Returns:
|
|
1503
|
+
IndexMeta object (existing or newly created)
|
|
1504
|
+
"""
|
|
1505
|
+
# Try to get existing metadata first
|
|
1506
|
+
existing_meta = await self.get_index_meta(project_id)
|
|
1507
|
+
if existing_meta:
|
|
1508
|
+
return existing_meta
|
|
1509
|
+
|
|
1510
|
+
# Create new metadata with defaults, allowing kwargs to override
|
|
1511
|
+
default_metadata = {
|
|
1512
|
+
"project_id": project_id,
|
|
1513
|
+
"total_chunks": 0,
|
|
1514
|
+
"indexed_chunks": 0,
|
|
1515
|
+
"total_files": 0,
|
|
1516
|
+
"indexed_files": 0,
|
|
1517
|
+
"last_sync": None,
|
|
1518
|
+
"sync_status": SyncStatus.PENDING,
|
|
1519
|
+
"error_message": None,
|
|
1520
|
+
"queue_depth": 0,
|
|
1521
|
+
"processing_rate": 0.0,
|
|
1522
|
+
"estimated_completion": None,
|
|
1523
|
+
"metadata": {},
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
# Override defaults with provided kwargs
|
|
1527
|
+
default_metadata.update(kwargs)
|
|
1528
|
+
|
|
1529
|
+
# Create the IndexMeta object
|
|
1530
|
+
new_meta = IndexMeta(**default_metadata)
|
|
1531
|
+
|
|
1532
|
+
# Store it in the database
|
|
1533
|
+
await self.create_index_meta(new_meta)
|
|
1534
|
+
|
|
1535
|
+
# Return the created metadata (fetch it back to get the assigned ID)
|
|
1536
|
+
result = await self.get_index_meta(project_id)
|
|
1537
|
+
if result is None:
|
|
1538
|
+
raise DatabaseError(f"Failed to create index metadata for project: {project_id}")
|
|
1539
|
+
|
|
1540
|
+
return result
|
|
1541
|
+
|
|
1323
1542
|
# Cleanup operations
|
|
1324
1543
|
|
|
1325
1544
|
@property
|