mcp-code-indexer 2.4.0__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/ask_handler.py +5 -7
- mcp_code_indexer/claude_api_handler.py +2 -2
- mcp_code_indexer/cleanup_manager.py +255 -0
- mcp_code_indexer/database/database.py +125 -98
- mcp_code_indexer/database/models.py +3 -5
- mcp_code_indexer/deepask_handler.py +5 -9
- mcp_code_indexer/error_handler.py +3 -1
- mcp_code_indexer/git_hook_handler.py +2 -9
- mcp_code_indexer/migrations/001_initial.sql +100 -0
- mcp_code_indexer/migrations/002_performance_indexes.sql +61 -0
- mcp_code_indexer/migrations/003_project_overviews.sql +20 -0
- mcp_code_indexer/migrations/004_remove_branch_dependency.sql +166 -0
- mcp_code_indexer/server/mcp_server.py +33 -211
- {mcp_code_indexer-2.4.0.dist-info → mcp_code_indexer-3.0.2.dist-info}/METADATA +3 -3
- {mcp_code_indexer-2.4.0.dist-info → mcp_code_indexer-3.0.2.dist-info}/RECORD +19 -14
- {mcp_code_indexer-2.4.0.dist-info → mcp_code_indexer-3.0.2.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-2.4.0.dist-info → mcp_code_indexer-3.0.2.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-2.4.0.dist-info → mcp_code_indexer-3.0.2.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-2.4.0.dist-info → mcp_code_indexer-3.0.2.dist-info}/top_level.txt +0 -0
@@ -31,6 +31,7 @@ from mcp_code_indexer.database.connection_health import (
|
|
31
31
|
ConnectionHealthMonitor, DatabaseMetricsCollector
|
32
32
|
)
|
33
33
|
from mcp_code_indexer.query_preprocessor import preprocess_search_query
|
34
|
+
from mcp_code_indexer.cleanup_manager import CleanupManager
|
34
35
|
|
35
36
|
logger = logging.getLogger(__name__)
|
36
37
|
|
@@ -79,6 +80,9 @@ class DatabaseManager:
|
|
79
80
|
self._health_monitor = None # Initialized in async context
|
80
81
|
self._metrics_collector = DatabaseMetricsCollector()
|
81
82
|
|
83
|
+
# Cleanup manager for retention policies
|
84
|
+
self._cleanup_manager = None # Initialized in async context
|
85
|
+
|
82
86
|
async def initialize(self) -> None:
|
83
87
|
"""Initialize database schema and configuration."""
|
84
88
|
import asyncio
|
@@ -97,13 +101,19 @@ class DatabaseManager:
|
|
97
101
|
)
|
98
102
|
await self._health_monitor.start_monitoring()
|
99
103
|
|
104
|
+
# Initialize cleanup manager
|
105
|
+
self._cleanup_manager = CleanupManager(self, retention_months=6)
|
106
|
+
|
100
107
|
# Ensure database directory exists
|
101
108
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
102
109
|
|
103
110
|
# Database initialization now uses the modern retry executor directly
|
104
111
|
|
105
112
|
# Apply migrations in order
|
106
|
-
|
113
|
+
# Migrations are now bundled with the package
|
114
|
+
migrations_dir = Path(__file__).parent.parent / "migrations"
|
115
|
+
if not migrations_dir.exists():
|
116
|
+
raise RuntimeError(f"Could not find migrations directory at {migrations_dir}")
|
107
117
|
migration_files = sorted(migrations_dir.glob("*.sql"))
|
108
118
|
|
109
119
|
async with aiosqlite.connect(self.db_path) as db:
|
@@ -113,16 +123,48 @@ class DatabaseManager:
|
|
113
123
|
# Configure WAL mode and optimizations for concurrent access
|
114
124
|
await self._configure_database_optimizations(db, include_wal_mode=self.enable_wal_mode)
|
115
125
|
|
116
|
-
#
|
126
|
+
# Create migrations tracking table
|
127
|
+
await db.execute('''
|
128
|
+
CREATE TABLE IF NOT EXISTS migrations (
|
129
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
130
|
+
filename TEXT UNIQUE NOT NULL,
|
131
|
+
applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
132
|
+
)
|
133
|
+
''')
|
134
|
+
await db.commit()
|
135
|
+
|
136
|
+
# Get list of already applied migrations
|
137
|
+
cursor = await db.execute('SELECT filename FROM migrations')
|
138
|
+
applied_migrations = {row[0] for row in await cursor.fetchall()}
|
139
|
+
|
140
|
+
# Apply each migration that hasn't been applied yet
|
117
141
|
for migration_file in migration_files:
|
118
|
-
|
119
|
-
|
120
|
-
|
142
|
+
migration_name = migration_file.name
|
143
|
+
if migration_name in applied_migrations:
|
144
|
+
logger.info(f"Skipping already applied migration: {migration_name}")
|
145
|
+
continue
|
146
|
+
|
147
|
+
logger.info(f"Applying migration: {migration_name}")
|
148
|
+
try:
|
149
|
+
migration_sql = migration_file.read_text(encoding='utf-8')
|
150
|
+
except AttributeError:
|
151
|
+
# Fallback for regular file objects
|
152
|
+
with open(migration_file, 'r', encoding='utf-8') as f:
|
153
|
+
migration_sql = f.read()
|
121
154
|
|
122
|
-
|
123
|
-
|
155
|
+
try:
|
156
|
+
await db.executescript(migration_sql)
|
157
|
+
|
158
|
+
# Record that migration was applied
|
159
|
+
await db.execute('INSERT INTO migrations (filename) VALUES (?)', (migration_name,))
|
160
|
+
await db.commit()
|
161
|
+
logger.info(f"Successfully applied migration: {migration_name}")
|
162
|
+
except Exception as e:
|
163
|
+
logger.error(f"Failed to apply migration {migration_name}: {e}")
|
164
|
+
await db.rollback()
|
165
|
+
raise
|
124
166
|
|
125
|
-
logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} migrations")
|
167
|
+
logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} total migrations")
|
126
168
|
|
127
169
|
async def _configure_database_optimizations(self, db: aiosqlite.Connection, include_wal_mode: bool = True) -> None:
|
128
170
|
"""
|
@@ -703,20 +745,7 @@ class DatabaseManager:
|
|
703
745
|
|
704
746
|
return projects
|
705
747
|
|
706
|
-
|
707
|
-
"""Get file counts per branch for a project."""
|
708
|
-
async with self.get_connection() as db:
|
709
|
-
cursor = await db.execute(
|
710
|
-
"""
|
711
|
-
SELECT branch, COUNT(*) as file_count
|
712
|
-
FROM file_descriptions
|
713
|
-
WHERE project_id = ?
|
714
|
-
GROUP BY branch
|
715
|
-
""",
|
716
|
-
(project_id,)
|
717
|
-
)
|
718
|
-
rows = await cursor.fetchall()
|
719
|
-
return {row[0]: row[1] for row in rows}
|
748
|
+
|
720
749
|
|
721
750
|
# File description operations
|
722
751
|
|
@@ -726,18 +755,18 @@ class DatabaseManager:
|
|
726
755
|
await db.execute(
|
727
756
|
"""
|
728
757
|
INSERT OR REPLACE INTO file_descriptions
|
729
|
-
(project_id,
|
758
|
+
(project_id, file_path, description, file_hash, last_modified, version, source_project_id, to_be_cleaned)
|
730
759
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
731
760
|
""",
|
732
761
|
(
|
733
762
|
file_desc.project_id,
|
734
|
-
file_desc.branch,
|
735
763
|
file_desc.file_path,
|
736
764
|
file_desc.description,
|
737
765
|
file_desc.file_hash,
|
738
766
|
file_desc.last_modified,
|
739
767
|
file_desc.version,
|
740
|
-
file_desc.source_project_id
|
768
|
+
file_desc.source_project_id,
|
769
|
+
file_desc.to_be_cleaned
|
741
770
|
)
|
742
771
|
)
|
743
772
|
await db.commit()
|
@@ -746,60 +775,60 @@ class DatabaseManager:
|
|
746
775
|
async def get_file_description(
|
747
776
|
self,
|
748
777
|
project_id: str,
|
749
|
-
branch: str,
|
750
778
|
file_path: str
|
751
779
|
) -> Optional[FileDescription]:
|
752
|
-
"""Get file description by project
|
780
|
+
"""Get file description by project and path."""
|
753
781
|
async with self.get_connection() as db:
|
754
782
|
cursor = await db.execute(
|
755
783
|
"""
|
756
784
|
SELECT * FROM file_descriptions
|
757
|
-
WHERE project_id = ? AND
|
785
|
+
WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
|
758
786
|
""",
|
759
|
-
(project_id,
|
787
|
+
(project_id, file_path)
|
760
788
|
)
|
761
789
|
row = await cursor.fetchone()
|
762
790
|
|
763
791
|
if row:
|
764
792
|
return FileDescription(
|
793
|
+
id=row['id'],
|
765
794
|
project_id=row['project_id'],
|
766
|
-
branch=row['branch'],
|
767
795
|
file_path=row['file_path'],
|
768
796
|
description=row['description'],
|
769
797
|
file_hash=row['file_hash'],
|
770
798
|
last_modified=datetime.fromisoformat(row['last_modified']),
|
771
799
|
version=row['version'],
|
772
|
-
source_project_id=row['source_project_id']
|
800
|
+
source_project_id=row['source_project_id'],
|
801
|
+
to_be_cleaned=row['to_be_cleaned']
|
773
802
|
)
|
774
803
|
return None
|
775
804
|
|
776
805
|
async def get_all_file_descriptions(
|
777
806
|
self,
|
778
|
-
project_id: str
|
779
|
-
branch: str
|
807
|
+
project_id: str
|
780
808
|
) -> List[FileDescription]:
|
781
|
-
"""Get all file descriptions for a project
|
809
|
+
"""Get all file descriptions for a project."""
|
782
810
|
async with self.get_connection() as db:
|
783
811
|
cursor = await db.execute(
|
784
812
|
"""
|
785
813
|
SELECT * FROM file_descriptions
|
786
|
-
WHERE project_id = ? AND
|
814
|
+
WHERE project_id = ? AND to_be_cleaned IS NULL
|
787
815
|
ORDER BY file_path
|
788
816
|
""",
|
789
|
-
(project_id,
|
817
|
+
(project_id,)
|
790
818
|
)
|
791
819
|
rows = await cursor.fetchall()
|
792
820
|
|
793
821
|
return [
|
794
822
|
FileDescription(
|
823
|
+
id=row['id'],
|
795
824
|
project_id=row['project_id'],
|
796
|
-
branch=row['branch'],
|
797
825
|
file_path=row['file_path'],
|
798
826
|
description=row['description'],
|
799
827
|
file_hash=row['file_hash'],
|
800
828
|
last_modified=datetime.fromisoformat(row['last_modified']),
|
801
829
|
version=row['version'],
|
802
|
-
source_project_id=row['source_project_id']
|
830
|
+
source_project_id=row['source_project_id'],
|
831
|
+
to_be_cleaned=row['to_be_cleaned']
|
803
832
|
)
|
804
833
|
for row in rows
|
805
834
|
]
|
@@ -813,13 +842,13 @@ class DatabaseManager:
|
|
813
842
|
data = [
|
814
843
|
(
|
815
844
|
fd.project_id,
|
816
|
-
fd.branch,
|
817
845
|
fd.file_path,
|
818
846
|
fd.description,
|
819
847
|
fd.file_hash,
|
820
848
|
fd.last_modified,
|
821
849
|
fd.version,
|
822
|
-
fd.source_project_id
|
850
|
+
fd.source_project_id,
|
851
|
+
fd.to_be_cleaned
|
823
852
|
)
|
824
853
|
for fd in file_descriptions
|
825
854
|
]
|
@@ -827,7 +856,7 @@ class DatabaseManager:
|
|
827
856
|
await conn.executemany(
|
828
857
|
"""
|
829
858
|
INSERT OR REPLACE INTO file_descriptions
|
830
|
-
(project_id,
|
859
|
+
(project_id, file_path, description, file_hash, last_modified, version, source_project_id, to_be_cleaned)
|
831
860
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
832
861
|
""",
|
833
862
|
data
|
@@ -845,7 +874,6 @@ class DatabaseManager:
|
|
845
874
|
async def search_file_descriptions(
|
846
875
|
self,
|
847
876
|
project_id: str,
|
848
|
-
branch: str,
|
849
877
|
query: str,
|
850
878
|
max_results: int = 20
|
851
879
|
) -> List[SearchResult]:
|
@@ -864,26 +892,24 @@ class DatabaseManager:
|
|
864
892
|
"""
|
865
893
|
SELECT
|
866
894
|
fd.project_id,
|
867
|
-
fd.branch,
|
868
895
|
fd.file_path,
|
869
896
|
fd.description,
|
870
897
|
bm25(file_descriptions_fts) as rank
|
871
898
|
FROM file_descriptions_fts
|
872
|
-
JOIN file_descriptions fd ON fd.
|
899
|
+
JOIN file_descriptions fd ON fd.id = file_descriptions_fts.rowid
|
873
900
|
WHERE file_descriptions_fts MATCH ?
|
874
901
|
AND fd.project_id = ?
|
875
|
-
AND fd.
|
902
|
+
AND fd.to_be_cleaned IS NULL
|
876
903
|
ORDER BY bm25(file_descriptions_fts)
|
877
904
|
LIMIT ?
|
878
905
|
""",
|
879
|
-
(preprocessed_query, project_id,
|
906
|
+
(preprocessed_query, project_id, max_results)
|
880
907
|
)
|
881
908
|
rows = await cursor.fetchall()
|
882
909
|
|
883
910
|
return [
|
884
911
|
SearchResult(
|
885
912
|
project_id=row['project_id'],
|
886
|
-
branch=row['branch'],
|
887
913
|
file_path=row['file_path'],
|
888
914
|
description=row['description'],
|
889
915
|
relevance_score=row['rank']
|
@@ -936,12 +962,12 @@ class DatabaseManager:
|
|
936
962
|
|
937
963
|
# Utility operations
|
938
964
|
|
939
|
-
async def get_file_count(self, project_id: str
|
940
|
-
"""Get count of files in a project
|
965
|
+
async def get_file_count(self, project_id: str) -> int:
|
966
|
+
"""Get count of files in a project."""
|
941
967
|
async with self.get_connection() as db:
|
942
968
|
cursor = await db.execute(
|
943
|
-
"SELECT COUNT(*) as count FROM file_descriptions WHERE project_id = ? AND
|
944
|
-
(project_id,
|
969
|
+
"SELECT COUNT(*) as count FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
|
970
|
+
(project_id,)
|
945
971
|
)
|
946
972
|
row = await cursor.fetchone()
|
947
973
|
return row['count'] if row else 0
|
@@ -1030,12 +1056,11 @@ class DatabaseManager:
|
|
1030
1056
|
await db.execute(
|
1031
1057
|
"""
|
1032
1058
|
INSERT OR REPLACE INTO project_overviews
|
1033
|
-
(project_id,
|
1034
|
-
VALUES (?, ?, ?, ?,
|
1059
|
+
(project_id, overview, last_modified, total_files, total_tokens)
|
1060
|
+
VALUES (?, ?, ?, ?, ?)
|
1035
1061
|
""",
|
1036
1062
|
(
|
1037
1063
|
overview.project_id,
|
1038
|
-
overview.branch,
|
1039
1064
|
overview.overview,
|
1040
1065
|
overview.last_modified,
|
1041
1066
|
overview.total_files,
|
@@ -1043,21 +1068,20 @@ class DatabaseManager:
|
|
1043
1068
|
)
|
1044
1069
|
)
|
1045
1070
|
await db.commit()
|
1046
|
-
logger.debug(f"Created/updated overview for project {overview.project_id}
|
1071
|
+
logger.debug(f"Created/updated overview for project {overview.project_id}")
|
1047
1072
|
|
1048
|
-
async def get_project_overview(self, project_id: str
|
1049
|
-
"""Get project overview by ID
|
1073
|
+
async def get_project_overview(self, project_id: str) -> Optional[ProjectOverview]:
|
1074
|
+
"""Get project overview by ID."""
|
1050
1075
|
async with self.get_connection() as db:
|
1051
1076
|
cursor = await db.execute(
|
1052
|
-
"SELECT * FROM project_overviews WHERE project_id = ?
|
1053
|
-
(project_id,
|
1077
|
+
"SELECT * FROM project_overviews WHERE project_id = ?",
|
1078
|
+
(project_id,)
|
1054
1079
|
)
|
1055
1080
|
row = await cursor.fetchone()
|
1056
1081
|
|
1057
1082
|
if row:
|
1058
1083
|
return ProjectOverview(
|
1059
1084
|
project_id=row['project_id'],
|
1060
|
-
branch=row['branch'],
|
1061
1085
|
overview=row['overview'],
|
1062
1086
|
last_modified=datetime.fromisoformat(row['last_modified']),
|
1063
1087
|
total_files=row['total_files'],
|
@@ -1065,25 +1089,24 @@ class DatabaseManager:
|
|
1065
1089
|
)
|
1066
1090
|
return None
|
1067
1091
|
|
1068
|
-
async def cleanup_missing_files(self, project_id: str,
|
1092
|
+
async def cleanup_missing_files(self, project_id: str, project_root: Path) -> List[str]:
|
1069
1093
|
"""
|
1070
|
-
|
1094
|
+
Mark descriptions for cleanup for files that no longer exist on disk.
|
1071
1095
|
|
1072
1096
|
Args:
|
1073
1097
|
project_id: Project identifier
|
1074
|
-
branch: Branch name
|
1075
1098
|
project_root: Path to project root directory
|
1076
1099
|
|
1077
1100
|
Returns:
|
1078
|
-
List of file paths that were
|
1101
|
+
List of file paths that were marked for cleanup
|
1079
1102
|
"""
|
1080
1103
|
removed_files = []
|
1081
1104
|
|
1082
1105
|
async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
|
1083
|
-
# Get all file descriptions for this project
|
1106
|
+
# Get all active file descriptions for this project
|
1084
1107
|
cursor = await conn.execute(
|
1085
|
-
"SELECT file_path FROM file_descriptions WHERE project_id = ? AND
|
1086
|
-
(project_id,
|
1108
|
+
"SELECT file_path FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
|
1109
|
+
(project_id,)
|
1087
1110
|
)
|
1088
1111
|
|
1089
1112
|
rows = await cursor.fetchall()
|
@@ -1097,31 +1120,32 @@ class DatabaseManager:
|
|
1097
1120
|
if not full_path.exists():
|
1098
1121
|
to_remove.append(file_path)
|
1099
1122
|
|
1100
|
-
#
|
1123
|
+
# Mark descriptions for cleanup instead of deleting
|
1101
1124
|
if to_remove:
|
1125
|
+
import time
|
1126
|
+
cleanup_timestamp = int(time.time())
|
1102
1127
|
await conn.executemany(
|
1103
|
-
"
|
1104
|
-
[(
|
1128
|
+
"UPDATE file_descriptions SET to_be_cleaned = ? WHERE project_id = ? AND file_path = ?",
|
1129
|
+
[(cleanup_timestamp, project_id, path) for path in to_remove]
|
1105
1130
|
)
|
1106
|
-
logger.info(f"
|
1131
|
+
logger.info(f"Marked {len(to_remove)} missing files for cleanup from {project_id}")
|
1107
1132
|
|
1108
1133
|
return to_remove
|
1109
1134
|
|
1110
1135
|
removed_files = await self.execute_transaction_with_retry(
|
1111
1136
|
cleanup_operation,
|
1112
|
-
f"cleanup_missing_files_{project_id}
|
1137
|
+
f"cleanup_missing_files_{project_id}",
|
1113
1138
|
timeout_seconds=60.0 # Longer timeout for file system operations
|
1114
1139
|
)
|
1115
1140
|
|
1116
1141
|
return removed_files
|
1117
1142
|
|
1118
|
-
async def analyze_word_frequency(self, project_id: str,
|
1143
|
+
async def analyze_word_frequency(self, project_id: str, limit: int = 200) -> WordFrequencyResult:
|
1119
1144
|
"""
|
1120
|
-
Analyze word frequency across all file descriptions for a project
|
1145
|
+
Analyze word frequency across all file descriptions for a project.
|
1121
1146
|
|
1122
1147
|
Args:
|
1123
1148
|
project_id: Project identifier
|
1124
|
-
branch: Branch name
|
1125
1149
|
limit: Maximum number of top terms to return
|
1126
1150
|
|
1127
1151
|
Returns:
|
@@ -1152,10 +1176,10 @@ class DatabaseManager:
|
|
1152
1176
|
stop_words.update(programming_keywords)
|
1153
1177
|
|
1154
1178
|
async with self.get_connection() as db:
|
1155
|
-
# Get all descriptions for this project
|
1179
|
+
# Get all descriptions for this project
|
1156
1180
|
cursor = await db.execute(
|
1157
|
-
"SELECT description FROM file_descriptions WHERE project_id = ? AND
|
1158
|
-
(project_id,
|
1181
|
+
"SELECT description FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
|
1182
|
+
(project_id,)
|
1159
1183
|
)
|
1160
1184
|
|
1161
1185
|
rows = await cursor.fetchall()
|
@@ -1218,13 +1242,12 @@ class DatabaseManager:
|
|
1218
1242
|
await db.commit()
|
1219
1243
|
return removed_count
|
1220
1244
|
|
1221
|
-
async def get_project_map_data(self, project_identifier: str
|
1245
|
+
async def get_project_map_data(self, project_identifier: str) -> dict:
|
1222
1246
|
"""
|
1223
1247
|
Get all data needed to generate a project map.
|
1224
1248
|
|
1225
1249
|
Args:
|
1226
1250
|
project_identifier: Project name or ID
|
1227
|
-
branch: Branch name (optional, will use first available if not specified)
|
1228
1251
|
|
1229
1252
|
Returns:
|
1230
1253
|
Dictionary containing project info, overview, and file descriptions
|
@@ -1256,39 +1279,43 @@ class DatabaseManager:
|
|
1256
1279
|
|
1257
1280
|
project = Project(**project_dict)
|
1258
1281
|
|
1259
|
-
# If no branch specified, find the first available branch
|
1260
|
-
if not branch:
|
1261
|
-
cursor = await db.execute(
|
1262
|
-
"SELECT DISTINCT branch FROM file_descriptions WHERE project_id = ? LIMIT 1",
|
1263
|
-
(project.id,)
|
1264
|
-
)
|
1265
|
-
branch_row = await cursor.fetchone()
|
1266
|
-
if branch_row:
|
1267
|
-
branch = branch_row['branch']
|
1268
|
-
else:
|
1269
|
-
branch = 'main' # Default fallback
|
1270
|
-
|
1271
1282
|
# Get project overview
|
1272
1283
|
cursor = await db.execute(
|
1273
|
-
"SELECT * FROM project_overviews WHERE project_id = ?
|
1274
|
-
(project.id,
|
1284
|
+
"SELECT * FROM project_overviews WHERE project_id = ?",
|
1285
|
+
(project.id,)
|
1275
1286
|
)
|
1276
1287
|
overview_row = await cursor.fetchone()
|
1277
1288
|
project_overview = ProjectOverview(**overview_row) if overview_row else None
|
1278
1289
|
|
1279
|
-
# Get all file descriptions for this project
|
1290
|
+
# Get all file descriptions for this project
|
1280
1291
|
cursor = await db.execute(
|
1281
1292
|
"""SELECT * FROM file_descriptions
|
1282
|
-
WHERE project_id = ? AND
|
1293
|
+
WHERE project_id = ? AND to_be_cleaned IS NULL
|
1283
1294
|
ORDER BY file_path""",
|
1284
|
-
(project.id,
|
1295
|
+
(project.id,)
|
1285
1296
|
)
|
1286
1297
|
file_rows = await cursor.fetchall()
|
1287
1298
|
file_descriptions = [FileDescription(**row) for row in file_rows]
|
1288
1299
|
|
1289
1300
|
return {
|
1290
1301
|
'project': project,
|
1291
|
-
'branch': branch,
|
1292
1302
|
'overview': project_overview,
|
1293
1303
|
'files': file_descriptions
|
1294
1304
|
}
|
1305
|
+
|
1306
|
+
# Cleanup operations
|
1307
|
+
|
1308
|
+
@property
|
1309
|
+
def cleanup_manager(self) -> CleanupManager:
|
1310
|
+
"""Get the cleanup manager instance."""
|
1311
|
+
if self._cleanup_manager is None:
|
1312
|
+
self._cleanup_manager = CleanupManager(self, retention_months=6)
|
1313
|
+
return self._cleanup_manager
|
1314
|
+
|
1315
|
+
async def mark_file_for_cleanup(self, project_id: str, file_path: str) -> bool:
|
1316
|
+
"""Mark a file for cleanup. Convenience method."""
|
1317
|
+
return await self.cleanup_manager.mark_file_for_cleanup(project_id, file_path)
|
1318
|
+
|
1319
|
+
async def perform_cleanup(self, project_id: Optional[str] = None) -> int:
|
1320
|
+
"""Perform cleanup of old records. Convenience method."""
|
1321
|
+
return await self.cleanup_manager.perform_cleanup(project_id)
|
@@ -29,19 +29,20 @@ class Project(BaseModel):
|
|
29
29
|
|
30
30
|
class FileDescription(BaseModel):
|
31
31
|
"""
|
32
|
-
Represents a file description within a project
|
32
|
+
Represents a file description within a project.
|
33
33
|
|
34
34
|
Stores detailed summaries of file contents including purpose, components,
|
35
35
|
and relationships to enable efficient codebase navigation.
|
36
36
|
"""
|
37
|
+
id: Optional[int] = Field(None, description="Database ID")
|
37
38
|
project_id: str = Field(..., description="Reference to project")
|
38
|
-
branch: str = Field(..., description="Git branch name")
|
39
39
|
file_path: str = Field(..., description="Relative path from project root")
|
40
40
|
description: str = Field(..., description="Detailed content description")
|
41
41
|
file_hash: Optional[str] = Field(None, description="SHA-256 of file contents")
|
42
42
|
last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
|
43
43
|
version: int = Field(default=1, description="For optimistic concurrency control")
|
44
44
|
source_project_id: Optional[str] = Field(None, description="Source project if copied from upstream")
|
45
|
+
to_be_cleaned: Optional[int] = Field(None, description="UNIX timestamp for cleanup, NULL = active")
|
45
46
|
|
46
47
|
|
47
48
|
class MergeConflict(BaseModel):
|
@@ -71,7 +72,6 @@ class ProjectOverview(BaseModel):
|
|
71
72
|
individual file descriptions.
|
72
73
|
"""
|
73
74
|
project_id: str = Field(..., description="Reference to project")
|
74
|
-
branch: str = Field(..., description="Git branch name")
|
75
75
|
overview: str = Field(..., description="Comprehensive codebase narrative")
|
76
76
|
last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
|
77
77
|
total_files: int = Field(..., description="Number of files in codebase")
|
@@ -86,7 +86,6 @@ class CodebaseOverview(BaseModel):
|
|
86
86
|
to help determine whether to use full overview or search-based approach.
|
87
87
|
"""
|
88
88
|
project_name: str = Field(..., description="Project name")
|
89
|
-
branch: str = Field(..., description="Git branch")
|
90
89
|
total_files: int = Field(..., description="Total number of tracked files")
|
91
90
|
total_tokens: int = Field(..., description="Total token count for all descriptions")
|
92
91
|
is_large: bool = Field(..., description="True if exceeds configured token limit")
|
@@ -121,7 +120,6 @@ class SearchResult(BaseModel):
|
|
121
120
|
description: str = Field(..., description="File description")
|
122
121
|
relevance_score: float = Field(..., description="Search relevance score")
|
123
122
|
project_id: str = Field(..., description="Project identifier")
|
124
|
-
branch: str = Field(..., description="Git branch")
|
125
123
|
|
126
124
|
|
127
125
|
class CodebaseSizeInfo(BaseModel):
|
@@ -75,7 +75,7 @@ class DeepAskHandler(ClaudeAPIHandler):
|
|
75
75
|
Ask an enhanced question about the project using two-stage Claude API processing.
|
76
76
|
|
77
77
|
Args:
|
78
|
-
project_info: Project information dict with projectName, folderPath,
|
78
|
+
project_info: Project information dict with projectName, folderPath, etc.
|
79
79
|
question: User's question about the project
|
80
80
|
max_file_results: Maximum number of file descriptions to include
|
81
81
|
|
@@ -118,8 +118,7 @@ class DeepAskHandler(ClaudeAPIHandler):
|
|
118
118
|
"stage1_tokens": stage1_result["token_usage"],
|
119
119
|
"stage2_tokens": stage2_result["token_usage"],
|
120
120
|
"total_files_found": stage2_result["total_files_found"],
|
121
|
-
"files_included": len(stage2_result["relevant_files"])
|
122
|
-
"branch": project_info.get("branch", "unknown")
|
121
|
+
"files_included": len(stage2_result["relevant_files"])
|
123
122
|
}
|
124
123
|
}
|
125
124
|
|
@@ -237,7 +236,6 @@ class DeepAskHandler(ClaudeAPIHandler):
|
|
237
236
|
try:
|
238
237
|
search_results = await self.db_manager.search_file_descriptions(
|
239
238
|
project_id=project.id,
|
240
|
-
branch=project_info["branch"],
|
241
239
|
query=search_term,
|
242
240
|
max_results=max_file_results
|
243
241
|
)
|
@@ -322,9 +320,8 @@ class DeepAskHandler(ClaudeAPIHandler):
|
|
322
320
|
) -> str:
|
323
321
|
"""Build stage 1 prompt for extracting search terms."""
|
324
322
|
project_name = project_info["projectName"]
|
325
|
-
branch = project_info.get("branch", "unknown")
|
326
323
|
|
327
|
-
return f"""I need to answer a question about the codebase "{project_name}"
|
324
|
+
return f"""I need to answer a question about the codebase "{project_name}". To provide the best answer, I need to search for relevant files and then answer the question.
|
328
325
|
|
329
326
|
PROJECT OVERVIEW:
|
330
327
|
{overview}
|
@@ -352,7 +349,6 @@ Respond with valid JSON in this format:
|
|
352
349
|
) -> str:
|
353
350
|
"""Build stage 2 prompt for enhanced answer."""
|
354
351
|
project_name = project_info["projectName"]
|
355
|
-
branch = project_info.get("branch", "unknown")
|
356
352
|
|
357
353
|
# Format file descriptions
|
358
354
|
file_context = ""
|
@@ -365,7 +361,7 @@ Respond with valid JSON in this format:
|
|
365
361
|
else:
|
366
362
|
file_context = "\n\nNo relevant files found in the search."
|
367
363
|
|
368
|
-
return f"""Please answer the following question about the codebase "{project_name}"
|
364
|
+
return f"""Please answer the following question about the codebase "{project_name}".
|
369
365
|
|
370
366
|
PROJECT OVERVIEW (COMPRESSED):
|
371
367
|
{compressed_overview}
|
@@ -432,7 +428,7 @@ Your answer should be comprehensive but focused on the specific question asked."
|
|
432
428
|
|
433
429
|
output = []
|
434
430
|
output.append(f"Question: {result['question']}")
|
435
|
-
output.append(f"Project: {result['project_name']}
|
431
|
+
output.append(f"Project: {result['project_name']}")
|
436
432
|
output.append("")
|
437
433
|
output.append("Answer:")
|
438
434
|
output.append(answer)
|
@@ -254,6 +254,7 @@ class StructuredFormatter(logging.Formatter):
|
|
254
254
|
def format(self, record: logging.LogRecord) -> str:
|
255
255
|
"""Format log record as structured JSON."""
|
256
256
|
import json
|
257
|
+
from . import __version__
|
257
258
|
|
258
259
|
log_data = {
|
259
260
|
"timestamp": datetime.utcnow().isoformat(),
|
@@ -262,7 +263,8 @@ class StructuredFormatter(logging.Formatter):
|
|
262
263
|
"message": record.getMessage(),
|
263
264
|
"module": record.module,
|
264
265
|
"function": record.funcName,
|
265
|
-
"line": record.lineno
|
266
|
+
"line": record.lineno,
|
267
|
+
"version": __version__
|
266
268
|
}
|
267
269
|
|
268
270
|
# Add structured data if present
|