mcp-code-indexer 3.1.3__py3-none-any.whl → 3.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +8 -6
- mcp_code_indexer/ask_handler.py +105 -75
- mcp_code_indexer/claude_api_handler.py +125 -82
- mcp_code_indexer/cleanup_manager.py +107 -81
- mcp_code_indexer/database/connection_health.py +212 -161
- mcp_code_indexer/database/database.py +529 -415
- mcp_code_indexer/database/exceptions.py +167 -118
- mcp_code_indexer/database/models.py +54 -19
- mcp_code_indexer/database/retry_executor.py +139 -103
- mcp_code_indexer/deepask_handler.py +178 -140
- mcp_code_indexer/error_handler.py +88 -76
- mcp_code_indexer/file_scanner.py +163 -141
- mcp_code_indexer/git_hook_handler.py +352 -261
- mcp_code_indexer/logging_config.py +76 -94
- mcp_code_indexer/main.py +406 -320
- mcp_code_indexer/middleware/error_middleware.py +106 -71
- mcp_code_indexer/query_preprocessor.py +40 -40
- mcp_code_indexer/server/mcp_server.py +785 -469
- mcp_code_indexer/token_counter.py +54 -47
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/METADATA +3 -3
- mcp_code_indexer-3.1.5.dist-info/RECORD +37 -0
- mcp_code_indexer-3.1.3.dist-info/RECORD +0 -37
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/entry_points.txt +0 -0
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/licenses/LICENSE +0 -0
- {mcp_code_indexer-3.1.3.dist-info → mcp_code_indexer-3.1.5.dist-info}/top_level.txt +0 -0
@@ -7,28 +7,31 @@ connection management, transaction handling, and performance optimizations.
|
|
7
7
|
|
8
8
|
import json
|
9
9
|
import logging
|
10
|
-
import sqlite3
|
11
10
|
from contextlib import asynccontextmanager
|
12
11
|
from datetime import datetime, timedelta
|
13
12
|
from pathlib import Path
|
14
|
-
from typing import List, Optional, Dict, Any,
|
13
|
+
from typing import List, Optional, Dict, Any, AsyncIterator
|
15
14
|
|
16
15
|
import asyncio
|
17
|
-
import random
|
18
16
|
import aiosqlite
|
19
17
|
|
20
18
|
from mcp_code_indexer.database.models import (
|
21
|
-
Project,
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
19
|
+
Project,
|
20
|
+
FileDescription,
|
21
|
+
SearchResult,
|
22
|
+
ProjectOverview,
|
23
|
+
WordFrequencyResult,
|
24
|
+
WordFrequencyTerm,
|
26
25
|
)
|
26
|
+
from mcp_code_indexer.database.retry_executor import create_retry_executor
|
27
27
|
from mcp_code_indexer.database.exceptions import (
|
28
|
-
DatabaseError,
|
28
|
+
DatabaseError,
|
29
|
+
classify_sqlite_error,
|
30
|
+
is_retryable_error,
|
29
31
|
)
|
30
32
|
from mcp_code_indexer.database.connection_health import (
|
31
|
-
ConnectionHealthMonitor,
|
33
|
+
ConnectionHealthMonitor,
|
34
|
+
DatabaseMetricsCollector,
|
32
35
|
)
|
33
36
|
from mcp_code_indexer.query_preprocessor import preprocess_search_query
|
34
37
|
from mcp_code_indexer.cleanup_manager import CleanupManager
|
@@ -39,21 +42,23 @@ logger = logging.getLogger(__name__)
|
|
39
42
|
class DatabaseManager:
|
40
43
|
"""
|
41
44
|
Manages SQLite database operations with async support.
|
42
|
-
|
45
|
+
|
43
46
|
Provides high-level operations for projects, file descriptions, search,
|
44
47
|
and caching with proper transaction management and error handling.
|
45
48
|
"""
|
46
|
-
|
47
|
-
def __init__(
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
49
|
+
|
50
|
+
def __init__(
|
51
|
+
self,
|
52
|
+
db_path: Path,
|
53
|
+
pool_size: int = 3,
|
54
|
+
retry_count: int = 5,
|
55
|
+
timeout: float = 10.0,
|
56
|
+
enable_wal_mode: bool = True,
|
57
|
+
health_check_interval: float = 30.0,
|
58
|
+
retry_min_wait: float = 0.1,
|
59
|
+
retry_max_wait: float = 2.0,
|
60
|
+
retry_jitter: float = 0.2,
|
61
|
+
):
|
57
62
|
"""Initialize database manager with path to SQLite database."""
|
58
63
|
self.db_path = db_path
|
59
64
|
self.pool_size = pool_size
|
@@ -66,167 +71,193 @@ class DatabaseManager:
|
|
66
71
|
self.retry_jitter = retry_jitter
|
67
72
|
self._connection_pool: List[aiosqlite.Connection] = []
|
68
73
|
self._pool_lock = None # Will be initialized in async context
|
69
|
-
self._write_lock = None # Write serialization lock,
|
70
|
-
|
74
|
+
self._write_lock = None # Write serialization lock, async context
|
75
|
+
|
71
76
|
# Retry and recovery components - configure with provided settings
|
72
77
|
self._retry_executor = create_retry_executor(
|
73
78
|
max_attempts=retry_count,
|
74
79
|
min_wait_seconds=retry_min_wait,
|
75
80
|
max_wait_seconds=retry_max_wait,
|
76
|
-
jitter_max_seconds=retry_jitter
|
81
|
+
jitter_max_seconds=retry_jitter,
|
77
82
|
)
|
78
|
-
|
83
|
+
|
79
84
|
# Health monitoring and metrics
|
80
85
|
self._health_monitor = None # Initialized in async context
|
81
86
|
self._metrics_collector = DatabaseMetricsCollector()
|
82
|
-
|
87
|
+
|
83
88
|
# Cleanup manager for retention policies
|
84
89
|
self._cleanup_manager = None # Initialized in async context
|
85
|
-
|
90
|
+
|
86
91
|
async def initialize(self) -> None:
|
87
92
|
"""Initialize database schema and configuration."""
|
88
93
|
import asyncio
|
89
|
-
|
94
|
+
|
90
95
|
# Initialize locks
|
91
96
|
self._pool_lock = asyncio.Lock()
|
92
97
|
self._write_lock = asyncio.Lock()
|
93
|
-
|
98
|
+
|
94
99
|
# Connection recovery is now handled by the retry executor
|
95
|
-
|
100
|
+
|
96
101
|
# Initialize health monitoring with configured interval
|
97
102
|
self._health_monitor = ConnectionHealthMonitor(
|
98
|
-
self,
|
103
|
+
self,
|
99
104
|
check_interval=self.health_check_interval,
|
100
|
-
timeout_seconds=self.timeout
|
105
|
+
timeout_seconds=self.timeout,
|
101
106
|
)
|
102
107
|
await self._health_monitor.start_monitoring()
|
103
|
-
|
108
|
+
|
104
109
|
# Initialize cleanup manager
|
105
110
|
self._cleanup_manager = CleanupManager(self, retention_months=6)
|
106
|
-
|
111
|
+
|
107
112
|
# Ensure database directory exists
|
108
113
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
109
|
-
|
114
|
+
|
110
115
|
# Database initialization now uses the modern retry executor directly
|
111
|
-
|
116
|
+
|
112
117
|
# Apply migrations in order
|
113
118
|
# Migrations are now bundled with the package
|
114
119
|
migrations_dir = Path(__file__).parent.parent / "migrations"
|
115
120
|
if not migrations_dir.exists():
|
116
|
-
raise RuntimeError(
|
121
|
+
raise RuntimeError(
|
122
|
+
f"Could not find migrations directory at {migrations_dir}"
|
123
|
+
)
|
117
124
|
migration_files = sorted(migrations_dir.glob("*.sql"))
|
118
|
-
|
125
|
+
|
119
126
|
async with aiosqlite.connect(self.db_path) as db:
|
120
127
|
# Enable row factory for easier data access
|
121
128
|
db.row_factory = aiosqlite.Row
|
122
|
-
|
129
|
+
|
123
130
|
# Configure WAL mode and optimizations for concurrent access
|
124
|
-
await self._configure_database_optimizations(
|
125
|
-
|
131
|
+
await self._configure_database_optimizations(
|
132
|
+
db, include_wal_mode=self.enable_wal_mode
|
133
|
+
)
|
134
|
+
|
126
135
|
# Create migrations tracking table
|
127
|
-
await db.execute(
|
136
|
+
await db.execute(
|
137
|
+
"""
|
128
138
|
CREATE TABLE IF NOT EXISTS migrations (
|
129
139
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
130
140
|
filename TEXT UNIQUE NOT NULL,
|
131
141
|
applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
132
142
|
)
|
133
|
-
|
143
|
+
"""
|
144
|
+
)
|
134
145
|
await db.commit()
|
135
|
-
|
146
|
+
|
136
147
|
# Get list of already applied migrations
|
137
|
-
cursor = await db.execute(
|
148
|
+
cursor = await db.execute("SELECT filename FROM migrations")
|
138
149
|
applied_migrations = {row[0] for row in await cursor.fetchall()}
|
139
|
-
|
150
|
+
|
140
151
|
# Apply each migration that hasn't been applied yet
|
141
152
|
for migration_file in migration_files:
|
142
153
|
migration_name = migration_file.name
|
143
154
|
if migration_name in applied_migrations:
|
144
155
|
logger.info(f"Skipping already applied migration: {migration_name}")
|
145
156
|
continue
|
146
|
-
|
157
|
+
|
147
158
|
logger.info(f"Applying migration: {migration_name}")
|
148
159
|
try:
|
149
|
-
migration_sql = migration_file.read_text(encoding=
|
160
|
+
migration_sql = migration_file.read_text(encoding="utf-8")
|
150
161
|
except AttributeError:
|
151
162
|
# Fallback for regular file objects
|
152
|
-
with open(migration_file,
|
163
|
+
with open(migration_file, "r", encoding="utf-8") as f:
|
153
164
|
migration_sql = f.read()
|
154
|
-
|
165
|
+
|
155
166
|
try:
|
156
167
|
await db.executescript(migration_sql)
|
157
|
-
|
168
|
+
|
158
169
|
# Record that migration was applied
|
159
|
-
await db.execute(
|
170
|
+
await db.execute(
|
171
|
+
"INSERT INTO migrations (filename) VALUES (?)",
|
172
|
+
(migration_name,),
|
173
|
+
)
|
160
174
|
await db.commit()
|
161
175
|
logger.info(f"Successfully applied migration: {migration_name}")
|
162
176
|
except Exception as e:
|
163
177
|
logger.error(f"Failed to apply migration {migration_name}: {e}")
|
164
178
|
await db.rollback()
|
165
179
|
raise
|
166
|
-
|
167
|
-
logger.info(
|
168
|
-
|
169
|
-
|
180
|
+
|
181
|
+
logger.info(
|
182
|
+
(
|
183
|
+
f"Database initialized at {self.db_path} with "
|
184
|
+
f"{len(migration_files)} total migrations"
|
185
|
+
)
|
186
|
+
)
|
187
|
+
|
188
|
+
async def _configure_database_optimizations(
|
189
|
+
self,
|
190
|
+
db: aiosqlite.Connection,
|
191
|
+
include_wal_mode: bool = True,
|
192
|
+
) -> None:
|
170
193
|
"""
|
171
194
|
Configure SQLite optimizations for concurrent access and performance.
|
172
|
-
|
195
|
+
|
173
196
|
Args:
|
174
197
|
db: Database connection to configure
|
175
|
-
include_wal_mode: Whether to set WAL mode (only needed once per
|
198
|
+
include_wal_mode: Whether to set WAL mode (only needed once per
|
199
|
+
database)
|
176
200
|
"""
|
177
201
|
optimizations = []
|
178
|
-
|
202
|
+
|
179
203
|
# WAL mode is database-level, only set during initialization
|
180
204
|
if include_wal_mode:
|
181
205
|
optimizations.append("PRAGMA journal_mode = WAL")
|
182
206
|
logger.info("Enabling WAL mode for database concurrency")
|
183
|
-
|
207
|
+
|
184
208
|
# Connection-level optimizations that can be set per connection
|
185
|
-
optimizations.extend(
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
209
|
+
optimizations.extend(
|
210
|
+
[
|
211
|
+
"PRAGMA synchronous = NORMAL", # Balance durability/performance
|
212
|
+
"PRAGMA cache_size = -64000", # 64MB cache
|
213
|
+
"PRAGMA temp_store = MEMORY", # Use memory for temp tables
|
214
|
+
"PRAGMA mmap_size = 268435456", # 256MB memory mapping
|
215
|
+
"PRAGMA busy_timeout = 10000", # 10s timeout (reduced from 30s)
|
216
|
+
"PRAGMA optimize", # Enable query planner optimizations
|
217
|
+
]
|
218
|
+
)
|
219
|
+
|
194
220
|
# WAL-specific settings (only if WAL mode is being set)
|
195
221
|
if include_wal_mode:
|
196
|
-
optimizations.append(
|
197
|
-
|
222
|
+
optimizations.append(
|
223
|
+
"PRAGMA wal_autocheckpoint = 1000"
|
224
|
+
) # Checkpoint after 1000 pages
|
225
|
+
|
198
226
|
for pragma in optimizations:
|
199
227
|
try:
|
200
228
|
await db.execute(pragma)
|
201
229
|
logger.debug(f"Applied optimization: {pragma}")
|
202
230
|
except Exception as e:
|
203
231
|
logger.warning(f"Failed to apply optimization '{pragma}': {e}")
|
204
|
-
|
232
|
+
|
205
233
|
await db.commit()
|
206
234
|
if include_wal_mode:
|
207
|
-
logger.info(
|
235
|
+
logger.info(
|
236
|
+
"Database optimizations configured for concurrent access "
|
237
|
+
"with WAL mode"
|
238
|
+
)
|
208
239
|
else:
|
209
240
|
logger.debug("Connection optimizations applied")
|
210
|
-
|
241
|
+
|
211
242
|
@asynccontextmanager
|
212
243
|
async def get_connection(self) -> AsyncIterator[aiosqlite.Connection]:
|
213
244
|
"""Get a database connection from pool or create new one."""
|
214
245
|
conn = None
|
215
|
-
|
246
|
+
|
216
247
|
# Try to get from pool
|
217
248
|
if self._pool_lock:
|
218
249
|
async with self._pool_lock:
|
219
250
|
if self._connection_pool:
|
220
251
|
conn = self._connection_pool.pop()
|
221
|
-
|
252
|
+
|
222
253
|
# Create new connection if none available
|
223
254
|
if conn is None:
|
224
255
|
conn = await aiosqlite.connect(self.db_path)
|
225
256
|
conn.row_factory = aiosqlite.Row
|
226
|
-
|
227
|
-
# Apply connection-level optimizations (WAL mode
|
257
|
+
|
258
|
+
# Apply connection-level optimizations (WAL mode set during init)
|
228
259
|
await self._configure_database_optimizations(conn, include_wal_mode=False)
|
229
|
-
|
260
|
+
|
230
261
|
try:
|
231
262
|
yield conn
|
232
263
|
finally:
|
@@ -237,74 +268,83 @@ class DatabaseManager:
|
|
237
268
|
if len(self._connection_pool) < self.pool_size:
|
238
269
|
self._connection_pool.append(conn)
|
239
270
|
returned_to_pool = True
|
240
|
-
|
271
|
+
|
241
272
|
if not returned_to_pool:
|
242
273
|
await conn.close()
|
243
|
-
|
274
|
+
|
244
275
|
async def close_pool(self) -> None:
|
245
276
|
"""Close all connections in the pool and stop monitoring."""
|
246
277
|
# Stop health monitoring
|
247
278
|
if self._health_monitor:
|
248
279
|
await self._health_monitor.stop_monitoring()
|
249
|
-
|
280
|
+
|
250
281
|
# Close connections
|
251
282
|
if self._pool_lock:
|
252
283
|
async with self._pool_lock:
|
253
284
|
for conn in self._connection_pool:
|
254
285
|
await conn.close()
|
255
286
|
self._connection_pool.clear()
|
256
|
-
|
287
|
+
|
257
288
|
@asynccontextmanager
|
258
289
|
async def get_write_connection(self) -> AsyncIterator[aiosqlite.Connection]:
|
259
290
|
"""
|
260
291
|
Get a database connection with write serialization.
|
261
|
-
|
262
|
-
This ensures only one write operation occurs at a time across the
|
263
|
-
application, preventing database locking issues in
|
292
|
+
|
293
|
+
This ensures only one write operation occurs at a time across the
|
294
|
+
entire application, preventing database locking issues in
|
295
|
+
multi-client scenarios.
|
264
296
|
"""
|
265
297
|
if self._write_lock is None:
|
266
|
-
raise RuntimeError(
|
267
|
-
|
298
|
+
raise RuntimeError(
|
299
|
+
"DatabaseManager not initialized - call initialize() first"
|
300
|
+
)
|
301
|
+
|
268
302
|
async with self._write_lock:
|
269
303
|
async with self.get_connection() as conn:
|
270
304
|
yield conn
|
271
|
-
|
305
|
+
|
272
306
|
@asynccontextmanager
|
273
|
-
async def get_write_connection_with_retry(
|
307
|
+
async def get_write_connection_with_retry(
|
308
|
+
self, operation_name: str = "write_operation"
|
309
|
+
) -> AsyncIterator[aiosqlite.Connection]:
|
274
310
|
"""
|
275
|
-
Get a database connection with write serialization and automatic
|
276
|
-
|
277
|
-
|
278
|
-
the
|
279
|
-
|
311
|
+
Get a database connection with write serialization and automatic
|
312
|
+
retry logic.
|
313
|
+
|
314
|
+
This uses the new RetryExecutor to properly handle retry logic
|
315
|
+
without the broken yield-in-retry-loop pattern that caused
|
316
|
+
generator errors.
|
317
|
+
|
280
318
|
Args:
|
281
|
-
operation_name: Name of the operation for logging and
|
319
|
+
operation_name: Name of the operation for logging and
|
320
|
+
monitoring
|
282
321
|
"""
|
283
322
|
if self._write_lock is None:
|
284
|
-
raise RuntimeError(
|
285
|
-
|
323
|
+
raise RuntimeError(
|
324
|
+
"DatabaseManager not initialized - call initialize() first"
|
325
|
+
)
|
326
|
+
|
286
327
|
async def get_write_connection():
|
287
|
-
"""Inner function to get connection -
|
328
|
+
"""Inner function to get connection - retried by executor."""
|
288
329
|
async with self._write_lock:
|
289
330
|
async with self.get_connection() as conn:
|
290
331
|
return conn
|
291
|
-
|
332
|
+
|
292
333
|
try:
|
293
334
|
# Use retry executor to handle connection acquisition with retries
|
294
335
|
connection = await self._retry_executor.execute_with_retry(
|
295
|
-
get_write_connection,
|
296
|
-
operation_name
|
336
|
+
get_write_connection, operation_name
|
297
337
|
)
|
298
|
-
|
338
|
+
|
299
339
|
try:
|
300
340
|
yield connection
|
301
|
-
|
341
|
+
|
302
342
|
# Success - retry executor handles all failure tracking
|
303
|
-
|
304
|
-
except Exception
|
343
|
+
|
344
|
+
except Exception:
|
305
345
|
# Error handling is managed by the retry executor
|
306
346
|
raise
|
307
|
-
|
347
|
+
|
308
348
|
except DatabaseError:
|
309
349
|
# Re-raise our custom database errors as-is
|
310
350
|
raise
|
@@ -312,73 +352,80 @@ class DatabaseManager:
|
|
312
352
|
# Classify and wrap other exceptions
|
313
353
|
classified_error = classify_sqlite_error(e, operation_name)
|
314
354
|
logger.error(
|
315
|
-
|
316
|
-
|
355
|
+
(
|
356
|
+
f"Database operation '{operation_name}' failed: "
|
357
|
+
f"{classified_error.message}"
|
358
|
+
),
|
359
|
+
extra={"structured_data": classified_error.to_dict()},
|
317
360
|
)
|
318
361
|
raise classified_error
|
319
|
-
|
362
|
+
|
320
363
|
def get_database_stats(self) -> Dict[str, Any]:
|
321
364
|
"""
|
322
365
|
Get database performance and reliability statistics.
|
323
|
-
|
366
|
+
|
324
367
|
Returns:
|
325
|
-
Dictionary with retry stats, recovery stats, health status,
|
368
|
+
Dictionary with retry stats, recovery stats, health status,
|
369
|
+
and metrics
|
326
370
|
"""
|
327
371
|
stats = {
|
328
372
|
"connection_pool": {
|
329
373
|
"configured_size": self.pool_size,
|
330
|
-
"current_size": len(self._connection_pool)
|
374
|
+
"current_size": len(self._connection_pool),
|
331
375
|
},
|
332
|
-
"retry_executor":
|
376
|
+
"retry_executor": (
|
377
|
+
self._retry_executor.get_retry_stats() if self._retry_executor else {}
|
378
|
+
),
|
333
379
|
}
|
334
|
-
|
380
|
+
|
335
381
|
# Legacy retry handler removed - retry executor stats are included above
|
336
|
-
|
382
|
+
|
337
383
|
if self._health_monitor:
|
338
384
|
stats["health_status"] = self._health_monitor.get_health_status()
|
339
|
-
|
385
|
+
|
340
386
|
if self._metrics_collector:
|
341
387
|
stats["operation_metrics"] = self._metrics_collector.get_operation_metrics()
|
342
388
|
stats["locking_frequency"] = self._metrics_collector.get_locking_frequency()
|
343
|
-
|
389
|
+
|
344
390
|
return stats
|
345
|
-
|
391
|
+
|
346
392
|
async def check_health(self) -> Dict[str, Any]:
|
347
393
|
"""
|
348
394
|
Perform an immediate health check and return detailed status.
|
349
|
-
|
395
|
+
|
350
396
|
Returns:
|
351
397
|
Dictionary with health check result and current metrics
|
352
398
|
"""
|
353
399
|
if not self._health_monitor:
|
354
400
|
return {"error": "Health monitoring not initialized"}
|
355
|
-
|
401
|
+
|
356
402
|
# Perform immediate health check
|
357
403
|
health_result = await self._health_monitor.check_health()
|
358
|
-
|
404
|
+
|
359
405
|
return {
|
360
406
|
"health_check": {
|
361
407
|
"is_healthy": health_result.is_healthy,
|
362
408
|
"response_time_ms": health_result.response_time_ms,
|
363
409
|
"error_message": health_result.error_message,
|
364
|
-
"timestamp": health_result.timestamp.isoformat()
|
410
|
+
"timestamp": health_result.timestamp.isoformat(),
|
365
411
|
},
|
366
412
|
"overall_status": self._health_monitor.get_health_status(),
|
367
|
-
"recent_history": self._health_monitor.get_recent_history()
|
413
|
+
"recent_history": self._health_monitor.get_recent_history(),
|
368
414
|
}
|
369
|
-
|
415
|
+
|
370
416
|
@asynccontextmanager
|
371
417
|
async def get_immediate_transaction(
|
372
|
-
self,
|
418
|
+
self,
|
373
419
|
operation_name: str = "immediate_transaction",
|
374
|
-
timeout_seconds: float = 10.0
|
420
|
+
timeout_seconds: float = 10.0,
|
375
421
|
) -> AsyncIterator[aiosqlite.Connection]:
|
376
422
|
"""
|
377
|
-
Get a database connection with BEGIN IMMEDIATE transaction and
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
423
|
+
Get a database connection with BEGIN IMMEDIATE transaction and
|
424
|
+
timeout.
|
425
|
+
|
426
|
+
This ensures write locks are acquired immediately, preventing lock
|
427
|
+
escalation failures that can occur with DEFERRED transactions.
|
428
|
+
|
382
429
|
Args:
|
383
430
|
operation_name: Name of the operation for monitoring
|
384
431
|
timeout_seconds: Transaction timeout in seconds
|
@@ -392,15 +439,18 @@ class DatabaseManager:
|
|
392
439
|
await conn.commit()
|
393
440
|
except asyncio.TimeoutError:
|
394
441
|
logger.warning(
|
395
|
-
|
442
|
+
(
|
443
|
+
f"Transaction timeout after {timeout_seconds}s for "
|
444
|
+
f"{operation_name}"
|
445
|
+
),
|
396
446
|
extra={
|
397
447
|
"structured_data": {
|
398
448
|
"transaction_timeout": {
|
399
449
|
"operation": operation_name,
|
400
|
-
"timeout_seconds": timeout_seconds
|
450
|
+
"timeout_seconds": timeout_seconds,
|
401
451
|
}
|
402
452
|
}
|
403
|
-
}
|
453
|
+
},
|
404
454
|
)
|
405
455
|
await conn.rollback()
|
406
456
|
raise
|
@@ -408,62 +458,69 @@ class DatabaseManager:
|
|
408
458
|
logger.error(f"Transaction failed for {operation_name}: {e}")
|
409
459
|
await conn.rollback()
|
410
460
|
raise
|
411
|
-
|
461
|
+
|
412
462
|
async def execute_transaction_with_retry(
|
413
463
|
self,
|
414
464
|
operation_func,
|
415
465
|
operation_name: str = "transaction_operation",
|
416
466
|
max_retries: int = 3,
|
417
|
-
timeout_seconds: float = 10.0
|
467
|
+
timeout_seconds: float = 10.0,
|
418
468
|
) -> Any:
|
419
469
|
"""
|
420
|
-
Execute a database operation within a transaction with automatic
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
470
|
+
Execute a database operation within a transaction with automatic
|
471
|
+
retry.
|
472
|
+
|
473
|
+
Uses the new RetryExecutor for robust retry handling with proper
|
474
|
+
error classification and exponential backoff.
|
475
|
+
|
425
476
|
Args:
|
426
|
-
operation_func: Async function that takes a connection and
|
477
|
+
operation_func: Async function that takes a connection and
|
478
|
+
performs the operation
|
427
479
|
operation_name: Name of the operation for logging
|
428
|
-
max_retries: Maximum retry attempts (overrides default retry
|
480
|
+
max_retries: Maximum retry attempts (overrides default retry
|
481
|
+
executor config)
|
429
482
|
timeout_seconds: Transaction timeout in seconds
|
430
|
-
|
483
|
+
|
431
484
|
Returns:
|
432
485
|
Result from operation_func
|
433
|
-
|
486
|
+
|
434
487
|
Example:
|
435
488
|
async def my_operation(conn):
|
436
489
|
await conn.execute("INSERT INTO ...", (...))
|
437
490
|
return "success"
|
438
|
-
|
439
|
-
result = await db.execute_transaction_with_retry(
|
491
|
+
|
492
|
+
result = await db.execute_transaction_with_retry(
|
493
|
+
my_operation, "insert_data"
|
494
|
+
)
|
440
495
|
"""
|
441
|
-
|
496
|
+
|
442
497
|
async def execute_transaction():
|
443
|
-
"""Inner function to execute transaction -
|
498
|
+
"""Inner function to execute transaction - retried by executor."""
|
444
499
|
try:
|
445
|
-
async with self.get_immediate_transaction(
|
500
|
+
async with self.get_immediate_transaction(
|
501
|
+
operation_name, timeout_seconds
|
502
|
+
) as conn:
|
446
503
|
result = await operation_func(conn)
|
447
|
-
|
504
|
+
|
448
505
|
# Record successful operation metrics
|
449
506
|
if self._metrics_collector:
|
450
507
|
self._metrics_collector.record_operation(
|
451
|
-
operation_name,
|
508
|
+
operation_name,
|
452
509
|
timeout_seconds * 1000, # Convert to ms
|
453
510
|
True,
|
454
|
-
len(self._connection_pool)
|
511
|
+
len(self._connection_pool),
|
455
512
|
)
|
456
|
-
|
513
|
+
|
457
514
|
return result
|
458
|
-
|
515
|
+
|
459
516
|
except (aiosqlite.OperationalError, asyncio.TimeoutError) as e:
|
460
517
|
# Record locking event for metrics
|
461
518
|
if self._metrics_collector and "locked" in str(e).lower():
|
462
519
|
self._metrics_collector.record_locking_event(operation_name, str(e))
|
463
|
-
|
520
|
+
|
464
521
|
# Classify the error for better handling
|
465
522
|
classified_error = classify_sqlite_error(e, operation_name)
|
466
|
-
|
523
|
+
|
467
524
|
# Record failed operation metrics for non-retryable errors
|
468
525
|
if not is_retryable_error(classified_error):
|
469
526
|
if self._metrics_collector:
|
@@ -471,39 +528,48 @@ class DatabaseManager:
|
|
471
528
|
operation_name,
|
472
529
|
timeout_seconds * 1000,
|
473
530
|
False,
|
474
|
-
len(self._connection_pool)
|
531
|
+
len(self._connection_pool),
|
475
532
|
)
|
476
|
-
|
533
|
+
|
477
534
|
raise classified_error
|
478
|
-
|
535
|
+
|
479
536
|
try:
|
480
|
-
# Create a temporary retry executor with custom max_retries if different
|
537
|
+
# Create a temporary retry executor with custom max_retries if different
|
538
|
+
# from default
|
481
539
|
if max_retries != self._retry_executor.config.max_attempts:
|
482
|
-
from mcp_code_indexer.database.retry_executor import
|
540
|
+
from mcp_code_indexer.database.retry_executor import (
|
541
|
+
RetryConfig,
|
542
|
+
RetryExecutor,
|
543
|
+
)
|
544
|
+
|
483
545
|
temp_config = RetryConfig(
|
484
546
|
max_attempts=max_retries,
|
485
547
|
min_wait_seconds=self._retry_executor.config.min_wait_seconds,
|
486
548
|
max_wait_seconds=self._retry_executor.config.max_wait_seconds,
|
487
|
-
jitter_max_seconds=self._retry_executor.config.jitter_max_seconds
|
549
|
+
jitter_max_seconds=self._retry_executor.config.jitter_max_seconds,
|
488
550
|
)
|
489
551
|
temp_executor = RetryExecutor(temp_config)
|
490
|
-
return await temp_executor.execute_with_retry(
|
552
|
+
return await temp_executor.execute_with_retry(
|
553
|
+
execute_transaction, operation_name
|
554
|
+
)
|
491
555
|
else:
|
492
|
-
return await self._retry_executor.execute_with_retry(
|
493
|
-
|
494
|
-
|
556
|
+
return await self._retry_executor.execute_with_retry(
|
557
|
+
execute_transaction, operation_name
|
558
|
+
)
|
559
|
+
|
560
|
+
except DatabaseError:
|
495
561
|
# Record failed operation metrics for final failure
|
496
562
|
if self._metrics_collector:
|
497
563
|
self._metrics_collector.record_operation(
|
498
564
|
operation_name,
|
499
565
|
timeout_seconds * 1000,
|
500
566
|
False,
|
501
|
-
len(self._connection_pool)
|
567
|
+
len(self._connection_pool),
|
502
568
|
)
|
503
569
|
raise
|
504
|
-
|
570
|
+
|
505
571
|
# Project operations
|
506
|
-
|
572
|
+
|
507
573
|
async def create_project(self, project: Project) -> None:
|
508
574
|
"""Create a new project record."""
|
509
575
|
async with self.get_write_connection_with_retry("create_project") as db:
|
@@ -517,137 +583,139 @@ class DatabaseManager:
|
|
517
583
|
project.name,
|
518
584
|
json.dumps(project.aliases),
|
519
585
|
project.created,
|
520
|
-
project.last_accessed
|
521
|
-
)
|
586
|
+
project.last_accessed,
|
587
|
+
),
|
522
588
|
)
|
523
589
|
await db.commit()
|
524
590
|
logger.debug(f"Created project: {project.id}")
|
525
|
-
|
591
|
+
|
526
592
|
async def get_project(self, project_id: str) -> Optional[Project]:
|
527
593
|
"""Get project by ID."""
|
528
594
|
async with self.get_connection() as db:
|
529
595
|
cursor = await db.execute(
|
530
|
-
"SELECT * FROM projects WHERE id = ?",
|
531
|
-
(project_id,)
|
596
|
+
"SELECT * FROM projects WHERE id = ?", (project_id,)
|
532
597
|
)
|
533
598
|
row = await cursor.fetchone()
|
534
|
-
|
599
|
+
|
535
600
|
if row:
|
536
601
|
return Project(
|
537
|
-
id=row[
|
538
|
-
name=row[
|
539
|
-
aliases=json.loads(row[
|
540
|
-
created=datetime.fromisoformat(row[
|
541
|
-
last_accessed=datetime.fromisoformat(row[
|
602
|
+
id=row["id"],
|
603
|
+
name=row["name"],
|
604
|
+
aliases=json.loads(row["aliases"]),
|
605
|
+
created=datetime.fromisoformat(row["created"]),
|
606
|
+
last_accessed=datetime.fromisoformat(row["last_accessed"]),
|
542
607
|
)
|
543
608
|
return None
|
544
|
-
|
609
|
+
|
545
610
|
async def find_matching_project(
|
546
|
-
self,
|
547
|
-
project_name: str,
|
548
|
-
folder_path: Optional[str] = None
|
611
|
+
self, project_name: str, folder_path: Optional[str] = None
|
549
612
|
) -> Optional[Project]:
|
550
613
|
"""
|
551
614
|
Find project by matching criteria.
|
552
|
-
|
615
|
+
|
553
616
|
Args:
|
554
617
|
project_name: Name of the project
|
555
618
|
folder_path: Project folder path
|
556
|
-
|
619
|
+
|
557
620
|
Returns:
|
558
621
|
Matching project or None
|
559
622
|
"""
|
560
623
|
projects = await self.get_all_projects()
|
561
624
|
normalized_name = project_name.lower()
|
562
|
-
|
625
|
+
|
563
626
|
best_match = None
|
564
627
|
best_score = 0
|
565
|
-
|
628
|
+
|
566
629
|
for project in projects:
|
567
630
|
score = 0
|
568
631
|
match_factors = []
|
569
|
-
|
632
|
+
|
570
633
|
# Check name match (case-insensitive)
|
571
634
|
if project.name.lower() == normalized_name:
|
572
635
|
score += 2 # Name match is primary identifier
|
573
636
|
match_factors.append("name")
|
574
|
-
|
637
|
+
|
575
638
|
# Check folder path in aliases
|
576
639
|
if folder_path and folder_path in project.aliases:
|
577
640
|
score += 1
|
578
641
|
match_factors.append("folder_path")
|
579
|
-
|
642
|
+
|
580
643
|
# If we have a name match, it's a strong candidate
|
581
644
|
if score >= 2:
|
582
645
|
if score > best_score:
|
583
646
|
best_score = score
|
584
647
|
best_match = project
|
585
|
-
logger.info(
|
586
|
-
|
648
|
+
logger.info(
|
649
|
+
(
|
650
|
+
f"Match for project {project.name} "
|
651
|
+
f"(score: {score}, factors: {match_factors})"
|
652
|
+
)
|
653
|
+
)
|
654
|
+
|
587
655
|
return best_match
|
588
656
|
|
589
657
|
async def get_or_create_project(
|
590
|
-
self,
|
591
|
-
project_name: str,
|
592
|
-
folder_path: str
|
658
|
+
self, project_name: str, folder_path: str
|
593
659
|
) -> Project:
|
594
660
|
"""
|
595
661
|
Get or create a project using intelligent matching.
|
596
|
-
|
662
|
+
|
597
663
|
Args:
|
598
664
|
project_name: Name of the project
|
599
665
|
folder_path: Project folder path
|
600
|
-
|
666
|
+
|
601
667
|
Returns:
|
602
668
|
Existing or newly created project
|
603
669
|
"""
|
604
670
|
# Try to find existing project
|
605
|
-
project = await self.find_matching_project(
|
606
|
-
|
607
|
-
)
|
608
|
-
|
671
|
+
project = await self.find_matching_project(project_name, folder_path)
|
672
|
+
|
609
673
|
if project:
|
610
674
|
# Update aliases if folder path not already included
|
611
675
|
if folder_path not in project.aliases:
|
612
676
|
project.aliases.append(folder_path)
|
613
677
|
await self.update_project(project)
|
614
|
-
logger.info(
|
615
|
-
|
678
|
+
logger.info(
|
679
|
+
f"Added folder path {folder_path} to project {project.name} aliases"
|
680
|
+
)
|
681
|
+
|
616
682
|
# Update access time
|
617
683
|
await self.update_project_access_time(project.id)
|
618
684
|
return project
|
619
|
-
|
685
|
+
|
620
686
|
# Create new project
|
621
687
|
from ..database.models import Project
|
622
688
|
import uuid
|
623
|
-
|
689
|
+
|
624
690
|
new_project = Project(
|
625
691
|
id=str(uuid.uuid4()),
|
626
692
|
name=project_name,
|
627
693
|
aliases=[folder_path],
|
628
694
|
created=datetime.utcnow(),
|
629
|
-
last_accessed=datetime.utcnow()
|
695
|
+
last_accessed=datetime.utcnow(),
|
630
696
|
)
|
631
|
-
|
697
|
+
|
632
698
|
await self.create_project(new_project)
|
633
699
|
logger.info(f"Created new project: {new_project.name} ({new_project.id})")
|
634
700
|
return new_project
|
635
|
-
|
701
|
+
|
636
702
|
async def update_project_access_time(self, project_id: str) -> None:
|
637
703
|
"""Update the last accessed time for a project."""
|
638
|
-
async with self.get_write_connection_with_retry(
|
704
|
+
async with self.get_write_connection_with_retry(
|
705
|
+
"update_project_access_time"
|
706
|
+
) as db:
|
639
707
|
await db.execute(
|
640
708
|
"UPDATE projects SET last_accessed = ? WHERE id = ?",
|
641
|
-
(datetime.utcnow(), project_id)
|
709
|
+
(datetime.utcnow(), project_id),
|
642
710
|
)
|
643
711
|
await db.commit()
|
644
|
-
|
712
|
+
|
645
713
|
async def update_project(self, project: Project) -> None:
|
646
714
|
"""Update an existing project record."""
|
647
715
|
async with self.get_write_connection_with_retry("update_project") as db:
|
648
716
|
await db.execute(
|
649
717
|
"""
|
650
|
-
UPDATE projects
|
718
|
+
UPDATE projects
|
651
719
|
SET name = ?, aliases = ?, last_accessed = ?
|
652
720
|
WHERE id = ?
|
653
721
|
""",
|
@@ -655,12 +723,12 @@ class DatabaseManager:
|
|
655
723
|
project.name,
|
656
724
|
json.dumps(project.aliases),
|
657
725
|
project.last_accessed,
|
658
|
-
project.id
|
659
|
-
)
|
726
|
+
project.id,
|
727
|
+
),
|
660
728
|
)
|
661
729
|
await db.commit()
|
662
730
|
logger.debug(f"Updated project: {project.id}")
|
663
|
-
|
731
|
+
|
664
732
|
async def get_all_projects(self) -> List[Project]:
|
665
733
|
"""Get all projects in the database."""
|
666
734
|
async with self.get_connection() as db:
|
@@ -668,7 +736,7 @@ class DatabaseManager:
|
|
668
736
|
"SELECT id, name, aliases, created, last_accessed FROM projects"
|
669
737
|
)
|
670
738
|
rows = await cursor.fetchall()
|
671
|
-
|
739
|
+
|
672
740
|
projects = []
|
673
741
|
for row in rows:
|
674
742
|
aliases = json.loads(row[2]) if row[2] else []
|
@@ -677,23 +745,26 @@ class DatabaseManager:
|
|
677
745
|
name=row[1],
|
678
746
|
aliases=aliases,
|
679
747
|
created=row[3],
|
680
|
-
last_accessed=row[4]
|
748
|
+
last_accessed=row[4],
|
681
749
|
)
|
682
750
|
projects.append(project)
|
683
|
-
|
751
|
+
|
684
752
|
return projects
|
685
|
-
|
686
753
|
|
687
|
-
|
688
754
|
# File description operations
|
689
|
-
|
755
|
+
|
690
756
|
async def create_file_description(self, file_desc: FileDescription) -> None:
|
691
757
|
"""Create or update a file description."""
|
692
|
-
async with self.get_write_connection_with_retry(
|
758
|
+
async with self.get_write_connection_with_retry(
|
759
|
+
"create_file_description"
|
760
|
+
) as db:
|
693
761
|
await db.execute(
|
694
762
|
"""
|
695
|
-
INSERT OR REPLACE INTO file_descriptions
|
696
|
-
(
|
763
|
+
INSERT OR REPLACE INTO file_descriptions
|
764
|
+
(
|
765
|
+
project_id, file_path, description, file_hash, last_modified,
|
766
|
+
version, source_project_id, to_be_cleaned
|
767
|
+
)
|
697
768
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
698
769
|
""",
|
699
770
|
(
|
@@ -704,78 +775,77 @@ class DatabaseManager:
|
|
704
775
|
file_desc.last_modified,
|
705
776
|
file_desc.version,
|
706
777
|
file_desc.source_project_id,
|
707
|
-
file_desc.to_be_cleaned
|
708
|
-
)
|
778
|
+
file_desc.to_be_cleaned,
|
779
|
+
),
|
709
780
|
)
|
710
781
|
await db.commit()
|
711
782
|
logger.debug(f"Saved file description: {file_desc.file_path}")
|
712
|
-
|
783
|
+
|
713
784
|
async def get_file_description(
|
714
|
-
self,
|
715
|
-
project_id: str,
|
716
|
-
file_path: str
|
785
|
+
self, project_id: str, file_path: str
|
717
786
|
) -> Optional[FileDescription]:
|
718
787
|
"""Get file description by project and path."""
|
719
788
|
async with self.get_connection() as db:
|
720
789
|
cursor = await db.execute(
|
721
790
|
"""
|
722
|
-
SELECT * FROM file_descriptions
|
791
|
+
SELECT * FROM file_descriptions
|
723
792
|
WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
|
724
793
|
""",
|
725
|
-
(project_id, file_path)
|
794
|
+
(project_id, file_path),
|
726
795
|
)
|
727
796
|
row = await cursor.fetchone()
|
728
|
-
|
797
|
+
|
729
798
|
if row:
|
730
799
|
return FileDescription(
|
731
|
-
id=row[
|
732
|
-
project_id=row[
|
733
|
-
file_path=row[
|
734
|
-
description=row[
|
735
|
-
file_hash=row[
|
736
|
-
last_modified=datetime.fromisoformat(row[
|
737
|
-
version=row[
|
738
|
-
source_project_id=row[
|
739
|
-
to_be_cleaned=row[
|
800
|
+
id=row["id"],
|
801
|
+
project_id=row["project_id"],
|
802
|
+
file_path=row["file_path"],
|
803
|
+
description=row["description"],
|
804
|
+
file_hash=row["file_hash"],
|
805
|
+
last_modified=datetime.fromisoformat(row["last_modified"]),
|
806
|
+
version=row["version"],
|
807
|
+
source_project_id=row["source_project_id"],
|
808
|
+
to_be_cleaned=row["to_be_cleaned"],
|
740
809
|
)
|
741
810
|
return None
|
742
|
-
|
743
|
-
async def get_all_file_descriptions(
|
744
|
-
self,
|
745
|
-
project_id: str
|
746
|
-
) -> List[FileDescription]:
|
811
|
+
|
812
|
+
async def get_all_file_descriptions(self, project_id: str) -> List[FileDescription]:
|
747
813
|
"""Get all file descriptions for a project."""
|
748
814
|
async with self.get_connection() as db:
|
749
815
|
cursor = await db.execute(
|
750
816
|
"""
|
751
|
-
SELECT * FROM file_descriptions
|
817
|
+
SELECT * FROM file_descriptions
|
752
818
|
WHERE project_id = ? AND to_be_cleaned IS NULL
|
753
819
|
ORDER BY file_path
|
754
820
|
""",
|
755
|
-
(project_id,)
|
821
|
+
(project_id,),
|
756
822
|
)
|
757
823
|
rows = await cursor.fetchall()
|
758
|
-
|
824
|
+
|
759
825
|
return [
|
760
826
|
FileDescription(
|
761
|
-
id=row[
|
762
|
-
project_id=row[
|
763
|
-
file_path=row[
|
764
|
-
description=row[
|
765
|
-
file_hash=row[
|
766
|
-
last_modified=datetime.fromisoformat(row[
|
767
|
-
version=row[
|
768
|
-
source_project_id=row[
|
769
|
-
to_be_cleaned=row[
|
827
|
+
id=row["id"],
|
828
|
+
project_id=row["project_id"],
|
829
|
+
file_path=row["file_path"],
|
830
|
+
description=row["description"],
|
831
|
+
file_hash=row["file_hash"],
|
832
|
+
last_modified=datetime.fromisoformat(row["last_modified"]),
|
833
|
+
version=row["version"],
|
834
|
+
source_project_id=row["source_project_id"],
|
835
|
+
to_be_cleaned=row["to_be_cleaned"],
|
770
836
|
)
|
771
837
|
for row in rows
|
772
838
|
]
|
773
|
-
|
774
|
-
async def batch_create_file_descriptions(
|
775
|
-
|
839
|
+
|
840
|
+
async def batch_create_file_descriptions(
|
841
|
+
self, file_descriptions: List[FileDescription]
|
842
|
+
) -> None:
|
843
|
+
"""
|
844
|
+
Batch create multiple file descriptions efficiently with optimized transactions.
|
845
|
+
"""
|
776
846
|
if not file_descriptions:
|
777
847
|
return
|
778
|
-
|
848
|
+
|
779
849
|
async def batch_operation(conn: aiosqlite.Connection) -> None:
|
780
850
|
data = [
|
781
851
|
(
|
@@ -786,142 +856,137 @@ class DatabaseManager:
|
|
786
856
|
fd.last_modified,
|
787
857
|
fd.version,
|
788
858
|
fd.source_project_id,
|
789
|
-
fd.to_be_cleaned
|
859
|
+
fd.to_be_cleaned,
|
790
860
|
)
|
791
861
|
for fd in file_descriptions
|
792
862
|
]
|
793
|
-
|
863
|
+
|
794
864
|
await conn.executemany(
|
795
865
|
"""
|
796
|
-
INSERT OR REPLACE INTO file_descriptions
|
797
|
-
(
|
866
|
+
INSERT OR REPLACE INTO file_descriptions
|
867
|
+
(
|
868
|
+
project_id, file_path, description, file_hash, last_modified,
|
869
|
+
version, source_project_id, to_be_cleaned
|
870
|
+
)
|
798
871
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
799
872
|
""",
|
800
|
-
data
|
873
|
+
data,
|
801
874
|
)
|
802
875
|
logger.debug(f"Batch created {len(file_descriptions)} file descriptions")
|
803
|
-
|
876
|
+
|
804
877
|
await self.execute_transaction_with_retry(
|
805
878
|
batch_operation,
|
806
879
|
f"batch_create_file_descriptions_{len(file_descriptions)}_files",
|
807
|
-
timeout_seconds=30.0 # Longer timeout for batch operations
|
880
|
+
timeout_seconds=30.0, # Longer timeout for batch operations
|
808
881
|
)
|
809
|
-
|
882
|
+
|
810
883
|
# Search operations
|
811
|
-
|
884
|
+
|
812
885
|
async def search_file_descriptions(
|
813
|
-
self,
|
814
|
-
project_id: str,
|
815
|
-
query: str,
|
816
|
-
max_results: int = 20
|
886
|
+
self, project_id: str, query: str, max_results: int = 20
|
817
887
|
) -> List[SearchResult]:
|
818
888
|
"""Search file descriptions using FTS5 with intelligent query preprocessing."""
|
819
889
|
# Preprocess query for optimal FTS5 search
|
820
890
|
preprocessed_query = preprocess_search_query(query)
|
821
|
-
|
891
|
+
|
822
892
|
if not preprocessed_query:
|
823
893
|
logger.debug(f"Empty query after preprocessing: '{query}'")
|
824
894
|
return []
|
825
|
-
|
895
|
+
|
826
896
|
logger.debug(f"Search query preprocessing: '{query}' -> '{preprocessed_query}'")
|
827
|
-
|
897
|
+
|
828
898
|
async with self.get_connection() as db:
|
829
899
|
cursor = await db.execute(
|
830
900
|
"""
|
831
|
-
SELECT
|
901
|
+
SELECT
|
832
902
|
fd.project_id,
|
833
903
|
fd.file_path,
|
834
904
|
fd.description,
|
835
905
|
bm25(file_descriptions_fts) as rank
|
836
906
|
FROM file_descriptions_fts
|
837
907
|
JOIN file_descriptions fd ON fd.id = file_descriptions_fts.rowid
|
838
|
-
WHERE file_descriptions_fts MATCH ?
|
839
|
-
AND fd.project_id = ?
|
908
|
+
WHERE file_descriptions_fts MATCH ?
|
909
|
+
AND fd.project_id = ?
|
840
910
|
AND fd.to_be_cleaned IS NULL
|
841
911
|
ORDER BY bm25(file_descriptions_fts)
|
842
912
|
LIMIT ?
|
843
913
|
""",
|
844
|
-
(preprocessed_query, project_id, max_results)
|
914
|
+
(preprocessed_query, project_id, max_results),
|
845
915
|
)
|
846
916
|
rows = await cursor.fetchall()
|
847
|
-
|
917
|
+
|
848
918
|
return [
|
849
919
|
SearchResult(
|
850
|
-
project_id=row[
|
851
|
-
file_path=row[
|
852
|
-
description=row[
|
853
|
-
relevance_score=row[
|
920
|
+
project_id=row["project_id"],
|
921
|
+
file_path=row["file_path"],
|
922
|
+
description=row["description"],
|
923
|
+
relevance_score=row["rank"],
|
854
924
|
)
|
855
925
|
for row in rows
|
856
926
|
]
|
857
|
-
|
927
|
+
|
858
928
|
# Token cache operations
|
859
|
-
|
929
|
+
|
860
930
|
async def get_cached_token_count(self, cache_key: str) -> Optional[int]:
|
861
931
|
"""Get cached token count if not expired."""
|
862
932
|
async with self.get_connection() as db:
|
863
933
|
cursor = await db.execute(
|
864
934
|
"""
|
865
|
-
SELECT token_count FROM token_cache
|
935
|
+
SELECT token_count FROM token_cache
|
866
936
|
WHERE cache_key = ? AND (expires IS NULL OR expires > ?)
|
867
937
|
""",
|
868
|
-
(cache_key, datetime.utcnow())
|
938
|
+
(cache_key, datetime.utcnow()),
|
869
939
|
)
|
870
940
|
row = await cursor.fetchone()
|
871
|
-
return row[
|
872
|
-
|
941
|
+
return row["token_count"] if row else None
|
942
|
+
|
873
943
|
async def cache_token_count(
|
874
|
-
self,
|
875
|
-
cache_key: str,
|
876
|
-
token_count: int,
|
877
|
-
ttl_hours: int = 24
|
944
|
+
self, cache_key: str, token_count: int, ttl_hours: int = 24
|
878
945
|
) -> None:
|
879
946
|
"""Cache token count with TTL."""
|
880
947
|
expires = datetime.utcnow() + timedelta(hours=ttl_hours)
|
881
|
-
|
948
|
+
|
882
949
|
async with self.get_write_connection() as db:
|
883
950
|
await db.execute(
|
884
951
|
"""
|
885
952
|
INSERT OR REPLACE INTO token_cache (cache_key, token_count, expires)
|
886
953
|
VALUES (?, ?, ?)
|
887
954
|
""",
|
888
|
-
(cache_key, token_count, expires)
|
955
|
+
(cache_key, token_count, expires),
|
889
956
|
)
|
890
957
|
await db.commit()
|
891
|
-
|
958
|
+
|
892
959
|
async def cleanup_expired_cache(self) -> None:
|
893
960
|
"""Remove expired cache entries."""
|
894
961
|
async with self.get_write_connection() as db:
|
895
962
|
await db.execute(
|
896
|
-
"DELETE FROM token_cache WHERE expires < ?",
|
897
|
-
(datetime.utcnow(),)
|
963
|
+
"DELETE FROM token_cache WHERE expires < ?", (datetime.utcnow(),)
|
898
964
|
)
|
899
965
|
await db.commit()
|
900
|
-
|
966
|
+
|
901
967
|
# Utility operations
|
902
|
-
|
968
|
+
|
903
969
|
async def get_file_count(self, project_id: str) -> int:
|
904
970
|
"""Get count of files in a project."""
|
905
971
|
async with self.get_connection() as db:
|
906
972
|
cursor = await db.execute(
|
907
|
-
|
908
|
-
|
973
|
+
(
|
974
|
+
"SELECT COUNT(*) as count FROM file_descriptions WHERE "
|
975
|
+
"project_id = ? AND to_be_cleaned IS NULL"
|
976
|
+
),
|
977
|
+
(project_id,),
|
909
978
|
)
|
910
979
|
row = await cursor.fetchone()
|
911
|
-
return row[
|
912
|
-
|
980
|
+
return row["count"] if row else 0
|
913
981
|
|
914
|
-
|
915
|
-
|
916
|
-
|
917
982
|
# Project Overview operations
|
918
|
-
|
983
|
+
|
919
984
|
async def create_project_overview(self, overview: ProjectOverview) -> None:
|
920
985
|
"""Create or update a project overview."""
|
921
986
|
async with self.get_write_connection() as db:
|
922
987
|
await db.execute(
|
923
988
|
"""
|
924
|
-
INSERT OR REPLACE INTO project_overviews
|
989
|
+
INSERT OR REPLACE INTO project_overviews
|
925
990
|
(project_id, overview, last_modified, total_files, total_tokens)
|
926
991
|
VALUES (?, ?, ?, ?, ?)
|
927
992
|
""",
|
@@ -930,258 +995,307 @@ class DatabaseManager:
|
|
930
995
|
overview.overview,
|
931
996
|
overview.last_modified,
|
932
997
|
overview.total_files,
|
933
|
-
overview.total_tokens
|
934
|
-
)
|
998
|
+
overview.total_tokens,
|
999
|
+
),
|
935
1000
|
)
|
936
1001
|
await db.commit()
|
937
1002
|
logger.debug(f"Created/updated overview for project {overview.project_id}")
|
938
|
-
|
1003
|
+
|
939
1004
|
async def get_project_overview(self, project_id: str) -> Optional[ProjectOverview]:
|
940
1005
|
"""Get project overview by ID."""
|
941
1006
|
async with self.get_connection() as db:
|
942
1007
|
cursor = await db.execute(
|
943
|
-
"SELECT * FROM project_overviews WHERE project_id = ?",
|
944
|
-
(project_id,)
|
1008
|
+
"SELECT * FROM project_overviews WHERE project_id = ?", (project_id,)
|
945
1009
|
)
|
946
1010
|
row = await cursor.fetchone()
|
947
|
-
|
1011
|
+
|
948
1012
|
if row:
|
949
1013
|
return ProjectOverview(
|
950
|
-
project_id=row[
|
951
|
-
overview=row[
|
952
|
-
last_modified=datetime.fromisoformat(row[
|
953
|
-
total_files=row[
|
954
|
-
total_tokens=row[
|
1014
|
+
project_id=row["project_id"],
|
1015
|
+
overview=row["overview"],
|
1016
|
+
last_modified=datetime.fromisoformat(row["last_modified"]),
|
1017
|
+
total_files=row["total_files"],
|
1018
|
+
total_tokens=row["total_tokens"],
|
955
1019
|
)
|
956
1020
|
return None
|
957
|
-
|
958
|
-
async def cleanup_missing_files(
|
1021
|
+
|
1022
|
+
async def cleanup_missing_files(
|
1023
|
+
self, project_id: str, project_root: Path
|
1024
|
+
) -> List[str]:
|
959
1025
|
"""
|
960
1026
|
Mark descriptions for cleanup for files that no longer exist on disk.
|
961
|
-
|
1027
|
+
|
962
1028
|
Args:
|
963
1029
|
project_id: Project identifier
|
964
1030
|
project_root: Path to project root directory
|
965
|
-
|
1031
|
+
|
966
1032
|
Returns:
|
967
1033
|
List of file paths that were marked for cleanup
|
968
1034
|
"""
|
969
1035
|
removed_files = []
|
970
|
-
|
1036
|
+
|
971
1037
|
async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
|
972
1038
|
# Get all active file descriptions for this project
|
973
1039
|
cursor = await conn.execute(
|
974
|
-
|
975
|
-
|
1040
|
+
(
|
1041
|
+
"SELECT file_path FROM file_descriptions WHERE "
|
1042
|
+
"project_id = ? AND to_be_cleaned IS NULL"
|
1043
|
+
),
|
1044
|
+
(project_id,),
|
976
1045
|
)
|
977
|
-
|
1046
|
+
|
978
1047
|
rows = await cursor.fetchall()
|
979
|
-
|
1048
|
+
|
980
1049
|
# Check which files no longer exist
|
981
1050
|
to_remove = []
|
982
1051
|
for row in rows:
|
983
|
-
file_path = row[
|
1052
|
+
file_path = row["file_path"]
|
984
1053
|
full_path = project_root / file_path
|
985
|
-
|
1054
|
+
|
986
1055
|
if not full_path.exists():
|
987
1056
|
to_remove.append(file_path)
|
988
|
-
|
1057
|
+
|
989
1058
|
# Mark descriptions for cleanup instead of deleting
|
990
1059
|
if to_remove:
|
991
1060
|
import time
|
1061
|
+
|
992
1062
|
cleanup_timestamp = int(time.time())
|
993
1063
|
await conn.executemany(
|
994
|
-
|
995
|
-
|
1064
|
+
(
|
1065
|
+
"UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
|
1066
|
+
"project_id = ? AND file_path = ?"
|
1067
|
+
),
|
1068
|
+
[(cleanup_timestamp, project_id, path) for path in to_remove],
|
996
1069
|
)
|
997
|
-
logger.info(
|
998
|
-
|
1070
|
+
logger.info(
|
1071
|
+
(
|
1072
|
+
f"Marked {len(to_remove)} missing files for cleanup "
|
1073
|
+
f"from {project_id}"
|
1074
|
+
)
|
1075
|
+
)
|
1076
|
+
|
999
1077
|
return to_remove
|
1000
|
-
|
1078
|
+
|
1001
1079
|
removed_files = await self.execute_transaction_with_retry(
|
1002
1080
|
cleanup_operation,
|
1003
1081
|
f"cleanup_missing_files_{project_id}",
|
1004
|
-
timeout_seconds=60.0 # Longer timeout for file system operations
|
1082
|
+
timeout_seconds=60.0, # Longer timeout for file system operations
|
1005
1083
|
)
|
1006
|
-
|
1084
|
+
|
1007
1085
|
return removed_files
|
1008
|
-
|
1009
|
-
async def analyze_word_frequency(
|
1086
|
+
|
1087
|
+
async def analyze_word_frequency(
|
1088
|
+
self, project_id: str, limit: int = 200
|
1089
|
+
) -> WordFrequencyResult:
|
1010
1090
|
"""
|
1011
1091
|
Analyze word frequency across all file descriptions for a project.
|
1012
|
-
|
1092
|
+
|
1013
1093
|
Args:
|
1014
1094
|
project_id: Project identifier
|
1015
1095
|
limit: Maximum number of top terms to return
|
1016
|
-
|
1096
|
+
|
1017
1097
|
Returns:
|
1018
1098
|
WordFrequencyResult with top terms and statistics
|
1019
1099
|
"""
|
1020
1100
|
from collections import Counter
|
1021
1101
|
import re
|
1022
|
-
|
1102
|
+
|
1023
1103
|
# Load stop words from bundled file
|
1024
|
-
stop_words_path =
|
1104
|
+
stop_words_path = (
|
1105
|
+
Path(__file__).parent.parent / "data" / "stop_words_english.txt"
|
1106
|
+
)
|
1025
1107
|
stop_words = set()
|
1026
|
-
|
1108
|
+
|
1027
1109
|
if stop_words_path.exists():
|
1028
|
-
with open(stop_words_path,
|
1110
|
+
with open(stop_words_path, "r", encoding="utf-8") as f:
|
1029
1111
|
for line in f:
|
1030
1112
|
# Each line contains just the stop word
|
1031
1113
|
word = line.strip().lower()
|
1032
1114
|
if word: # Skip empty lines
|
1033
1115
|
stop_words.add(word)
|
1034
|
-
|
1116
|
+
|
1035
1117
|
# Add common programming keywords to stop words
|
1036
1118
|
programming_keywords = {
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1119
|
+
"if",
|
1120
|
+
"else",
|
1121
|
+
"for",
|
1122
|
+
"while",
|
1123
|
+
"do",
|
1124
|
+
"break",
|
1125
|
+
"continue",
|
1126
|
+
"return",
|
1127
|
+
"function",
|
1128
|
+
"class",
|
1129
|
+
"def",
|
1130
|
+
"var",
|
1131
|
+
"let",
|
1132
|
+
"const",
|
1133
|
+
"public",
|
1134
|
+
"private",
|
1135
|
+
"static",
|
1136
|
+
"async",
|
1137
|
+
"await",
|
1138
|
+
"import",
|
1139
|
+
"export",
|
1140
|
+
"from",
|
1141
|
+
"true",
|
1142
|
+
"false",
|
1143
|
+
"null",
|
1144
|
+
"undefined",
|
1145
|
+
"this",
|
1146
|
+
"that",
|
1147
|
+
"self",
|
1148
|
+
"super",
|
1149
|
+
"new",
|
1150
|
+
"delete",
|
1041
1151
|
}
|
1042
1152
|
stop_words.update(programming_keywords)
|
1043
|
-
|
1153
|
+
|
1044
1154
|
async with self.get_connection() as db:
|
1045
1155
|
# Get all descriptions for this project
|
1046
1156
|
cursor = await db.execute(
|
1047
|
-
|
1048
|
-
|
1157
|
+
(
|
1158
|
+
"SELECT description FROM file_descriptions WHERE "
|
1159
|
+
"project_id = ? AND to_be_cleaned IS NULL"
|
1160
|
+
),
|
1161
|
+
(project_id,),
|
1049
1162
|
)
|
1050
|
-
|
1163
|
+
|
1051
1164
|
rows = await cursor.fetchall()
|
1052
|
-
|
1165
|
+
|
1053
1166
|
# Combine all descriptions
|
1054
|
-
all_text = " ".join(row[
|
1055
|
-
|
1167
|
+
all_text = " ".join(row["description"] for row in rows)
|
1168
|
+
|
1056
1169
|
# Tokenize and filter
|
1057
|
-
words = re.findall(r
|
1170
|
+
words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
|
1058
1171
|
filtered_words = [word for word in words if word not in stop_words]
|
1059
|
-
|
1172
|
+
|
1060
1173
|
# Count frequencies
|
1061
1174
|
word_counts = Counter(filtered_words)
|
1062
|
-
|
1175
|
+
|
1063
1176
|
# Create result
|
1064
1177
|
top_terms = [
|
1065
1178
|
WordFrequencyTerm(term=term, frequency=count)
|
1066
1179
|
for term, count in word_counts.most_common(limit)
|
1067
1180
|
]
|
1068
|
-
|
1181
|
+
|
1069
1182
|
return WordFrequencyResult(
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1183
|
+
top_terms=top_terms,
|
1184
|
+
total_terms_analyzed=len(filtered_words),
|
1185
|
+
total_unique_terms=len(word_counts),
|
1073
1186
|
)
|
1074
|
-
|
1187
|
+
|
1075
1188
|
async def cleanup_empty_projects(self) -> int:
|
1076
1189
|
"""
|
1077
1190
|
Remove projects that have no file descriptions and no project overview.
|
1078
|
-
|
1191
|
+
|
1079
1192
|
Returns:
|
1080
1193
|
Number of projects removed
|
1081
1194
|
"""
|
1082
1195
|
async with self.get_write_connection() as db:
|
1083
1196
|
# Find projects with no descriptions and no overview
|
1084
|
-
cursor = await db.execute(
|
1085
|
-
|
1197
|
+
cursor = await db.execute(
|
1198
|
+
"""
|
1199
|
+
SELECT p.id, p.name
|
1086
1200
|
FROM projects p
|
1087
1201
|
LEFT JOIN file_descriptions fd ON p.id = fd.project_id
|
1088
1202
|
LEFT JOIN project_overviews po ON p.id = po.project_id
|
1089
1203
|
WHERE fd.project_id IS NULL AND po.project_id IS NULL
|
1090
|
-
"""
|
1091
|
-
|
1204
|
+
"""
|
1205
|
+
)
|
1206
|
+
|
1092
1207
|
empty_projects = await cursor.fetchall()
|
1093
|
-
|
1208
|
+
|
1094
1209
|
if not empty_projects:
|
1095
1210
|
return 0
|
1096
|
-
|
1211
|
+
|
1097
1212
|
removed_count = 0
|
1098
1213
|
for project in empty_projects:
|
1099
|
-
project_id = project[
|
1100
|
-
project_name = project[
|
1101
|
-
|
1214
|
+
project_id = project["id"]
|
1215
|
+
project_name = project["name"]
|
1216
|
+
|
1102
1217
|
# Remove from projects table (cascading will handle related data)
|
1103
1218
|
await db.execute("DELETE FROM projects WHERE id = ?", (project_id,))
|
1104
1219
|
removed_count += 1
|
1105
|
-
|
1220
|
+
|
1106
1221
|
logger.info(f"Removed empty project: {project_name} (ID: {project_id})")
|
1107
|
-
|
1222
|
+
|
1108
1223
|
await db.commit()
|
1109
1224
|
return removed_count
|
1110
|
-
|
1225
|
+
|
1111
1226
|
async def get_project_map_data(self, project_identifier: str) -> dict:
|
1112
1227
|
"""
|
1113
1228
|
Get all data needed to generate a project map.
|
1114
|
-
|
1229
|
+
|
1115
1230
|
Args:
|
1116
1231
|
project_identifier: Project name or ID
|
1117
|
-
|
1232
|
+
|
1118
1233
|
Returns:
|
1119
1234
|
Dictionary containing project info, overview, and file descriptions
|
1120
1235
|
"""
|
1121
1236
|
async with self.get_connection() as db:
|
1122
1237
|
# Try to find project by ID first, then by name
|
1123
|
-
if len(project_identifier) == 36 and
|
1238
|
+
if len(project_identifier) == 36 and "-" in project_identifier:
|
1124
1239
|
# Looks like a UUID
|
1125
1240
|
cursor = await db.execute(
|
1126
|
-
"SELECT * FROM projects WHERE id = ?",
|
1127
|
-
(project_identifier,)
|
1241
|
+
"SELECT * FROM projects WHERE id = ?", (project_identifier,)
|
1128
1242
|
)
|
1129
1243
|
else:
|
1130
1244
|
# Search by name
|
1131
1245
|
cursor = await db.execute(
|
1132
|
-
"SELECT * FROM projects WHERE LOWER(name) = LOWER(?)",
|
1133
|
-
(project_identifier,)
|
1246
|
+
"SELECT * FROM projects WHERE LOWER(name) = LOWER(?)",
|
1247
|
+
(project_identifier,),
|
1134
1248
|
)
|
1135
|
-
|
1249
|
+
|
1136
1250
|
project_row = await cursor.fetchone()
|
1137
1251
|
if not project_row:
|
1138
1252
|
return None
|
1139
|
-
|
1253
|
+
|
1140
1254
|
# Handle aliases JSON parsing
|
1141
1255
|
project_dict = dict(project_row)
|
1142
|
-
if isinstance(project_dict[
|
1256
|
+
if isinstance(project_dict["aliases"], str):
|
1143
1257
|
import json
|
1144
|
-
|
1145
|
-
|
1258
|
+
|
1259
|
+
project_dict["aliases"] = json.loads(project_dict["aliases"])
|
1260
|
+
|
1146
1261
|
project = Project(**project_dict)
|
1147
|
-
|
1262
|
+
|
1148
1263
|
# Get project overview
|
1149
1264
|
cursor = await db.execute(
|
1150
|
-
"SELECT * FROM project_overviews WHERE project_id = ?",
|
1151
|
-
(project.id,)
|
1265
|
+
"SELECT * FROM project_overviews WHERE project_id = ?", (project.id,)
|
1152
1266
|
)
|
1153
1267
|
overview_row = await cursor.fetchone()
|
1154
1268
|
project_overview = ProjectOverview(**overview_row) if overview_row else None
|
1155
|
-
|
1269
|
+
|
1156
1270
|
# Get all file descriptions for this project
|
1157
1271
|
cursor = await db.execute(
|
1158
|
-
"""SELECT * FROM file_descriptions
|
1272
|
+
"""SELECT * FROM file_descriptions
|
1159
1273
|
WHERE project_id = ? AND to_be_cleaned IS NULL
|
1160
1274
|
ORDER BY file_path""",
|
1161
|
-
(project.id,)
|
1275
|
+
(project.id,),
|
1162
1276
|
)
|
1163
1277
|
file_rows = await cursor.fetchall()
|
1164
1278
|
file_descriptions = [FileDescription(**row) for row in file_rows]
|
1165
|
-
|
1279
|
+
|
1166
1280
|
return {
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1281
|
+
"project": project,
|
1282
|
+
"overview": project_overview,
|
1283
|
+
"files": file_descriptions,
|
1170
1284
|
}
|
1171
|
-
|
1285
|
+
|
1172
1286
|
# Cleanup operations
|
1173
|
-
|
1287
|
+
|
1174
1288
|
@property
|
1175
1289
|
def cleanup_manager(self) -> CleanupManager:
|
1176
1290
|
"""Get the cleanup manager instance."""
|
1177
1291
|
if self._cleanup_manager is None:
|
1178
1292
|
self._cleanup_manager = CleanupManager(self, retention_months=6)
|
1179
1293
|
return self._cleanup_manager
|
1180
|
-
|
1294
|
+
|
1181
1295
|
async def mark_file_for_cleanup(self, project_id: str, file_path: str) -> bool:
|
1182
1296
|
"""Mark a file for cleanup. Convenience method."""
|
1183
1297
|
return await self.cleanup_manager.mark_file_for_cleanup(project_id, file_path)
|
1184
|
-
|
1298
|
+
|
1185
1299
|
async def perform_cleanup(self, project_id: Optional[str] = None) -> int:
|
1186
1300
|
"""Perform cleanup of old records. Convenience method."""
|
1187
1301
|
return await self.cleanup_manager.perform_cleanup(project_id)
|