mcp-code-indexer 3.1.3__py3-none-any.whl → 3.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,28 +7,31 @@ connection management, transaction handling, and performance optimizations.
7
7
 
8
8
  import json
9
9
  import logging
10
- import sqlite3
11
10
  from contextlib import asynccontextmanager
12
11
  from datetime import datetime, timedelta
13
12
  from pathlib import Path
14
- from typing import List, Optional, Dict, Any, Tuple, AsyncIterator
13
+ from typing import List, Optional, Dict, Any, AsyncIterator
15
14
 
16
15
  import asyncio
17
- import random
18
16
  import aiosqlite
19
17
 
20
18
  from mcp_code_indexer.database.models import (
21
- Project, FileDescription, MergeConflict, SearchResult,
22
- CodebaseSizeInfo, ProjectOverview, WordFrequencyResult, WordFrequencyTerm
23
- )
24
- from mcp_code_indexer.database.retry_executor import (
25
- RetryExecutor, create_retry_executor
19
+ Project,
20
+ FileDescription,
21
+ SearchResult,
22
+ ProjectOverview,
23
+ WordFrequencyResult,
24
+ WordFrequencyTerm,
26
25
  )
26
+ from mcp_code_indexer.database.retry_executor import create_retry_executor
27
27
  from mcp_code_indexer.database.exceptions import (
28
- DatabaseError, DatabaseLockError, classify_sqlite_error, is_retryable_error
28
+ DatabaseError,
29
+ classify_sqlite_error,
30
+ is_retryable_error,
29
31
  )
30
32
  from mcp_code_indexer.database.connection_health import (
31
- ConnectionHealthMonitor, DatabaseMetricsCollector
33
+ ConnectionHealthMonitor,
34
+ DatabaseMetricsCollector,
32
35
  )
33
36
  from mcp_code_indexer.query_preprocessor import preprocess_search_query
34
37
  from mcp_code_indexer.cleanup_manager import CleanupManager
@@ -39,21 +42,23 @@ logger = logging.getLogger(__name__)
39
42
  class DatabaseManager:
40
43
  """
41
44
  Manages SQLite database operations with async support.
42
-
45
+
43
46
  Provides high-level operations for projects, file descriptions, search,
44
47
  and caching with proper transaction management and error handling.
45
48
  """
46
-
47
- def __init__(self,
48
- db_path: Path,
49
- pool_size: int = 3,
50
- retry_count: int = 5,
51
- timeout: float = 10.0,
52
- enable_wal_mode: bool = True,
53
- health_check_interval: float = 30.0,
54
- retry_min_wait: float = 0.1,
55
- retry_max_wait: float = 2.0,
56
- retry_jitter: float = 0.2):
49
+
50
+ def __init__(
51
+ self,
52
+ db_path: Path,
53
+ pool_size: int = 3,
54
+ retry_count: int = 5,
55
+ timeout: float = 10.0,
56
+ enable_wal_mode: bool = True,
57
+ health_check_interval: float = 30.0,
58
+ retry_min_wait: float = 0.1,
59
+ retry_max_wait: float = 2.0,
60
+ retry_jitter: float = 0.2,
61
+ ):
57
62
  """Initialize database manager with path to SQLite database."""
58
63
  self.db_path = db_path
59
64
  self.pool_size = pool_size
@@ -66,167 +71,193 @@ class DatabaseManager:
66
71
  self.retry_jitter = retry_jitter
67
72
  self._connection_pool: List[aiosqlite.Connection] = []
68
73
  self._pool_lock = None # Will be initialized in async context
69
- self._write_lock = None # Write serialization lock, initialized in async context
70
-
74
+ self._write_lock = None # Write serialization lock, async context
75
+
71
76
  # Retry and recovery components - configure with provided settings
72
77
  self._retry_executor = create_retry_executor(
73
78
  max_attempts=retry_count,
74
79
  min_wait_seconds=retry_min_wait,
75
80
  max_wait_seconds=retry_max_wait,
76
- jitter_max_seconds=retry_jitter
81
+ jitter_max_seconds=retry_jitter,
77
82
  )
78
-
83
+
79
84
  # Health monitoring and metrics
80
85
  self._health_monitor = None # Initialized in async context
81
86
  self._metrics_collector = DatabaseMetricsCollector()
82
-
87
+
83
88
  # Cleanup manager for retention policies
84
89
  self._cleanup_manager = None # Initialized in async context
85
-
90
+
86
91
  async def initialize(self) -> None:
87
92
  """Initialize database schema and configuration."""
88
93
  import asyncio
89
-
94
+
90
95
  # Initialize locks
91
96
  self._pool_lock = asyncio.Lock()
92
97
  self._write_lock = asyncio.Lock()
93
-
98
+
94
99
  # Connection recovery is now handled by the retry executor
95
-
100
+
96
101
  # Initialize health monitoring with configured interval
97
102
  self._health_monitor = ConnectionHealthMonitor(
98
- self,
103
+ self,
99
104
  check_interval=self.health_check_interval,
100
- timeout_seconds=self.timeout
105
+ timeout_seconds=self.timeout,
101
106
  )
102
107
  await self._health_monitor.start_monitoring()
103
-
108
+
104
109
  # Initialize cleanup manager
105
110
  self._cleanup_manager = CleanupManager(self, retention_months=6)
106
-
111
+
107
112
  # Ensure database directory exists
108
113
  self.db_path.parent.mkdir(parents=True, exist_ok=True)
109
-
114
+
110
115
  # Database initialization now uses the modern retry executor directly
111
-
116
+
112
117
  # Apply migrations in order
113
118
  # Migrations are now bundled with the package
114
119
  migrations_dir = Path(__file__).parent.parent / "migrations"
115
120
  if not migrations_dir.exists():
116
- raise RuntimeError(f"Could not find migrations directory at {migrations_dir}")
121
+ raise RuntimeError(
122
+ f"Could not find migrations directory at {migrations_dir}"
123
+ )
117
124
  migration_files = sorted(migrations_dir.glob("*.sql"))
118
-
125
+
119
126
  async with aiosqlite.connect(self.db_path) as db:
120
127
  # Enable row factory for easier data access
121
128
  db.row_factory = aiosqlite.Row
122
-
129
+
123
130
  # Configure WAL mode and optimizations for concurrent access
124
- await self._configure_database_optimizations(db, include_wal_mode=self.enable_wal_mode)
125
-
131
+ await self._configure_database_optimizations(
132
+ db, include_wal_mode=self.enable_wal_mode
133
+ )
134
+
126
135
  # Create migrations tracking table
127
- await db.execute('''
136
+ await db.execute(
137
+ """
128
138
  CREATE TABLE IF NOT EXISTS migrations (
129
139
  id INTEGER PRIMARY KEY AUTOINCREMENT,
130
140
  filename TEXT UNIQUE NOT NULL,
131
141
  applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
132
142
  )
133
- ''')
143
+ """
144
+ )
134
145
  await db.commit()
135
-
146
+
136
147
  # Get list of already applied migrations
137
- cursor = await db.execute('SELECT filename FROM migrations')
148
+ cursor = await db.execute("SELECT filename FROM migrations")
138
149
  applied_migrations = {row[0] for row in await cursor.fetchall()}
139
-
150
+
140
151
  # Apply each migration that hasn't been applied yet
141
152
  for migration_file in migration_files:
142
153
  migration_name = migration_file.name
143
154
  if migration_name in applied_migrations:
144
155
  logger.info(f"Skipping already applied migration: {migration_name}")
145
156
  continue
146
-
157
+
147
158
  logger.info(f"Applying migration: {migration_name}")
148
159
  try:
149
- migration_sql = migration_file.read_text(encoding='utf-8')
160
+ migration_sql = migration_file.read_text(encoding="utf-8")
150
161
  except AttributeError:
151
162
  # Fallback for regular file objects
152
- with open(migration_file, 'r', encoding='utf-8') as f:
163
+ with open(migration_file, "r", encoding="utf-8") as f:
153
164
  migration_sql = f.read()
154
-
165
+
155
166
  try:
156
167
  await db.executescript(migration_sql)
157
-
168
+
158
169
  # Record that migration was applied
159
- await db.execute('INSERT INTO migrations (filename) VALUES (?)', (migration_name,))
170
+ await db.execute(
171
+ "INSERT INTO migrations (filename) VALUES (?)",
172
+ (migration_name,),
173
+ )
160
174
  await db.commit()
161
175
  logger.info(f"Successfully applied migration: {migration_name}")
162
176
  except Exception as e:
163
177
  logger.error(f"Failed to apply migration {migration_name}: {e}")
164
178
  await db.rollback()
165
179
  raise
166
-
167
- logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} total migrations")
168
-
169
- async def _configure_database_optimizations(self, db: aiosqlite.Connection, include_wal_mode: bool = True) -> None:
180
+
181
+ logger.info(
182
+ (
183
+ f"Database initialized at {self.db_path} with "
184
+ f"{len(migration_files)} total migrations"
185
+ )
186
+ )
187
+
188
+ async def _configure_database_optimizations(
189
+ self,
190
+ db: aiosqlite.Connection,
191
+ include_wal_mode: bool = True,
192
+ ) -> None:
170
193
  """
171
194
  Configure SQLite optimizations for concurrent access and performance.
172
-
195
+
173
196
  Args:
174
197
  db: Database connection to configure
175
- include_wal_mode: Whether to set WAL mode (only needed once per database)
198
+ include_wal_mode: Whether to set WAL mode (only needed once per
199
+ database)
176
200
  """
177
201
  optimizations = []
178
-
202
+
179
203
  # WAL mode is database-level, only set during initialization
180
204
  if include_wal_mode:
181
205
  optimizations.append("PRAGMA journal_mode = WAL")
182
206
  logger.info("Enabling WAL mode for database concurrency")
183
-
207
+
184
208
  # Connection-level optimizations that can be set per connection
185
- optimizations.extend([
186
- "PRAGMA synchronous = NORMAL", # Balance durability/performance
187
- "PRAGMA cache_size = -64000", # 64MB cache
188
- "PRAGMA temp_store = MEMORY", # Use memory for temp tables
189
- "PRAGMA mmap_size = 268435456", # 256MB memory mapping
190
- "PRAGMA busy_timeout = 10000", # 10 second timeout (reduced from 30s)
191
- "PRAGMA optimize" # Enable query planner optimizations
192
- ])
193
-
209
+ optimizations.extend(
210
+ [
211
+ "PRAGMA synchronous = NORMAL", # Balance durability/performance
212
+ "PRAGMA cache_size = -64000", # 64MB cache
213
+ "PRAGMA temp_store = MEMORY", # Use memory for temp tables
214
+ "PRAGMA mmap_size = 268435456", # 256MB memory mapping
215
+ "PRAGMA busy_timeout = 10000", # 10s timeout (reduced from 30s)
216
+ "PRAGMA optimize", # Enable query planner optimizations
217
+ ]
218
+ )
219
+
194
220
  # WAL-specific settings (only if WAL mode is being set)
195
221
  if include_wal_mode:
196
- optimizations.append("PRAGMA wal_autocheckpoint = 1000") # Checkpoint after 1000 pages
197
-
222
+ optimizations.append(
223
+ "PRAGMA wal_autocheckpoint = 1000"
224
+ ) # Checkpoint after 1000 pages
225
+
198
226
  for pragma in optimizations:
199
227
  try:
200
228
  await db.execute(pragma)
201
229
  logger.debug(f"Applied optimization: {pragma}")
202
230
  except Exception as e:
203
231
  logger.warning(f"Failed to apply optimization '{pragma}': {e}")
204
-
232
+
205
233
  await db.commit()
206
234
  if include_wal_mode:
207
- logger.info("Database optimizations configured for concurrent access with WAL mode")
235
+ logger.info(
236
+ "Database optimizations configured for concurrent access "
237
+ "with WAL mode"
238
+ )
208
239
  else:
209
240
  logger.debug("Connection optimizations applied")
210
-
241
+
211
242
  @asynccontextmanager
212
243
  async def get_connection(self) -> AsyncIterator[aiosqlite.Connection]:
213
244
  """Get a database connection from pool or create new one."""
214
245
  conn = None
215
-
246
+
216
247
  # Try to get from pool
217
248
  if self._pool_lock:
218
249
  async with self._pool_lock:
219
250
  if self._connection_pool:
220
251
  conn = self._connection_pool.pop()
221
-
252
+
222
253
  # Create new connection if none available
223
254
  if conn is None:
224
255
  conn = await aiosqlite.connect(self.db_path)
225
256
  conn.row_factory = aiosqlite.Row
226
-
227
- # Apply connection-level optimizations (WAL mode already set during initialization)
257
+
258
+ # Apply connection-level optimizations (WAL mode set during init)
228
259
  await self._configure_database_optimizations(conn, include_wal_mode=False)
229
-
260
+
230
261
  try:
231
262
  yield conn
232
263
  finally:
@@ -237,74 +268,83 @@ class DatabaseManager:
237
268
  if len(self._connection_pool) < self.pool_size:
238
269
  self._connection_pool.append(conn)
239
270
  returned_to_pool = True
240
-
271
+
241
272
  if not returned_to_pool:
242
273
  await conn.close()
243
-
274
+
244
275
  async def close_pool(self) -> None:
245
276
  """Close all connections in the pool and stop monitoring."""
246
277
  # Stop health monitoring
247
278
  if self._health_monitor:
248
279
  await self._health_monitor.stop_monitoring()
249
-
280
+
250
281
  # Close connections
251
282
  if self._pool_lock:
252
283
  async with self._pool_lock:
253
284
  for conn in self._connection_pool:
254
285
  await conn.close()
255
286
  self._connection_pool.clear()
256
-
287
+
257
288
  @asynccontextmanager
258
289
  async def get_write_connection(self) -> AsyncIterator[aiosqlite.Connection]:
259
290
  """
260
291
  Get a database connection with write serialization.
261
-
262
- This ensures only one write operation occurs at a time across the entire
263
- application, preventing database locking issues in multi-client scenarios.
292
+
293
+ This ensures only one write operation occurs at a time across the
294
+ entire application, preventing database locking issues in
295
+ multi-client scenarios.
264
296
  """
265
297
  if self._write_lock is None:
266
- raise RuntimeError("DatabaseManager not initialized - call initialize() first")
267
-
298
+ raise RuntimeError(
299
+ "DatabaseManager not initialized - call initialize() first"
300
+ )
301
+
268
302
  async with self._write_lock:
269
303
  async with self.get_connection() as conn:
270
304
  yield conn
271
-
305
+
272
306
  @asynccontextmanager
273
- async def get_write_connection_with_retry(self, operation_name: str = "write_operation") -> AsyncIterator[aiosqlite.Connection]:
307
+ async def get_write_connection_with_retry(
308
+ self, operation_name: str = "write_operation"
309
+ ) -> AsyncIterator[aiosqlite.Connection]:
274
310
  """
275
- Get a database connection with write serialization and automatic retry logic.
276
-
277
- This uses the new RetryExecutor to properly handle retry logic without
278
- the broken yield-in-retry-loop pattern that caused generator errors.
279
-
311
+ Get a database connection with write serialization and automatic
312
+ retry logic.
313
+
314
+ This uses the new RetryExecutor to properly handle retry logic
315
+ without the broken yield-in-retry-loop pattern that caused
316
+ generator errors.
317
+
280
318
  Args:
281
- operation_name: Name of the operation for logging and monitoring
319
+ operation_name: Name of the operation for logging and
320
+ monitoring
282
321
  """
283
322
  if self._write_lock is None:
284
- raise RuntimeError("DatabaseManager not initialized - call initialize() first")
285
-
323
+ raise RuntimeError(
324
+ "DatabaseManager not initialized - call initialize() first"
325
+ )
326
+
286
327
  async def get_write_connection():
287
- """Inner function to get connection - will be retried by executor."""
328
+ """Inner function to get connection - retried by executor."""
288
329
  async with self._write_lock:
289
330
  async with self.get_connection() as conn:
290
331
  return conn
291
-
332
+
292
333
  try:
293
334
  # Use retry executor to handle connection acquisition with retries
294
335
  connection = await self._retry_executor.execute_with_retry(
295
- get_write_connection,
296
- operation_name
336
+ get_write_connection, operation_name
297
337
  )
298
-
338
+
299
339
  try:
300
340
  yield connection
301
-
341
+
302
342
  # Success - retry executor handles all failure tracking
303
-
304
- except Exception as e:
343
+
344
+ except Exception:
305
345
  # Error handling is managed by the retry executor
306
346
  raise
307
-
347
+
308
348
  except DatabaseError:
309
349
  # Re-raise our custom database errors as-is
310
350
  raise
@@ -312,73 +352,80 @@ class DatabaseManager:
312
352
  # Classify and wrap other exceptions
313
353
  classified_error = classify_sqlite_error(e, operation_name)
314
354
  logger.error(
315
- f"Database operation '{operation_name}' failed: {classified_error.message}",
316
- extra={"structured_data": classified_error.to_dict()}
355
+ (
356
+ f"Database operation '{operation_name}' failed: "
357
+ f"{classified_error.message}"
358
+ ),
359
+ extra={"structured_data": classified_error.to_dict()},
317
360
  )
318
361
  raise classified_error
319
-
362
+
320
363
  def get_database_stats(self) -> Dict[str, Any]:
321
364
  """
322
365
  Get database performance and reliability statistics.
323
-
366
+
324
367
  Returns:
325
- Dictionary with retry stats, recovery stats, health status, and metrics
368
+ Dictionary with retry stats, recovery stats, health status,
369
+ and metrics
326
370
  """
327
371
  stats = {
328
372
  "connection_pool": {
329
373
  "configured_size": self.pool_size,
330
- "current_size": len(self._connection_pool)
374
+ "current_size": len(self._connection_pool),
331
375
  },
332
- "retry_executor": self._retry_executor.get_retry_stats() if self._retry_executor else {},
376
+ "retry_executor": (
377
+ self._retry_executor.get_retry_stats() if self._retry_executor else {}
378
+ ),
333
379
  }
334
-
380
+
335
381
  # Legacy retry handler removed - retry executor stats are included above
336
-
382
+
337
383
  if self._health_monitor:
338
384
  stats["health_status"] = self._health_monitor.get_health_status()
339
-
385
+
340
386
  if self._metrics_collector:
341
387
  stats["operation_metrics"] = self._metrics_collector.get_operation_metrics()
342
388
  stats["locking_frequency"] = self._metrics_collector.get_locking_frequency()
343
-
389
+
344
390
  return stats
345
-
391
+
346
392
  async def check_health(self) -> Dict[str, Any]:
347
393
  """
348
394
  Perform an immediate health check and return detailed status.
349
-
395
+
350
396
  Returns:
351
397
  Dictionary with health check result and current metrics
352
398
  """
353
399
  if not self._health_monitor:
354
400
  return {"error": "Health monitoring not initialized"}
355
-
401
+
356
402
  # Perform immediate health check
357
403
  health_result = await self._health_monitor.check_health()
358
-
404
+
359
405
  return {
360
406
  "health_check": {
361
407
  "is_healthy": health_result.is_healthy,
362
408
  "response_time_ms": health_result.response_time_ms,
363
409
  "error_message": health_result.error_message,
364
- "timestamp": health_result.timestamp.isoformat()
410
+ "timestamp": health_result.timestamp.isoformat(),
365
411
  },
366
412
  "overall_status": self._health_monitor.get_health_status(),
367
- "recent_history": self._health_monitor.get_recent_history()
413
+ "recent_history": self._health_monitor.get_recent_history(),
368
414
  }
369
-
415
+
370
416
  @asynccontextmanager
371
417
  async def get_immediate_transaction(
372
- self,
418
+ self,
373
419
  operation_name: str = "immediate_transaction",
374
- timeout_seconds: float = 10.0
420
+ timeout_seconds: float = 10.0,
375
421
  ) -> AsyncIterator[aiosqlite.Connection]:
376
422
  """
377
- Get a database connection with BEGIN IMMEDIATE transaction and timeout.
378
-
379
- This ensures write locks are acquired immediately, preventing lock escalation
380
- failures that can occur with DEFERRED transactions.
381
-
423
+ Get a database connection with BEGIN IMMEDIATE transaction and
424
+ timeout.
425
+
426
+ This ensures write locks are acquired immediately, preventing lock
427
+ escalation failures that can occur with DEFERRED transactions.
428
+
382
429
  Args:
383
430
  operation_name: Name of the operation for monitoring
384
431
  timeout_seconds: Transaction timeout in seconds
@@ -392,15 +439,18 @@ class DatabaseManager:
392
439
  await conn.commit()
393
440
  except asyncio.TimeoutError:
394
441
  logger.warning(
395
- f"Transaction timeout after {timeout_seconds}s for {operation_name}",
442
+ (
443
+ f"Transaction timeout after {timeout_seconds}s for "
444
+ f"{operation_name}"
445
+ ),
396
446
  extra={
397
447
  "structured_data": {
398
448
  "transaction_timeout": {
399
449
  "operation": operation_name,
400
- "timeout_seconds": timeout_seconds
450
+ "timeout_seconds": timeout_seconds,
401
451
  }
402
452
  }
403
- }
453
+ },
404
454
  )
405
455
  await conn.rollback()
406
456
  raise
@@ -408,62 +458,69 @@ class DatabaseManager:
408
458
  logger.error(f"Transaction failed for {operation_name}: {e}")
409
459
  await conn.rollback()
410
460
  raise
411
-
461
+
412
462
  async def execute_transaction_with_retry(
413
463
  self,
414
464
  operation_func,
415
465
  operation_name: str = "transaction_operation",
416
466
  max_retries: int = 3,
417
- timeout_seconds: float = 10.0
467
+ timeout_seconds: float = 10.0,
418
468
  ) -> Any:
419
469
  """
420
- Execute a database operation within a transaction with automatic retry.
421
-
422
- Uses the new RetryExecutor for robust retry handling with proper error
423
- classification and exponential backoff.
424
-
470
+ Execute a database operation within a transaction with automatic
471
+ retry.
472
+
473
+ Uses the new RetryExecutor for robust retry handling with proper
474
+ error classification and exponential backoff.
475
+
425
476
  Args:
426
- operation_func: Async function that takes a connection and performs the operation
477
+ operation_func: Async function that takes a connection and
478
+ performs the operation
427
479
  operation_name: Name of the operation for logging
428
- max_retries: Maximum retry attempts (overrides default retry executor config)
480
+ max_retries: Maximum retry attempts (overrides default retry
481
+ executor config)
429
482
  timeout_seconds: Transaction timeout in seconds
430
-
483
+
431
484
  Returns:
432
485
  Result from operation_func
433
-
486
+
434
487
  Example:
435
488
  async def my_operation(conn):
436
489
  await conn.execute("INSERT INTO ...", (...))
437
490
  return "success"
438
-
439
- result = await db.execute_transaction_with_retry(my_operation, "insert_data")
491
+
492
+ result = await db.execute_transaction_with_retry(
493
+ my_operation, "insert_data"
494
+ )
440
495
  """
441
-
496
+
442
497
  async def execute_transaction():
443
- """Inner function to execute transaction - will be retried by executor."""
498
+ """Inner function to execute transaction - retried by executor."""
444
499
  try:
445
- async with self.get_immediate_transaction(operation_name, timeout_seconds) as conn:
500
+ async with self.get_immediate_transaction(
501
+ operation_name, timeout_seconds
502
+ ) as conn:
446
503
  result = await operation_func(conn)
447
-
504
+
448
505
  # Record successful operation metrics
449
506
  if self._metrics_collector:
450
507
  self._metrics_collector.record_operation(
451
- operation_name,
508
+ operation_name,
452
509
  timeout_seconds * 1000, # Convert to ms
453
510
  True,
454
- len(self._connection_pool)
511
+ len(self._connection_pool),
455
512
  )
456
-
513
+
457
514
  return result
458
-
515
+
459
516
  except (aiosqlite.OperationalError, asyncio.TimeoutError) as e:
460
517
  # Record locking event for metrics
461
518
  if self._metrics_collector and "locked" in str(e).lower():
462
519
  self._metrics_collector.record_locking_event(operation_name, str(e))
463
-
520
+
464
521
  # Classify the error for better handling
465
522
  classified_error = classify_sqlite_error(e, operation_name)
466
-
523
+
467
524
  # Record failed operation metrics for non-retryable errors
468
525
  if not is_retryable_error(classified_error):
469
526
  if self._metrics_collector:
@@ -471,39 +528,48 @@ class DatabaseManager:
471
528
  operation_name,
472
529
  timeout_seconds * 1000,
473
530
  False,
474
- len(self._connection_pool)
531
+ len(self._connection_pool),
475
532
  )
476
-
533
+
477
534
  raise classified_error
478
-
535
+
479
536
  try:
480
- # Create a temporary retry executor with custom max_retries if different from default
537
+ # Create a temporary retry executor with custom max_retries if different
538
+ # from default
481
539
  if max_retries != self._retry_executor.config.max_attempts:
482
- from mcp_code_indexer.database.retry_executor import RetryConfig, RetryExecutor
540
+ from mcp_code_indexer.database.retry_executor import (
541
+ RetryConfig,
542
+ RetryExecutor,
543
+ )
544
+
483
545
  temp_config = RetryConfig(
484
546
  max_attempts=max_retries,
485
547
  min_wait_seconds=self._retry_executor.config.min_wait_seconds,
486
548
  max_wait_seconds=self._retry_executor.config.max_wait_seconds,
487
- jitter_max_seconds=self._retry_executor.config.jitter_max_seconds
549
+ jitter_max_seconds=self._retry_executor.config.jitter_max_seconds,
488
550
  )
489
551
  temp_executor = RetryExecutor(temp_config)
490
- return await temp_executor.execute_with_retry(execute_transaction, operation_name)
552
+ return await temp_executor.execute_with_retry(
553
+ execute_transaction, operation_name
554
+ )
491
555
  else:
492
- return await self._retry_executor.execute_with_retry(execute_transaction, operation_name)
493
-
494
- except DatabaseError as e:
556
+ return await self._retry_executor.execute_with_retry(
557
+ execute_transaction, operation_name
558
+ )
559
+
560
+ except DatabaseError:
495
561
  # Record failed operation metrics for final failure
496
562
  if self._metrics_collector:
497
563
  self._metrics_collector.record_operation(
498
564
  operation_name,
499
565
  timeout_seconds * 1000,
500
566
  False,
501
- len(self._connection_pool)
567
+ len(self._connection_pool),
502
568
  )
503
569
  raise
504
-
570
+
505
571
  # Project operations
506
-
572
+
507
573
  async def create_project(self, project: Project) -> None:
508
574
  """Create a new project record."""
509
575
  async with self.get_write_connection_with_retry("create_project") as db:
@@ -517,137 +583,139 @@ class DatabaseManager:
517
583
  project.name,
518
584
  json.dumps(project.aliases),
519
585
  project.created,
520
- project.last_accessed
521
- )
586
+ project.last_accessed,
587
+ ),
522
588
  )
523
589
  await db.commit()
524
590
  logger.debug(f"Created project: {project.id}")
525
-
591
+
526
592
  async def get_project(self, project_id: str) -> Optional[Project]:
527
593
  """Get project by ID."""
528
594
  async with self.get_connection() as db:
529
595
  cursor = await db.execute(
530
- "SELECT * FROM projects WHERE id = ?",
531
- (project_id,)
596
+ "SELECT * FROM projects WHERE id = ?", (project_id,)
532
597
  )
533
598
  row = await cursor.fetchone()
534
-
599
+
535
600
  if row:
536
601
  return Project(
537
- id=row['id'],
538
- name=row['name'],
539
- aliases=json.loads(row['aliases']),
540
- created=datetime.fromisoformat(row['created']),
541
- last_accessed=datetime.fromisoformat(row['last_accessed'])
602
+ id=row["id"],
603
+ name=row["name"],
604
+ aliases=json.loads(row["aliases"]),
605
+ created=datetime.fromisoformat(row["created"]),
606
+ last_accessed=datetime.fromisoformat(row["last_accessed"]),
542
607
  )
543
608
  return None
544
-
609
+
545
610
  async def find_matching_project(
546
- self,
547
- project_name: str,
548
- folder_path: Optional[str] = None
611
+ self, project_name: str, folder_path: Optional[str] = None
549
612
  ) -> Optional[Project]:
550
613
  """
551
614
  Find project by matching criteria.
552
-
615
+
553
616
  Args:
554
617
  project_name: Name of the project
555
618
  folder_path: Project folder path
556
-
619
+
557
620
  Returns:
558
621
  Matching project or None
559
622
  """
560
623
  projects = await self.get_all_projects()
561
624
  normalized_name = project_name.lower()
562
-
625
+
563
626
  best_match = None
564
627
  best_score = 0
565
-
628
+
566
629
  for project in projects:
567
630
  score = 0
568
631
  match_factors = []
569
-
632
+
570
633
  # Check name match (case-insensitive)
571
634
  if project.name.lower() == normalized_name:
572
635
  score += 2 # Name match is primary identifier
573
636
  match_factors.append("name")
574
-
637
+
575
638
  # Check folder path in aliases
576
639
  if folder_path and folder_path in project.aliases:
577
640
  score += 1
578
641
  match_factors.append("folder_path")
579
-
642
+
580
643
  # If we have a name match, it's a strong candidate
581
644
  if score >= 2:
582
645
  if score > best_score:
583
646
  best_score = score
584
647
  best_match = project
585
- logger.info(f"Match for project {project.name} (score: {score}, factors: {match_factors})")
586
-
648
+ logger.info(
649
+ (
650
+ f"Match for project {project.name} "
651
+ f"(score: {score}, factors: {match_factors})"
652
+ )
653
+ )
654
+
587
655
  return best_match
588
656
 
589
657
  async def get_or_create_project(
590
- self,
591
- project_name: str,
592
- folder_path: str
658
+ self, project_name: str, folder_path: str
593
659
  ) -> Project:
594
660
  """
595
661
  Get or create a project using intelligent matching.
596
-
662
+
597
663
  Args:
598
664
  project_name: Name of the project
599
665
  folder_path: Project folder path
600
-
666
+
601
667
  Returns:
602
668
  Existing or newly created project
603
669
  """
604
670
  # Try to find existing project
605
- project = await self.find_matching_project(
606
- project_name, folder_path
607
- )
608
-
671
+ project = await self.find_matching_project(project_name, folder_path)
672
+
609
673
  if project:
610
674
  # Update aliases if folder path not already included
611
675
  if folder_path not in project.aliases:
612
676
  project.aliases.append(folder_path)
613
677
  await self.update_project(project)
614
- logger.info(f"Added folder path {folder_path} to project {project.name} aliases")
615
-
678
+ logger.info(
679
+ f"Added folder path {folder_path} to project {project.name} aliases"
680
+ )
681
+
616
682
  # Update access time
617
683
  await self.update_project_access_time(project.id)
618
684
  return project
619
-
685
+
620
686
  # Create new project
621
687
  from ..database.models import Project
622
688
  import uuid
623
-
689
+
624
690
  new_project = Project(
625
691
  id=str(uuid.uuid4()),
626
692
  name=project_name,
627
693
  aliases=[folder_path],
628
694
  created=datetime.utcnow(),
629
- last_accessed=datetime.utcnow()
695
+ last_accessed=datetime.utcnow(),
630
696
  )
631
-
697
+
632
698
  await self.create_project(new_project)
633
699
  logger.info(f"Created new project: {new_project.name} ({new_project.id})")
634
700
  return new_project
635
-
701
+
636
702
  async def update_project_access_time(self, project_id: str) -> None:
637
703
  """Update the last accessed time for a project."""
638
- async with self.get_write_connection_with_retry("update_project_access_time") as db:
704
+ async with self.get_write_connection_with_retry(
705
+ "update_project_access_time"
706
+ ) as db:
639
707
  await db.execute(
640
708
  "UPDATE projects SET last_accessed = ? WHERE id = ?",
641
- (datetime.utcnow(), project_id)
709
+ (datetime.utcnow(), project_id),
642
710
  )
643
711
  await db.commit()
644
-
712
+
645
713
  async def update_project(self, project: Project) -> None:
646
714
  """Update an existing project record."""
647
715
  async with self.get_write_connection_with_retry("update_project") as db:
648
716
  await db.execute(
649
717
  """
650
- UPDATE projects
718
+ UPDATE projects
651
719
  SET name = ?, aliases = ?, last_accessed = ?
652
720
  WHERE id = ?
653
721
  """,
@@ -655,12 +723,12 @@ class DatabaseManager:
655
723
  project.name,
656
724
  json.dumps(project.aliases),
657
725
  project.last_accessed,
658
- project.id
659
- )
726
+ project.id,
727
+ ),
660
728
  )
661
729
  await db.commit()
662
730
  logger.debug(f"Updated project: {project.id}")
663
-
731
+
664
732
  async def get_all_projects(self) -> List[Project]:
665
733
  """Get all projects in the database."""
666
734
  async with self.get_connection() as db:
@@ -668,7 +736,7 @@ class DatabaseManager:
668
736
  "SELECT id, name, aliases, created, last_accessed FROM projects"
669
737
  )
670
738
  rows = await cursor.fetchall()
671
-
739
+
672
740
  projects = []
673
741
  for row in rows:
674
742
  aliases = json.loads(row[2]) if row[2] else []
@@ -677,23 +745,26 @@ class DatabaseManager:
677
745
  name=row[1],
678
746
  aliases=aliases,
679
747
  created=row[3],
680
- last_accessed=row[4]
748
+ last_accessed=row[4],
681
749
  )
682
750
  projects.append(project)
683
-
751
+
684
752
  return projects
685
-
686
753
 
687
-
688
754
  # File description operations
689
-
755
+
690
756
  async def create_file_description(self, file_desc: FileDescription) -> None:
691
757
  """Create or update a file description."""
692
- async with self.get_write_connection_with_retry("create_file_description") as db:
758
+ async with self.get_write_connection_with_retry(
759
+ "create_file_description"
760
+ ) as db:
693
761
  await db.execute(
694
762
  """
695
- INSERT OR REPLACE INTO file_descriptions
696
- (project_id, file_path, description, file_hash, last_modified, version, source_project_id, to_be_cleaned)
763
+ INSERT OR REPLACE INTO file_descriptions
764
+ (
765
+ project_id, file_path, description, file_hash, last_modified,
766
+ version, source_project_id, to_be_cleaned
767
+ )
697
768
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
698
769
  """,
699
770
  (
@@ -704,78 +775,77 @@ class DatabaseManager:
704
775
  file_desc.last_modified,
705
776
  file_desc.version,
706
777
  file_desc.source_project_id,
707
- file_desc.to_be_cleaned
708
- )
778
+ file_desc.to_be_cleaned,
779
+ ),
709
780
  )
710
781
  await db.commit()
711
782
  logger.debug(f"Saved file description: {file_desc.file_path}")
712
-
783
+
713
784
  async def get_file_description(
714
- self,
715
- project_id: str,
716
- file_path: str
785
+ self, project_id: str, file_path: str
717
786
  ) -> Optional[FileDescription]:
718
787
  """Get file description by project and path."""
719
788
  async with self.get_connection() as db:
720
789
  cursor = await db.execute(
721
790
  """
722
- SELECT * FROM file_descriptions
791
+ SELECT * FROM file_descriptions
723
792
  WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
724
793
  """,
725
- (project_id, file_path)
794
+ (project_id, file_path),
726
795
  )
727
796
  row = await cursor.fetchone()
728
-
797
+
729
798
  if row:
730
799
  return FileDescription(
731
- id=row['id'],
732
- project_id=row['project_id'],
733
- file_path=row['file_path'],
734
- description=row['description'],
735
- file_hash=row['file_hash'],
736
- last_modified=datetime.fromisoformat(row['last_modified']),
737
- version=row['version'],
738
- source_project_id=row['source_project_id'],
739
- to_be_cleaned=row['to_be_cleaned']
800
+ id=row["id"],
801
+ project_id=row["project_id"],
802
+ file_path=row["file_path"],
803
+ description=row["description"],
804
+ file_hash=row["file_hash"],
805
+ last_modified=datetime.fromisoformat(row["last_modified"]),
806
+ version=row["version"],
807
+ source_project_id=row["source_project_id"],
808
+ to_be_cleaned=row["to_be_cleaned"],
740
809
  )
741
810
  return None
742
-
743
- async def get_all_file_descriptions(
744
- self,
745
- project_id: str
746
- ) -> List[FileDescription]:
811
+
812
+ async def get_all_file_descriptions(self, project_id: str) -> List[FileDescription]:
747
813
  """Get all file descriptions for a project."""
748
814
  async with self.get_connection() as db:
749
815
  cursor = await db.execute(
750
816
  """
751
- SELECT * FROM file_descriptions
817
+ SELECT * FROM file_descriptions
752
818
  WHERE project_id = ? AND to_be_cleaned IS NULL
753
819
  ORDER BY file_path
754
820
  """,
755
- (project_id,)
821
+ (project_id,),
756
822
  )
757
823
  rows = await cursor.fetchall()
758
-
824
+
759
825
  return [
760
826
  FileDescription(
761
- id=row['id'],
762
- project_id=row['project_id'],
763
- file_path=row['file_path'],
764
- description=row['description'],
765
- file_hash=row['file_hash'],
766
- last_modified=datetime.fromisoformat(row['last_modified']),
767
- version=row['version'],
768
- source_project_id=row['source_project_id'],
769
- to_be_cleaned=row['to_be_cleaned']
827
+ id=row["id"],
828
+ project_id=row["project_id"],
829
+ file_path=row["file_path"],
830
+ description=row["description"],
831
+ file_hash=row["file_hash"],
832
+ last_modified=datetime.fromisoformat(row["last_modified"]),
833
+ version=row["version"],
834
+ source_project_id=row["source_project_id"],
835
+ to_be_cleaned=row["to_be_cleaned"],
770
836
  )
771
837
  for row in rows
772
838
  ]
773
-
774
- async def batch_create_file_descriptions(self, file_descriptions: List[FileDescription]) -> None:
775
- """Batch create multiple file descriptions efficiently with optimized transactions."""
839
+
840
+ async def batch_create_file_descriptions(
841
+ self, file_descriptions: List[FileDescription]
842
+ ) -> None:
843
+ """
844
+ Batch create multiple file descriptions efficiently with optimized transactions.
845
+ """
776
846
  if not file_descriptions:
777
847
  return
778
-
848
+
779
849
  async def batch_operation(conn: aiosqlite.Connection) -> None:
780
850
  data = [
781
851
  (
@@ -786,142 +856,137 @@ class DatabaseManager:
786
856
  fd.last_modified,
787
857
  fd.version,
788
858
  fd.source_project_id,
789
- fd.to_be_cleaned
859
+ fd.to_be_cleaned,
790
860
  )
791
861
  for fd in file_descriptions
792
862
  ]
793
-
863
+
794
864
  await conn.executemany(
795
865
  """
796
- INSERT OR REPLACE INTO file_descriptions
797
- (project_id, file_path, description, file_hash, last_modified, version, source_project_id, to_be_cleaned)
866
+ INSERT OR REPLACE INTO file_descriptions
867
+ (
868
+ project_id, file_path, description, file_hash, last_modified,
869
+ version, source_project_id, to_be_cleaned
870
+ )
798
871
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
799
872
  """,
800
- data
873
+ data,
801
874
  )
802
875
  logger.debug(f"Batch created {len(file_descriptions)} file descriptions")
803
-
876
+
804
877
  await self.execute_transaction_with_retry(
805
878
  batch_operation,
806
879
  f"batch_create_file_descriptions_{len(file_descriptions)}_files",
807
- timeout_seconds=30.0 # Longer timeout for batch operations
880
+ timeout_seconds=30.0, # Longer timeout for batch operations
808
881
  )
809
-
882
+
810
883
  # Search operations
811
-
884
+
812
885
  async def search_file_descriptions(
813
- self,
814
- project_id: str,
815
- query: str,
816
- max_results: int = 20
886
+ self, project_id: str, query: str, max_results: int = 20
817
887
  ) -> List[SearchResult]:
818
888
  """Search file descriptions using FTS5 with intelligent query preprocessing."""
819
889
  # Preprocess query for optimal FTS5 search
820
890
  preprocessed_query = preprocess_search_query(query)
821
-
891
+
822
892
  if not preprocessed_query:
823
893
  logger.debug(f"Empty query after preprocessing: '{query}'")
824
894
  return []
825
-
895
+
826
896
  logger.debug(f"Search query preprocessing: '{query}' -> '{preprocessed_query}'")
827
-
897
+
828
898
  async with self.get_connection() as db:
829
899
  cursor = await db.execute(
830
900
  """
831
- SELECT
901
+ SELECT
832
902
  fd.project_id,
833
903
  fd.file_path,
834
904
  fd.description,
835
905
  bm25(file_descriptions_fts) as rank
836
906
  FROM file_descriptions_fts
837
907
  JOIN file_descriptions fd ON fd.id = file_descriptions_fts.rowid
838
- WHERE file_descriptions_fts MATCH ?
839
- AND fd.project_id = ?
908
+ WHERE file_descriptions_fts MATCH ?
909
+ AND fd.project_id = ?
840
910
  AND fd.to_be_cleaned IS NULL
841
911
  ORDER BY bm25(file_descriptions_fts)
842
912
  LIMIT ?
843
913
  """,
844
- (preprocessed_query, project_id, max_results)
914
+ (preprocessed_query, project_id, max_results),
845
915
  )
846
916
  rows = await cursor.fetchall()
847
-
917
+
848
918
  return [
849
919
  SearchResult(
850
- project_id=row['project_id'],
851
- file_path=row['file_path'],
852
- description=row['description'],
853
- relevance_score=row['rank']
920
+ project_id=row["project_id"],
921
+ file_path=row["file_path"],
922
+ description=row["description"],
923
+ relevance_score=row["rank"],
854
924
  )
855
925
  for row in rows
856
926
  ]
857
-
927
+
858
928
  # Token cache operations
859
-
929
+
860
930
  async def get_cached_token_count(self, cache_key: str) -> Optional[int]:
861
931
  """Get cached token count if not expired."""
862
932
  async with self.get_connection() as db:
863
933
  cursor = await db.execute(
864
934
  """
865
- SELECT token_count FROM token_cache
935
+ SELECT token_count FROM token_cache
866
936
  WHERE cache_key = ? AND (expires IS NULL OR expires > ?)
867
937
  """,
868
- (cache_key, datetime.utcnow())
938
+ (cache_key, datetime.utcnow()),
869
939
  )
870
940
  row = await cursor.fetchone()
871
- return row['token_count'] if row else None
872
-
941
+ return row["token_count"] if row else None
942
+
873
943
  async def cache_token_count(
874
- self,
875
- cache_key: str,
876
- token_count: int,
877
- ttl_hours: int = 24
944
+ self, cache_key: str, token_count: int, ttl_hours: int = 24
878
945
  ) -> None:
879
946
  """Cache token count with TTL."""
880
947
  expires = datetime.utcnow() + timedelta(hours=ttl_hours)
881
-
948
+
882
949
  async with self.get_write_connection() as db:
883
950
  await db.execute(
884
951
  """
885
952
  INSERT OR REPLACE INTO token_cache (cache_key, token_count, expires)
886
953
  VALUES (?, ?, ?)
887
954
  """,
888
- (cache_key, token_count, expires)
955
+ (cache_key, token_count, expires),
889
956
  )
890
957
  await db.commit()
891
-
958
+
892
959
  async def cleanup_expired_cache(self) -> None:
893
960
  """Remove expired cache entries."""
894
961
  async with self.get_write_connection() as db:
895
962
  await db.execute(
896
- "DELETE FROM token_cache WHERE expires < ?",
897
- (datetime.utcnow(),)
963
+ "DELETE FROM token_cache WHERE expires < ?", (datetime.utcnow(),)
898
964
  )
899
965
  await db.commit()
900
-
966
+
901
967
  # Utility operations
902
-
968
+
903
969
  async def get_file_count(self, project_id: str) -> int:
904
970
  """Get count of files in a project."""
905
971
  async with self.get_connection() as db:
906
972
  cursor = await db.execute(
907
- "SELECT COUNT(*) as count FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
908
- (project_id,)
973
+ (
974
+ "SELECT COUNT(*) as count FROM file_descriptions WHERE "
975
+ "project_id = ? AND to_be_cleaned IS NULL"
976
+ ),
977
+ (project_id,),
909
978
  )
910
979
  row = await cursor.fetchone()
911
- return row['count'] if row else 0
912
-
980
+ return row["count"] if row else 0
913
981
 
914
-
915
-
916
-
917
982
  # Project Overview operations
918
-
983
+
919
984
  async def create_project_overview(self, overview: ProjectOverview) -> None:
920
985
  """Create or update a project overview."""
921
986
  async with self.get_write_connection() as db:
922
987
  await db.execute(
923
988
  """
924
- INSERT OR REPLACE INTO project_overviews
989
+ INSERT OR REPLACE INTO project_overviews
925
990
  (project_id, overview, last_modified, total_files, total_tokens)
926
991
  VALUES (?, ?, ?, ?, ?)
927
992
  """,
@@ -930,258 +995,307 @@ class DatabaseManager:
930
995
  overview.overview,
931
996
  overview.last_modified,
932
997
  overview.total_files,
933
- overview.total_tokens
934
- )
998
+ overview.total_tokens,
999
+ ),
935
1000
  )
936
1001
  await db.commit()
937
1002
  logger.debug(f"Created/updated overview for project {overview.project_id}")
938
-
1003
+
939
1004
  async def get_project_overview(self, project_id: str) -> Optional[ProjectOverview]:
940
1005
  """Get project overview by ID."""
941
1006
  async with self.get_connection() as db:
942
1007
  cursor = await db.execute(
943
- "SELECT * FROM project_overviews WHERE project_id = ?",
944
- (project_id,)
1008
+ "SELECT * FROM project_overviews WHERE project_id = ?", (project_id,)
945
1009
  )
946
1010
  row = await cursor.fetchone()
947
-
1011
+
948
1012
  if row:
949
1013
  return ProjectOverview(
950
- project_id=row['project_id'],
951
- overview=row['overview'],
952
- last_modified=datetime.fromisoformat(row['last_modified']),
953
- total_files=row['total_files'],
954
- total_tokens=row['total_tokens']
1014
+ project_id=row["project_id"],
1015
+ overview=row["overview"],
1016
+ last_modified=datetime.fromisoformat(row["last_modified"]),
1017
+ total_files=row["total_files"],
1018
+ total_tokens=row["total_tokens"],
955
1019
  )
956
1020
  return None
957
-
958
- async def cleanup_missing_files(self, project_id: str, project_root: Path) -> List[str]:
1021
+
1022
+ async def cleanup_missing_files(
1023
+ self, project_id: str, project_root: Path
1024
+ ) -> List[str]:
959
1025
  """
960
1026
  Mark descriptions for cleanup for files that no longer exist on disk.
961
-
1027
+
962
1028
  Args:
963
1029
  project_id: Project identifier
964
1030
  project_root: Path to project root directory
965
-
1031
+
966
1032
  Returns:
967
1033
  List of file paths that were marked for cleanup
968
1034
  """
969
1035
  removed_files = []
970
-
1036
+
971
1037
  async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
972
1038
  # Get all active file descriptions for this project
973
1039
  cursor = await conn.execute(
974
- "SELECT file_path FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
975
- (project_id,)
1040
+ (
1041
+ "SELECT file_path FROM file_descriptions WHERE "
1042
+ "project_id = ? AND to_be_cleaned IS NULL"
1043
+ ),
1044
+ (project_id,),
976
1045
  )
977
-
1046
+
978
1047
  rows = await cursor.fetchall()
979
-
1048
+
980
1049
  # Check which files no longer exist
981
1050
  to_remove = []
982
1051
  for row in rows:
983
- file_path = row['file_path']
1052
+ file_path = row["file_path"]
984
1053
  full_path = project_root / file_path
985
-
1054
+
986
1055
  if not full_path.exists():
987
1056
  to_remove.append(file_path)
988
-
1057
+
989
1058
  # Mark descriptions for cleanup instead of deleting
990
1059
  if to_remove:
991
1060
  import time
1061
+
992
1062
  cleanup_timestamp = int(time.time())
993
1063
  await conn.executemany(
994
- "UPDATE file_descriptions SET to_be_cleaned = ? WHERE project_id = ? AND file_path = ?",
995
- [(cleanup_timestamp, project_id, path) for path in to_remove]
1064
+ (
1065
+ "UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
1066
+ "project_id = ? AND file_path = ?"
1067
+ ),
1068
+ [(cleanup_timestamp, project_id, path) for path in to_remove],
996
1069
  )
997
- logger.info(f"Marked {len(to_remove)} missing files for cleanup from {project_id}")
998
-
1070
+ logger.info(
1071
+ (
1072
+ f"Marked {len(to_remove)} missing files for cleanup "
1073
+ f"from {project_id}"
1074
+ )
1075
+ )
1076
+
999
1077
  return to_remove
1000
-
1078
+
1001
1079
  removed_files = await self.execute_transaction_with_retry(
1002
1080
  cleanup_operation,
1003
1081
  f"cleanup_missing_files_{project_id}",
1004
- timeout_seconds=60.0 # Longer timeout for file system operations
1082
+ timeout_seconds=60.0, # Longer timeout for file system operations
1005
1083
  )
1006
-
1084
+
1007
1085
  return removed_files
1008
-
1009
- async def analyze_word_frequency(self, project_id: str, limit: int = 200) -> WordFrequencyResult:
1086
+
1087
+ async def analyze_word_frequency(
1088
+ self, project_id: str, limit: int = 200
1089
+ ) -> WordFrequencyResult:
1010
1090
  """
1011
1091
  Analyze word frequency across all file descriptions for a project.
1012
-
1092
+
1013
1093
  Args:
1014
1094
  project_id: Project identifier
1015
1095
  limit: Maximum number of top terms to return
1016
-
1096
+
1017
1097
  Returns:
1018
1098
  WordFrequencyResult with top terms and statistics
1019
1099
  """
1020
1100
  from collections import Counter
1021
1101
  import re
1022
-
1102
+
1023
1103
  # Load stop words from bundled file
1024
- stop_words_path = Path(__file__).parent.parent / "data" / "stop_words_english.txt"
1104
+ stop_words_path = (
1105
+ Path(__file__).parent.parent / "data" / "stop_words_english.txt"
1106
+ )
1025
1107
  stop_words = set()
1026
-
1108
+
1027
1109
  if stop_words_path.exists():
1028
- with open(stop_words_path, 'r', encoding='utf-8') as f:
1110
+ with open(stop_words_path, "r", encoding="utf-8") as f:
1029
1111
  for line in f:
1030
1112
  # Each line contains just the stop word
1031
1113
  word = line.strip().lower()
1032
1114
  if word: # Skip empty lines
1033
1115
  stop_words.add(word)
1034
-
1116
+
1035
1117
  # Add common programming keywords to stop words
1036
1118
  programming_keywords = {
1037
- 'if', 'else', 'for', 'while', 'do', 'break', 'continue', 'return',
1038
- 'function', 'class', 'def', 'var', 'let', 'const', 'public', 'private',
1039
- 'static', 'async', 'await', 'import', 'export', 'from', 'true', 'false',
1040
- 'null', 'undefined', 'this', 'that', 'self', 'super', 'new', 'delete'
1119
+ "if",
1120
+ "else",
1121
+ "for",
1122
+ "while",
1123
+ "do",
1124
+ "break",
1125
+ "continue",
1126
+ "return",
1127
+ "function",
1128
+ "class",
1129
+ "def",
1130
+ "var",
1131
+ "let",
1132
+ "const",
1133
+ "public",
1134
+ "private",
1135
+ "static",
1136
+ "async",
1137
+ "await",
1138
+ "import",
1139
+ "export",
1140
+ "from",
1141
+ "true",
1142
+ "false",
1143
+ "null",
1144
+ "undefined",
1145
+ "this",
1146
+ "that",
1147
+ "self",
1148
+ "super",
1149
+ "new",
1150
+ "delete",
1041
1151
  }
1042
1152
  stop_words.update(programming_keywords)
1043
-
1153
+
1044
1154
  async with self.get_connection() as db:
1045
1155
  # Get all descriptions for this project
1046
1156
  cursor = await db.execute(
1047
- "SELECT description FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
1048
- (project_id,)
1157
+ (
1158
+ "SELECT description FROM file_descriptions WHERE "
1159
+ "project_id = ? AND to_be_cleaned IS NULL"
1160
+ ),
1161
+ (project_id,),
1049
1162
  )
1050
-
1163
+
1051
1164
  rows = await cursor.fetchall()
1052
-
1165
+
1053
1166
  # Combine all descriptions
1054
- all_text = " ".join(row['description'] for row in rows)
1055
-
1167
+ all_text = " ".join(row["description"] for row in rows)
1168
+
1056
1169
  # Tokenize and filter
1057
- words = re.findall(r'\b[a-zA-Z]{2,}\b', all_text.lower())
1170
+ words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
1058
1171
  filtered_words = [word for word in words if word not in stop_words]
1059
-
1172
+
1060
1173
  # Count frequencies
1061
1174
  word_counts = Counter(filtered_words)
1062
-
1175
+
1063
1176
  # Create result
1064
1177
  top_terms = [
1065
1178
  WordFrequencyTerm(term=term, frequency=count)
1066
1179
  for term, count in word_counts.most_common(limit)
1067
1180
  ]
1068
-
1181
+
1069
1182
  return WordFrequencyResult(
1070
- top_terms=top_terms,
1071
- total_terms_analyzed=len(filtered_words),
1072
- total_unique_terms=len(word_counts)
1183
+ top_terms=top_terms,
1184
+ total_terms_analyzed=len(filtered_words),
1185
+ total_unique_terms=len(word_counts),
1073
1186
  )
1074
-
1187
+
1075
1188
  async def cleanup_empty_projects(self) -> int:
1076
1189
  """
1077
1190
  Remove projects that have no file descriptions and no project overview.
1078
-
1191
+
1079
1192
  Returns:
1080
1193
  Number of projects removed
1081
1194
  """
1082
1195
  async with self.get_write_connection() as db:
1083
1196
  # Find projects with no descriptions and no overview
1084
- cursor = await db.execute("""
1085
- SELECT p.id, p.name
1197
+ cursor = await db.execute(
1198
+ """
1199
+ SELECT p.id, p.name
1086
1200
  FROM projects p
1087
1201
  LEFT JOIN file_descriptions fd ON p.id = fd.project_id
1088
1202
  LEFT JOIN project_overviews po ON p.id = po.project_id
1089
1203
  WHERE fd.project_id IS NULL AND po.project_id IS NULL
1090
- """)
1091
-
1204
+ """
1205
+ )
1206
+
1092
1207
  empty_projects = await cursor.fetchall()
1093
-
1208
+
1094
1209
  if not empty_projects:
1095
1210
  return 0
1096
-
1211
+
1097
1212
  removed_count = 0
1098
1213
  for project in empty_projects:
1099
- project_id = project['id']
1100
- project_name = project['name']
1101
-
1214
+ project_id = project["id"]
1215
+ project_name = project["name"]
1216
+
1102
1217
  # Remove from projects table (cascading will handle related data)
1103
1218
  await db.execute("DELETE FROM projects WHERE id = ?", (project_id,))
1104
1219
  removed_count += 1
1105
-
1220
+
1106
1221
  logger.info(f"Removed empty project: {project_name} (ID: {project_id})")
1107
-
1222
+
1108
1223
  await db.commit()
1109
1224
  return removed_count
1110
-
1225
+
1111
1226
  async def get_project_map_data(self, project_identifier: str) -> dict:
1112
1227
  """
1113
1228
  Get all data needed to generate a project map.
1114
-
1229
+
1115
1230
  Args:
1116
1231
  project_identifier: Project name or ID
1117
-
1232
+
1118
1233
  Returns:
1119
1234
  Dictionary containing project info, overview, and file descriptions
1120
1235
  """
1121
1236
  async with self.get_connection() as db:
1122
1237
  # Try to find project by ID first, then by name
1123
- if len(project_identifier) == 36 and '-' in project_identifier:
1238
+ if len(project_identifier) == 36 and "-" in project_identifier:
1124
1239
  # Looks like a UUID
1125
1240
  cursor = await db.execute(
1126
- "SELECT * FROM projects WHERE id = ?",
1127
- (project_identifier,)
1241
+ "SELECT * FROM projects WHERE id = ?", (project_identifier,)
1128
1242
  )
1129
1243
  else:
1130
1244
  # Search by name
1131
1245
  cursor = await db.execute(
1132
- "SELECT * FROM projects WHERE LOWER(name) = LOWER(?)",
1133
- (project_identifier,)
1246
+ "SELECT * FROM projects WHERE LOWER(name) = LOWER(?)",
1247
+ (project_identifier,),
1134
1248
  )
1135
-
1249
+
1136
1250
  project_row = await cursor.fetchone()
1137
1251
  if not project_row:
1138
1252
  return None
1139
-
1253
+
1140
1254
  # Handle aliases JSON parsing
1141
1255
  project_dict = dict(project_row)
1142
- if isinstance(project_dict['aliases'], str):
1256
+ if isinstance(project_dict["aliases"], str):
1143
1257
  import json
1144
- project_dict['aliases'] = json.loads(project_dict['aliases'])
1145
-
1258
+
1259
+ project_dict["aliases"] = json.loads(project_dict["aliases"])
1260
+
1146
1261
  project = Project(**project_dict)
1147
-
1262
+
1148
1263
  # Get project overview
1149
1264
  cursor = await db.execute(
1150
- "SELECT * FROM project_overviews WHERE project_id = ?",
1151
- (project.id,)
1265
+ "SELECT * FROM project_overviews WHERE project_id = ?", (project.id,)
1152
1266
  )
1153
1267
  overview_row = await cursor.fetchone()
1154
1268
  project_overview = ProjectOverview(**overview_row) if overview_row else None
1155
-
1269
+
1156
1270
  # Get all file descriptions for this project
1157
1271
  cursor = await db.execute(
1158
- """SELECT * FROM file_descriptions
1272
+ """SELECT * FROM file_descriptions
1159
1273
  WHERE project_id = ? AND to_be_cleaned IS NULL
1160
1274
  ORDER BY file_path""",
1161
- (project.id,)
1275
+ (project.id,),
1162
1276
  )
1163
1277
  file_rows = await cursor.fetchall()
1164
1278
  file_descriptions = [FileDescription(**row) for row in file_rows]
1165
-
1279
+
1166
1280
  return {
1167
- 'project': project,
1168
- 'overview': project_overview,
1169
- 'files': file_descriptions
1281
+ "project": project,
1282
+ "overview": project_overview,
1283
+ "files": file_descriptions,
1170
1284
  }
1171
-
1285
+
1172
1286
  # Cleanup operations
1173
-
1287
+
1174
1288
  @property
1175
1289
  def cleanup_manager(self) -> CleanupManager:
1176
1290
  """Get the cleanup manager instance."""
1177
1291
  if self._cleanup_manager is None:
1178
1292
  self._cleanup_manager = CleanupManager(self, retention_months=6)
1179
1293
  return self._cleanup_manager
1180
-
1294
+
1181
1295
  async def mark_file_for_cleanup(self, project_id: str, file_path: str) -> bool:
1182
1296
  """Mark a file for cleanup. Convenience method."""
1183
1297
  return await self.cleanup_manager.mark_file_for_cleanup(project_id, file_path)
1184
-
1298
+
1185
1299
  async def perform_cleanup(self, project_id: Optional[str] = None) -> int:
1186
1300
  """Perform cleanup of old records. Convenience method."""
1187
1301
  return await self.cleanup_manager.perform_cleanup(project_id)