mcp-code-indexer 4.2.14__tar.gz → 4.2.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/PKG-INFO +13 -10
  2. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/README.md +8 -6
  3. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/pyproject.toml +3 -3
  4. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/database.py +251 -85
  5. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/models.py +66 -24
  6. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/retry_executor.py +15 -5
  7. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/file_scanner.py +107 -12
  8. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/main.py +75 -23
  9. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/server/mcp_server.py +191 -1
  10. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
  11. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
  12. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/config.py +113 -45
  13. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/const.py +24 -0
  14. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/daemon.py +1105 -0
  15. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
  16. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
  17. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +410 -0
  18. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +221 -0
  19. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/__init__.py +9 -0
  20. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
  21. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
  22. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
  23. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/types.py +46 -0
  24. mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/utils.py +50 -0
  25. mcp_code_indexer-4.2.14/src/mcp_code_indexer/vector_mode/daemon.py +0 -343
  26. mcp_code_indexer-4.2.14/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +0 -217
  27. mcp_code_indexer-4.2.14/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +0 -119
  28. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/LICENSE +0 -0
  29. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/__init__.py +0 -0
  30. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/__main__.py +0 -0
  31. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/ask_handler.py +0 -0
  32. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/claude_api_handler.py +0 -0
  33. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/cleanup_manager.py +0 -0
  34. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/commands/__init__.py +0 -0
  35. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/commands/makelocal.py +0 -0
  36. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
  37. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/__init__.py +0 -0
  38. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/connection_health.py +0 -0
  39. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/database_factory.py +0 -0
  40. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/exceptions.py +0 -0
  41. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/path_resolver.py +0 -0
  42. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/deepask_handler.py +0 -0
  43. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/error_handler.py +0 -0
  44. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/git_hook_handler.py +0 -0
  45. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/logging_config.py +0 -0
  46. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/__init__.py +0 -0
  47. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/auth.py +0 -0
  48. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
  49. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/logging.py +0 -0
  50. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/security.py +0 -0
  51. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/001_initial.sql +0 -0
  52. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/002_performance_indexes.sql +0 -0
  53. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/003_project_overviews.sql +0 -0
  54. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +0 -0
  55. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/005_remove_git_remotes.sql +0 -0
  56. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/006_vector_mode.sql +0 -0
  57. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/query_preprocessor.py +0 -0
  58. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/server/__init__.py +0 -0
  59. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  60. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/token_counter.py +0 -0
  61. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/tools/__init__.py +0 -0
  62. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/__init__.py +0 -0
  63. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/base.py +0 -0
  64. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/http_transport.py +0 -0
  65. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/stdio_transport.py +0 -0
  66. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/__init__.py +0 -0
  67. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/__init__.py +0 -0
  68. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/language_handlers.py +0 -0
  69. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/__init__.py +0 -0
  70. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +0 -0
  71. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/providers/__init__.py +0 -0
  72. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/security/__init__.py +0 -0
  73. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/security/patterns.py +0 -0
  74. {mcp_code_indexer-4.2.14 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/security/redactor.py +0 -0
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 4.2.14
3
+ Version: 4.2.16
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
7
8
  Author: MCP Code Indexer Contributors
8
9
  Maintainer: MCP Code Indexer Contributors
@@ -36,11 +37,11 @@ Requires-Dist: pyyaml (>=6.0)
36
37
  Requires-Dist: tenacity (>=8.0.0)
37
38
  Requires-Dist: tiktoken (>=0.9.0)
38
39
  Requires-Dist: tomli (>=1.2.0) ; python_version < "3.11"
39
- Requires-Dist: tree-sitter (>=0.25.0)
40
+ Requires-Dist: tree-sitter (>=0.21.0)
40
41
  Requires-Dist: turbopuffer (>=0.6.0)
41
42
  Requires-Dist: uvicorn (>=0.24.0)
42
43
  Requires-Dist: voyageai (>=0.3.0)
43
- Requires-Dist: watchdog (>=6.0.0)
44
+ Requires-Dist: watchdog (>=4.0.0)
44
45
  Project-URL: Documentation, https://github.com/fluffypony/mcp-code-indexer/blob/main/README.md
45
46
  Project-URL: Homepage, https://github.com/fluffypony/mcp-code-indexer
46
47
  Project-URL: Repository, https://github.com/fluffypony/mcp-code-indexer
@@ -48,8 +49,8 @@ Description-Content-Type: text/markdown
48
49
 
49
50
  # MCP Code Indexer 🚀
50
51
 
51
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?61)](https://badge.fury.io/py/mcp-code-indexer)
52
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?61)](https://pypi.org/project/mcp-code-indexer/)
52
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?63)](https://badge.fury.io/py/mcp-code-indexer)
53
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?63)](https://pypi.org/project/mcp-code-indexer/)
53
54
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
54
55
 
55
56
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -256,6 +257,7 @@ mcp-code-indexer --vector --http --port 8080
256
257
 
257
258
  Vector Mode adds powerful new MCP tools:
258
259
  - `vector_search` - Semantic code search across projects
260
+ - `find_similar_code` - Find code similar to a given snippet or file section
259
261
  - `similarity_search` - Find similar code patterns
260
262
  - `dependency_search` - Discover code relationships
261
263
  - `vector_status` - Monitor indexing progress
@@ -316,7 +318,7 @@ mypy src/
316
318
 
317
319
  ## 🛠️ MCP Tools Available
318
320
 
319
- The server provides **11 powerful MCP tools** for intelligent codebase management. Whether you're an AI agent or human developer, these tools make navigating code effortless.
321
+ The server provides **13 powerful MCP tools** for intelligent codebase management. Whether you're an AI agent or human developer, these tools make navigating code effortless.
320
322
 
321
323
  ### 🎯 Essential Tools (Start Here)
322
324
  | Tool | Purpose | When to Use |
@@ -339,6 +341,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
339
341
  | **`get_word_frequency`** | Technical vocabulary analysis | Domain understanding |
340
342
  | **`update_codebase_overview`** | Create project documentation | Architecture documentation |
341
343
  | **`search_codebase_overview`** | Search in project overviews | Finding specific topics |
344
+ | **`find_similar_code`** | Find code similar to snippet/section | Code pattern discovery (Vector Mode) |
342
345
 
343
346
  ### 🏥 System Health
344
347
  | Tool | Purpose | For |
@@ -347,7 +350,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
347
350
 
348
351
  💡 **Pro Tip**: Always start with `check_codebase_size` to get personalized recommendations for navigating your specific codebase.
349
352
 
350
- **📖 Complete API Documentation**: [View all 11 tools with examples →](docs/api-reference.md)
353
+ **📖 Complete API Documentation**: [View all 13 tools with examples →](docs/api-reference.md)
351
354
 
352
355
  ## 🔗 Git Hook Integration
353
356
 
@@ -411,7 +414,7 @@ Comprehensive documentation organized by user journey and expertise level.
411
414
  | Guide | Purpose | Time Investment |
412
415
  |-------|---------|-----------------|
413
416
  | **[Quick Start](#-quick-start)** | Install and run your first server | 2 minutes |
414
- | **[API Reference](docs/api-reference.md)** | Master all 11 MCP tools | 15 minutes |
417
+ | **[API Reference](docs/api-reference.md)** | Master all 13 MCP tools | 15 minutes |
415
418
  | **[HTTP API Reference](docs/http-api.md)** | REST API for web applications | 10 minutes |
416
419
  | **[Q&A Interface](docs/qa-interface.md)** | AI-powered codebase analysis | 8 minutes |
417
420
  | **[Git Hook Setup](docs/git-hook-setup.md)** | Automate your workflow | 5 minutes |
@@ -435,7 +438,7 @@ Comprehensive documentation organized by user journey and expertise level.
435
438
  ### 📋 Quick References
436
439
  - **[Examples & Integrations](examples/)** - Ready-to-use configurations
437
440
  - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
438
- - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
441
+ - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 13 tools at a glance
439
442
 
440
443
  **📚 Reading Paths:**
441
444
  - **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
@@ -1,7 +1,7 @@
1
1
  # MCP Code Indexer 🚀
2
2
 
3
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?61)](https://badge.fury.io/py/mcp-code-indexer)
4
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?61)](https://pypi.org/project/mcp-code-indexer/)
3
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?63)](https://badge.fury.io/py/mcp-code-indexer)
4
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?63)](https://pypi.org/project/mcp-code-indexer/)
5
5
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
7
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -208,6 +208,7 @@ mcp-code-indexer --vector --http --port 8080
208
208
 
209
209
  Vector Mode adds powerful new MCP tools:
210
210
  - `vector_search` - Semantic code search across projects
211
+ - `find_similar_code` - Find code similar to a given snippet or file section
211
212
  - `similarity_search` - Find similar code patterns
212
213
  - `dependency_search` - Discover code relationships
213
214
  - `vector_status` - Monitor indexing progress
@@ -268,7 +269,7 @@ mypy src/
268
269
 
269
270
  ## 🛠️ MCP Tools Available
270
271
 
271
- The server provides **11 powerful MCP tools** for intelligent codebase management. Whether you're an AI agent or human developer, these tools make navigating code effortless.
272
+ The server provides **13 powerful MCP tools** for intelligent codebase management. Whether you're an AI agent or human developer, these tools make navigating code effortless.
272
273
 
273
274
  ### 🎯 Essential Tools (Start Here)
274
275
  | Tool | Purpose | When to Use |
@@ -291,6 +292,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
291
292
  | **`get_word_frequency`** | Technical vocabulary analysis | Domain understanding |
292
293
  | **`update_codebase_overview`** | Create project documentation | Architecture documentation |
293
294
  | **`search_codebase_overview`** | Search in project overviews | Finding specific topics |
295
+ | **`find_similar_code`** | Find code similar to snippet/section | Code pattern discovery (Vector Mode) |
294
296
 
295
297
  ### 🏥 System Health
296
298
  | Tool | Purpose | For |
@@ -299,7 +301,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
299
301
 
300
302
  💡 **Pro Tip**: Always start with `check_codebase_size` to get personalized recommendations for navigating your specific codebase.
301
303
 
302
- **📖 Complete API Documentation**: [View all 11 tools with examples →](docs/api-reference.md)
304
+ **📖 Complete API Documentation**: [View all 13 tools with examples →](docs/api-reference.md)
303
305
 
304
306
  ## 🔗 Git Hook Integration
305
307
 
@@ -363,7 +365,7 @@ Comprehensive documentation organized by user journey and expertise level.
363
365
  | Guide | Purpose | Time Investment |
364
366
  |-------|---------|-----------------|
365
367
  | **[Quick Start](#-quick-start)** | Install and run your first server | 2 minutes |
366
- | **[API Reference](docs/api-reference.md)** | Master all 11 MCP tools | 15 minutes |
368
+ | **[API Reference](docs/api-reference.md)** | Master all 13 MCP tools | 15 minutes |
367
369
  | **[HTTP API Reference](docs/http-api.md)** | REST API for web applications | 10 minutes |
368
370
  | **[Q&A Interface](docs/qa-interface.md)** | AI-powered codebase analysis | 8 minutes |
369
371
  | **[Git Hook Setup](docs/git-hook-setup.md)** | Automate your workflow | 5 minutes |
@@ -387,7 +389,7 @@ Comprehensive documentation organized by user journey and expertise level.
387
389
  ### 📋 Quick References
388
390
  - **[Examples & Integrations](examples/)** - Ready-to-use configurations
389
391
  - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
390
- - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
392
+ - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 13 tools at a glance
391
393
 
392
394
  **📚 Reading Paths:**
393
395
  - **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "mcp-code-indexer"
7
- version = "4.2.14"
7
+ version = "4.2.16"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  authors = ["MCP Code Indexer Contributors"]
10
10
  maintainers = ["MCP Code Indexer Contributors"]
@@ -61,8 +61,8 @@ python-multipart = ">=0.0.6"
61
61
  # Vector mode dependencies (always included)
62
62
  voyageai = ">=0.3.0"
63
63
  turbopuffer = ">=0.6.0"
64
- tree-sitter = ">=0.25.0"
65
- watchdog = ">=6.0.0"
64
+ tree-sitter = ">=0.21.0"
65
+ watchdog = ">=4.0.0"
66
66
  pyyaml = ">=6.0"
67
67
 
68
68
  [tool.poetry.group.dev.dependencies]
@@ -27,9 +27,11 @@ from mcp_code_indexer.database.exceptions import (
27
27
  )
28
28
  from mcp_code_indexer.database.models import (
29
29
  FileDescription,
30
+ IndexMeta,
30
31
  Project,
31
32
  ProjectOverview,
32
33
  SearchResult,
34
+ SyncStatus,
33
35
  WordFrequencyResult,
34
36
  WordFrequencyTerm,
35
37
  )
@@ -315,12 +317,10 @@ class DatabaseManager:
315
317
  self, operation_name: str = "write_operation"
316
318
  ) -> AsyncIterator[aiosqlite.Connection]:
317
319
  """
318
- Get a database connection with write serialization and automatic
319
- retry logic.
320
+ Get a database connection with write serialization.
320
321
 
321
- This uses the new RetryExecutor to properly handle retry logic
322
- without the broken yield-in-retry-loop pattern that caused
323
- generator errors.
322
+ Ensures the write lock is held throughout the duration of the context
323
+ to prevent race conditions and database locking errors.
324
324
 
325
325
  Args:
326
326
  operation_name: Name of the operation for logging and
@@ -331,43 +331,10 @@ class DatabaseManager:
331
331
  "DatabaseManager not initialized - call initialize() first"
332
332
  )
333
333
 
334
- async def get_write_connection() -> aiosqlite.Connection:
335
- """Inner function to get connection - retried by executor."""
336
- if self._write_lock is None:
337
- raise RuntimeError("Write lock not initialized")
338
- async with self._write_lock:
339
- async with self.get_connection() as conn:
340
- return conn
341
-
342
- try:
343
- # Use retry executor to handle connection acquisition with retries
344
- connection = await self._retry_executor.execute_with_retry(
345
- get_write_connection, operation_name
346
- )
347
-
348
- try:
349
- yield connection
350
-
351
- # Success - retry executor handles all failure tracking
352
-
353
- except Exception:
354
- # Error handling is managed by the retry executor
355
- raise
356
-
357
- except DatabaseError:
358
- # Re-raise our custom database errors as-is
359
- raise
360
- except Exception as e:
361
- # Classify and wrap other exceptions
362
- classified_error = classify_sqlite_error(e, operation_name)
363
- logger.error(
364
- (
365
- f"Database operation '{operation_name}' failed: "
366
- f"{classified_error.message}"
367
- ),
368
- extra={"structured_data": classified_error.to_dict()},
369
- )
370
- raise classified_error
334
+ # Acquire lock for exclusive write access - hold it for entire context
335
+ async with self._write_lock:
336
+ async with self.get_connection() as conn:
337
+ yield conn
371
338
 
372
339
  def get_database_stats(self) -> Dict[str, Any]:
373
340
  """
@@ -740,6 +707,25 @@ class DatabaseManager:
740
707
  await db.commit()
741
708
  logger.debug(f"Updated project: {project.id}")
742
709
 
710
+ async def set_project_vector_mode(self, project_id: str, enabled: bool) -> None:
711
+ """Set the vector_mode for a specific project."""
712
+ async with self.get_write_connection_with_retry(
713
+ "set_project_vector_mode"
714
+ ) as db:
715
+ await db.execute(
716
+ "UPDATE projects SET vector_mode = ? WHERE id = ?",
717
+ (int(enabled), project_id),
718
+ )
719
+
720
+ # Check if the project was actually updated
721
+ cursor = await db.execute("SELECT changes()")
722
+ changes = await cursor.fetchone()
723
+ if changes[0] == 0:
724
+ raise ValueError(f"Project not found: {project_id}")
725
+
726
+ await db.commit()
727
+ logger.debug(f"Set vector_mode={enabled} for project: {project_id}")
728
+
743
729
  async def get_all_projects(self) -> List[Project]:
744
730
  """Get all projects in the database."""
745
731
  async with self.get_connection() as db:
@@ -795,12 +781,19 @@ class DatabaseManager:
795
781
  ) as db:
796
782
  await db.execute(
797
783
  """
798
- INSERT OR REPLACE INTO file_descriptions
784
+ INSERT INTO file_descriptions
799
785
  (
800
786
  project_id, file_path, description, file_hash, last_modified,
801
787
  version, source_project_id, to_be_cleaned
802
788
  )
803
789
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
790
+ ON CONFLICT(project_id, file_path) DO UPDATE SET
791
+ description=excluded.description,
792
+ file_hash=excluded.file_hash,
793
+ last_modified=excluded.last_modified,
794
+ version=excluded.version,
795
+ source_project_id=excluded.source_project_id,
796
+ to_be_cleaned=excluded.to_be_cleaned
804
797
  """,
805
798
  (
806
799
  file_desc.project_id,
@@ -898,12 +891,19 @@ class DatabaseManager:
898
891
 
899
892
  await conn.executemany(
900
893
  """
901
- INSERT OR REPLACE INTO file_descriptions
894
+ INSERT INTO file_descriptions
902
895
  (
903
896
  project_id, file_path, description, file_hash, last_modified,
904
897
  version, source_project_id, to_be_cleaned
905
898
  )
906
899
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
900
+ ON CONFLICT(project_id, file_path) DO UPDATE SET
901
+ description=excluded.description,
902
+ file_hash=excluded.file_hash,
903
+ last_modified=excluded.last_modified,
904
+ version=excluded.version,
905
+ source_project_id=excluded.source_project_id,
906
+ to_be_cleaned=excluded.to_be_cleaned
907
907
  """,
908
908
  data,
909
909
  )
@@ -1067,10 +1067,8 @@ class DatabaseManager:
1067
1067
  Returns:
1068
1068
  List of file paths that were marked for cleanup
1069
1069
  """
1070
- removed_files: List[str] = []
1071
-
1072
- async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
1073
- # Get all active file descriptions for this project
1070
+ # 1. Get all active file paths (fast DB read)
1071
+ async with self.get_connection() as conn:
1074
1072
  cursor = await conn.execute(
1075
1073
  (
1076
1074
  "SELECT file_path FROM file_descriptions WHERE "
@@ -1078,46 +1076,29 @@ class DatabaseManager:
1078
1076
  ),
1079
1077
  (project_id,),
1080
1078
  )
1081
-
1082
1079
  rows = await cursor.fetchall()
1080
+ file_paths = [row["file_path"] for row in rows]
1083
1081
 
1084
- # Check which files no longer exist
1085
- to_remove = []
1086
- for row in rows:
1087
- file_path = row["file_path"]
1082
+ # 2. Check existence on disk (blocking IO - run in executor)
1083
+ def find_removed_files() -> List[str]:
1084
+ missing = []
1085
+ for file_path in file_paths:
1088
1086
  full_path = project_root / file_path
1089
-
1090
1087
  if not full_path.exists():
1091
- to_remove.append(file_path)
1092
-
1093
- # Mark descriptions for cleanup instead of deleting
1094
- if to_remove:
1095
- import time
1096
-
1097
- cleanup_timestamp = int(time.time())
1098
- await conn.executemany(
1099
- (
1100
- "UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
1101
- "project_id = ? AND file_path = ?"
1102
- ),
1103
- [(cleanup_timestamp, project_id, path) for path in to_remove],
1104
- )
1105
- logger.info(
1106
- (
1107
- f"Marked {len(to_remove)} missing files for cleanup "
1108
- f"from {project_id}"
1109
- )
1110
- )
1088
+ missing.append(file_path)
1089
+ return missing
1111
1090
 
1112
- return to_remove
1091
+ loop = asyncio.get_running_loop()
1092
+ to_remove = await loop.run_in_executor(None, find_removed_files)
1113
1093
 
1114
- removed_files = await self.execute_transaction_with_retry(
1115
- cleanup_operation,
1116
- f"cleanup_missing_files_{project_id}",
1117
- timeout_seconds=60.0, # Longer timeout for file system operations
1118
- )
1094
+ # 3. Mark for cleanup (fast DB write)
1095
+ if to_remove:
1096
+ await self.cleanup_manager.mark_files_for_cleanup(project_id, to_remove)
1097
+ logger.info(
1098
+ f"Marked {len(to_remove)} missing files for cleanup from {project_id}"
1099
+ )
1119
1100
 
1120
- return removed_files
1101
+ return to_remove
1121
1102
 
1122
1103
  async def analyze_word_frequency(
1123
1104
  self, project_id: str, limit: int = 200
@@ -1139,7 +1120,7 @@ class DatabaseManager:
1139
1120
  stop_words_path = (
1140
1121
  Path(__file__).parent.parent / "data" / "stop_words_english.txt"
1141
1122
  )
1142
- stop_words = set()
1123
+ stop_words: set = set()
1143
1124
 
1144
1125
  if stop_words_path.exists():
1145
1126
  with open(stop_words_path, "r", encoding="utf-8") as f:
@@ -1186,8 +1167,8 @@ class DatabaseManager:
1186
1167
  }
1187
1168
  stop_words.update(programming_keywords)
1188
1169
 
1170
+ # Get all descriptions for this project (fast DB read)
1189
1171
  async with self.get_connection() as db:
1190
- # Get all descriptions for this project
1191
1172
  cursor = await db.execute(
1192
1173
  (
1193
1174
  "SELECT description FROM file_descriptions WHERE "
@@ -1195,11 +1176,13 @@ class DatabaseManager:
1195
1176
  ),
1196
1177
  (project_id,),
1197
1178
  )
1198
-
1199
1179
  rows = await cursor.fetchall()
1180
+ descriptions = [row["description"] for row in rows]
1200
1181
 
1182
+ # Process word frequency in executor (CPU-bound work)
1183
+ def process_word_frequency() -> WordFrequencyResult:
1201
1184
  # Combine all descriptions
1202
- all_text = " ".join(row["description"] for row in rows)
1185
+ all_text = " ".join(descriptions)
1203
1186
 
1204
1187
  # Tokenize and filter
1205
1188
  words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
@@ -1220,6 +1203,9 @@ class DatabaseManager:
1220
1203
  total_unique_terms=len(word_counts),
1221
1204
  )
1222
1205
 
1206
+ loop = asyncio.get_running_loop()
1207
+ return await loop.run_in_executor(None, process_word_frequency)
1208
+
1223
1209
  async def cleanup_empty_projects(self) -> int:
1224
1210
  """
1225
1211
  Remove projects that have no file descriptions and no project overview.
@@ -1320,6 +1306,186 @@ class DatabaseManager:
1320
1306
  "files": file_descriptions,
1321
1307
  }
1322
1308
 
1309
+ # IndexMeta operations
1310
+ async def create_index_meta(self, index_meta: IndexMeta) -> None:
1311
+ """Create or update index metadata for a project."""
1312
+ async with self.get_write_connection_with_retry("create_index_meta") as db:
1313
+ await db.execute(
1314
+ """
1315
+ INSERT OR REPLACE INTO index_meta (
1316
+ project_id, total_chunks, indexed_chunks, total_files, indexed_files,
1317
+ last_sync, sync_status, error_message, queue_depth, processing_rate,
1318
+ estimated_completion, metadata, created, last_modified
1319
+ )
1320
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1321
+ """,
1322
+ (
1323
+ index_meta.project_id,
1324
+ index_meta.total_chunks,
1325
+ index_meta.indexed_chunks,
1326
+ index_meta.total_files,
1327
+ index_meta.indexed_files,
1328
+ index_meta.last_sync,
1329
+ index_meta.sync_status.value,
1330
+ index_meta.error_message,
1331
+ index_meta.queue_depth,
1332
+ index_meta.processing_rate,
1333
+ index_meta.estimated_completion,
1334
+ json.dumps(index_meta.metadata),
1335
+ index_meta.created,
1336
+ index_meta.last_modified,
1337
+ ),
1338
+ )
1339
+ await db.commit()
1340
+ logger.debug(
1341
+ f"Created/updated index metadata for project: {index_meta.project_id}"
1342
+ )
1343
+
1344
+ async def update_index_meta(self, index_meta: IndexMeta) -> None:
1345
+ """Update existing index metadata for a project."""
1346
+ async with self.get_write_connection_with_retry("update_index_meta") as db:
1347
+ await db.execute(
1348
+ """
1349
+ UPDATE index_meta
1350
+ SET total_chunks = ?, indexed_chunks = ?, total_files = ?, indexed_files = ?,
1351
+ last_sync = ?, sync_status = ?, error_message = ?, queue_depth = ?,
1352
+ processing_rate = ?, estimated_completion = ?, metadata = ?, last_modified = ?
1353
+ WHERE project_id = ?
1354
+ """,
1355
+ (
1356
+ index_meta.total_chunks,
1357
+ index_meta.indexed_chunks,
1358
+ index_meta.total_files,
1359
+ index_meta.indexed_files,
1360
+ index_meta.last_sync,
1361
+ index_meta.sync_status.value,
1362
+ index_meta.error_message,
1363
+ index_meta.queue_depth,
1364
+ index_meta.processing_rate,
1365
+ index_meta.estimated_completion,
1366
+ json.dumps(index_meta.metadata),
1367
+ index_meta.last_modified,
1368
+ index_meta.project_id,
1369
+ ),
1370
+ )
1371
+
1372
+ # Check if the project was actually updated
1373
+ cursor = await db.execute("SELECT changes()")
1374
+ changes = await cursor.fetchone()
1375
+ if changes[0] == 0:
1376
+ raise ValueError(
1377
+ f"Index metadata not found for project: {index_meta.project_id}"
1378
+ )
1379
+
1380
+ await db.commit()
1381
+ logger.debug(f"Updated index metadata for project: {index_meta.project_id}")
1382
+
1383
+ async def get_index_meta(self, project_id: str) -> Optional[IndexMeta]:
1384
+ """Retrieve index metadata for a project."""
1385
+ async with self.get_connection() as db:
1386
+ cursor = await db.execute(
1387
+ "SELECT * FROM index_meta WHERE project_id = ?", (project_id,)
1388
+ )
1389
+ row = await cursor.fetchone()
1390
+
1391
+ if row:
1392
+ # Convert row to dict for easier field access
1393
+ row_dict = dict(row)
1394
+
1395
+ # Parse JSON metadata field
1396
+ metadata = (
1397
+ json.loads(row_dict["metadata"]) if row_dict["metadata"] else {}
1398
+ )
1399
+
1400
+ # Parse datetime fields
1401
+ created = (
1402
+ datetime.fromisoformat(row_dict["created"])
1403
+ if row_dict["created"]
1404
+ else datetime.utcnow()
1405
+ )
1406
+ last_modified = (
1407
+ datetime.fromisoformat(row_dict["last_modified"])
1408
+ if row_dict["last_modified"]
1409
+ else datetime.utcnow()
1410
+ )
1411
+ last_sync = (
1412
+ datetime.fromisoformat(row_dict["last_sync"])
1413
+ if row_dict["last_sync"]
1414
+ else None
1415
+ )
1416
+ estimated_completion = (
1417
+ datetime.fromisoformat(row_dict["estimated_completion"])
1418
+ if row_dict["estimated_completion"]
1419
+ else None
1420
+ )
1421
+
1422
+ return IndexMeta(
1423
+ id=row_dict["id"],
1424
+ project_id=row_dict["project_id"],
1425
+ total_chunks=row_dict["total_chunks"],
1426
+ indexed_chunks=row_dict["indexed_chunks"],
1427
+ total_files=row_dict["total_files"],
1428
+ indexed_files=row_dict["indexed_files"],
1429
+ last_sync=last_sync,
1430
+ sync_status=row_dict["sync_status"],
1431
+ error_message=row_dict["error_message"],
1432
+ queue_depth=row_dict["queue_depth"],
1433
+ processing_rate=row_dict["processing_rate"],
1434
+ estimated_completion=estimated_completion,
1435
+ metadata=metadata,
1436
+ created=created,
1437
+ last_modified=last_modified,
1438
+ )
1439
+ return None
1440
+
1441
+ async def get_or_create_index_meta(self, project_id: str, **kwargs) -> IndexMeta:
1442
+ """
1443
+ Get existing index metadata or create new one with default values.
1444
+
1445
+ Args:
1446
+ project_id: Project identifier
1447
+ **kwargs: Optional fields to override defaults when creating new metadata
1448
+
1449
+ Returns:
1450
+ IndexMeta object (existing or newly created)
1451
+ """
1452
+ # Try to get existing metadata first
1453
+ existing_meta = await self.get_index_meta(project_id)
1454
+ if existing_meta:
1455
+ return existing_meta
1456
+
1457
+ # Create new metadata with defaults, allowing kwargs to override
1458
+ default_metadata = {
1459
+ "project_id": project_id,
1460
+ "total_chunks": 0,
1461
+ "indexed_chunks": 0,
1462
+ "total_files": 0,
1463
+ "indexed_files": 0,
1464
+ "last_sync": None,
1465
+ "sync_status": SyncStatus.PENDING,
1466
+ "error_message": None,
1467
+ "queue_depth": 0,
1468
+ "processing_rate": 0.0,
1469
+ "estimated_completion": None,
1470
+ "metadata": {},
1471
+ }
1472
+
1473
+ # Override defaults with provided kwargs
1474
+ default_metadata.update(kwargs)
1475
+
1476
+ # Create the IndexMeta object
1477
+ new_meta = IndexMeta(**default_metadata)
1478
+
1479
+ # Store it in the database
1480
+ await self.create_index_meta(new_meta)
1481
+
1482
+ # Return the created metadata (fetch it back to get the assigned ID)
1483
+ result = await self.get_index_meta(project_id)
1484
+ if result is None:
1485
+ raise DatabaseError(f"Failed to create index metadata for project: {project_id}")
1486
+
1487
+ return result
1488
+
1323
1489
  # Cleanup operations
1324
1490
 
1325
1491
  @property