mcp-code-indexer 4.2.15__tar.gz → 4.2.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/PKG-INFO +13 -10
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/README.md +8 -6
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/pyproject.toml +3 -3
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/database.py +251 -85
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/models.py +66 -24
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/retry_executor.py +15 -5
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/file_scanner.py +107 -12
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/main.py +43 -30
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/server/mcp_server.py +191 -1
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/config.py +113 -45
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/const.py +24 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/daemon.py +1105 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +410 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +221 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/__init__.py +9 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/types.py +46 -0
- mcp_code_indexer-4.2.16/src/mcp_code_indexer/vector_mode/utils.py +50 -0
- mcp_code_indexer-4.2.15/src/mcp_code_indexer/vector_mode/daemon.py +0 -343
- mcp_code_indexer-4.2.15/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +0 -217
- mcp_code_indexer-4.2.15/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +0 -119
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/LICENSE +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/__main__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/ask_handler.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/claude_api_handler.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/cleanup_manager.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/commands/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/commands/makelocal.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/connection_health.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/database_factory.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/exceptions.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/path_resolver.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/deepask_handler.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/error_handler.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/git_hook_handler.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/logging_config.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/auth.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/logging.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/middleware/security.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/001_initial.sql +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/002_performance_indexes.sql +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/003_project_overviews.sql +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/005_remove_git_remotes.sql +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/migrations/006_vector_mode.sql +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/query_preprocessor.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/server/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/token_counter.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/tools/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/base.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/http_transport.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/transport/stdio_transport.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/chunking/language_handlers.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/providers/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/security/__init__.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/security/patterns.py +0 -0
- {mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/vector_mode/security/redactor.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: mcp-code-indexer
|
|
3
|
-
Version: 4.2.
|
|
3
|
+
Version: 4.2.16
|
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
|
|
7
8
|
Author: MCP Code Indexer Contributors
|
|
8
9
|
Maintainer: MCP Code Indexer Contributors
|
|
@@ -36,11 +37,11 @@ Requires-Dist: pyyaml (>=6.0)
|
|
|
36
37
|
Requires-Dist: tenacity (>=8.0.0)
|
|
37
38
|
Requires-Dist: tiktoken (>=0.9.0)
|
|
38
39
|
Requires-Dist: tomli (>=1.2.0) ; python_version < "3.11"
|
|
39
|
-
Requires-Dist: tree-sitter (>=0.
|
|
40
|
+
Requires-Dist: tree-sitter (>=0.21.0)
|
|
40
41
|
Requires-Dist: turbopuffer (>=0.6.0)
|
|
41
42
|
Requires-Dist: uvicorn (>=0.24.0)
|
|
42
43
|
Requires-Dist: voyageai (>=0.3.0)
|
|
43
|
-
Requires-Dist: watchdog (>=
|
|
44
|
+
Requires-Dist: watchdog (>=4.0.0)
|
|
44
45
|
Project-URL: Documentation, https://github.com/fluffypony/mcp-code-indexer/blob/main/README.md
|
|
45
46
|
Project-URL: Homepage, https://github.com/fluffypony/mcp-code-indexer
|
|
46
47
|
Project-URL: Repository, https://github.com/fluffypony/mcp-code-indexer
|
|
@@ -48,8 +49,8 @@ Description-Content-Type: text/markdown
|
|
|
48
49
|
|
|
49
50
|
# MCP Code Indexer 🚀
|
|
50
51
|
|
|
51
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
|
53
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
|
53
54
|
[](https://opensource.org/licenses/MIT)
|
|
54
55
|
|
|
55
56
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
|
@@ -256,6 +257,7 @@ mcp-code-indexer --vector --http --port 8080
|
|
|
256
257
|
|
|
257
258
|
Vector Mode adds powerful new MCP tools:
|
|
258
259
|
- `vector_search` - Semantic code search across projects
|
|
260
|
+
- `find_similar_code` - Find code similar to a given snippet or file section
|
|
259
261
|
- `similarity_search` - Find similar code patterns
|
|
260
262
|
- `dependency_search` - Discover code relationships
|
|
261
263
|
- `vector_status` - Monitor indexing progress
|
|
@@ -316,7 +318,7 @@ mypy src/
|
|
|
316
318
|
|
|
317
319
|
## 🛠️ MCP Tools Available
|
|
318
320
|
|
|
319
|
-
The server provides **
|
|
321
|
+
The server provides **13 powerful MCP tools** for intelligent codebase management. Whether you're an AI agent or human developer, these tools make navigating code effortless.
|
|
320
322
|
|
|
321
323
|
### 🎯 Essential Tools (Start Here)
|
|
322
324
|
| Tool | Purpose | When to Use |
|
|
@@ -339,6 +341,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
|
|
|
339
341
|
| **`get_word_frequency`** | Technical vocabulary analysis | Domain understanding |
|
|
340
342
|
| **`update_codebase_overview`** | Create project documentation | Architecture documentation |
|
|
341
343
|
| **`search_codebase_overview`** | Search in project overviews | Finding specific topics |
|
|
344
|
+
| **`find_similar_code`** | Find code similar to snippet/section | Code pattern discovery (Vector Mode) |
|
|
342
345
|
|
|
343
346
|
### 🏥 System Health
|
|
344
347
|
| Tool | Purpose | For |
|
|
@@ -347,7 +350,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
|
|
|
347
350
|
|
|
348
351
|
💡 **Pro Tip**: Always start with `check_codebase_size` to get personalized recommendations for navigating your specific codebase.
|
|
349
352
|
|
|
350
|
-
**📖 Complete API Documentation**: [View all
|
|
353
|
+
**📖 Complete API Documentation**: [View all 13 tools with examples →](docs/api-reference.md)
|
|
351
354
|
|
|
352
355
|
## 🔗 Git Hook Integration
|
|
353
356
|
|
|
@@ -411,7 +414,7 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
411
414
|
| Guide | Purpose | Time Investment |
|
|
412
415
|
|-------|---------|-----------------|
|
|
413
416
|
| **[Quick Start](#-quick-start)** | Install and run your first server | 2 minutes |
|
|
414
|
-
| **[API Reference](docs/api-reference.md)** | Master all
|
|
417
|
+
| **[API Reference](docs/api-reference.md)** | Master all 13 MCP tools | 15 minutes |
|
|
415
418
|
| **[HTTP API Reference](docs/http-api.md)** | REST API for web applications | 10 minutes |
|
|
416
419
|
| **[Q&A Interface](docs/qa-interface.md)** | AI-powered codebase analysis | 8 minutes |
|
|
417
420
|
| **[Git Hook Setup](docs/git-hook-setup.md)** | Automate your workflow | 5 minutes |
|
|
@@ -435,7 +438,7 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
435
438
|
### 📋 Quick References
|
|
436
439
|
- **[Examples & Integrations](examples/)** - Ready-to-use configurations
|
|
437
440
|
- **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
|
|
438
|
-
- **[API Tools Summary](#🛠️-mcp-tools-available)** - All
|
|
441
|
+
- **[API Tools Summary](#🛠️-mcp-tools-available)** - All 13 tools at a glance
|
|
439
442
|
|
|
440
443
|
**📚 Reading Paths:**
|
|
441
444
|
- **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# MCP Code Indexer 🚀
|
|
2
2
|
|
|
3
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
|
4
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
7
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
|
@@ -208,6 +208,7 @@ mcp-code-indexer --vector --http --port 8080
|
|
|
208
208
|
|
|
209
209
|
Vector Mode adds powerful new MCP tools:
|
|
210
210
|
- `vector_search` - Semantic code search across projects
|
|
211
|
+
- `find_similar_code` - Find code similar to a given snippet or file section
|
|
211
212
|
- `similarity_search` - Find similar code patterns
|
|
212
213
|
- `dependency_search` - Discover code relationships
|
|
213
214
|
- `vector_status` - Monitor indexing progress
|
|
@@ -268,7 +269,7 @@ mypy src/
|
|
|
268
269
|
|
|
269
270
|
## 🛠️ MCP Tools Available
|
|
270
271
|
|
|
271
|
-
The server provides **
|
|
272
|
+
The server provides **13 powerful MCP tools** for intelligent codebase management. Whether you're an AI agent or human developer, these tools make navigating code effortless.
|
|
272
273
|
|
|
273
274
|
### 🎯 Essential Tools (Start Here)
|
|
274
275
|
| Tool | Purpose | When to Use |
|
|
@@ -291,6 +292,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
|
|
|
291
292
|
| **`get_word_frequency`** | Technical vocabulary analysis | Domain understanding |
|
|
292
293
|
| **`update_codebase_overview`** | Create project documentation | Architecture documentation |
|
|
293
294
|
| **`search_codebase_overview`** | Search in project overviews | Finding specific topics |
|
|
295
|
+
| **`find_similar_code`** | Find code similar to snippet/section | Code pattern discovery (Vector Mode) |
|
|
294
296
|
|
|
295
297
|
### 🏥 System Health
|
|
296
298
|
| Tool | Purpose | For |
|
|
@@ -299,7 +301,7 @@ The server provides **11 powerful MCP tools** for intelligent codebase managemen
|
|
|
299
301
|
|
|
300
302
|
💡 **Pro Tip**: Always start with `check_codebase_size` to get personalized recommendations for navigating your specific codebase.
|
|
301
303
|
|
|
302
|
-
**📖 Complete API Documentation**: [View all
|
|
304
|
+
**📖 Complete API Documentation**: [View all 13 tools with examples →](docs/api-reference.md)
|
|
303
305
|
|
|
304
306
|
## 🔗 Git Hook Integration
|
|
305
307
|
|
|
@@ -363,7 +365,7 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
363
365
|
| Guide | Purpose | Time Investment |
|
|
364
366
|
|-------|---------|-----------------|
|
|
365
367
|
| **[Quick Start](#-quick-start)** | Install and run your first server | 2 minutes |
|
|
366
|
-
| **[API Reference](docs/api-reference.md)** | Master all
|
|
368
|
+
| **[API Reference](docs/api-reference.md)** | Master all 13 MCP tools | 15 minutes |
|
|
367
369
|
| **[HTTP API Reference](docs/http-api.md)** | REST API for web applications | 10 minutes |
|
|
368
370
|
| **[Q&A Interface](docs/qa-interface.md)** | AI-powered codebase analysis | 8 minutes |
|
|
369
371
|
| **[Git Hook Setup](docs/git-hook-setup.md)** | Automate your workflow | 5 minutes |
|
|
@@ -387,7 +389,7 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
387
389
|
### 📋 Quick References
|
|
388
390
|
- **[Examples & Integrations](examples/)** - Ready-to-use configurations
|
|
389
391
|
- **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
|
|
390
|
-
- **[API Tools Summary](#🛠️-mcp-tools-available)** - All
|
|
392
|
+
- **[API Tools Summary](#🛠️-mcp-tools-available)** - All 13 tools at a glance
|
|
391
393
|
|
|
392
394
|
**📚 Reading Paths:**
|
|
393
395
|
- **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "mcp-code-indexer"
|
|
7
|
-
version = "4.2.
|
|
7
|
+
version = "4.2.16"
|
|
8
8
|
description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
|
|
9
9
|
authors = ["MCP Code Indexer Contributors"]
|
|
10
10
|
maintainers = ["MCP Code Indexer Contributors"]
|
|
@@ -61,8 +61,8 @@ python-multipart = ">=0.0.6"
|
|
|
61
61
|
# Vector mode dependencies (always included)
|
|
62
62
|
voyageai = ">=0.3.0"
|
|
63
63
|
turbopuffer = ">=0.6.0"
|
|
64
|
-
tree-sitter = ">=0.
|
|
65
|
-
watchdog = ">=
|
|
64
|
+
tree-sitter = ">=0.21.0"
|
|
65
|
+
watchdog = ">=4.0.0"
|
|
66
66
|
pyyaml = ">=6.0"
|
|
67
67
|
|
|
68
68
|
[tool.poetry.group.dev.dependencies]
|
{mcp_code_indexer-4.2.15 → mcp_code_indexer-4.2.16}/src/mcp_code_indexer/database/database.py
RENAMED
|
@@ -27,9 +27,11 @@ from mcp_code_indexer.database.exceptions import (
|
|
|
27
27
|
)
|
|
28
28
|
from mcp_code_indexer.database.models import (
|
|
29
29
|
FileDescription,
|
|
30
|
+
IndexMeta,
|
|
30
31
|
Project,
|
|
31
32
|
ProjectOverview,
|
|
32
33
|
SearchResult,
|
|
34
|
+
SyncStatus,
|
|
33
35
|
WordFrequencyResult,
|
|
34
36
|
WordFrequencyTerm,
|
|
35
37
|
)
|
|
@@ -315,12 +317,10 @@ class DatabaseManager:
|
|
|
315
317
|
self, operation_name: str = "write_operation"
|
|
316
318
|
) -> AsyncIterator[aiosqlite.Connection]:
|
|
317
319
|
"""
|
|
318
|
-
Get a database connection with write serialization
|
|
319
|
-
retry logic.
|
|
320
|
+
Get a database connection with write serialization.
|
|
320
321
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
generator errors.
|
|
322
|
+
Ensures the write lock is held throughout the duration of the context
|
|
323
|
+
to prevent race conditions and database locking errors.
|
|
324
324
|
|
|
325
325
|
Args:
|
|
326
326
|
operation_name: Name of the operation for logging and
|
|
@@ -331,43 +331,10 @@ class DatabaseManager:
|
|
|
331
331
|
"DatabaseManager not initialized - call initialize() first"
|
|
332
332
|
)
|
|
333
333
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
async with self._write_lock:
|
|
339
|
-
async with self.get_connection() as conn:
|
|
340
|
-
return conn
|
|
341
|
-
|
|
342
|
-
try:
|
|
343
|
-
# Use retry executor to handle connection acquisition with retries
|
|
344
|
-
connection = await self._retry_executor.execute_with_retry(
|
|
345
|
-
get_write_connection, operation_name
|
|
346
|
-
)
|
|
347
|
-
|
|
348
|
-
try:
|
|
349
|
-
yield connection
|
|
350
|
-
|
|
351
|
-
# Success - retry executor handles all failure tracking
|
|
352
|
-
|
|
353
|
-
except Exception:
|
|
354
|
-
# Error handling is managed by the retry executor
|
|
355
|
-
raise
|
|
356
|
-
|
|
357
|
-
except DatabaseError:
|
|
358
|
-
# Re-raise our custom database errors as-is
|
|
359
|
-
raise
|
|
360
|
-
except Exception as e:
|
|
361
|
-
# Classify and wrap other exceptions
|
|
362
|
-
classified_error = classify_sqlite_error(e, operation_name)
|
|
363
|
-
logger.error(
|
|
364
|
-
(
|
|
365
|
-
f"Database operation '{operation_name}' failed: "
|
|
366
|
-
f"{classified_error.message}"
|
|
367
|
-
),
|
|
368
|
-
extra={"structured_data": classified_error.to_dict()},
|
|
369
|
-
)
|
|
370
|
-
raise classified_error
|
|
334
|
+
# Acquire lock for exclusive write access - hold it for entire context
|
|
335
|
+
async with self._write_lock:
|
|
336
|
+
async with self.get_connection() as conn:
|
|
337
|
+
yield conn
|
|
371
338
|
|
|
372
339
|
def get_database_stats(self) -> Dict[str, Any]:
|
|
373
340
|
"""
|
|
@@ -740,6 +707,25 @@ class DatabaseManager:
|
|
|
740
707
|
await db.commit()
|
|
741
708
|
logger.debug(f"Updated project: {project.id}")
|
|
742
709
|
|
|
710
|
+
async def set_project_vector_mode(self, project_id: str, enabled: bool) -> None:
|
|
711
|
+
"""Set the vector_mode for a specific project."""
|
|
712
|
+
async with self.get_write_connection_with_retry(
|
|
713
|
+
"set_project_vector_mode"
|
|
714
|
+
) as db:
|
|
715
|
+
await db.execute(
|
|
716
|
+
"UPDATE projects SET vector_mode = ? WHERE id = ?",
|
|
717
|
+
(int(enabled), project_id),
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
# Check if the project was actually updated
|
|
721
|
+
cursor = await db.execute("SELECT changes()")
|
|
722
|
+
changes = await cursor.fetchone()
|
|
723
|
+
if changes[0] == 0:
|
|
724
|
+
raise ValueError(f"Project not found: {project_id}")
|
|
725
|
+
|
|
726
|
+
await db.commit()
|
|
727
|
+
logger.debug(f"Set vector_mode={enabled} for project: {project_id}")
|
|
728
|
+
|
|
743
729
|
async def get_all_projects(self) -> List[Project]:
|
|
744
730
|
"""Get all projects in the database."""
|
|
745
731
|
async with self.get_connection() as db:
|
|
@@ -795,12 +781,19 @@ class DatabaseManager:
|
|
|
795
781
|
) as db:
|
|
796
782
|
await db.execute(
|
|
797
783
|
"""
|
|
798
|
-
INSERT
|
|
784
|
+
INSERT INTO file_descriptions
|
|
799
785
|
(
|
|
800
786
|
project_id, file_path, description, file_hash, last_modified,
|
|
801
787
|
version, source_project_id, to_be_cleaned
|
|
802
788
|
)
|
|
803
789
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
790
|
+
ON CONFLICT(project_id, file_path) DO UPDATE SET
|
|
791
|
+
description=excluded.description,
|
|
792
|
+
file_hash=excluded.file_hash,
|
|
793
|
+
last_modified=excluded.last_modified,
|
|
794
|
+
version=excluded.version,
|
|
795
|
+
source_project_id=excluded.source_project_id,
|
|
796
|
+
to_be_cleaned=excluded.to_be_cleaned
|
|
804
797
|
""",
|
|
805
798
|
(
|
|
806
799
|
file_desc.project_id,
|
|
@@ -898,12 +891,19 @@ class DatabaseManager:
|
|
|
898
891
|
|
|
899
892
|
await conn.executemany(
|
|
900
893
|
"""
|
|
901
|
-
INSERT
|
|
894
|
+
INSERT INTO file_descriptions
|
|
902
895
|
(
|
|
903
896
|
project_id, file_path, description, file_hash, last_modified,
|
|
904
897
|
version, source_project_id, to_be_cleaned
|
|
905
898
|
)
|
|
906
899
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
900
|
+
ON CONFLICT(project_id, file_path) DO UPDATE SET
|
|
901
|
+
description=excluded.description,
|
|
902
|
+
file_hash=excluded.file_hash,
|
|
903
|
+
last_modified=excluded.last_modified,
|
|
904
|
+
version=excluded.version,
|
|
905
|
+
source_project_id=excluded.source_project_id,
|
|
906
|
+
to_be_cleaned=excluded.to_be_cleaned
|
|
907
907
|
""",
|
|
908
908
|
data,
|
|
909
909
|
)
|
|
@@ -1067,10 +1067,8 @@ class DatabaseManager:
|
|
|
1067
1067
|
Returns:
|
|
1068
1068
|
List of file paths that were marked for cleanup
|
|
1069
1069
|
"""
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
|
|
1073
|
-
# Get all active file descriptions for this project
|
|
1070
|
+
# 1. Get all active file paths (fast DB read)
|
|
1071
|
+
async with self.get_connection() as conn:
|
|
1074
1072
|
cursor = await conn.execute(
|
|
1075
1073
|
(
|
|
1076
1074
|
"SELECT file_path FROM file_descriptions WHERE "
|
|
@@ -1078,46 +1076,29 @@ class DatabaseManager:
|
|
|
1078
1076
|
),
|
|
1079
1077
|
(project_id,),
|
|
1080
1078
|
)
|
|
1081
|
-
|
|
1082
1079
|
rows = await cursor.fetchall()
|
|
1080
|
+
file_paths = [row["file_path"] for row in rows]
|
|
1083
1081
|
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1082
|
+
# 2. Check existence on disk (blocking IO - run in executor)
|
|
1083
|
+
def find_removed_files() -> List[str]:
|
|
1084
|
+
missing = []
|
|
1085
|
+
for file_path in file_paths:
|
|
1088
1086
|
full_path = project_root / file_path
|
|
1089
|
-
|
|
1090
1087
|
if not full_path.exists():
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
# Mark descriptions for cleanup instead of deleting
|
|
1094
|
-
if to_remove:
|
|
1095
|
-
import time
|
|
1096
|
-
|
|
1097
|
-
cleanup_timestamp = int(time.time())
|
|
1098
|
-
await conn.executemany(
|
|
1099
|
-
(
|
|
1100
|
-
"UPDATE file_descriptions SET to_be_cleaned = ? WHERE "
|
|
1101
|
-
"project_id = ? AND file_path = ?"
|
|
1102
|
-
),
|
|
1103
|
-
[(cleanup_timestamp, project_id, path) for path in to_remove],
|
|
1104
|
-
)
|
|
1105
|
-
logger.info(
|
|
1106
|
-
(
|
|
1107
|
-
f"Marked {len(to_remove)} missing files for cleanup "
|
|
1108
|
-
f"from {project_id}"
|
|
1109
|
-
)
|
|
1110
|
-
)
|
|
1088
|
+
missing.append(file_path)
|
|
1089
|
+
return missing
|
|
1111
1090
|
|
|
1112
|
-
|
|
1091
|
+
loop = asyncio.get_running_loop()
|
|
1092
|
+
to_remove = await loop.run_in_executor(None, find_removed_files)
|
|
1113
1093
|
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1094
|
+
# 3. Mark for cleanup (fast DB write)
|
|
1095
|
+
if to_remove:
|
|
1096
|
+
await self.cleanup_manager.mark_files_for_cleanup(project_id, to_remove)
|
|
1097
|
+
logger.info(
|
|
1098
|
+
f"Marked {len(to_remove)} missing files for cleanup from {project_id}"
|
|
1099
|
+
)
|
|
1119
1100
|
|
|
1120
|
-
return
|
|
1101
|
+
return to_remove
|
|
1121
1102
|
|
|
1122
1103
|
async def analyze_word_frequency(
|
|
1123
1104
|
self, project_id: str, limit: int = 200
|
|
@@ -1139,7 +1120,7 @@ class DatabaseManager:
|
|
|
1139
1120
|
stop_words_path = (
|
|
1140
1121
|
Path(__file__).parent.parent / "data" / "stop_words_english.txt"
|
|
1141
1122
|
)
|
|
1142
|
-
stop_words = set()
|
|
1123
|
+
stop_words: set = set()
|
|
1143
1124
|
|
|
1144
1125
|
if stop_words_path.exists():
|
|
1145
1126
|
with open(stop_words_path, "r", encoding="utf-8") as f:
|
|
@@ -1186,8 +1167,8 @@ class DatabaseManager:
|
|
|
1186
1167
|
}
|
|
1187
1168
|
stop_words.update(programming_keywords)
|
|
1188
1169
|
|
|
1170
|
+
# Get all descriptions for this project (fast DB read)
|
|
1189
1171
|
async with self.get_connection() as db:
|
|
1190
|
-
# Get all descriptions for this project
|
|
1191
1172
|
cursor = await db.execute(
|
|
1192
1173
|
(
|
|
1193
1174
|
"SELECT description FROM file_descriptions WHERE "
|
|
@@ -1195,11 +1176,13 @@ class DatabaseManager:
|
|
|
1195
1176
|
),
|
|
1196
1177
|
(project_id,),
|
|
1197
1178
|
)
|
|
1198
|
-
|
|
1199
1179
|
rows = await cursor.fetchall()
|
|
1180
|
+
descriptions = [row["description"] for row in rows]
|
|
1200
1181
|
|
|
1182
|
+
# Process word frequency in executor (CPU-bound work)
|
|
1183
|
+
def process_word_frequency() -> WordFrequencyResult:
|
|
1201
1184
|
# Combine all descriptions
|
|
1202
|
-
all_text = " ".join(
|
|
1185
|
+
all_text = " ".join(descriptions)
|
|
1203
1186
|
|
|
1204
1187
|
# Tokenize and filter
|
|
1205
1188
|
words = re.findall(r"\b[a-zA-Z]{2,}\b", all_text.lower())
|
|
@@ -1220,6 +1203,9 @@ class DatabaseManager:
|
|
|
1220
1203
|
total_unique_terms=len(word_counts),
|
|
1221
1204
|
)
|
|
1222
1205
|
|
|
1206
|
+
loop = asyncio.get_running_loop()
|
|
1207
|
+
return await loop.run_in_executor(None, process_word_frequency)
|
|
1208
|
+
|
|
1223
1209
|
async def cleanup_empty_projects(self) -> int:
|
|
1224
1210
|
"""
|
|
1225
1211
|
Remove projects that have no file descriptions and no project overview.
|
|
@@ -1320,6 +1306,186 @@ class DatabaseManager:
|
|
|
1320
1306
|
"files": file_descriptions,
|
|
1321
1307
|
}
|
|
1322
1308
|
|
|
1309
|
+
# IndexMeta operations
|
|
1310
|
+
async def create_index_meta(self, index_meta: IndexMeta) -> None:
|
|
1311
|
+
"""Create or update index metadata for a project."""
|
|
1312
|
+
async with self.get_write_connection_with_retry("create_index_meta") as db:
|
|
1313
|
+
await db.execute(
|
|
1314
|
+
"""
|
|
1315
|
+
INSERT OR REPLACE INTO index_meta (
|
|
1316
|
+
project_id, total_chunks, indexed_chunks, total_files, indexed_files,
|
|
1317
|
+
last_sync, sync_status, error_message, queue_depth, processing_rate,
|
|
1318
|
+
estimated_completion, metadata, created, last_modified
|
|
1319
|
+
)
|
|
1320
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1321
|
+
""",
|
|
1322
|
+
(
|
|
1323
|
+
index_meta.project_id,
|
|
1324
|
+
index_meta.total_chunks,
|
|
1325
|
+
index_meta.indexed_chunks,
|
|
1326
|
+
index_meta.total_files,
|
|
1327
|
+
index_meta.indexed_files,
|
|
1328
|
+
index_meta.last_sync,
|
|
1329
|
+
index_meta.sync_status.value,
|
|
1330
|
+
index_meta.error_message,
|
|
1331
|
+
index_meta.queue_depth,
|
|
1332
|
+
index_meta.processing_rate,
|
|
1333
|
+
index_meta.estimated_completion,
|
|
1334
|
+
json.dumps(index_meta.metadata),
|
|
1335
|
+
index_meta.created,
|
|
1336
|
+
index_meta.last_modified,
|
|
1337
|
+
),
|
|
1338
|
+
)
|
|
1339
|
+
await db.commit()
|
|
1340
|
+
logger.debug(
|
|
1341
|
+
f"Created/updated index metadata for project: {index_meta.project_id}"
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
async def update_index_meta(self, index_meta: IndexMeta) -> None:
|
|
1345
|
+
"""Update existing index metadata for a project."""
|
|
1346
|
+
async with self.get_write_connection_with_retry("update_index_meta") as db:
|
|
1347
|
+
await db.execute(
|
|
1348
|
+
"""
|
|
1349
|
+
UPDATE index_meta
|
|
1350
|
+
SET total_chunks = ?, indexed_chunks = ?, total_files = ?, indexed_files = ?,
|
|
1351
|
+
last_sync = ?, sync_status = ?, error_message = ?, queue_depth = ?,
|
|
1352
|
+
processing_rate = ?, estimated_completion = ?, metadata = ?, last_modified = ?
|
|
1353
|
+
WHERE project_id = ?
|
|
1354
|
+
""",
|
|
1355
|
+
(
|
|
1356
|
+
index_meta.total_chunks,
|
|
1357
|
+
index_meta.indexed_chunks,
|
|
1358
|
+
index_meta.total_files,
|
|
1359
|
+
index_meta.indexed_files,
|
|
1360
|
+
index_meta.last_sync,
|
|
1361
|
+
index_meta.sync_status.value,
|
|
1362
|
+
index_meta.error_message,
|
|
1363
|
+
index_meta.queue_depth,
|
|
1364
|
+
index_meta.processing_rate,
|
|
1365
|
+
index_meta.estimated_completion,
|
|
1366
|
+
json.dumps(index_meta.metadata),
|
|
1367
|
+
index_meta.last_modified,
|
|
1368
|
+
index_meta.project_id,
|
|
1369
|
+
),
|
|
1370
|
+
)
|
|
1371
|
+
|
|
1372
|
+
# Check if the project was actually updated
|
|
1373
|
+
cursor = await db.execute("SELECT changes()")
|
|
1374
|
+
changes = await cursor.fetchone()
|
|
1375
|
+
if changes[0] == 0:
|
|
1376
|
+
raise ValueError(
|
|
1377
|
+
f"Index metadata not found for project: {index_meta.project_id}"
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
await db.commit()
|
|
1381
|
+
logger.debug(f"Updated index metadata for project: {index_meta.project_id}")
|
|
1382
|
+
|
|
1383
|
+
async def get_index_meta(self, project_id: str) -> Optional[IndexMeta]:
|
|
1384
|
+
"""Retrieve index metadata for a project."""
|
|
1385
|
+
async with self.get_connection() as db:
|
|
1386
|
+
cursor = await db.execute(
|
|
1387
|
+
"SELECT * FROM index_meta WHERE project_id = ?", (project_id,)
|
|
1388
|
+
)
|
|
1389
|
+
row = await cursor.fetchone()
|
|
1390
|
+
|
|
1391
|
+
if row:
|
|
1392
|
+
# Convert row to dict for easier field access
|
|
1393
|
+
row_dict = dict(row)
|
|
1394
|
+
|
|
1395
|
+
# Parse JSON metadata field
|
|
1396
|
+
metadata = (
|
|
1397
|
+
json.loads(row_dict["metadata"]) if row_dict["metadata"] else {}
|
|
1398
|
+
)
|
|
1399
|
+
|
|
1400
|
+
# Parse datetime fields
|
|
1401
|
+
created = (
|
|
1402
|
+
datetime.fromisoformat(row_dict["created"])
|
|
1403
|
+
if row_dict["created"]
|
|
1404
|
+
else datetime.utcnow()
|
|
1405
|
+
)
|
|
1406
|
+
last_modified = (
|
|
1407
|
+
datetime.fromisoformat(row_dict["last_modified"])
|
|
1408
|
+
if row_dict["last_modified"]
|
|
1409
|
+
else datetime.utcnow()
|
|
1410
|
+
)
|
|
1411
|
+
last_sync = (
|
|
1412
|
+
datetime.fromisoformat(row_dict["last_sync"])
|
|
1413
|
+
if row_dict["last_sync"]
|
|
1414
|
+
else None
|
|
1415
|
+
)
|
|
1416
|
+
estimated_completion = (
|
|
1417
|
+
datetime.fromisoformat(row_dict["estimated_completion"])
|
|
1418
|
+
if row_dict["estimated_completion"]
|
|
1419
|
+
else None
|
|
1420
|
+
)
|
|
1421
|
+
|
|
1422
|
+
return IndexMeta(
|
|
1423
|
+
id=row_dict["id"],
|
|
1424
|
+
project_id=row_dict["project_id"],
|
|
1425
|
+
total_chunks=row_dict["total_chunks"],
|
|
1426
|
+
indexed_chunks=row_dict["indexed_chunks"],
|
|
1427
|
+
total_files=row_dict["total_files"],
|
|
1428
|
+
indexed_files=row_dict["indexed_files"],
|
|
1429
|
+
last_sync=last_sync,
|
|
1430
|
+
sync_status=row_dict["sync_status"],
|
|
1431
|
+
error_message=row_dict["error_message"],
|
|
1432
|
+
queue_depth=row_dict["queue_depth"],
|
|
1433
|
+
processing_rate=row_dict["processing_rate"],
|
|
1434
|
+
estimated_completion=estimated_completion,
|
|
1435
|
+
metadata=metadata,
|
|
1436
|
+
created=created,
|
|
1437
|
+
last_modified=last_modified,
|
|
1438
|
+
)
|
|
1439
|
+
return None
|
|
1440
|
+
|
|
1441
|
+
async def get_or_create_index_meta(self, project_id: str, **kwargs) -> IndexMeta:
|
|
1442
|
+
"""
|
|
1443
|
+
Get existing index metadata or create new one with default values.
|
|
1444
|
+
|
|
1445
|
+
Args:
|
|
1446
|
+
project_id: Project identifier
|
|
1447
|
+
**kwargs: Optional fields to override defaults when creating new metadata
|
|
1448
|
+
|
|
1449
|
+
Returns:
|
|
1450
|
+
IndexMeta object (existing or newly created)
|
|
1451
|
+
"""
|
|
1452
|
+
# Try to get existing metadata first
|
|
1453
|
+
existing_meta = await self.get_index_meta(project_id)
|
|
1454
|
+
if existing_meta:
|
|
1455
|
+
return existing_meta
|
|
1456
|
+
|
|
1457
|
+
# Create new metadata with defaults, allowing kwargs to override
|
|
1458
|
+
default_metadata = {
|
|
1459
|
+
"project_id": project_id,
|
|
1460
|
+
"total_chunks": 0,
|
|
1461
|
+
"indexed_chunks": 0,
|
|
1462
|
+
"total_files": 0,
|
|
1463
|
+
"indexed_files": 0,
|
|
1464
|
+
"last_sync": None,
|
|
1465
|
+
"sync_status": SyncStatus.PENDING,
|
|
1466
|
+
"error_message": None,
|
|
1467
|
+
"queue_depth": 0,
|
|
1468
|
+
"processing_rate": 0.0,
|
|
1469
|
+
"estimated_completion": None,
|
|
1470
|
+
"metadata": {},
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
# Override defaults with provided kwargs
|
|
1474
|
+
default_metadata.update(kwargs)
|
|
1475
|
+
|
|
1476
|
+
# Create the IndexMeta object
|
|
1477
|
+
new_meta = IndexMeta(**default_metadata)
|
|
1478
|
+
|
|
1479
|
+
# Store it in the database
|
|
1480
|
+
await self.create_index_meta(new_meta)
|
|
1481
|
+
|
|
1482
|
+
# Return the created metadata (fetch it back to get the assigned ID)
|
|
1483
|
+
result = await self.get_index_meta(project_id)
|
|
1484
|
+
if result is None:
|
|
1485
|
+
raise DatabaseError(f"Failed to create index metadata for project: {project_id}")
|
|
1486
|
+
|
|
1487
|
+
return result
|
|
1488
|
+
|
|
1323
1489
|
# Cleanup operations
|
|
1324
1490
|
|
|
1325
1491
|
@property
|