mcp-code-indexer 4.0.1__tar.gz → 4.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/PKG-INFO +82 -24
  2. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/README.md +80 -23
  3. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/pyproject.toml +27 -6
  4. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/__init__.py +7 -5
  5. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/ask_handler.py +2 -2
  6. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/claude_api_handler.py +10 -5
  7. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/cleanup_manager.py +20 -12
  8. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/commands/makelocal.py +85 -63
  9. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/data/stop_words_english.txt +1 -1
  10. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/connection_health.py +29 -20
  11. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/database.py +44 -31
  12. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/database_factory.py +19 -20
  13. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/exceptions.py +10 -10
  14. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/models.py +126 -1
  15. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/path_resolver.py +22 -21
  16. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/retry_executor.py +37 -19
  17. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/deepask_handler.py +3 -3
  18. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/error_handler.py +46 -20
  19. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/file_scanner.py +15 -12
  20. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/git_hook_handler.py +71 -76
  21. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/logging_config.py +13 -5
  22. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/main.py +85 -22
  23. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/auth.py +47 -43
  24. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/error_middleware.py +15 -15
  25. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/logging.py +44 -42
  26. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/security.py +84 -76
  27. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
  28. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
  29. mcp_code_indexer-4.1.0/src/mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  30. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/query_preprocessor.py +2 -2
  31. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/server/mcp_server.py +158 -94
  32. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/base.py +19 -17
  33. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/http_transport.py +89 -76
  34. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/stdio_transport.py +12 -8
  35. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/__init__.py +36 -0
  36. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  37. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  38. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  39. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  40. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/config.py +155 -0
  41. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/daemon.py +335 -0
  42. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  43. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  44. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  45. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  46. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
  47. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
  48. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
  49. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
  50. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  51. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  52. mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  53. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/LICENSE +0 -0
  54. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/__main__.py +0 -0
  55. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/commands/__init__.py +0 -0
  56. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/__init__.py +0 -0
  57. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/__init__.py +1 -1
  58. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/001_initial.sql +0 -0
  59. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/003_project_overviews.sql +0 -0
  60. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/005_remove_git_remotes.sql +0 -0
  61. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/server/__init__.py +0 -0
  62. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  63. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/token_counter.py +0 -0
  64. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/tools/__init__.py +0 -0
  65. {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/__init__.py +1 -1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mcp-code-indexer
3
- Version: 4.0.1
3
+ Version: 4.1.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  License: MIT
6
6
  Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
@@ -23,6 +23,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Classifier: Topic :: Software Development
24
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
25
  Classifier: Typing :: Typed
26
+ Provides-Extra: vector
26
27
  Requires-Dist: aiofiles (==23.2.0)
27
28
  Requires-Dist: aiohttp (>=3.8.0)
28
29
  Requires-Dist: aiosqlite (==0.19.0)
@@ -43,8 +44,8 @@ Description-Content-Type: text/markdown
43
44
 
44
45
  # MCP Code Indexer 🚀
45
46
 
46
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?44)](https://badge.fury.io/py/mcp-code-indexer)
47
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?44)](https://pypi.org/project/mcp-code-indexer/)
47
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?46)](https://badge.fury.io/py/mcp-code-indexer)
48
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?46)](https://pypi.org/project/mcp-code-indexer/)
48
49
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
49
50
 
50
51
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -183,20 +184,77 @@ The git hook integration provides intelligent automation:
183
184
 
184
185
  - **📊 Git Analysis**: Automatically analyzes git diffs after commits/merges
185
186
  - **🤖 AI Processing**: Uses OpenRouter API with Anthropic's Claude Sonnet 4
186
- - **⚡ Smart Updates**: Only processes files that actually changed
187
+ - **⚡ Smart Updates**: Only processes files that actually changed
187
188
  - **🔄 Overview Maintenance**: Updates project overview when structure changes
188
189
  - **🛡️ Error Isolation**: Git operations continue even if indexing fails
189
190
  - **⏱️ Rate Limiting**: Built-in retry logic with exponential backoff
190
191
 
191
192
  ### 🎯 Key Benefits
192
193
 
193
- 💡 **Zero Manual Work**: Descriptions stay current without any effort
194
- ⚡ **Performance**: Only analyzes changed files, not entire codebase
195
- 🔒 **Reliability**: Robust error handling ensures git operations never fail
196
- 🎛️ **Configurable**: Support for custom models and timeout settings
194
+ 💡 **Zero Manual Work**: Descriptions stay current without any effort
195
+ ⚡ **Performance**: Only analyzes changed files, not entire codebase
196
+ 🔒 **Reliability**: Robust error handling ensures git operations never fail
197
+ 🎛️ **Configurable**: Support for custom models and timeout settings
197
198
 
198
199
  **Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
199
200
 
201
+ ## 🧠 Vector Mode (BETA)
202
+
203
+ 🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
204
+
205
+ ### 🎯 What is Vector Mode?
206
+
207
+ Vector Mode transforms how you search and understand codebases by using AI embeddings:
208
+
209
+ - **🔍 Semantic Search**: Find code by meaning, not just text matching
210
+ - **⚡ Real-time Indexing**: Automatic embedding generation as code changes
211
+ - **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
212
+ - **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
213
+ - **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
214
+
215
+ ### 🚀 Quick Start
216
+
217
+ ```bash
218
+ # Install vector mode dependencies
219
+ pip install mcp-code-indexer[vector]
220
+
221
+ # Set required API keys
222
+ export VOYAGE_API_KEY="pa-your-voyage-api-key"
223
+ export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
224
+
225
+ # Start with vector mode enabled
226
+ mcp-code-indexer --vector
227
+
228
+ # The daemon automatically starts and begins indexing your projects
229
+ ```
230
+
231
+ ### 💡 Key Features
232
+
233
+ - **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
234
+ - **🌳 Merkle Trees**: Efficient change detection without full directory scans
235
+ - **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
236
+ - **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
237
+
238
+ ### 🔧 Advanced Configuration
239
+
240
+ ```bash
241
+ # Custom configuration
242
+ mcp-code-indexer --vector --vector-config /path/to/config.yaml
243
+
244
+ # HTTP mode with vector search
245
+ mcp-code-indexer --vector --http --port 8080
246
+ ```
247
+
248
+ ### 🛠️ Architecture
249
+
250
+ Vector Mode adds powerful new MCP tools:
251
+ - `vector_search` - Semantic code search across projects
252
+ - `similarity_search` - Find similar code patterns
253
+ - `dependency_search` - Discover code relationships
254
+ - `vector_status` - Monitor indexing progress
255
+
256
+ **Status**: Currently in BETA - foundations implemented, full pipeline in development.
257
+
200
258
  ## 🔧 Development Setup
201
259
 
202
260
  ### 👨‍💻 For Contributors
@@ -369,14 +427,14 @@ Comprehensive documentation organized by user journey and expertise level.
369
427
 
370
428
  ### 📋 Quick References
371
429
  - **[Examples & Integrations](examples/)** - Ready-to-use configurations
372
- - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
430
+ - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
373
431
  - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
374
432
 
375
433
  **📚 Reading Paths:**
376
434
  - **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
377
435
  - **Web developers?** Quick Start → HTTP API Reference → Q&A Interface → Git Hooks
378
436
  - **AI/ML engineers?** Quick Start → Q&A Interface → API Reference → Git Hooks
379
- - **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
437
+ - **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
380
438
  - **Contributing to the project?** Architecture → Contributing → API Reference
381
439
 
382
440
  ## 🚦 System Requirements
@@ -390,7 +448,7 @@ Comprehensive documentation organized by user journey and expertise level.
390
448
 
391
449
  Tested with codebases up to **10,000 files**:
392
450
  - File description retrieval: **< 10ms**
393
- - Full-text search: **< 100ms**
451
+ - Full-text search: **< 100ms**
394
452
  - Codebase overview generation: **< 2s**
395
453
  - Merge conflict detection: **< 5s**
396
454
 
@@ -460,11 +518,11 @@ async def analyze_codebase(project_path):
460
518
  "projectName": "my-project",
461
519
  "folderPath": project_path
462
520
  })
463
-
521
+
464
522
  if size_info["isLarge"]:
465
523
  # Use search for large codebases
466
524
  results = await mcp_client.call_tool("search_descriptions", {
467
- "projectName": "my-project",
525
+ "projectName": "my-project",
468
526
  "folderPath": project_path,
469
527
  "query": "authentication logic"
470
528
  })
@@ -484,18 +542,18 @@ async def analyze_codebase(project_path):
484
542
  python -c "
485
543
  import asyncio
486
544
  from mcp_client import MCPClient
487
-
545
+
488
546
  async def update_descriptions():
489
547
  client = MCPClient('mcp-code-indexer')
490
-
548
+
491
549
  # Find files without descriptions
492
550
  missing = await client.call_tool('find_missing_descriptions', {
493
551
  'projectName': '${{ github.repository }}',
494
552
  'folderPath': '.'
495
553
  })
496
-
554
+
497
555
  # Process with AI and update...
498
-
556
+
499
557
  asyncio.run(update_descriptions())
500
558
  "
501
559
  ```
@@ -606,7 +664,7 @@ mcp-code-indexer --map PROJECT_NAME
606
664
  ## 🛡️ Security Features
607
665
 
608
666
  - **Input validation** on all MCP tool parameters
609
- - **SQL injection protection** via parameterized queries
667
+ - **SQL injection protection** via parameterized queries
610
668
  - **File system sandboxing** with .gitignore respect
611
669
  - **Error sanitization** to prevent information leakage
612
670
  - **Async resource cleanup** to prevent memory leaks
@@ -638,7 +696,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
638
696
  2. **[Master the API tools](docs/api-reference.md)** - Learn all 11 tools with examples
639
697
  3. **[Try HTTP API access](docs/http-api.md)** - REST API for web applications
640
698
  4. **[Explore AI-powered Q&A](docs/qa-interface.md)** - Ask questions about your code
641
- 5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
699
+ 5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
642
700
 
643
701
  **👥 Setting up for a team?**
644
702
  1. **[Learn all CLI commands](docs/cli-reference.md)** - Complete command reference
@@ -661,7 +719,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
661
719
 
662
720
  We welcome contributions! See our **[Contributing Guide](docs/contributing.md)** for:
663
721
  - Development setup
664
- - Code style guidelines
722
+ - Code style guidelines
665
723
  - Testing requirements
666
724
  - Pull request process
667
725
 
@@ -672,7 +730,7 @@ MIT License - see **[LICENSE](LICENSE)** for details.
672
730
  ## 🙏 Built With
673
731
 
674
732
  - **[Model Context Protocol](https://github.com/modelcontextprotocol/python-sdk)** - The foundation for tool integration
675
- - **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
733
+ - **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
676
734
  - **[aiosqlite](https://pypi.org/project/aiosqlite/)** - Async SQLite operations
677
735
  - **[aiohttp](https://pypi.org/project/aiohttp/)** - Async HTTP client for OpenRouter API
678
736
  - **[tenacity](https://pypi.org/project/tenacity/)** - Robust retry logic and rate limiting
@@ -680,9 +738,9 @@ MIT License - see **[LICENSE](LICENSE)** for details.
680
738
 
681
739
  ---
682
740
 
683
- **Transform how your AI agents understand code!** 🚀
741
+ **Transform how your AI agents understand code!** 🚀
684
742
 
685
- 🎯 **New User?** [Get started in 2 minutes](#-quick-start)
686
- 👨‍💻 **Developer?** [Explore the complete API](docs/api-reference.md)
743
+ 🎯 **New User?** [Get started in 2 minutes](#-quick-start)
744
+ 👨‍💻 **Developer?** [Explore the complete API](docs/api-reference.md)
687
745
  🔧 **Production?** [Deploy with confidence](docs/configuration.md)
688
746
 
@@ -1,7 +1,7 @@
1
1
  # MCP Code Indexer 🚀
2
2
 
3
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?44)](https://badge.fury.io/py/mcp-code-indexer)
4
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?44)](https://pypi.org/project/mcp-code-indexer/)
3
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?46)](https://badge.fury.io/py/mcp-code-indexer)
4
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?46)](https://pypi.org/project/mcp-code-indexer/)
5
5
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
7
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -140,20 +140,77 @@ The git hook integration provides intelligent automation:
140
140
 
141
141
  - **📊 Git Analysis**: Automatically analyzes git diffs after commits/merges
142
142
  - **🤖 AI Processing**: Uses OpenRouter API with Anthropic's Claude Sonnet 4
143
- - **⚡ Smart Updates**: Only processes files that actually changed
143
+ - **⚡ Smart Updates**: Only processes files that actually changed
144
144
  - **🔄 Overview Maintenance**: Updates project overview when structure changes
145
145
  - **🛡️ Error Isolation**: Git operations continue even if indexing fails
146
146
  - **⏱️ Rate Limiting**: Built-in retry logic with exponential backoff
147
147
 
148
148
  ### 🎯 Key Benefits
149
149
 
150
- 💡 **Zero Manual Work**: Descriptions stay current without any effort
151
- ⚡ **Performance**: Only analyzes changed files, not entire codebase
152
- 🔒 **Reliability**: Robust error handling ensures git operations never fail
153
- 🎛️ **Configurable**: Support for custom models and timeout settings
150
+ 💡 **Zero Manual Work**: Descriptions stay current without any effort
151
+ ⚡ **Performance**: Only analyzes changed files, not entire codebase
152
+ 🔒 **Reliability**: Robust error handling ensures git operations never fail
153
+ 🎛️ **Configurable**: Support for custom models and timeout settings
154
154
 
155
155
  **Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
156
156
 
157
+ ## 🧠 Vector Mode (BETA)
158
+
159
+ 🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
160
+
161
+ ### 🎯 What is Vector Mode?
162
+
163
+ Vector Mode transforms how you search and understand codebases by using AI embeddings:
164
+
165
+ - **🔍 Semantic Search**: Find code by meaning, not just text matching
166
+ - **⚡ Real-time Indexing**: Automatic embedding generation as code changes
167
+ - **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
168
+ - **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
169
+ - **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
170
+
171
+ ### 🚀 Quick Start
172
+
173
+ ```bash
174
+ # Install vector mode dependencies
175
+ pip install mcp-code-indexer[vector]
176
+
177
+ # Set required API keys
178
+ export VOYAGE_API_KEY="pa-your-voyage-api-key"
179
+ export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
180
+
181
+ # Start with vector mode enabled
182
+ mcp-code-indexer --vector
183
+
184
+ # The daemon automatically starts and begins indexing your projects
185
+ ```
186
+
187
+ ### 💡 Key Features
188
+
189
+ - **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
190
+ - **🌳 Merkle Trees**: Efficient change detection without full directory scans
191
+ - **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
192
+ - **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
193
+
194
+ ### 🔧 Advanced Configuration
195
+
196
+ ```bash
197
+ # Custom configuration
198
+ mcp-code-indexer --vector --vector-config /path/to/config.yaml
199
+
200
+ # HTTP mode with vector search
201
+ mcp-code-indexer --vector --http --port 8080
202
+ ```
203
+
204
+ ### 🛠️ Architecture
205
+
206
+ Vector Mode adds powerful new MCP tools:
207
+ - `vector_search` - Semantic code search across projects
208
+ - `similarity_search` - Find similar code patterns
209
+ - `dependency_search` - Discover code relationships
210
+ - `vector_status` - Monitor indexing progress
211
+
212
+ **Status**: Currently in BETA - foundations implemented, full pipeline in development.
213
+
157
214
  ## 🔧 Development Setup
158
215
 
159
216
  ### 👨‍💻 For Contributors
@@ -326,14 +383,14 @@ Comprehensive documentation organized by user journey and expertise level.
326
383
 
327
384
  ### 📋 Quick References
328
385
  - **[Examples & Integrations](examples/)** - Ready-to-use configurations
329
- - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
386
+ - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
330
387
  - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
331
388
 
332
389
  **📚 Reading Paths:**
333
390
  - **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
334
391
  - **Web developers?** Quick Start → HTTP API Reference → Q&A Interface → Git Hooks
335
392
  - **AI/ML engineers?** Quick Start → Q&A Interface → API Reference → Git Hooks
336
- - **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
393
+ - **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
337
394
  - **Contributing to the project?** Architecture → Contributing → API Reference
338
395
 
339
396
  ## 🚦 System Requirements
@@ -347,7 +404,7 @@ Comprehensive documentation organized by user journey and expertise level.
347
404
 
348
405
  Tested with codebases up to **10,000 files**:
349
406
  - File description retrieval: **< 10ms**
350
- - Full-text search: **< 100ms**
407
+ - Full-text search: **< 100ms**
351
408
  - Codebase overview generation: **< 2s**
352
409
  - Merge conflict detection: **< 5s**
353
410
 
@@ -417,11 +474,11 @@ async def analyze_codebase(project_path):
417
474
  "projectName": "my-project",
418
475
  "folderPath": project_path
419
476
  })
420
-
477
+
421
478
  if size_info["isLarge"]:
422
479
  # Use search for large codebases
423
480
  results = await mcp_client.call_tool("search_descriptions", {
424
- "projectName": "my-project",
481
+ "projectName": "my-project",
425
482
  "folderPath": project_path,
426
483
  "query": "authentication logic"
427
484
  })
@@ -441,18 +498,18 @@ async def analyze_codebase(project_path):
441
498
  python -c "
442
499
  import asyncio
443
500
  from mcp_client import MCPClient
444
-
501
+
445
502
  async def update_descriptions():
446
503
  client = MCPClient('mcp-code-indexer')
447
-
504
+
448
505
  # Find files without descriptions
449
506
  missing = await client.call_tool('find_missing_descriptions', {
450
507
  'projectName': '${{ github.repository }}',
451
508
  'folderPath': '.'
452
509
  })
453
-
510
+
454
511
  # Process with AI and update...
455
-
512
+
456
513
  asyncio.run(update_descriptions())
457
514
  "
458
515
  ```
@@ -563,7 +620,7 @@ mcp-code-indexer --map PROJECT_NAME
563
620
  ## 🛡️ Security Features
564
621
 
565
622
  - **Input validation** on all MCP tool parameters
566
- - **SQL injection protection** via parameterized queries
623
+ - **SQL injection protection** via parameterized queries
567
624
  - **File system sandboxing** with .gitignore respect
568
625
  - **Error sanitization** to prevent information leakage
569
626
  - **Async resource cleanup** to prevent memory leaks
@@ -595,7 +652,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
595
652
  2. **[Master the API tools](docs/api-reference.md)** - Learn all 11 tools with examples
596
653
  3. **[Try HTTP API access](docs/http-api.md)** - REST API for web applications
597
654
  4. **[Explore AI-powered Q&A](docs/qa-interface.md)** - Ask questions about your code
598
- 5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
655
+ 5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
599
656
 
600
657
  **👥 Setting up for a team?**
601
658
  1. **[Learn all CLI commands](docs/cli-reference.md)** - Complete command reference
@@ -618,7 +675,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
618
675
 
619
676
  We welcome contributions! See our **[Contributing Guide](docs/contributing.md)** for:
620
677
  - Development setup
621
- - Code style guidelines
678
+ - Code style guidelines
622
679
  - Testing requirements
623
680
  - Pull request process
624
681
 
@@ -629,7 +686,7 @@ MIT License - see **[LICENSE](LICENSE)** for details.
629
686
  ## 🙏 Built With
630
687
 
631
688
  - **[Model Context Protocol](https://github.com/modelcontextprotocol/python-sdk)** - The foundation for tool integration
632
- - **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
689
+ - **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
633
690
  - **[aiosqlite](https://pypi.org/project/aiosqlite/)** - Async SQLite operations
634
691
  - **[aiohttp](https://pypi.org/project/aiohttp/)** - Async HTTP client for OpenRouter API
635
692
  - **[tenacity](https://pypi.org/project/tenacity/)** - Robust retry logic and rate limiting
@@ -637,8 +694,8 @@ MIT License - see **[LICENSE](LICENSE)** for details.
637
694
 
638
695
  ---
639
696
 
640
- **Transform how your AI agents understand code!** 🚀
697
+ **Transform how your AI agents understand code!** 🚀
641
698
 
642
- 🎯 **New User?** [Get started in 2 minutes](#-quick-start)
643
- 👨‍💻 **Developer?** [Explore the complete API](docs/api-reference.md)
699
+ 🎯 **New User?** [Get started in 2 minutes](#-quick-start)
700
+ 👨‍💻 **Developer?** [Explore the complete API](docs/api-reference.md)
644
701
  🔧 **Production?** [Deploy with confidence](docs/configuration.md)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "mcp-code-indexer"
7
- version = "4.0.1"
7
+ version = "4.1.0"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  authors = ["MCP Code Indexer Contributors"]
10
10
  maintainers = ["MCP Code Indexer Contributors"]
@@ -14,10 +14,10 @@ homepage = "https://github.com/fluffypony/mcp-code-indexer"
14
14
  repository = "https://github.com/fluffypony/mcp-code-indexer"
15
15
  documentation = "https://github.com/fluffypony/mcp-code-indexer/blob/main/README.md"
16
16
  keywords = [
17
- "mcp",
18
- "model-context-protocol",
19
- "code-indexer",
20
- "ai-tools",
17
+ "mcp",
18
+ "model-context-protocol",
19
+ "code-indexer",
20
+ "ai-tools",
21
21
  "codebase-navigation",
22
22
  "file-descriptions",
23
23
  "llm-tools"
@@ -59,6 +59,15 @@ fastapi = ">=0.104.0"
59
59
  uvicorn = ">=0.24.0"
60
60
  python-multipart = ">=0.0.6"
61
61
 
62
+ [tool.poetry.extras]
63
+ vector = [
64
+ "voyageai",
65
+ "turbopuffer",
66
+ "tree-sitter",
67
+ "watchdog",
68
+ "pyyaml"
69
+ ]
70
+
62
71
  [tool.poetry.group.dev.dependencies]
63
72
  pytest = ">=8.0.0"
64
73
  pytest-asyncio = ">=0.21.0"
@@ -111,7 +120,7 @@ known_first_party = ["src", "mcp_code_indexer"]
111
120
 
112
121
  # MyPy configuration
113
122
  [tool.mypy]
114
- python_version = "3.9"
123
+ python_version = "3.10"
115
124
  warn_return_any = true
116
125
  warn_unused_configs = true
117
126
  disallow_untyped_defs = true
@@ -171,3 +180,15 @@ exclude_lines = [
171
180
  "class .*\\bProtocol\\):",
172
181
  "@(abc\\.)?abstractmethod"
173
182
  ]
183
+
184
+ # Ruff configuration
185
+ [tool.ruff]
186
+ exclude = [
187
+ ".vulture_whitelist.py",
188
+ "venv",
189
+ ".git",
190
+ "__pycache__",
191
+ ".mypy_cache",
192
+ ".pytest_cache",
193
+ ".ruff_cache"
194
+ ]
@@ -6,10 +6,12 @@ intelligent codebase navigation through searchable file descriptions,
6
6
  token-aware overviews, and advanced merge capabilities.
7
7
  """
8
8
 
9
+
9
10
  # Delay import to avoid dependency issues during testing
10
- def get_server():
11
+ def get_server() -> type:
11
12
  """Get MCPCodeIndexServer (lazy import)."""
12
13
  from .server.mcp_server import MCPCodeIndexServer
14
+
13
15
  return MCPCodeIndexServer
14
16
 
15
17
 
@@ -27,15 +29,15 @@ def _get_version() -> str:
27
29
  for pkg_name in ["mcp-code-indexer", "mcp_code_indexer"]:
28
30
  try:
29
31
  return version(pkg_name)
30
- except Exception:
32
+ except Exception: # nosec B112
31
33
  continue
32
- except Exception:
34
+ except Exception: # nosec B110
33
35
  pass
34
36
 
35
37
  # Fallback to reading from pyproject.toml (for development)
36
38
  try:
37
- from pathlib import Path
38
39
  import sys
40
+ from pathlib import Path
39
41
 
40
42
  if sys.version_info >= (3, 11):
41
43
  import tomllib
@@ -48,7 +50,7 @@ def _get_version() -> str:
48
50
  pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
49
51
  with open(pyproject_path, "rb") as f:
50
52
  data = tomllib.load(f)
51
- return data["project"]["version"]
53
+ return str(data["project"]["version"])
52
54
  except Exception:
53
55
  return "dev"
54
56
 
@@ -8,9 +8,9 @@ and sending them to Claude via OpenRouter API for direct responses.
8
8
 
9
9
  import logging
10
10
  from pathlib import Path
11
- from typing import Dict, Optional, Any
11
+ from typing import Any, Dict, Optional
12
12
 
13
- from .claude_api_handler import ClaudeAPIHandler, ClaudeAPIError
13
+ from .claude_api_handler import ClaudeAPIError, ClaudeAPIHandler
14
14
  from .database.database import DatabaseManager
15
15
 
16
16
 
@@ -11,15 +11,15 @@ import json
11
11
  import logging
12
12
  import os
13
13
  from dataclasses import dataclass
14
- from typing import Dict, List, Optional, Any
15
14
  from pathlib import Path
15
+ from typing import Any, Dict, List, Optional
16
16
 
17
17
  import aiohttp
18
18
  from tenacity import (
19
19
  retry,
20
- wait_exponential,
21
- stop_after_attempt,
22
20
  retry_if_exception_type,
21
+ stop_after_attempt,
22
+ wait_exponential,
23
23
  )
24
24
 
25
25
  from .database.database import DatabaseManager
@@ -194,7 +194,6 @@ class ClaudeAPIHandler:
194
194
  async with session.post(
195
195
  self.OPENROUTER_API_URL, headers=headers, json=payload
196
196
  ) as response:
197
-
198
197
  self.logger.info(f"Claude API response status: {response.status}")
199
198
 
200
199
  if response.status == 429:
@@ -240,7 +239,7 @@ class ClaudeAPIHandler:
240
239
  raise ClaudeAPIError("Claude API request timed out")
241
240
 
242
241
  def validate_json_response(
243
- self, response_text: str, required_keys: List[str] = None
242
+ self, response_text: str, required_keys: Optional[List[str]] = None
244
243
  ) -> Dict[str, Any]:
245
244
  """
246
245
  Validate and parse JSON response from Claude.
@@ -297,6 +296,12 @@ class ClaudeAPIHandler:
297
296
  self.logger.debug(f"Extracted JSON from response: {extracted_json}")
298
297
  data = json.loads(extracted_json)
299
298
 
299
+ # Ensure data is a dictionary
300
+ if not isinstance(data, dict):
301
+ raise ClaudeValidationError(
302
+ f"Expected JSON object, got {type(data).__name__}"
303
+ )
304
+
300
305
  # Validate required keys if specified
301
306
  if required_keys:
302
307
  missing_keys = [key for key in required_keys if key not in data]