mcp-code-indexer 4.0.1__tar.gz → 4.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/PKG-INFO +82 -24
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/README.md +80 -23
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/pyproject.toml +27 -6
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/__init__.py +7 -5
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/ask_handler.py +2 -2
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/claude_api_handler.py +10 -5
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/cleanup_manager.py +20 -12
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/commands/makelocal.py +85 -63
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/data/stop_words_english.txt +1 -1
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/connection_health.py +29 -20
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/database.py +44 -31
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/database_factory.py +19 -20
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/exceptions.py +10 -10
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/models.py +126 -1
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/path_resolver.py +22 -21
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/retry_executor.py +37 -19
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/deepask_handler.py +3 -3
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/error_handler.py +46 -20
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/file_scanner.py +15 -12
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/git_hook_handler.py +71 -76
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/logging_config.py +13 -5
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/main.py +85 -22
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/auth.py +47 -43
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/error_middleware.py +15 -15
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/logging.py +44 -42
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/security.py +84 -76
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/query_preprocessor.py +2 -2
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/server/mcp_server.py +158 -94
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/base.py +19 -17
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/http_transport.py +89 -76
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/stdio_transport.py +12 -8
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/__init__.py +36 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/config.py +155 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/daemon.py +335 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/__init__.py +11 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/patterns.py +297 -0
- mcp_code_indexer-4.1.0/src/mcp_code_indexer/vector_mode/security/redactor.py +368 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/LICENSE +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/__main__.py +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/commands/__init__.py +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/database/__init__.py +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/middleware/__init__.py +1 -1
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/001_initial.sql +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/003_project_overviews.sql +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/migrations/005_remove_git_remotes.sql +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/server/__init__.py +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/token_counter.py +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/tools/__init__.py +0 -0
- {mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/transport/__init__.py +1 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mcp-code-indexer
|
|
3
|
-
Version: 4.0
|
|
3
|
+
Version: 4.1.0
|
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
|
|
@@ -23,6 +23,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
23
23
|
Classifier: Topic :: Software Development
|
|
24
24
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
25
25
|
Classifier: Typing :: Typed
|
|
26
|
+
Provides-Extra: vector
|
|
26
27
|
Requires-Dist: aiofiles (==23.2.0)
|
|
27
28
|
Requires-Dist: aiohttp (>=3.8.0)
|
|
28
29
|
Requires-Dist: aiosqlite (==0.19.0)
|
|
@@ -43,8 +44,8 @@ Description-Content-Type: text/markdown
|
|
|
43
44
|
|
|
44
45
|
# MCP Code Indexer 🚀
|
|
45
46
|
|
|
46
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
|
48
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
|
48
49
|
[](https://opensource.org/licenses/MIT)
|
|
49
50
|
|
|
50
51
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
|
@@ -183,20 +184,77 @@ The git hook integration provides intelligent automation:
|
|
|
183
184
|
|
|
184
185
|
- **📊 Git Analysis**: Automatically analyzes git diffs after commits/merges
|
|
185
186
|
- **🤖 AI Processing**: Uses OpenRouter API with Anthropic's Claude Sonnet 4
|
|
186
|
-
- **⚡ Smart Updates**: Only processes files that actually changed
|
|
187
|
+
- **⚡ Smart Updates**: Only processes files that actually changed
|
|
187
188
|
- **🔄 Overview Maintenance**: Updates project overview when structure changes
|
|
188
189
|
- **🛡️ Error Isolation**: Git operations continue even if indexing fails
|
|
189
190
|
- **⏱️ Rate Limiting**: Built-in retry logic with exponential backoff
|
|
190
191
|
|
|
191
192
|
### 🎯 Key Benefits
|
|
192
193
|
|
|
193
|
-
💡 **Zero Manual Work**: Descriptions stay current without any effort
|
|
194
|
-
⚡ **Performance**: Only analyzes changed files, not entire codebase
|
|
195
|
-
🔒 **Reliability**: Robust error handling ensures git operations never fail
|
|
196
|
-
🎛️ **Configurable**: Support for custom models and timeout settings
|
|
194
|
+
💡 **Zero Manual Work**: Descriptions stay current without any effort
|
|
195
|
+
⚡ **Performance**: Only analyzes changed files, not entire codebase
|
|
196
|
+
🔒 **Reliability**: Robust error handling ensures git operations never fail
|
|
197
|
+
🎛️ **Configurable**: Support for custom models and timeout settings
|
|
197
198
|
|
|
198
199
|
**Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
|
|
199
200
|
|
|
201
|
+
## 🧠 Vector Mode (BETA)
|
|
202
|
+
|
|
203
|
+
🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
|
|
204
|
+
|
|
205
|
+
### 🎯 What is Vector Mode?
|
|
206
|
+
|
|
207
|
+
Vector Mode transforms how you search and understand codebases by using AI embeddings:
|
|
208
|
+
|
|
209
|
+
- **🔍 Semantic Search**: Find code by meaning, not just text matching
|
|
210
|
+
- **⚡ Real-time Indexing**: Automatic embedding generation as code changes
|
|
211
|
+
- **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
|
|
212
|
+
- **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
|
|
213
|
+
- **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
|
|
214
|
+
|
|
215
|
+
### 🚀 Quick Start
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Install vector mode dependencies
|
|
219
|
+
pip install mcp-code-indexer[vector]
|
|
220
|
+
|
|
221
|
+
# Set required API keys
|
|
222
|
+
export VOYAGE_API_KEY="pa-your-voyage-api-key"
|
|
223
|
+
export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
|
|
224
|
+
|
|
225
|
+
# Start with vector mode enabled
|
|
226
|
+
mcp-code-indexer --vector
|
|
227
|
+
|
|
228
|
+
# The daemon automatically starts and begins indexing your projects
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### 💡 Key Features
|
|
232
|
+
|
|
233
|
+
- **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
|
|
234
|
+
- **🌳 Merkle Trees**: Efficient change detection without full directory scans
|
|
235
|
+
- **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
|
|
236
|
+
- **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
|
|
237
|
+
|
|
238
|
+
### 🔧 Advanced Configuration
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
# Custom configuration
|
|
242
|
+
mcp-code-indexer --vector --vector-config /path/to/config.yaml
|
|
243
|
+
|
|
244
|
+
# HTTP mode with vector search
|
|
245
|
+
mcp-code-indexer --vector --http --port 8080
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### 🛠️ Architecture
|
|
249
|
+
|
|
250
|
+
Vector Mode adds powerful new MCP tools:
|
|
251
|
+
- `vector_search` - Semantic code search across projects
|
|
252
|
+
- `similarity_search` - Find similar code patterns
|
|
253
|
+
- `dependency_search` - Discover code relationships
|
|
254
|
+
- `vector_status` - Monitor indexing progress
|
|
255
|
+
|
|
256
|
+
**Status**: Currently in BETA - foundations implemented, full pipeline in development.
|
|
257
|
+
|
|
200
258
|
## 🔧 Development Setup
|
|
201
259
|
|
|
202
260
|
### 👨💻 For Contributors
|
|
@@ -369,14 +427,14 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
369
427
|
|
|
370
428
|
### 📋 Quick References
|
|
371
429
|
- **[Examples & Integrations](examples/)** - Ready-to-use configurations
|
|
372
|
-
- **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
|
|
430
|
+
- **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
|
|
373
431
|
- **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
|
|
374
432
|
|
|
375
433
|
**📚 Reading Paths:**
|
|
376
434
|
- **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
|
|
377
435
|
- **Web developers?** Quick Start → HTTP API Reference → Q&A Interface → Git Hooks
|
|
378
436
|
- **AI/ML engineers?** Quick Start → Q&A Interface → API Reference → Git Hooks
|
|
379
|
-
- **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
|
|
437
|
+
- **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
|
|
380
438
|
- **Contributing to the project?** Architecture → Contributing → API Reference
|
|
381
439
|
|
|
382
440
|
## 🚦 System Requirements
|
|
@@ -390,7 +448,7 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
390
448
|
|
|
391
449
|
Tested with codebases up to **10,000 files**:
|
|
392
450
|
- File description retrieval: **< 10ms**
|
|
393
|
-
- Full-text search: **< 100ms**
|
|
451
|
+
- Full-text search: **< 100ms**
|
|
394
452
|
- Codebase overview generation: **< 2s**
|
|
395
453
|
- Merge conflict detection: **< 5s**
|
|
396
454
|
|
|
@@ -460,11 +518,11 @@ async def analyze_codebase(project_path):
|
|
|
460
518
|
"projectName": "my-project",
|
|
461
519
|
"folderPath": project_path
|
|
462
520
|
})
|
|
463
|
-
|
|
521
|
+
|
|
464
522
|
if size_info["isLarge"]:
|
|
465
523
|
# Use search for large codebases
|
|
466
524
|
results = await mcp_client.call_tool("search_descriptions", {
|
|
467
|
-
"projectName": "my-project",
|
|
525
|
+
"projectName": "my-project",
|
|
468
526
|
"folderPath": project_path,
|
|
469
527
|
"query": "authentication logic"
|
|
470
528
|
})
|
|
@@ -484,18 +542,18 @@ async def analyze_codebase(project_path):
|
|
|
484
542
|
python -c "
|
|
485
543
|
import asyncio
|
|
486
544
|
from mcp_client import MCPClient
|
|
487
|
-
|
|
545
|
+
|
|
488
546
|
async def update_descriptions():
|
|
489
547
|
client = MCPClient('mcp-code-indexer')
|
|
490
|
-
|
|
548
|
+
|
|
491
549
|
# Find files without descriptions
|
|
492
550
|
missing = await client.call_tool('find_missing_descriptions', {
|
|
493
551
|
'projectName': '${{ github.repository }}',
|
|
494
552
|
'folderPath': '.'
|
|
495
553
|
})
|
|
496
|
-
|
|
554
|
+
|
|
497
555
|
# Process with AI and update...
|
|
498
|
-
|
|
556
|
+
|
|
499
557
|
asyncio.run(update_descriptions())
|
|
500
558
|
"
|
|
501
559
|
```
|
|
@@ -606,7 +664,7 @@ mcp-code-indexer --map PROJECT_NAME
|
|
|
606
664
|
## 🛡️ Security Features
|
|
607
665
|
|
|
608
666
|
- **Input validation** on all MCP tool parameters
|
|
609
|
-
- **SQL injection protection** via parameterized queries
|
|
667
|
+
- **SQL injection protection** via parameterized queries
|
|
610
668
|
- **File system sandboxing** with .gitignore respect
|
|
611
669
|
- **Error sanitization** to prevent information leakage
|
|
612
670
|
- **Async resource cleanup** to prevent memory leaks
|
|
@@ -638,7 +696,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
|
|
|
638
696
|
2. **[Master the API tools](docs/api-reference.md)** - Learn all 11 tools with examples
|
|
639
697
|
3. **[Try HTTP API access](docs/http-api.md)** - REST API for web applications
|
|
640
698
|
4. **[Explore AI-powered Q&A](docs/qa-interface.md)** - Ask questions about your code
|
|
641
|
-
5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
|
|
699
|
+
5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
|
|
642
700
|
|
|
643
701
|
**👥 Setting up for a team?**
|
|
644
702
|
1. **[Learn all CLI commands](docs/cli-reference.md)** - Complete command reference
|
|
@@ -661,7 +719,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
|
|
|
661
719
|
|
|
662
720
|
We welcome contributions! See our **[Contributing Guide](docs/contributing.md)** for:
|
|
663
721
|
- Development setup
|
|
664
|
-
- Code style guidelines
|
|
722
|
+
- Code style guidelines
|
|
665
723
|
- Testing requirements
|
|
666
724
|
- Pull request process
|
|
667
725
|
|
|
@@ -672,7 +730,7 @@ MIT License - see **[LICENSE](LICENSE)** for details.
|
|
|
672
730
|
## 🙏 Built With
|
|
673
731
|
|
|
674
732
|
- **[Model Context Protocol](https://github.com/modelcontextprotocol/python-sdk)** - The foundation for tool integration
|
|
675
|
-
- **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
|
|
733
|
+
- **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
|
|
676
734
|
- **[aiosqlite](https://pypi.org/project/aiosqlite/)** - Async SQLite operations
|
|
677
735
|
- **[aiohttp](https://pypi.org/project/aiohttp/)** - Async HTTP client for OpenRouter API
|
|
678
736
|
- **[tenacity](https://pypi.org/project/tenacity/)** - Robust retry logic and rate limiting
|
|
@@ -680,9 +738,9 @@ MIT License - see **[LICENSE](LICENSE)** for details.
|
|
|
680
738
|
|
|
681
739
|
---
|
|
682
740
|
|
|
683
|
-
**Transform how your AI agents understand code!** 🚀
|
|
741
|
+
**Transform how your AI agents understand code!** 🚀
|
|
684
742
|
|
|
685
|
-
🎯 **New User?** [Get started in 2 minutes](#-quick-start)
|
|
686
|
-
👨💻 **Developer?** [Explore the complete API](docs/api-reference.md)
|
|
743
|
+
🎯 **New User?** [Get started in 2 minutes](#-quick-start)
|
|
744
|
+
👨💻 **Developer?** [Explore the complete API](docs/api-reference.md)
|
|
687
745
|
🔧 **Production?** [Deploy with confidence](docs/configuration.md)
|
|
688
746
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# MCP Code Indexer 🚀
|
|
2
2
|
|
|
3
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
|
4
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
|
6
6
|
|
|
7
7
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
|
@@ -140,20 +140,77 @@ The git hook integration provides intelligent automation:
|
|
|
140
140
|
|
|
141
141
|
- **📊 Git Analysis**: Automatically analyzes git diffs after commits/merges
|
|
142
142
|
- **🤖 AI Processing**: Uses OpenRouter API with Anthropic's Claude Sonnet 4
|
|
143
|
-
- **⚡ Smart Updates**: Only processes files that actually changed
|
|
143
|
+
- **⚡ Smart Updates**: Only processes files that actually changed
|
|
144
144
|
- **🔄 Overview Maintenance**: Updates project overview when structure changes
|
|
145
145
|
- **🛡️ Error Isolation**: Git operations continue even if indexing fails
|
|
146
146
|
- **⏱️ Rate Limiting**: Built-in retry logic with exponential backoff
|
|
147
147
|
|
|
148
148
|
### 🎯 Key Benefits
|
|
149
149
|
|
|
150
|
-
💡 **Zero Manual Work**: Descriptions stay current without any effort
|
|
151
|
-
⚡ **Performance**: Only analyzes changed files, not entire codebase
|
|
152
|
-
🔒 **Reliability**: Robust error handling ensures git operations never fail
|
|
153
|
-
🎛️ **Configurable**: Support for custom models and timeout settings
|
|
150
|
+
💡 **Zero Manual Work**: Descriptions stay current without any effort
|
|
151
|
+
⚡ **Performance**: Only analyzes changed files, not entire codebase
|
|
152
|
+
🔒 **Reliability**: Robust error handling ensures git operations never fail
|
|
153
|
+
🎛️ **Configurable**: Support for custom models and timeout settings
|
|
154
154
|
|
|
155
155
|
**Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
|
|
156
156
|
|
|
157
|
+
## 🧠 Vector Mode (BETA)
|
|
158
|
+
|
|
159
|
+
🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
|
|
160
|
+
|
|
161
|
+
### 🎯 What is Vector Mode?
|
|
162
|
+
|
|
163
|
+
Vector Mode transforms how you search and understand codebases by using AI embeddings:
|
|
164
|
+
|
|
165
|
+
- **🔍 Semantic Search**: Find code by meaning, not just text matching
|
|
166
|
+
- **⚡ Real-time Indexing**: Automatic embedding generation as code changes
|
|
167
|
+
- **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
|
|
168
|
+
- **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
|
|
169
|
+
- **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
|
|
170
|
+
|
|
171
|
+
### 🚀 Quick Start
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
# Install vector mode dependencies
|
|
175
|
+
pip install mcp-code-indexer[vector]
|
|
176
|
+
|
|
177
|
+
# Set required API keys
|
|
178
|
+
export VOYAGE_API_KEY="pa-your-voyage-api-key"
|
|
179
|
+
export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
|
|
180
|
+
|
|
181
|
+
# Start with vector mode enabled
|
|
182
|
+
mcp-code-indexer --vector
|
|
183
|
+
|
|
184
|
+
# The daemon automatically starts and begins indexing your projects
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### 💡 Key Features
|
|
188
|
+
|
|
189
|
+
- **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
|
|
190
|
+
- **🌳 Merkle Trees**: Efficient change detection without full directory scans
|
|
191
|
+
- **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
|
|
192
|
+
- **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
|
|
193
|
+
|
|
194
|
+
### 🔧 Advanced Configuration
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
# Custom configuration
|
|
198
|
+
mcp-code-indexer --vector --vector-config /path/to/config.yaml
|
|
199
|
+
|
|
200
|
+
# HTTP mode with vector search
|
|
201
|
+
mcp-code-indexer --vector --http --port 8080
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### 🛠️ Architecture
|
|
205
|
+
|
|
206
|
+
Vector Mode adds powerful new MCP tools:
|
|
207
|
+
- `vector_search` - Semantic code search across projects
|
|
208
|
+
- `similarity_search` - Find similar code patterns
|
|
209
|
+
- `dependency_search` - Discover code relationships
|
|
210
|
+
- `vector_status` - Monitor indexing progress
|
|
211
|
+
|
|
212
|
+
**Status**: Currently in BETA - foundations implemented, full pipeline in development.
|
|
213
|
+
|
|
157
214
|
## 🔧 Development Setup
|
|
158
215
|
|
|
159
216
|
### 👨💻 For Contributors
|
|
@@ -326,14 +383,14 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
326
383
|
|
|
327
384
|
### 📋 Quick References
|
|
328
385
|
- **[Examples & Integrations](examples/)** - Ready-to-use configurations
|
|
329
|
-
- **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
|
|
386
|
+
- **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
|
|
330
387
|
- **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
|
|
331
388
|
|
|
332
389
|
**📚 Reading Paths:**
|
|
333
390
|
- **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
|
|
334
391
|
- **Web developers?** Quick Start → HTTP API Reference → Q&A Interface → Git Hooks
|
|
335
392
|
- **AI/ML engineers?** Quick Start → Q&A Interface → API Reference → Git Hooks
|
|
336
|
-
- **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
|
|
393
|
+
- **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
|
|
337
394
|
- **Contributing to the project?** Architecture → Contributing → API Reference
|
|
338
395
|
|
|
339
396
|
## 🚦 System Requirements
|
|
@@ -347,7 +404,7 @@ Comprehensive documentation organized by user journey and expertise level.
|
|
|
347
404
|
|
|
348
405
|
Tested with codebases up to **10,000 files**:
|
|
349
406
|
- File description retrieval: **< 10ms**
|
|
350
|
-
- Full-text search: **< 100ms**
|
|
407
|
+
- Full-text search: **< 100ms**
|
|
351
408
|
- Codebase overview generation: **< 2s**
|
|
352
409
|
- Merge conflict detection: **< 5s**
|
|
353
410
|
|
|
@@ -417,11 +474,11 @@ async def analyze_codebase(project_path):
|
|
|
417
474
|
"projectName": "my-project",
|
|
418
475
|
"folderPath": project_path
|
|
419
476
|
})
|
|
420
|
-
|
|
477
|
+
|
|
421
478
|
if size_info["isLarge"]:
|
|
422
479
|
# Use search for large codebases
|
|
423
480
|
results = await mcp_client.call_tool("search_descriptions", {
|
|
424
|
-
"projectName": "my-project",
|
|
481
|
+
"projectName": "my-project",
|
|
425
482
|
"folderPath": project_path,
|
|
426
483
|
"query": "authentication logic"
|
|
427
484
|
})
|
|
@@ -441,18 +498,18 @@ async def analyze_codebase(project_path):
|
|
|
441
498
|
python -c "
|
|
442
499
|
import asyncio
|
|
443
500
|
from mcp_client import MCPClient
|
|
444
|
-
|
|
501
|
+
|
|
445
502
|
async def update_descriptions():
|
|
446
503
|
client = MCPClient('mcp-code-indexer')
|
|
447
|
-
|
|
504
|
+
|
|
448
505
|
# Find files without descriptions
|
|
449
506
|
missing = await client.call_tool('find_missing_descriptions', {
|
|
450
507
|
'projectName': '${{ github.repository }}',
|
|
451
508
|
'folderPath': '.'
|
|
452
509
|
})
|
|
453
|
-
|
|
510
|
+
|
|
454
511
|
# Process with AI and update...
|
|
455
|
-
|
|
512
|
+
|
|
456
513
|
asyncio.run(update_descriptions())
|
|
457
514
|
"
|
|
458
515
|
```
|
|
@@ -563,7 +620,7 @@ mcp-code-indexer --map PROJECT_NAME
|
|
|
563
620
|
## 🛡️ Security Features
|
|
564
621
|
|
|
565
622
|
- **Input validation** on all MCP tool parameters
|
|
566
|
-
- **SQL injection protection** via parameterized queries
|
|
623
|
+
- **SQL injection protection** via parameterized queries
|
|
567
624
|
- **File system sandboxing** with .gitignore respect
|
|
568
625
|
- **Error sanitization** to prevent information leakage
|
|
569
626
|
- **Async resource cleanup** to prevent memory leaks
|
|
@@ -595,7 +652,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
|
|
|
595
652
|
2. **[Master the API tools](docs/api-reference.md)** - Learn all 11 tools with examples
|
|
596
653
|
3. **[Try HTTP API access](docs/http-api.md)** - REST API for web applications
|
|
597
654
|
4. **[Explore AI-powered Q&A](docs/qa-interface.md)** - Ask questions about your code
|
|
598
|
-
5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
|
|
655
|
+
5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
|
|
599
656
|
|
|
600
657
|
**👥 Setting up for a team?**
|
|
601
658
|
1. **[Learn all CLI commands](docs/cli-reference.md)** - Complete command reference
|
|
@@ -618,7 +675,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
|
|
|
618
675
|
|
|
619
676
|
We welcome contributions! See our **[Contributing Guide](docs/contributing.md)** for:
|
|
620
677
|
- Development setup
|
|
621
|
-
- Code style guidelines
|
|
678
|
+
- Code style guidelines
|
|
622
679
|
- Testing requirements
|
|
623
680
|
- Pull request process
|
|
624
681
|
|
|
@@ -629,7 +686,7 @@ MIT License - see **[LICENSE](LICENSE)** for details.
|
|
|
629
686
|
## 🙏 Built With
|
|
630
687
|
|
|
631
688
|
- **[Model Context Protocol](https://github.com/modelcontextprotocol/python-sdk)** - The foundation for tool integration
|
|
632
|
-
- **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
|
|
689
|
+
- **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
|
|
633
690
|
- **[aiosqlite](https://pypi.org/project/aiosqlite/)** - Async SQLite operations
|
|
634
691
|
- **[aiohttp](https://pypi.org/project/aiohttp/)** - Async HTTP client for OpenRouter API
|
|
635
692
|
- **[tenacity](https://pypi.org/project/tenacity/)** - Robust retry logic and rate limiting
|
|
@@ -637,8 +694,8 @@ MIT License - see **[LICENSE](LICENSE)** for details.
|
|
|
637
694
|
|
|
638
695
|
---
|
|
639
696
|
|
|
640
|
-
**Transform how your AI agents understand code!** 🚀
|
|
697
|
+
**Transform how your AI agents understand code!** 🚀
|
|
641
698
|
|
|
642
|
-
🎯 **New User?** [Get started in 2 minutes](#-quick-start)
|
|
643
|
-
👨💻 **Developer?** [Explore the complete API](docs/api-reference.md)
|
|
699
|
+
🎯 **New User?** [Get started in 2 minutes](#-quick-start)
|
|
700
|
+
👨💻 **Developer?** [Explore the complete API](docs/api-reference.md)
|
|
644
701
|
🔧 **Production?** [Deploy with confidence](docs/configuration.md)
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "mcp-code-indexer"
|
|
7
|
-
version = "4.0
|
|
7
|
+
version = "4.1.0"
|
|
8
8
|
description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
|
|
9
9
|
authors = ["MCP Code Indexer Contributors"]
|
|
10
10
|
maintainers = ["MCP Code Indexer Contributors"]
|
|
@@ -14,10 +14,10 @@ homepage = "https://github.com/fluffypony/mcp-code-indexer"
|
|
|
14
14
|
repository = "https://github.com/fluffypony/mcp-code-indexer"
|
|
15
15
|
documentation = "https://github.com/fluffypony/mcp-code-indexer/blob/main/README.md"
|
|
16
16
|
keywords = [
|
|
17
|
-
"mcp",
|
|
18
|
-
"model-context-protocol",
|
|
19
|
-
"code-indexer",
|
|
20
|
-
"ai-tools",
|
|
17
|
+
"mcp",
|
|
18
|
+
"model-context-protocol",
|
|
19
|
+
"code-indexer",
|
|
20
|
+
"ai-tools",
|
|
21
21
|
"codebase-navigation",
|
|
22
22
|
"file-descriptions",
|
|
23
23
|
"llm-tools"
|
|
@@ -59,6 +59,15 @@ fastapi = ">=0.104.0"
|
|
|
59
59
|
uvicorn = ">=0.24.0"
|
|
60
60
|
python-multipart = ">=0.0.6"
|
|
61
61
|
|
|
62
|
+
[tool.poetry.extras]
|
|
63
|
+
vector = [
|
|
64
|
+
"voyageai",
|
|
65
|
+
"turbopuffer",
|
|
66
|
+
"tree-sitter",
|
|
67
|
+
"watchdog",
|
|
68
|
+
"pyyaml"
|
|
69
|
+
]
|
|
70
|
+
|
|
62
71
|
[tool.poetry.group.dev.dependencies]
|
|
63
72
|
pytest = ">=8.0.0"
|
|
64
73
|
pytest-asyncio = ">=0.21.0"
|
|
@@ -111,7 +120,7 @@ known_first_party = ["src", "mcp_code_indexer"]
|
|
|
111
120
|
|
|
112
121
|
# MyPy configuration
|
|
113
122
|
[tool.mypy]
|
|
114
|
-
python_version = "3.
|
|
123
|
+
python_version = "3.10"
|
|
115
124
|
warn_return_any = true
|
|
116
125
|
warn_unused_configs = true
|
|
117
126
|
disallow_untyped_defs = true
|
|
@@ -171,3 +180,15 @@ exclude_lines = [
|
|
|
171
180
|
"class .*\\bProtocol\\):",
|
|
172
181
|
"@(abc\\.)?abstractmethod"
|
|
173
182
|
]
|
|
183
|
+
|
|
184
|
+
# Ruff configuration
|
|
185
|
+
[tool.ruff]
|
|
186
|
+
exclude = [
|
|
187
|
+
".vulture_whitelist.py",
|
|
188
|
+
"venv",
|
|
189
|
+
".git",
|
|
190
|
+
"__pycache__",
|
|
191
|
+
".mypy_cache",
|
|
192
|
+
".pytest_cache",
|
|
193
|
+
".ruff_cache"
|
|
194
|
+
]
|
|
@@ -6,10 +6,12 @@ intelligent codebase navigation through searchable file descriptions,
|
|
|
6
6
|
token-aware overviews, and advanced merge capabilities.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
# Delay import to avoid dependency issues during testing
|
|
10
|
-
def get_server():
|
|
11
|
+
def get_server() -> type:
|
|
11
12
|
"""Get MCPCodeIndexServer (lazy import)."""
|
|
12
13
|
from .server.mcp_server import MCPCodeIndexServer
|
|
14
|
+
|
|
13
15
|
return MCPCodeIndexServer
|
|
14
16
|
|
|
15
17
|
|
|
@@ -27,15 +29,15 @@ def _get_version() -> str:
|
|
|
27
29
|
for pkg_name in ["mcp-code-indexer", "mcp_code_indexer"]:
|
|
28
30
|
try:
|
|
29
31
|
return version(pkg_name)
|
|
30
|
-
except Exception:
|
|
32
|
+
except Exception: # nosec B112
|
|
31
33
|
continue
|
|
32
|
-
except Exception:
|
|
34
|
+
except Exception: # nosec B110
|
|
33
35
|
pass
|
|
34
36
|
|
|
35
37
|
# Fallback to reading from pyproject.toml (for development)
|
|
36
38
|
try:
|
|
37
|
-
from pathlib import Path
|
|
38
39
|
import sys
|
|
40
|
+
from pathlib import Path
|
|
39
41
|
|
|
40
42
|
if sys.version_info >= (3, 11):
|
|
41
43
|
import tomllib
|
|
@@ -48,7 +50,7 @@ def _get_version() -> str:
|
|
|
48
50
|
pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
|
|
49
51
|
with open(pyproject_path, "rb") as f:
|
|
50
52
|
data = tomllib.load(f)
|
|
51
|
-
return data["project"]["version"]
|
|
53
|
+
return str(data["project"]["version"])
|
|
52
54
|
except Exception:
|
|
53
55
|
return "dev"
|
|
54
56
|
|
|
@@ -8,9 +8,9 @@ and sending them to Claude via OpenRouter API for direct responses.
|
|
|
8
8
|
|
|
9
9
|
import logging
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import Dict, Optional
|
|
11
|
+
from typing import Any, Dict, Optional
|
|
12
12
|
|
|
13
|
-
from .claude_api_handler import
|
|
13
|
+
from .claude_api_handler import ClaudeAPIError, ClaudeAPIHandler
|
|
14
14
|
from .database.database import DatabaseManager
|
|
15
15
|
|
|
16
16
|
|
{mcp_code_indexer-4.0.1 → mcp_code_indexer-4.1.0}/src/mcp_code_indexer/claude_api_handler.py
RENAMED
|
@@ -11,15 +11,15 @@ import json
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
13
|
from dataclasses import dataclass
|
|
14
|
-
from typing import Dict, List, Optional, Any
|
|
15
14
|
from pathlib import Path
|
|
15
|
+
from typing import Any, Dict, List, Optional
|
|
16
16
|
|
|
17
17
|
import aiohttp
|
|
18
18
|
from tenacity import (
|
|
19
19
|
retry,
|
|
20
|
-
wait_exponential,
|
|
21
|
-
stop_after_attempt,
|
|
22
20
|
retry_if_exception_type,
|
|
21
|
+
stop_after_attempt,
|
|
22
|
+
wait_exponential,
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
from .database.database import DatabaseManager
|
|
@@ -194,7 +194,6 @@ class ClaudeAPIHandler:
|
|
|
194
194
|
async with session.post(
|
|
195
195
|
self.OPENROUTER_API_URL, headers=headers, json=payload
|
|
196
196
|
) as response:
|
|
197
|
-
|
|
198
197
|
self.logger.info(f"Claude API response status: {response.status}")
|
|
199
198
|
|
|
200
199
|
if response.status == 429:
|
|
@@ -240,7 +239,7 @@ class ClaudeAPIHandler:
|
|
|
240
239
|
raise ClaudeAPIError("Claude API request timed out")
|
|
241
240
|
|
|
242
241
|
def validate_json_response(
|
|
243
|
-
self, response_text: str, required_keys: List[str] = None
|
|
242
|
+
self, response_text: str, required_keys: Optional[List[str]] = None
|
|
244
243
|
) -> Dict[str, Any]:
|
|
245
244
|
"""
|
|
246
245
|
Validate and parse JSON response from Claude.
|
|
@@ -297,6 +296,12 @@ class ClaudeAPIHandler:
|
|
|
297
296
|
self.logger.debug(f"Extracted JSON from response: {extracted_json}")
|
|
298
297
|
data = json.loads(extracted_json)
|
|
299
298
|
|
|
299
|
+
# Ensure data is a dictionary
|
|
300
|
+
if not isinstance(data, dict):
|
|
301
|
+
raise ClaudeValidationError(
|
|
302
|
+
f"Expected JSON object, got {type(data).__name__}"
|
|
303
|
+
)
|
|
304
|
+
|
|
300
305
|
# Validate required keys if specified
|
|
301
306
|
if required_keys:
|
|
302
307
|
missing_keys = [key for key in required_keys if key not in data]
|