mcp-code-indexer 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +16 -0
- mcp_code_indexer/database/__init__.py +1 -0
- mcp_code_indexer/database/database.py +480 -0
- mcp_code_indexer/database/models.py +123 -0
- mcp_code_indexer/error_handler.py +365 -0
- mcp_code_indexer/file_scanner.py +375 -0
- mcp_code_indexer/logging_config.py +183 -0
- mcp_code_indexer/main.py +129 -0
- mcp_code_indexer/merge_handler.py +386 -0
- mcp_code_indexer/middleware/__init__.py +7 -0
- mcp_code_indexer/middleware/error_middleware.py +286 -0
- mcp_code_indexer/server/__init__.py +1 -0
- mcp_code_indexer/server/mcp_server.py +699 -0
- mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +100256 -0
- mcp_code_indexer/token_counter.py +243 -0
- mcp_code_indexer/tools/__init__.py +1 -0
- mcp_code_indexer-1.0.0.dist-info/METADATA +364 -0
- mcp_code_indexer-1.0.0.dist-info/RECORD +22 -0
- mcp_code_indexer-1.0.0.dist-info/WHEEL +5 -0
- mcp_code_indexer-1.0.0.dist-info/entry_points.txt +2 -0
- mcp_code_indexer-1.0.0.dist-info/licenses/LICENSE +21 -0
- mcp_code_indexer-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,243 @@
|
|
1
|
+
"""
|
2
|
+
Token counting functionality using tiktoken with offline cache.
|
3
|
+
|
4
|
+
This module provides token counting capabilities using the tiktoken library
|
5
|
+
with a bundled cache file for offline operation. It enables accurate token
|
6
|
+
estimation for determining whether to use full overview or search approaches.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import hashlib
|
10
|
+
import logging
|
11
|
+
import os
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import List, Optional
|
14
|
+
|
15
|
+
import tiktoken
|
16
|
+
|
17
|
+
from mcp_code_indexer.database.models import FileDescription, FolderNode
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
class TokenCounter:
|
23
|
+
"""
|
24
|
+
Handles token counting using tiktoken with offline cache support.
|
25
|
+
|
26
|
+
Automatically configures tiktoken to use bundled cache file for offline
|
27
|
+
operation and provides methods to count tokens in various data structures.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self, token_limit: int = 32000):
|
31
|
+
"""
|
32
|
+
Initialize token counter with specified limit.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
token_limit: Maximum tokens before recommending search over overview
|
36
|
+
"""
|
37
|
+
self.token_limit = token_limit
|
38
|
+
self._encoder: Optional[tiktoken.Encoding] = None
|
39
|
+
self._setup_offline_tiktoken()
|
40
|
+
self._init_encoder()
|
41
|
+
|
42
|
+
def _setup_offline_tiktoken(self) -> None:
|
43
|
+
"""Configure tiktoken to use bundled encoding file for offline operation."""
|
44
|
+
# Get path to bundled cache directory
|
45
|
+
base_dir = Path(__file__).parent.absolute()
|
46
|
+
cache_dir = base_dir / "tiktoken_cache"
|
47
|
+
|
48
|
+
# Ensure cache directory exists
|
49
|
+
if not cache_dir.exists():
|
50
|
+
raise FileNotFoundError(
|
51
|
+
f"Tiktoken cache directory not found at {cache_dir}. "
|
52
|
+
"Please ensure the tiktoken_cache directory exists in the src folder."
|
53
|
+
)
|
54
|
+
|
55
|
+
# Set tiktoken to use our bundled cache
|
56
|
+
os.environ["TIKTOKEN_CACHE_DIR"] = str(cache_dir)
|
57
|
+
|
58
|
+
# Verify the encoding file exists
|
59
|
+
cache_file = "9b5ad71b2ce5302211f9c61530b329a4922fc6a4"
|
60
|
+
cache_path = cache_dir / cache_file
|
61
|
+
|
62
|
+
if not cache_path.exists():
|
63
|
+
raise FileNotFoundError(
|
64
|
+
f"Tiktoken cache file not found at {cache_path}. "
|
65
|
+
"Please ensure the cl100k_base.tiktoken file is properly "
|
66
|
+
f"renamed to {cache_file} and placed in the tiktoken_cache directory."
|
67
|
+
)
|
68
|
+
|
69
|
+
logger.debug(f"Configured tiktoken to use cache at {cache_dir}")
|
70
|
+
|
71
|
+
def _init_encoder(self) -> None:
|
72
|
+
"""Initialize tiktoken encoder with fallback options."""
|
73
|
+
try:
|
74
|
+
# Try to get the cl100k_base encoding directly
|
75
|
+
self._encoder = tiktoken.get_encoding("cl100k_base")
|
76
|
+
logger.debug("Initialized tiktoken with cl100k_base encoding")
|
77
|
+
except Exception as e:
|
78
|
+
logger.warning(f"Failed to load cl100k_base encoding: {e}")
|
79
|
+
try:
|
80
|
+
# Fallback to model-based encoding
|
81
|
+
self._encoder = tiktoken.encoding_for_model("gpt-4o")
|
82
|
+
logger.debug("Initialized tiktoken with gpt-4o model encoding")
|
83
|
+
except Exception as fallback_error:
|
84
|
+
raise RuntimeError(
|
85
|
+
"Failed to initialize tiktoken encoder. "
|
86
|
+
"Check that the cache file is properly configured and accessible."
|
87
|
+
) from fallback_error
|
88
|
+
|
89
|
+
@property
|
90
|
+
def encoder(self) -> tiktoken.Encoding:
|
91
|
+
"""Get the tiktoken encoder instance."""
|
92
|
+
if self._encoder is None:
|
93
|
+
raise RuntimeError("Token encoder not properly initialized")
|
94
|
+
return self._encoder
|
95
|
+
|
96
|
+
def count_tokens(self, text: str) -> int:
|
97
|
+
"""
|
98
|
+
Count tokens in a text string.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
text: Input text to count tokens for
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
Number of tokens in the text
|
105
|
+
"""
|
106
|
+
if not text:
|
107
|
+
return 0
|
108
|
+
|
109
|
+
try:
|
110
|
+
tokens = self.encoder.encode(text)
|
111
|
+
return len(tokens)
|
112
|
+
except Exception as e:
|
113
|
+
logger.error(f"Failed to count tokens for text: {e}")
|
114
|
+
# Fallback to rough approximation (4 chars per token)
|
115
|
+
return len(text) // 4
|
116
|
+
|
117
|
+
def count_file_description_tokens(self, file_desc: FileDescription) -> int:
|
118
|
+
"""
|
119
|
+
Count tokens for a file description in overview format.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
file_desc: File description to count tokens for
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
Number of tokens for formatted file description
|
126
|
+
"""
|
127
|
+
# Format matches what would be shown in codebase overview
|
128
|
+
formatted_content = f"{file_desc.file_path}\n{file_desc.description}\n"
|
129
|
+
return self.count_tokens(formatted_content)
|
130
|
+
|
131
|
+
def count_folder_structure_tokens(self, folder: FolderNode) -> int:
|
132
|
+
"""
|
133
|
+
Count tokens for a complete folder structure.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
folder: Root folder node to count tokens for
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
Total number of tokens for the folder structure
|
140
|
+
"""
|
141
|
+
total_tokens = 0
|
142
|
+
|
143
|
+
# Count tokens for folder name and path
|
144
|
+
folder_header = f"{folder.name}/\n"
|
145
|
+
total_tokens += self.count_tokens(folder_header)
|
146
|
+
|
147
|
+
# Count tokens for all files in this folder
|
148
|
+
for file_node in folder.files:
|
149
|
+
file_content = f"{file_node.path}\n{file_node.description}\n"
|
150
|
+
total_tokens += self.count_tokens(file_content)
|
151
|
+
|
152
|
+
# Recursively count tokens for subfolders
|
153
|
+
for subfolder in folder.folders:
|
154
|
+
total_tokens += self.count_folder_structure_tokens(subfolder)
|
155
|
+
|
156
|
+
return total_tokens
|
157
|
+
|
158
|
+
def calculate_codebase_tokens(self, file_descriptions: List[FileDescription]) -> int:
|
159
|
+
"""
|
160
|
+
Calculate total tokens for a list of file descriptions.
|
161
|
+
|
162
|
+
Args:
|
163
|
+
file_descriptions: List of file descriptions to count
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
Total token count for all file descriptions
|
167
|
+
"""
|
168
|
+
total_tokens = 0
|
169
|
+
|
170
|
+
for file_desc in file_descriptions:
|
171
|
+
total_tokens += self.count_file_description_tokens(file_desc)
|
172
|
+
|
173
|
+
return total_tokens
|
174
|
+
|
175
|
+
def is_large_codebase(self, total_tokens: int) -> bool:
|
176
|
+
"""
|
177
|
+
Check if codebase exceeds configured token limit.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
total_tokens: Total token count to check
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
True if codebase exceeds token limit
|
184
|
+
"""
|
185
|
+
return total_tokens > self.token_limit
|
186
|
+
|
187
|
+
def get_recommendation(self, total_tokens: int) -> str:
|
188
|
+
"""
|
189
|
+
Get recommendation for codebase navigation approach.
|
190
|
+
|
191
|
+
Args:
|
192
|
+
total_tokens: Total token count
|
193
|
+
|
194
|
+
Returns:
|
195
|
+
"use_search" or "use_overview" based on token count
|
196
|
+
"""
|
197
|
+
return "use_search" if self.is_large_codebase(total_tokens) else "use_overview"
|
198
|
+
|
199
|
+
def generate_cache_key(self, project_id: str, branch: str, content_hash: str) -> str:
|
200
|
+
"""
|
201
|
+
Generate a cache key for token count caching.
|
202
|
+
|
203
|
+
Args:
|
204
|
+
project_id: Project identifier
|
205
|
+
branch: Git branch name
|
206
|
+
content_hash: Hash of file contents or descriptions
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
Cache key string
|
210
|
+
"""
|
211
|
+
key_content = f"{project_id}:{branch}:{content_hash}"
|
212
|
+
return hashlib.sha256(key_content.encode()).hexdigest()[:16]
|
213
|
+
|
214
|
+
|
215
|
+
def verify_tiktoken_setup() -> bool:
|
216
|
+
"""
|
217
|
+
Verify that tiktoken is properly configured for offline operation.
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
True if tiktoken setup is working correctly
|
221
|
+
"""
|
222
|
+
try:
|
223
|
+
counter = TokenCounter()
|
224
|
+
|
225
|
+
# Test with a known string
|
226
|
+
test_string = "Hello, world!"
|
227
|
+
token_count = counter.count_tokens(test_string)
|
228
|
+
|
229
|
+
# cl100k_base should encode "Hello, world!" to 4 tokens
|
230
|
+
expected_count = 4
|
231
|
+
|
232
|
+
if token_count == expected_count:
|
233
|
+
logger.info("Tiktoken offline setup verified successfully")
|
234
|
+
return True
|
235
|
+
else:
|
236
|
+
logger.warning(
|
237
|
+
f"Tiktoken token count mismatch: expected {expected_count}, got {token_count}"
|
238
|
+
)
|
239
|
+
return False
|
240
|
+
|
241
|
+
except Exception as e:
|
242
|
+
logger.error(f"Tiktoken setup verification failed: {e}")
|
243
|
+
return False
|
@@ -0,0 +1 @@
|
|
1
|
+
"""MCP tools for file description management."""
|
@@ -0,0 +1,364 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: mcp-code-indexer
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
5
|
+
Author: MCP Code Indexer Contributors
|
6
|
+
Maintainer: MCP Code Indexer Contributors
|
7
|
+
License: MIT
|
8
|
+
Project-URL: Homepage, https://github.com/fluffypony/mcp-code-indexer
|
9
|
+
Project-URL: Repository, https://github.com/fluffypony/mcp-code-indexer
|
10
|
+
Project-URL: Issues, https://github.com/fluffypony/mcp-code-indexer/issues
|
11
|
+
Project-URL: Documentation, https://github.com/fluffypony/mcp-code-indexer/blob/main/README.md
|
12
|
+
Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
14
|
+
Classifier: Intended Audience :: Developers
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
16
|
+
Classifier: Operating System :: OS Independent
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
18
|
+
Classifier: Programming Language :: Python :: 3.8
|
19
|
+
Classifier: Programming Language :: Python :: 3.9
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
23
|
+
Classifier: Topic :: Software Development
|
24
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
26
|
+
Classifier: Framework :: AsyncIO
|
27
|
+
Classifier: Environment :: Console
|
28
|
+
Classifier: Typing :: Typed
|
29
|
+
Requires-Python: >=3.9
|
30
|
+
Description-Content-Type: text/markdown
|
31
|
+
License-File: LICENSE
|
32
|
+
Requires-Dist: tiktoken==0.7.0
|
33
|
+
Requires-Dist: mcp==1.0.0
|
34
|
+
Requires-Dist: gitignore_parser==0.1.11
|
35
|
+
Requires-Dist: pydantic>=2.8.0
|
36
|
+
Requires-Dist: aiofiles==23.2.0
|
37
|
+
Requires-Dist: aiosqlite==0.19.0
|
38
|
+
Provides-Extra: dev
|
39
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
40
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
41
|
+
Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
|
42
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
43
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
44
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
45
|
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
46
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
47
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
48
|
+
Provides-Extra: test
|
49
|
+
Requires-Dist: pytest>=8.0.0; extra == "test"
|
50
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
51
|
+
Requires-Dist: pytest-mock>=3.11.0; extra == "test"
|
52
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
53
|
+
Dynamic: license-file
|
54
|
+
Dynamic: requires-python
|
55
|
+
|
56
|
+
# MCP Code Indexer ๐
|
57
|
+
|
58
|
+
[](https://badge.fury.io/py/mcp-code-indexer)
|
59
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
60
|
+
[](https://opensource.org/licenses/MIT)
|
61
|
+
|
62
|
+
A production-ready **Model Context Protocol (MCP) server** that provides intelligent codebase navigation for AI agents through searchable file descriptions, token-aware overviews, and advanced merge capabilities.
|
63
|
+
|
64
|
+
## ๐ฏ What It Does
|
65
|
+
|
66
|
+
The MCP Code Indexer solves a critical problem for AI agents working with large codebases: **understanding code structure without repeatedly scanning files**. Instead of reading every file, agents can:
|
67
|
+
|
68
|
+
- **Query file purposes** instantly with natural language descriptions
|
69
|
+
- **Search across codebases** using full-text search
|
70
|
+
- **Get intelligent recommendations** based on codebase size (overview vs search)
|
71
|
+
- **Merge branch descriptions** with conflict resolution
|
72
|
+
- **Inherit descriptions** from upstream repositories automatically
|
73
|
+
|
74
|
+
Perfect for AI-powered code review, refactoring tools, documentation generation, and codebase analysis workflows.
|
75
|
+
|
76
|
+
## โก Quick Start
|
77
|
+
|
78
|
+
### Install from PyPI
|
79
|
+
|
80
|
+
```bash
|
81
|
+
# Install the package
|
82
|
+
pip install mcp-code-indexer
|
83
|
+
|
84
|
+
# Run the server
|
85
|
+
mcp-code-indexer --token-limit 32000
|
86
|
+
|
87
|
+
# Check version
|
88
|
+
mcp-code-indexer --version
|
89
|
+
```
|
90
|
+
|
91
|
+
### Install from Source
|
92
|
+
|
93
|
+
```bash
|
94
|
+
# Clone and setup
|
95
|
+
git clone https://github.com/your-username/mcp-code-indexer.git
|
96
|
+
cd mcp-code-indexer
|
97
|
+
|
98
|
+
# Install in development mode
|
99
|
+
pip install -e .
|
100
|
+
|
101
|
+
# Run the server
|
102
|
+
mcp-code-indexer --token-limit 32000
|
103
|
+
```
|
104
|
+
|
105
|
+
## ๐ง Development Setup
|
106
|
+
|
107
|
+
For development work, you **must** install the package in editable mode to ensure proper import resolution:
|
108
|
+
|
109
|
+
```bash
|
110
|
+
# Setup development environment
|
111
|
+
git clone https://github.com/your-username/mcp-code-indexer.git
|
112
|
+
cd mcp-code-indexer
|
113
|
+
|
114
|
+
# Create and activate virtual environment
|
115
|
+
python -m venv venv
|
116
|
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
117
|
+
|
118
|
+
# Install package in editable mode (REQUIRED for development)
|
119
|
+
pip install -e .
|
120
|
+
|
121
|
+
# Install development dependencies
|
122
|
+
pip install -e .[dev]
|
123
|
+
|
124
|
+
# Verify installation
|
125
|
+
python main.py --help
|
126
|
+
mcp-code-indexer --version
|
127
|
+
```
|
128
|
+
|
129
|
+
### Why Editable Install is Required
|
130
|
+
|
131
|
+
The project uses a proper PyPI package structure with absolute imports like `from mcp_code_indexer.database.database import DatabaseManager`. Without the editable installation (`pip install -e .`), Python cannot resolve these imports and you'll get `ModuleNotFoundError` exceptions.
|
132
|
+
|
133
|
+
### Development Workflow
|
134
|
+
|
135
|
+
```bash
|
136
|
+
# Activate virtual environment
|
137
|
+
source venv/bin/activate
|
138
|
+
|
139
|
+
# Run the server directly
|
140
|
+
python main.py --token-limit 32000
|
141
|
+
|
142
|
+
# Or use the installed CLI command
|
143
|
+
mcp-code-indexer --token-limit 32000
|
144
|
+
|
145
|
+
# Run tests
|
146
|
+
python -m pytest tests/ -v
|
147
|
+
|
148
|
+
# Run with coverage
|
149
|
+
python -m pytest tests/ --cov=src --cov-report=html
|
150
|
+
|
151
|
+
# Format code
|
152
|
+
black src/ tests/
|
153
|
+
isort src/ tests/
|
154
|
+
|
155
|
+
# Type checking
|
156
|
+
mypy src/
|
157
|
+
```
|
158
|
+
|
159
|
+
## ๐ ๏ธ MCP Tools Available
|
160
|
+
|
161
|
+
The server provides **8 powerful MCP tools** for intelligent codebase management:
|
162
|
+
|
163
|
+
### Core Operations
|
164
|
+
- **`get_file_description`** - Retrieve stored file descriptions instantly
|
165
|
+
- **`update_file_description`** - Store detailed file summaries and metadata
|
166
|
+
- **`check_codebase_size`** - Get token count and size-based recommendations
|
167
|
+
|
168
|
+
### Batch Operations
|
169
|
+
- **`find_missing_descriptions`** - Scan projects for files without descriptions
|
170
|
+
- **`update_missing_descriptions`** - Bulk update multiple file descriptions
|
171
|
+
|
172
|
+
### Search & Discovery
|
173
|
+
- **`search_descriptions`** - Fast full-text search across all descriptions
|
174
|
+
- **`get_codebase_overview`** - Complete hierarchical project structure
|
175
|
+
|
176
|
+
### Advanced Features
|
177
|
+
- **`merge_branch_descriptions`** - Two-phase merge with conflict resolution
|
178
|
+
|
179
|
+
## ๐๏ธ Architecture Highlights
|
180
|
+
|
181
|
+
### Performance Optimized
|
182
|
+
- **SQLite with WAL mode** for high-concurrency access
|
183
|
+
- **Connection pooling** for efficient database operations
|
184
|
+
- **FTS5 full-text search** with prefix indexing
|
185
|
+
- **Token-aware caching** to minimize expensive operations
|
186
|
+
|
187
|
+
### Production Ready
|
188
|
+
- **Comprehensive error handling** with structured JSON logging
|
189
|
+
- **Async-first design** with proper resource cleanup
|
190
|
+
- **Upstream inheritance** for fork workflows
|
191
|
+
- **Git integration** with .gitignore support
|
192
|
+
|
193
|
+
### Developer Friendly
|
194
|
+
- **95%+ test coverage** with async support
|
195
|
+
- **Integration tests** for complete workflows
|
196
|
+
- **Performance benchmarks** for large codebases
|
197
|
+
- **Clear error messages** with MCP protocol compliance
|
198
|
+
|
199
|
+
## ๐ Documentation
|
200
|
+
|
201
|
+
- **[API Reference](docs/api-reference.md)** - Complete MCP tool documentation
|
202
|
+
- **[Configuration Guide](docs/configuration.md)** - Setup and tuning options
|
203
|
+
- **[Architecture Overview](docs/architecture.md)** - Technical deep dive
|
204
|
+
- **[Contributing Guide](docs/contributing.md)** - Development workflow
|
205
|
+
|
206
|
+
## ๐ฆ System Requirements
|
207
|
+
|
208
|
+
- **Python 3.8+** with asyncio support
|
209
|
+
- **SQLite 3.35+** (included with Python)
|
210
|
+
- **4GB+ RAM** for large codebases (1000+ files)
|
211
|
+
- **SSD storage** recommended for optimal performance
|
212
|
+
|
213
|
+
## ๐ Performance
|
214
|
+
|
215
|
+
Tested with codebases up to **10,000 files**:
|
216
|
+
- File description retrieval: **< 10ms**
|
217
|
+
- Full-text search: **< 100ms**
|
218
|
+
- Codebase overview generation: **< 2s**
|
219
|
+
- Merge conflict detection: **< 5s**
|
220
|
+
|
221
|
+
## ๐ง Advanced Configuration
|
222
|
+
|
223
|
+
```bash
|
224
|
+
# Production setup with custom limits
|
225
|
+
mcp-code-indexer \
|
226
|
+
--token-limit 50000 \
|
227
|
+
--db-path /data/mcp-index.db \
|
228
|
+
--cache-dir /tmp/mcp-cache \
|
229
|
+
--log-level INFO
|
230
|
+
|
231
|
+
# Enable structured logging
|
232
|
+
export MCP_LOG_FORMAT=json
|
233
|
+
mcp-code-indexer
|
234
|
+
```
|
235
|
+
|
236
|
+
## ๐ค Integration Examples
|
237
|
+
|
238
|
+
### With AI Agents
|
239
|
+
```python
|
240
|
+
# Example: AI agent using MCP tools
|
241
|
+
async def analyze_codebase(project_path):
|
242
|
+
# Check if codebase is large
|
243
|
+
size_info = await mcp_client.call_tool("check_codebase_size", {
|
244
|
+
"projectName": "my-project",
|
245
|
+
"folderPath": project_path,
|
246
|
+
"branch": "main"
|
247
|
+
})
|
248
|
+
|
249
|
+
if size_info["isLarge"]:
|
250
|
+
# Use search for large codebases
|
251
|
+
results = await mcp_client.call_tool("search_descriptions", {
|
252
|
+
"projectName": "my-project",
|
253
|
+
"folderPath": project_path,
|
254
|
+
"branch": "main",
|
255
|
+
"query": "authentication logic"
|
256
|
+
})
|
257
|
+
else:
|
258
|
+
# Get full overview for smaller projects
|
259
|
+
overview = await mcp_client.call_tool("get_codebase_overview", {
|
260
|
+
"projectName": "my-project",
|
261
|
+
"folderPath": project_path,
|
262
|
+
"branch": "main"
|
263
|
+
})
|
264
|
+
```
|
265
|
+
|
266
|
+
### With CI/CD Pipelines
|
267
|
+
```yaml
|
268
|
+
# Example: GitHub Actions integration
|
269
|
+
- name: Update Code Descriptions
|
270
|
+
run: |
|
271
|
+
python -c "
|
272
|
+
import asyncio
|
273
|
+
from mcp_client import MCPClient
|
274
|
+
|
275
|
+
async def update_descriptions():
|
276
|
+
client = MCPClient('mcp-code-indexer')
|
277
|
+
|
278
|
+
# Find files without descriptions
|
279
|
+
missing = await client.call_tool('find_missing_descriptions', {
|
280
|
+
'projectName': '${{ github.repository }}',
|
281
|
+
'folderPath': '.',
|
282
|
+
'branch': '${{ github.ref_name }}'
|
283
|
+
})
|
284
|
+
|
285
|
+
# Process with AI and update...
|
286
|
+
|
287
|
+
asyncio.run(update_descriptions())
|
288
|
+
"
|
289
|
+
```
|
290
|
+
|
291
|
+
## ๐งช Testing
|
292
|
+
|
293
|
+
```bash
|
294
|
+
# Install with test dependencies
|
295
|
+
pip install mcp-code-indexer[test]
|
296
|
+
|
297
|
+
# Run full test suite
|
298
|
+
python -m pytest tests/ -v
|
299
|
+
|
300
|
+
# Run with coverage
|
301
|
+
python -m pytest tests/ --cov=src --cov-report=html
|
302
|
+
|
303
|
+
# Run performance tests
|
304
|
+
python -m pytest tests/ -m performance
|
305
|
+
|
306
|
+
# Run integration tests only
|
307
|
+
python -m pytest tests/integration/ -v
|
308
|
+
```
|
309
|
+
|
310
|
+
## ๐ Monitoring
|
311
|
+
|
312
|
+
The server provides structured JSON logs for monitoring:
|
313
|
+
|
314
|
+
```json
|
315
|
+
{
|
316
|
+
"timestamp": "2024-01-15T10:30:00Z",
|
317
|
+
"level": "INFO",
|
318
|
+
"message": "Tool search_descriptions completed",
|
319
|
+
"tool_usage": {
|
320
|
+
"tool_name": "search_descriptions",
|
321
|
+
"success": true,
|
322
|
+
"duration_seconds": 0.045,
|
323
|
+
"result_size": 1247
|
324
|
+
}
|
325
|
+
}
|
326
|
+
```
|
327
|
+
|
328
|
+
## ๐ก๏ธ Security Features
|
329
|
+
|
330
|
+
- **Input validation** on all MCP tool parameters
|
331
|
+
- **SQL injection protection** via parameterized queries
|
332
|
+
- **File system sandboxing** with .gitignore respect
|
333
|
+
- **Error sanitization** to prevent information leakage
|
334
|
+
- **Async resource cleanup** to prevent memory leaks
|
335
|
+
|
336
|
+
## ๐ Next Steps
|
337
|
+
|
338
|
+
1. **[Read the API docs](docs/api-reference.md)** to understand available tools
|
339
|
+
2. **[Check the configuration guide](docs/configuration.md)** for advanced setup
|
340
|
+
3. **[Review the architecture](docs/architecture.md)** for technical details
|
341
|
+
4. **[Contribute](docs/contributing.md)** to help improve the project
|
342
|
+
|
343
|
+
## ๐ค Contributing
|
344
|
+
|
345
|
+
We welcome contributions! See our **[Contributing Guide](docs/contributing.md)** for:
|
346
|
+
- Development setup
|
347
|
+
- Code style guidelines
|
348
|
+
- Testing requirements
|
349
|
+
- Pull request process
|
350
|
+
|
351
|
+
## ๐ License
|
352
|
+
|
353
|
+
MIT License - see **[LICENSE](LICENSE)** for details.
|
354
|
+
|
355
|
+
## ๐ Built With
|
356
|
+
|
357
|
+
- **[Model Context Protocol](https://github.com/modelcontextprotocol/python-sdk)** - The foundation for tool integration
|
358
|
+
- **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
|
359
|
+
- **[aiosqlite](https://pypi.org/project/aiosqlite/)** - Async SQLite operations
|
360
|
+
- **[Pydantic](https://pydantic.dev/)** - Data validation and settings
|
361
|
+
|
362
|
+
---
|
363
|
+
|
364
|
+
**Ready to supercharge your AI agents with intelligent codebase navigation?** ๐ [Install from PyPI](#install-from-pypi) or [explore the API docs](docs/api-reference.md)!
|
@@ -0,0 +1,22 @@
|
|
1
|
+
mcp_code_indexer/__init__.py,sha256=bKiDtK7m0i3FVZXbtCRYVbEQzDlsrBM93nLMvF7Lypo,473
|
2
|
+
mcp_code_indexer/error_handler.py,sha256=cNSUFFrGBMLDv4qa78c7495L1wSl_dXCRbzCJOidx-Q,11590
|
3
|
+
mcp_code_indexer/file_scanner.py,sha256=1Z6wq7H14V1OMAHIF4v9G7SY8hC1puDmU5IXsCKH4kU,11442
|
4
|
+
mcp_code_indexer/logging_config.py,sha256=5L1cYIG8IAX91yCjc5pzkbO_KPt0bvm_ABHB53LBZjI,5184
|
5
|
+
mcp_code_indexer/main.py,sha256=zM08T96zqq8HT02rslQhHxkOgvx8J77RmFUJWdqKCqY,3419
|
6
|
+
mcp_code_indexer/merge_handler.py,sha256=lJR8eVq2qSrF6MW9mR3Fy8UzrNAaQ7RsI2FMNXne3vQ,14692
|
7
|
+
mcp_code_indexer/token_counter.py,sha256=WrifOkbF99nWWHlRlhCHAB2KN7qr83GOHl7apE-hJcE,8460
|
8
|
+
mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
|
9
|
+
mcp_code_indexer/database/database.py,sha256=rmQ30CguudV70sC5ledRwjc7dKvxJ4TjwcDZpReFDVE,17973
|
10
|
+
mcp_code_indexer/database/models.py,sha256=3wOxHKb6j3zKPWFSwB5g1TLpI507vLNZcqsxZR4VuRs,5528
|
11
|
+
mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
|
12
|
+
mcp_code_indexer/middleware/error_middleware.py,sha256=v6jaHmPxf3qerYdb85X1tHIXLxgcbybpitKVakFLQTA,10109
|
13
|
+
mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
|
14
|
+
mcp_code_indexer/server/mcp_server.py,sha256=6wm4XKfTLSpfHP9GpCqwWsekQFwwo9ngqM-ZXiJidx4,33424
|
15
|
+
mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
|
16
|
+
mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
|
17
|
+
mcp_code_indexer-1.0.0.dist-info/licenses/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
|
18
|
+
mcp_code_indexer-1.0.0.dist-info/METADATA,sha256=GllXN1R8ya1no8exdC3pOm7TKNJ5QfdD7KaWEbY2Q68,11875
|
19
|
+
mcp_code_indexer-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
20
|
+
mcp_code_indexer-1.0.0.dist-info/entry_points.txt,sha256=8HqWOw1Is7jOP1bvIgaSwouvT9z_Boe-9hd4NzyJOhY,68
|
21
|
+
mcp_code_indexer-1.0.0.dist-info/top_level.txt,sha256=yKYCM-gMGt-cnupGfAhnZaoEsROLB6DQ1KFUuyKx4rw,17
|
22
|
+
mcp_code_indexer-1.0.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2024 MCP Code Indexer Contributors
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
mcp_code_indexer
|