mcp-code-indexer 4.0.1__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. mcp_code_indexer/__init__.py +7 -5
  2. mcp_code_indexer/ask_handler.py +2 -2
  3. mcp_code_indexer/claude_api_handler.py +10 -5
  4. mcp_code_indexer/cleanup_manager.py +20 -12
  5. mcp_code_indexer/commands/makelocal.py +85 -63
  6. mcp_code_indexer/data/stop_words_english.txt +1 -1
  7. mcp_code_indexer/database/connection_health.py +29 -20
  8. mcp_code_indexer/database/database.py +44 -31
  9. mcp_code_indexer/database/database_factory.py +19 -20
  10. mcp_code_indexer/database/exceptions.py +10 -10
  11. mcp_code_indexer/database/models.py +126 -1
  12. mcp_code_indexer/database/path_resolver.py +22 -21
  13. mcp_code_indexer/database/retry_executor.py +37 -19
  14. mcp_code_indexer/deepask_handler.py +3 -3
  15. mcp_code_indexer/error_handler.py +46 -20
  16. mcp_code_indexer/file_scanner.py +15 -12
  17. mcp_code_indexer/git_hook_handler.py +71 -76
  18. mcp_code_indexer/logging_config.py +13 -5
  19. mcp_code_indexer/main.py +85 -22
  20. mcp_code_indexer/middleware/__init__.py +1 -1
  21. mcp_code_indexer/middleware/auth.py +47 -43
  22. mcp_code_indexer/middleware/error_middleware.py +15 -15
  23. mcp_code_indexer/middleware/logging.py +44 -42
  24. mcp_code_indexer/middleware/security.py +84 -76
  25. mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
  26. mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
  27. mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  28. mcp_code_indexer/query_preprocessor.py +2 -2
  29. mcp_code_indexer/server/mcp_server.py +158 -94
  30. mcp_code_indexer/transport/__init__.py +1 -1
  31. mcp_code_indexer/transport/base.py +19 -17
  32. mcp_code_indexer/transport/http_transport.py +89 -76
  33. mcp_code_indexer/transport/stdio_transport.py +12 -8
  34. mcp_code_indexer/vector_mode/__init__.py +36 -0
  35. mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  36. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  37. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  38. mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  39. mcp_code_indexer/vector_mode/config.py +155 -0
  40. mcp_code_indexer/vector_mode/daemon.py +335 -0
  41. mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  42. mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  43. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  44. mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  45. mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
  46. mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
  47. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
  48. mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
  49. mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  50. mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  51. mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  52. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/METADATA +82 -24
  53. mcp_code_indexer-4.1.0.dist-info/RECORD +66 -0
  54. mcp_code_indexer-4.0.1.dist-info/RECORD +0 -47
  55. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/LICENSE +0 -0
  56. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/WHEEL +0 -0
  57. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,368 @@
1
+ """
2
+ Secret redaction engine for vector mode.
3
+
4
+ Detects and redacts sensitive information from code before sending
5
+ to external APIs for embedding generation.
6
+ """
7
+
8
+ import hashlib
9
+ import logging
10
+ import re
11
+ from typing import List, Dict, Any, Optional, Set, NamedTuple
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+
15
+ from .patterns import SecurityPatterns, PatternMatch
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ class RedactionResult(NamedTuple):
20
+ """Result of secret redaction process."""
21
+ original_hash: str
22
+ redacted_content: str
23
+ redaction_count: int
24
+ patterns_matched: List[str]
25
+ confidence_scores: List[float]
26
+ was_redacted: bool
27
+
28
+ @dataclass
29
+ class RedactionStats:
30
+ """Statistics about redaction operations."""
31
+ total_files_processed: int = 0
32
+ total_redactions: int = 0
33
+ redactions_by_type: Dict[str, int] = None
34
+ redactions_by_pattern: Dict[str, int] = None
35
+ high_confidence_redactions: int = 0
36
+
37
+ def __post_init__(self):
38
+ if self.redactions_by_type is None:
39
+ self.redactions_by_type = {}
40
+ if self.redactions_by_pattern is None:
41
+ self.redactions_by_pattern = {}
42
+
43
+ class SecretRedactor:
44
+ """
45
+ Main secret redaction engine.
46
+
47
+ Scans code content for secrets and replaces them with safe placeholders
48
+ while preserving code structure and semantics.
49
+ """
50
+
51
+ def __init__(
52
+ self,
53
+ min_confidence: float = 0.5,
54
+ preserve_structure: bool = True,
55
+ redaction_marker: str = "[REDACTED]",
56
+ custom_patterns_file: Optional[Path] = None,
57
+ ):
58
+ """
59
+ Initialize the secret redactor.
60
+
61
+ Args:
62
+ min_confidence: Minimum confidence threshold for redaction
63
+ preserve_structure: Whether to preserve code structure
64
+ redaction_marker: Marker to use for redacted content
65
+ custom_patterns_file: Path to custom patterns file
66
+ """
67
+ self.min_confidence = min_confidence
68
+ self.preserve_structure = preserve_structure
69
+ self.redaction_marker = redaction_marker
70
+
71
+ # Load security patterns
72
+ self.patterns = SecurityPatterns()
73
+
74
+ # Load custom patterns if provided
75
+ if custom_patterns_file and custom_patterns_file.exists():
76
+ self._load_custom_patterns(custom_patterns_file)
77
+
78
+ # Statistics tracking
79
+ self.stats = RedactionStats()
80
+
81
+ # Common safe file extensions (no redaction needed)
82
+ self.safe_extensions = {
83
+ '.md', '.txt', '.rst', '.json', '.yaml', '.yml',
84
+ '.xml', '.html', '.css', '.svg', '.license'
85
+ }
86
+
87
+ # Cache for performance
88
+ self._pattern_cache: Dict[str, List[PatternMatch]] = {}
89
+
90
+ def _load_custom_patterns(self, patterns_file: Path) -> None:
91
+ """Load custom security patterns from file."""
92
+ try:
93
+ # TODO: Implement custom pattern loading
94
+ logger.info(f"Custom patterns file specified but not yet implemented: {patterns_file}")
95
+ except Exception as e:
96
+ logger.warning(f"Failed to load custom patterns from {patterns_file}: {e}")
97
+
98
+ def _should_process_file(self, file_path: str) -> bool:
99
+ """Determine if a file should be processed for redaction."""
100
+ path = Path(file_path)
101
+
102
+ # Skip safe file types
103
+ if path.suffix.lower() in self.safe_extensions:
104
+ return False
105
+
106
+ # Skip test files (often contain mock data)
107
+ if any(test_marker in path.name.lower() for test_marker in ['test', 'spec', 'mock']):
108
+ return False
109
+
110
+ # Skip documentation directories
111
+ if any(doc_dir in path.parts for doc_dir in ['docs', 'documentation', 'examples']):
112
+ return False
113
+
114
+ return True
115
+
116
+ def _generate_content_hash(self, content: str) -> str:
117
+ """Generate SHA-256 hash of content."""
118
+ return hashlib.sha256(content.encode('utf-8')).hexdigest()
119
+
120
+ def _create_redaction_marker(
121
+ self,
122
+ pattern_type: str,
123
+ pattern_name: str,
124
+ original_length: int
125
+ ) -> str:
126
+ """Create a redaction marker that preserves structure."""
127
+ if not self.preserve_structure:
128
+ return self.redaction_marker
129
+
130
+ # Create a marker that maintains similar length
131
+ base_marker = f"[REDACTED:{pattern_type.upper()}]"
132
+
133
+ if original_length <= len(base_marker):
134
+ return base_marker[:original_length]
135
+
136
+ # Pad with safe characters to maintain length
137
+ padding = 'X' * (original_length - len(base_marker))
138
+ return base_marker + padding
139
+
140
+ def _redact_matches(self, content: str, matches: List[PatternMatch]) -> str:
141
+ """Apply redactions to content based on matches."""
142
+ if not matches:
143
+ return content
144
+
145
+ # Sort matches by position (reverse order for safe replacement)
146
+ sorted_matches = sorted(matches, key=lambda m: m.start_pos, reverse=True)
147
+
148
+ redacted_content = content
149
+
150
+ for match in sorted_matches:
151
+ # Create appropriate redaction marker
152
+ marker = self._create_redaction_marker(
153
+ match.pattern_type,
154
+ match.pattern_name,
155
+ len(match.matched_text)
156
+ )
157
+
158
+ # Replace the matched text
159
+ redacted_content = (
160
+ redacted_content[:match.start_pos] +
161
+ marker +
162
+ redacted_content[match.end_pos:]
163
+ )
164
+
165
+ # Update statistics
166
+ self.stats.total_redactions += 1
167
+ self.stats.redactions_by_type[match.pattern_type] = (
168
+ self.stats.redactions_by_type.get(match.pattern_type, 0) + 1
169
+ )
170
+ self.stats.redactions_by_pattern[match.pattern_name] = (
171
+ self.stats.redactions_by_pattern.get(match.pattern_name, 0) + 1
172
+ )
173
+
174
+ if match.confidence >= 0.8:
175
+ self.stats.high_confidence_redactions += 1
176
+
177
+ logger.debug(
178
+ f"Redacted {match.pattern_name} (confidence: {match.confidence:.2f}): "
179
+ f"{match.matched_text[:20]}..."
180
+ )
181
+
182
+ return redacted_content
183
+
184
+ def _filter_false_positives(
185
+ self,
186
+ matches: List[PatternMatch],
187
+ content: str,
188
+ file_path: Optional[str] = None
189
+ ) -> List[PatternMatch]:
190
+ """Filter out likely false positives based on context."""
191
+ filtered_matches = []
192
+
193
+ for match in matches:
194
+ # Skip very short matches for low-confidence patterns
195
+ if match.confidence < 0.7 and len(match.matched_text) < 16:
196
+ continue
197
+
198
+ # Skip matches that look like placeholders
199
+ if self._looks_like_placeholder(match.matched_text):
200
+ continue
201
+
202
+ # Skip matches in comments (for code files)
203
+ if file_path and self._is_in_comment(content, match.start_pos, file_path):
204
+ continue
205
+
206
+ # Skip matches that are likely examples or documentation
207
+ if self._is_example_content(content, match.start_pos):
208
+ continue
209
+
210
+ filtered_matches.append(match)
211
+
212
+ return filtered_matches
213
+
214
+ def _looks_like_placeholder(self, text: str) -> bool:
215
+ """Check if text looks like a placeholder rather than real secret."""
216
+ placeholder_indicators = [
217
+ 'example', 'sample', 'test', 'demo', 'placeholder', 'your',
218
+ 'xxxxx', '11111', '00000', 'aaaaa', 'abcdef',
219
+ 'replace', 'insert', 'enter', 'put'
220
+ ]
221
+
222
+ text_lower = text.lower()
223
+ return any(indicator in text_lower for indicator in placeholder_indicators)
224
+
225
+ def _is_in_comment(self, content: str, position: int, file_path: str) -> bool:
226
+ """Check if position is within a comment."""
227
+ # Get file extension for comment style detection
228
+ ext = Path(file_path).suffix.lower()
229
+
230
+ # Find line containing the position
231
+ lines = content[:position].split('\n')
232
+ current_line = content.split('\n')[len(lines) - 1] if lines else ""
233
+
234
+ # Check for common comment patterns
235
+ comment_patterns = {
236
+ '.py': [r'#.*', r'""".*?"""', r"'''.*?'''"],
237
+ '.js': [r'//.*', r'/\*.*?\*/'],
238
+ '.java': [r'//.*', r'/\*.*?\*/'],
239
+ '.cpp': [r'//.*', r'/\*.*?\*/'],
240
+ '.c': [r'//.*', r'/\*.*?\*/'],
241
+ '.go': [r'//.*', r'/\*.*?\*/'],
242
+ '.rs': [r'//.*', r'/\*.*?\*/'],
243
+ '.sh': [r'#.*'],
244
+ '.sql': [r'--.*', r'/\*.*?\*/'],
245
+ }
246
+
247
+ patterns = comment_patterns.get(ext, [])
248
+ for pattern in patterns:
249
+ if re.search(pattern, current_line, re.DOTALL):
250
+ return True
251
+
252
+ return False
253
+
254
+ def _is_example_content(self, content: str, position: int, context_size: int = 100) -> bool:
255
+ """Check if content around position suggests it's example/documentation."""
256
+ start = max(0, position - context_size)
257
+ end = min(len(content), position + context_size)
258
+ context = content[start:end].lower()
259
+
260
+ example_indicators = [
261
+ 'example', 'sample', 'demo', 'tutorial', 'documentation',
262
+ 'readme', 'how to', 'getting started', 'quickstart'
263
+ ]
264
+
265
+ return any(indicator in context for indicator in example_indicators)
266
+
267
+ def redact_content(
268
+ self,
269
+ content: str,
270
+ file_path: Optional[str] = None,
271
+ cache_key: Optional[str] = None
272
+ ) -> RedactionResult:
273
+ """
274
+ Redact secrets from content.
275
+
276
+ Args:
277
+ content: Content to redact
278
+ file_path: Path to file (for context)
279
+ cache_key: Optional cache key for performance
280
+
281
+ Returns:
282
+ RedactionResult with original hash and redacted content
283
+ """
284
+ # Generate hash of original content
285
+ original_hash = self._generate_content_hash(content)
286
+
287
+ # Check if file should be processed
288
+ if file_path and not self._should_process_file(file_path):
289
+ logger.debug(f"Skipping redaction for safe file: {file_path}")
290
+ return RedactionResult(
291
+ original_hash=original_hash,
292
+ redacted_content=content,
293
+ redaction_count=0,
294
+ patterns_matched=[],
295
+ confidence_scores=[],
296
+ was_redacted=False
297
+ )
298
+
299
+ # Check cache if available
300
+ if cache_key and cache_key in self._pattern_cache:
301
+ matches = self._pattern_cache[cache_key]
302
+ else:
303
+ # Find all pattern matches
304
+ matches = self.patterns.find_matches(content, self.min_confidence)
305
+
306
+ # Filter false positives
307
+ matches = self._filter_false_positives(matches, content, file_path)
308
+
309
+ # Cache results
310
+ if cache_key:
311
+ self._pattern_cache[cache_key] = matches
312
+
313
+ # Apply redactions
314
+ redacted_content = self._redact_matches(content, matches)
315
+
316
+ # Update statistics
317
+ self.stats.total_files_processed += 1
318
+
319
+ # Log redaction summary
320
+ if matches:
321
+ logger.info(
322
+ f"Redacted {len(matches)} secrets from {file_path or 'content'} "
323
+ f"(confidence range: {min(m.confidence for m in matches):.2f}-"
324
+ f"{max(m.confidence for m in matches):.2f})"
325
+ )
326
+
327
+ return RedactionResult(
328
+ original_hash=original_hash,
329
+ redacted_content=redacted_content,
330
+ redaction_count=len(matches),
331
+ patterns_matched=[m.pattern_name for m in matches],
332
+ confidence_scores=[m.confidence for m in matches],
333
+ was_redacted=len(matches) > 0
334
+ )
335
+
336
+ def redact_file(self, file_path: Path) -> RedactionResult:
337
+ """Redact secrets from a file."""
338
+ try:
339
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
340
+ content = f.read()
341
+
342
+ return self.redact_content(content, str(file_path))
343
+
344
+ except Exception as e:
345
+ logger.error(f"Failed to redact file {file_path}: {e}")
346
+ return RedactionResult(
347
+ original_hash="",
348
+ redacted_content="",
349
+ redaction_count=0,
350
+ patterns_matched=[],
351
+ confidence_scores=[],
352
+ was_redacted=False
353
+ )
354
+
355
+ def get_redaction_stats(self) -> RedactionStats:
356
+ """Get redaction statistics."""
357
+ return self.stats
358
+
359
+ def clear_cache(self) -> None:
360
+ """Clear the pattern match cache."""
361
+ self._pattern_cache.clear()
362
+
363
+ def set_confidence_threshold(self, threshold: float) -> None:
364
+ """Update the confidence threshold for redaction."""
365
+ if 0.0 <= threshold <= 1.0:
366
+ self.min_confidence = threshold
367
+ else:
368
+ raise ValueError("Confidence threshold must be between 0.0 and 1.0")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mcp-code-indexer
3
- Version: 4.0.1
3
+ Version: 4.1.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  License: MIT
6
6
  Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
@@ -23,6 +23,7 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Classifier: Topic :: Software Development
24
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
25
  Classifier: Typing :: Typed
26
+ Provides-Extra: vector
26
27
  Requires-Dist: aiofiles (==23.2.0)
27
28
  Requires-Dist: aiohttp (>=3.8.0)
28
29
  Requires-Dist: aiosqlite (==0.19.0)
@@ -43,8 +44,8 @@ Description-Content-Type: text/markdown
43
44
 
44
45
  # MCP Code Indexer 🚀
45
46
 
46
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?44)](https://badge.fury.io/py/mcp-code-indexer)
47
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?44)](https://pypi.org/project/mcp-code-indexer/)
47
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?46)](https://badge.fury.io/py/mcp-code-indexer)
48
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?46)](https://pypi.org/project/mcp-code-indexer/)
48
49
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
49
50
 
50
51
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -183,20 +184,77 @@ The git hook integration provides intelligent automation:
183
184
 
184
185
  - **📊 Git Analysis**: Automatically analyzes git diffs after commits/merges
185
186
  - **🤖 AI Processing**: Uses OpenRouter API with Anthropic's Claude Sonnet 4
186
- - **⚡ Smart Updates**: Only processes files that actually changed
187
+ - **⚡ Smart Updates**: Only processes files that actually changed
187
188
  - **🔄 Overview Maintenance**: Updates project overview when structure changes
188
189
  - **🛡️ Error Isolation**: Git operations continue even if indexing fails
189
190
  - **⏱️ Rate Limiting**: Built-in retry logic with exponential backoff
190
191
 
191
192
  ### 🎯 Key Benefits
192
193
 
193
- 💡 **Zero Manual Work**: Descriptions stay current without any effort
194
- ⚡ **Performance**: Only analyzes changed files, not entire codebase
195
- 🔒 **Reliability**: Robust error handling ensures git operations never fail
196
- 🎛️ **Configurable**: Support for custom models and timeout settings
194
+ 💡 **Zero Manual Work**: Descriptions stay current without any effort
195
+ ⚡ **Performance**: Only analyzes changed files, not entire codebase
196
+ 🔒 **Reliability**: Robust error handling ensures git operations never fail
197
+ 🎛️ **Configurable**: Support for custom models and timeout settings
197
198
 
198
199
  **Learn More**: See [Git Hook Setup Guide](docs/git-hook-setup.md) for complete configuration options and troubleshooting.
199
200
 
201
+ ## 🧠 Vector Mode (BETA)
202
+
203
+ 🚀 **NEW Feature**: Semantic code search with vector embeddings! Experience AI-powered code discovery that understands context and meaning, not just keywords.
204
+
205
+ ### 🎯 What is Vector Mode?
206
+
207
+ Vector Mode transforms how you search and understand codebases by using AI embeddings:
208
+
209
+ - **🔍 Semantic Search**: Find code by meaning, not just text matching
210
+ - **⚡ Real-time Indexing**: Automatic embedding generation as code changes
211
+ - **🛡️ Secure by Default**: Comprehensive secret redaction before API calls
212
+ - **🌐 Multi-language**: Python, JavaScript, TypeScript with AST-based chunking
213
+ - **📊 Smart Chunking**: Context-aware code segmentation for optimal embeddings
214
+
215
+ ### 🚀 Quick Start
216
+
217
+ ```bash
218
+ # Install vector mode dependencies
219
+ pip install mcp-code-indexer[vector]
220
+
221
+ # Set required API keys
222
+ export VOYAGE_API_KEY="pa-your-voyage-api-key"
223
+ export TURBOPUFFER_API_KEY="your-turbopuffer-api-key"
224
+
225
+ # Start with vector mode enabled
226
+ mcp-code-indexer --vector
227
+
228
+ # The daemon automatically starts and begins indexing your projects
229
+ ```
230
+
231
+ ### 💡 Key Features
232
+
233
+ - **🔐 Secret Redaction**: 20+ pattern types automatically detected and redacted
234
+ - **🌳 Merkle Trees**: Efficient change detection without full directory scans
235
+ - **🎛️ Circuit Breakers**: Resilient API integration with automatic retry logic
236
+ - **📈 Production Ready**: Built for high-concurrency with comprehensive monitoring
237
+
238
+ ### 🔧 Advanced Configuration
239
+
240
+ ```bash
241
+ # Custom configuration
242
+ mcp-code-indexer --vector --vector-config /path/to/config.yaml
243
+
244
+ # HTTP mode with vector search
245
+ mcp-code-indexer --vector --http --port 8080
246
+ ```
247
+
248
+ ### 🛠️ Architecture
249
+
250
+ Vector Mode adds powerful new MCP tools:
251
+ - `vector_search` - Semantic code search across projects
252
+ - `similarity_search` - Find similar code patterns
253
+ - `dependency_search` - Discover code relationships
254
+ - `vector_status` - Monitor indexing progress
255
+
256
+ **Status**: Currently in BETA - foundations implemented, full pipeline in development.
257
+
200
258
  ## 🔧 Development Setup
201
259
 
202
260
  ### 👨‍💻 For Contributors
@@ -369,14 +427,14 @@ Comprehensive documentation organized by user journey and expertise level.
369
427
 
370
428
  ### 📋 Quick References
371
429
  - **[Examples & Integrations](examples/)** - Ready-to-use configurations
372
- - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
430
+ - **[Troubleshooting](#🚨-troubleshooting)** - Common issues & solutions
373
431
  - **[API Tools Summary](#🛠️-mcp-tools-available)** - All 11 tools at a glance
374
432
 
375
433
  **📚 Reading Paths:**
376
434
  - **New to MCP Code Indexer?** Quick Start → API Reference → HTTP API → Q&A Interface
377
435
  - **Web developers?** Quick Start → HTTP API Reference → Q&A Interface → Git Hooks
378
436
  - **AI/ML engineers?** Quick Start → Q&A Interface → API Reference → Git Hooks
379
- - **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
437
+ - **Setting up for a team?** CLI Reference → Configuration → Administrative Commands → Monitoring
380
438
  - **Contributing to the project?** Architecture → Contributing → API Reference
381
439
 
382
440
  ## 🚦 System Requirements
@@ -390,7 +448,7 @@ Comprehensive documentation organized by user journey and expertise level.
390
448
 
391
449
  Tested with codebases up to **10,000 files**:
392
450
  - File description retrieval: **< 10ms**
393
- - Full-text search: **< 100ms**
451
+ - Full-text search: **< 100ms**
394
452
  - Codebase overview generation: **< 2s**
395
453
  - Merge conflict detection: **< 5s**
396
454
 
@@ -460,11 +518,11 @@ async def analyze_codebase(project_path):
460
518
  "projectName": "my-project",
461
519
  "folderPath": project_path
462
520
  })
463
-
521
+
464
522
  if size_info["isLarge"]:
465
523
  # Use search for large codebases
466
524
  results = await mcp_client.call_tool("search_descriptions", {
467
- "projectName": "my-project",
525
+ "projectName": "my-project",
468
526
  "folderPath": project_path,
469
527
  "query": "authentication logic"
470
528
  })
@@ -484,18 +542,18 @@ async def analyze_codebase(project_path):
484
542
  python -c "
485
543
  import asyncio
486
544
  from mcp_client import MCPClient
487
-
545
+
488
546
  async def update_descriptions():
489
547
  client = MCPClient('mcp-code-indexer')
490
-
548
+
491
549
  # Find files without descriptions
492
550
  missing = await client.call_tool('find_missing_descriptions', {
493
551
  'projectName': '${{ github.repository }}',
494
552
  'folderPath': '.'
495
553
  })
496
-
554
+
497
555
  # Process with AI and update...
498
-
556
+
499
557
  asyncio.run(update_descriptions())
500
558
  "
501
559
  ```
@@ -606,7 +664,7 @@ mcp-code-indexer --map PROJECT_NAME
606
664
  ## 🛡️ Security Features
607
665
 
608
666
  - **Input validation** on all MCP tool parameters
609
- - **SQL injection protection** via parameterized queries
667
+ - **SQL injection protection** via parameterized queries
610
668
  - **File system sandboxing** with .gitignore respect
611
669
  - **Error sanitization** to prevent information leakage
612
670
  - **Async resource cleanup** to prevent memory leaks
@@ -638,7 +696,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
638
696
  2. **[Master the API tools](docs/api-reference.md)** - Learn all 11 tools with examples
639
697
  3. **[Try HTTP API access](docs/http-api.md)** - REST API for web applications
640
698
  4. **[Explore AI-powered Q&A](docs/qa-interface.md)** - Ask questions about your code
641
- 5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
699
+ 5. **[Set up git hooks](docs/git-hook-setup.md)** - Automate your workflow
642
700
 
643
701
  **👥 Setting up for a team?**
644
702
  1. **[Learn all CLI commands](docs/cli-reference.md)** - Complete command reference
@@ -661,7 +719,7 @@ Ready to supercharge your AI agents with intelligent codebase navigation?
661
719
 
662
720
  We welcome contributions! See our **[Contributing Guide](docs/contributing.md)** for:
663
721
  - Development setup
664
- - Code style guidelines
722
+ - Code style guidelines
665
723
  - Testing requirements
666
724
  - Pull request process
667
725
 
@@ -672,7 +730,7 @@ MIT License - see **[LICENSE](LICENSE)** for details.
672
730
  ## 🙏 Built With
673
731
 
674
732
  - **[Model Context Protocol](https://github.com/modelcontextprotocol/python-sdk)** - The foundation for tool integration
675
- - **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
733
+ - **[tiktoken](https://pypi.org/project/tiktoken/)** - Fast BPE tokenization
676
734
  - **[aiosqlite](https://pypi.org/project/aiosqlite/)** - Async SQLite operations
677
735
  - **[aiohttp](https://pypi.org/project/aiohttp/)** - Async HTTP client for OpenRouter API
678
736
  - **[tenacity](https://pypi.org/project/tenacity/)** - Robust retry logic and rate limiting
@@ -680,9 +738,9 @@ MIT License - see **[LICENSE](LICENSE)** for details.
680
738
 
681
739
  ---
682
740
 
683
- **Transform how your AI agents understand code!** 🚀
741
+ **Transform how your AI agents understand code!** 🚀
684
742
 
685
- 🎯 **New User?** [Get started in 2 minutes](#-quick-start)
686
- 👨‍💻 **Developer?** [Explore the complete API](docs/api-reference.md)
743
+ 🎯 **New User?** [Get started in 2 minutes](#-quick-start)
744
+ 👨‍💻 **Developer?** [Explore the complete API](docs/api-reference.md)
687
745
  🔧 **Production?** [Deploy with confidence](docs/configuration.md)
688
746
 
@@ -0,0 +1,66 @@
1
+ mcp_code_indexer/__init__.py,sha256=IG3xW6SGlqnOCnGOCio_05IxTXWRWqaJF4y25ChbYMA,1852
2
+ mcp_code_indexer/__main__.py,sha256=4Edinoe0ug43hobuLYcjTmGp2YJnlFYN4_8iKvUBJ0Q,213
3
+ mcp_code_indexer/ask_handler.py,sha256=EiobL_Daii7wwcHvwDDtxLvqjZvhCJWvz_PIiRm14V4,9106
4
+ mcp_code_indexer/claude_api_handler.py,sha256=_PVhHxwVyY3ojNp-tIGp73nWD8MYMmIlCMIwKNMJXi8,13845
5
+ mcp_code_indexer/cleanup_manager.py,sha256=-Nqzy49fbjl7q_nIpVs__HV1IAI8q3AY8IH_fIIg5gs,9785
6
+ mcp_code_indexer/commands/__init__.py,sha256=141U722dS_NnFTZyxTPipzhXKdU21kCv-mcrN4djyHo,45
7
+ mcp_code_indexer/commands/makelocal.py,sha256=T_44so96jcs1FNlft9E3nAq0LlOzQLhjLd8P31Myfr4,9140
8
+ mcp_code_indexer/data/stop_words_english.txt,sha256=feRGP8WG5hQPo-wZN5ralJiSv1CGw4h3010NBJnJ0Z8,6344
9
+ mcp_code_indexer/database/__init__.py,sha256=aPq_aaRp0aSwOBIq9GkuMNjmLxA411zg2vhdrAuHm-w,38
10
+ mcp_code_indexer/database/connection_health.py,sha256=jZr3tCbfjUJujdXe_uxtm1N4c31dMV4euiSY4ulamOE,25497
11
+ mcp_code_indexer/database/database.py,sha256=bBjQoa1cH8iAHZxnv157229yBCZhcZre4g6QtBvl6Dk,47321
12
+ mcp_code_indexer/database/database_factory.py,sha256=zm942m72mqCYTGh1GFyVw-hBsbZcZnx3znJ2ZQPwISM,4316
13
+ mcp_code_indexer/database/exceptions.py,sha256=bamoC-ssw_TMRA5-6lzX6d_1DlcXXrcmiCMBdUEQ9dI,10479
14
+ mcp_code_indexer/database/models.py,sha256=OBIHmggY7BK-b9xak616YwttGS8pr8fY2DnAdP5JqYY,13181
15
+ mcp_code_indexer/database/path_resolver.py,sha256=1Ubx6Ly5F2dnvhbdN3tqyowBHslABXpoA6wgL4BQYGo,3461
16
+ mcp_code_indexer/database/retry_executor.py,sha256=6Hb0BM2BO6fl7sTIHtHFcwgV93W22eOrFvexYtFpa0k,13966
17
+ mcp_code_indexer/deepask_handler.py,sha256=qI9h_Me5WQAbt3hzzDG8XDBMZlnvx-I9R7OsmO_o8aA,18497
18
+ mcp_code_indexer/error_handler.py,sha256=ylciEM-cR7E8Gmd8cfh5olcllJm0FnaYBGH86yayFic,12530
19
+ mcp_code_indexer/file_scanner.py,sha256=7Ab34lRQGeh5GBCzcSP96p4YK6LDWFGUHLXqi499UZ4,11838
20
+ mcp_code_indexer/git_hook_handler.py,sha256=sTtZV3-Yy1Evt06R5NZclELeepM4Ia9OQoR2O6BK3Hk,45517
21
+ mcp_code_indexer/logging_config.py,sha256=M5eVZ5PwfTROib7ISTQ522n2hUSc4hJ_wUgsrJKsTTg,10030
22
+ mcp_code_indexer/main.py,sha256=tdUEcTVLweLmrG49TReGAl1nBf0vnzCIa7NSg6IPPec,37137
23
+ mcp_code_indexer/middleware/__init__.py,sha256=UCEPzOlZldlqFzYEfrXw1HvCDvY1jpLvyaDGUzVr2aw,368
24
+ mcp_code_indexer/middleware/auth.py,sha256=4HkHMDZBNsyPA1VE8qF7pRNKbqG4xIDZjllENbgynxI,7258
25
+ mcp_code_indexer/middleware/error_middleware.py,sha256=0RnKM5fK_n_7AITK2ueAqv30kLBdjU3vaWOTwWd2Xs0,11965
26
+ mcp_code_indexer/middleware/logging.py,sha256=C8tO0ckzT-FDSGehkRARpNC4TEtQMazQ8tjHU4RbFlU,8852
27
+ mcp_code_indexer/middleware/security.py,sha256=BEfEZbiXk3omWbe8PA7xm_hDe5onoTkUuMUerpy0mao,12009
28
+ mcp_code_indexer/migrations/001_initial.sql,sha256=hIXkCP4LA_4A9HJ1CHU0a1DD-a6EN6u-uJPMqW0c2Yo,4120
29
+ mcp_code_indexer/migrations/002_performance_indexes.sql,sha256=-J6Ce3nyF8pJ2hN5ZbkIGeqgfVPn-RQ_6bcOLne6-s8,2845
30
+ mcp_code_indexer/migrations/003_project_overviews.sql,sha256=pPzn7UmJ_Bda9mJ1nYTN1GeuYwdQHC7Fva6PvWaucUw,891
31
+ mcp_code_indexer/migrations/004_remove_branch_dependency.sql,sha256=WrWl3_17_1s9zVEN4dESLhDr6ezVF18dpGaeab4eJGs,6451
32
+ mcp_code_indexer/migrations/005_remove_git_remotes.sql,sha256=vT84AaV1hyN4zq5W67hR14TgAwhW7_RNtBHrCoksxA4,1299
33
+ mcp_code_indexer/migrations/006_vector_mode.sql,sha256=kN-UBPGoagqtpxpGEjdz-V3hevPAXxAdNmxF4iIPsY8,7448
34
+ mcp_code_indexer/query_preprocessor.py,sha256=vi23sK2ffs4T5PGY7lHrbCBDL421AlPz2dldqX_3JKA,5491
35
+ mcp_code_indexer/server/__init__.py,sha256=16xMcuriUOBlawRqWNBk6niwrvtv_JD5xvI36X1Vsmk,41
36
+ mcp_code_indexer/server/mcp_server.py,sha256=wFfsnrd5OOdzi4gmkVjggKBMIT6CYGcjWFClUguR9zo,75935
37
+ mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
38
+ mcp_code_indexer/token_counter.py,sha256=e6WsyCEWMMSkMwLbcVtr5e8vEqh-kFqNmiJErCNdqHE,8220
39
+ mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
40
+ mcp_code_indexer/transport/__init__.py,sha256=OrdabRjO3EChnTZ06IIFxO6imRQ3PwtRtJ9NQmDKwxw,370
41
+ mcp_code_indexer/transport/base.py,sha256=Lb3IrL8wr1QvUQpx1GyBQW5bvDpJoUFyXWNAw0dbOK4,3258
42
+ mcp_code_indexer/transport/http_transport.py,sha256=iDxW8CXEIPlpyOtSFkU1qw1FtbKbXgCvJXfVtlXbzIo,13291
43
+ mcp_code_indexer/transport/stdio_transport.py,sha256=a-Pu3usx_NwkHsN2VU8Qe0EwcA7PGL54Gbu2Ee8e0lU,4792
44
+ mcp_code_indexer/vector_mode/__init__.py,sha256=78RDdsmZGMcCBqaXxRslgzSrY-A9pLbmlb0bnnMXuVo,1064
45
+ mcp_code_indexer/vector_mode/chunking/__init__.py,sha256=rjjFMbHsqWIBzL4IajYxXXJud_RvBrpFNjVcxnRIWCE,490
46
+ mcp_code_indexer/vector_mode/chunking/ast_chunker.py,sha256=GTl_6U0nSgDRRzKS07tJ7RMX8AmJvvY_IsRn95hvVfA,14623
47
+ mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py,sha256=xD0zEibjt6FLBFaKHNc63-iKTtCgnOlLL_9Hc8mCrzE,19752
48
+ mcp_code_indexer/vector_mode/chunking/language_handlers.py,sha256=YEpTVjzyJH445OjniGV05apexsfG5KVR4lwBEl4mGJc,18189
49
+ mcp_code_indexer/vector_mode/config.py,sha256=OgjkY-chGIWJCusNA327gm0Jzy_j6U-k4Qdiq70MRBM,6023
50
+ mcp_code_indexer/vector_mode/daemon.py,sha256=le3NkxFD73bKeutruzLY-Bauc-nXzlhlIlDJv4jlxhU,12096
51
+ mcp_code_indexer/vector_mode/monitoring/__init__.py,sha256=9rNWCvHxRMvYumdIrPjb5K9fpOwe1Aem24hdh8gXoDM,439
52
+ mcp_code_indexer/vector_mode/monitoring/change_detector.py,sha256=X82e_sKbJJFPhqZFJubLQb8Rs-srRtS7sh0nUOsPCPw,10338
53
+ mcp_code_indexer/vector_mode/monitoring/file_watcher.py,sha256=AQ6YHSKXPubtprLZngeLb0othJOCNQZ7wwXUvqwphT4,15299
54
+ mcp_code_indexer/vector_mode/monitoring/merkle_tree.py,sha256=83RLdUj_cgcAlrT9Wev9IBavVEyc8Jo8w--IOJisLOk,14645
55
+ mcp_code_indexer/vector_mode/providers/__init__.py,sha256=xZLGtAuaQpEWm5KW5Bdf8fMO92wb7OwOedSKhacjmwY,1908
56
+ mcp_code_indexer/vector_mode/providers/base_provider.py,sha256=4lmWUTDwB5CmFhEc004DkniiCuiRfFFTBBB0BOHlsUE,7513
57
+ mcp_code_indexer/vector_mode/providers/turbopuffer_client.py,sha256=97em_sHGvzEy6h1BI4Ux7IPj8U4d5ayYJyLwzmFRMyM,10758
58
+ mcp_code_indexer/vector_mode/providers/voyage_client.py,sha256=12uVi6Hqo2dfoUnbxaXohlsDmfBkeRKEotbvEPzT3n4,8315
59
+ mcp_code_indexer/vector_mode/security/__init__.py,sha256=itfeuysSqV-m9xuo-CMkAoucxexVfPgeOU-ieTLvdls,336
60
+ mcp_code_indexer/vector_mode/security/patterns.py,sha256=0xaiMnZm7YXswq3hVe_DJYePE9MhWuvizApLnmXus9M,11572
61
+ mcp_code_indexer/vector_mode/security/redactor.py,sha256=tsFzhCJ99bp4EFqQVjZ-4f8Uf3ux9X4ODVR09oJG01U,13380
62
+ mcp_code_indexer-4.1.0.dist-info/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
63
+ mcp_code_indexer-4.1.0.dist-info/METADATA,sha256=_oF0bxlQWX1SczGQb-nUVkNPWHs4Pt0DlqczLacfSPw,27221
64
+ mcp_code_indexer-4.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
65
+ mcp_code_indexer-4.1.0.dist-info/entry_points.txt,sha256=UABj7HZ0mC6rvF22gxaz2LLNLGQShTrFmp5u00iUtvo,67
66
+ mcp_code_indexer-4.1.0.dist-info/RECORD,,