mcp-code-indexer 2.2.1__tar.gz → 2.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {mcp_code_indexer-2.2.1/src/mcp_code_indexer.egg-info → mcp_code_indexer-2.4.0}/PKG-INFO +3 -3
  2. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/README.md +2 -2
  3. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/api-reference.md +50 -3
  4. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/pyproject.toml +1 -1
  5. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/database/database.py +12 -2
  6. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/main.py +1 -0
  7. mcp_code_indexer-2.4.0/src/mcp_code_indexer/query_preprocessor.py +181 -0
  8. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/server/mcp_server.py +79 -3
  9. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0/src/mcp_code_indexer.egg-info}/PKG-INFO +3 -3
  10. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer.egg-info/SOURCES.txt +1 -0
  11. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/LICENSE +0 -0
  12. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/MANIFEST.in +0 -0
  13. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/architecture.md +0 -0
  14. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/configuration.md +0 -0
  15. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/contributing.md +0 -0
  16. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/database-resilience.md +0 -0
  17. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/git-hook-setup.md +0 -0
  18. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/monitoring.md +0 -0
  19. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/docs/performance-tuning.md +0 -0
  20. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/migrations/001_initial.sql +0 -0
  21. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/migrations/002_performance_indexes.sql +0 -0
  22. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/migrations/003_project_overviews.sql +0 -0
  23. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/requirements.txt +0 -0
  24. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/setup.cfg +0 -0
  25. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/setup.py +0 -0
  26. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/__init__.py +0 -0
  27. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/__main__.py +0 -0
  28. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/ask_handler.py +0 -0
  29. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/claude_api_handler.py +0 -0
  30. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
  31. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/database/__init__.py +0 -0
  32. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/database/connection_health.py +0 -0
  33. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/database/exceptions.py +0 -0
  34. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/database/models.py +0 -0
  35. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/database/retry_executor.py +0 -0
  36. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/deepask_handler.py +0 -0
  37. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/error_handler.py +0 -0
  38. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/file_scanner.py +0 -0
  39. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/git_hook_handler.py +0 -0
  40. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/logging_config.py +0 -0
  41. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/merge_handler.py +0 -0
  42. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/middleware/__init__.py +0 -0
  43. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
  44. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/server/__init__.py +0 -0
  45. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  46. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/token_counter.py +0 -0
  47. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer/tools/__init__.py +0 -0
  48. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer.egg-info/dependency_links.txt +0 -0
  49. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer.egg-info/entry_points.txt +0 -0
  50. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer.egg-info/requires.txt +0 -0
  51. {mcp_code_indexer-2.2.1 → mcp_code_indexer-2.4.0}/src/mcp_code_indexer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 2.2.1
3
+ Version: 2.4.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -59,8 +59,8 @@ Dynamic: requires-python
59
59
 
60
60
  # MCP Code Indexer 🚀
61
61
 
62
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?16)](https://badge.fury.io/py/mcp-code-indexer)
63
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?16)](https://pypi.org/project/mcp-code-indexer/)
62
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?18)](https://badge.fury.io/py/mcp-code-indexer)
63
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?18)](https://pypi.org/project/mcp-code-indexer/)
64
64
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
65
65
 
66
66
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -1,7 +1,7 @@
1
1
  # MCP Code Indexer 🚀
2
2
 
3
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?16)](https://badge.fury.io/py/mcp-code-indexer)
4
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?16)](https://pypi.org/project/mcp-code-indexer/)
3
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?18)](https://badge.fury.io/py/mcp-code-indexer)
4
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?18)](https://pypi.org/project/mcp-code-indexer/)
5
5
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
7
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -327,7 +327,14 @@ const result = await mcp.callTool("update_missing_descriptions", {
327
327
 
328
328
  ### search_descriptions
329
329
 
330
- Searches through all file descriptions in a project to find files related to specific functionality. Use this for large codebases instead of loading the entire structure. Returns files ranked by relevance.
330
+ Searches through all file descriptions in a project to find files related to specific functionality using intelligent query preprocessing. Features include:
331
+
332
+ - **Multi-word search**: `"grpc proto"` finds files containing both terms regardless of order
333
+ - **Operator escaping**: FTS5 operators (`AND`, `OR`, `NOT`, `NEAR`) are treated as literal search terms
334
+ - **Whole word matching**: Prevents partial matches for more precise results
335
+ - **Case insensitive**: Works regardless of case in query or descriptions
336
+
337
+ Use this for large codebases instead of loading the entire structure. Returns files ranked by relevance using BM25 scoring.
331
338
 
332
339
  #### Parameters
333
340
 
@@ -394,10 +401,50 @@ const result = await mcp.callTool("search_descriptions", {
394
401
  }
395
402
  ```
396
403
 
404
+ #### Enhanced Search Examples
405
+
406
+ **Multi-word search (order-agnostic):**
407
+ ```javascript
408
+ // Both queries find the same results
409
+ await mcp.callTool("search_descriptions", {
410
+ projectName: "api-service",
411
+ folderPath: "/projects/api-service",
412
+ branch: "main",
413
+ query: "grpc proto" // Finds files with both "grpc" AND "proto"
414
+ });
415
+
416
+ await mcp.callTool("search_descriptions", {
417
+ projectName: "api-service",
418
+ folderPath: "/projects/api-service",
419
+ branch: "main",
420
+ query: "proto grpc" // Same results as above
421
+ });
422
+ ```
423
+
424
+ **FTS5 operator escaping:**
425
+ ```javascript
426
+ // Search for files containing literal "AND" as a term
427
+ await mcp.callTool("search_descriptions", {
428
+ projectName: "error-handling",
429
+ folderPath: "/projects/error-handling",
430
+ branch: "main",
431
+ query: "logging AND error" // Finds files with all three: "logging", "AND", "error"
432
+ });
433
+ ```
434
+
435
+ **Case insensitive matching:**
436
+ ```javascript
437
+ // All variations return same results
438
+ const queries = ["HTTP client", "http CLIENT", "Http Client"];
439
+ // Each finds files containing both "http" and "client" regardless of case
440
+ ```
441
+
397
442
  🔍 **Search Tips**:
443
+ - **Use multiple words**: "grpc proto" finds files with both terms
444
+ - **Try different orders**: "api client" vs "client api" yield same results
398
445
  - **Be descriptive**: "authentication logic" vs "auth"
399
- - **Combine concepts**: "database connection pooling"
400
- - **Try variations**: If no results, try different terms
446
+ - **Don't worry about operators**: "AND", "OR" are treated as literal search terms
447
+ - **Case doesn't matter**: "HTTP", "http", "Http" all work the same
401
448
  - **Use technical terms**: "middleware", "controller", "utils"
402
449
  - **Search by purpose**: "error handling", "data validation"
403
450
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mcp-code-indexer"
7
- version = "2.2.1"
7
+ version = "2.4.0"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -30,6 +30,7 @@ from mcp_code_indexer.database.exceptions import (
30
30
  from mcp_code_indexer.database.connection_health import (
31
31
  ConnectionHealthMonitor, DatabaseMetricsCollector
32
32
  )
33
+ from mcp_code_indexer.query_preprocessor import preprocess_search_query
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -848,7 +849,16 @@ class DatabaseManager:
848
849
  query: str,
849
850
  max_results: int = 20
850
851
  ) -> List[SearchResult]:
851
- """Search file descriptions using FTS5."""
852
+ """Search file descriptions using FTS5 with intelligent query preprocessing."""
853
+ # Preprocess query for optimal FTS5 search
854
+ preprocessed_query = preprocess_search_query(query)
855
+
856
+ if not preprocessed_query:
857
+ logger.debug(f"Empty query after preprocessing: '{query}'")
858
+ return []
859
+
860
+ logger.debug(f"Search query preprocessing: '{query}' -> '{preprocessed_query}'")
861
+
852
862
  async with self.get_connection() as db:
853
863
  cursor = await db.execute(
854
864
  """
@@ -866,7 +876,7 @@ class DatabaseManager:
866
876
  ORDER BY bm25(file_descriptions_fts)
867
877
  LIMIT ?
868
878
  """,
869
- (query, project_id, branch, max_results)
879
+ (preprocessed_query, project_id, branch, max_results)
870
880
  )
871
881
  rows = await cursor.fetchall()
872
882
 
@@ -294,6 +294,7 @@ async def handle_runcommand(args: argparse.Namespace) -> None:
294
294
  "update_codebase_overview": server._handle_update_codebase_overview,
295
295
  "get_word_frequency": server._handle_get_word_frequency,
296
296
  "merge_branch_descriptions": server._handle_merge_branch_descriptions,
297
+ "search_codebase_overview": server._handle_search_codebase_overview,
297
298
  }
298
299
 
299
300
  if tool_name not in tool_handlers:
@@ -0,0 +1,181 @@
1
+ """
2
+ Query preprocessing module for intelligent FTS5 search.
3
+
4
+ This module provides intelligent query preprocessing for SQLite FTS5 full-text search
5
+ to enable multi-word search with case insensitive matching, whole word enforcement,
6
+ and proper handling of FTS5 operators as literal search terms.
7
+
8
+ Key features:
9
+ - Multi-word queries: "grpc proto" becomes "grpc" AND "proto" for order-agnostic matching
10
+ - FTS5 operator escaping: "AND OR" becomes '"AND" AND "OR"' to treat operators as literals
11
+ - Whole word matching: prevents partial matches by relying on proper tokenization
12
+ - Case insensitive: leverages FTS5 default behavior
13
+ - Special character handling: preserves special characters in quoted terms
14
+ """
15
+
16
+ import re
17
+ import logging
18
+ from typing import List, Set
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class QueryPreprocessor:
24
+ """
25
+ Preprocesses user queries for optimal FTS5 search performance.
26
+
27
+ Handles multi-word queries, operator escaping, and special character preservation
28
+ while maintaining BM25 ranking performance.
29
+ """
30
+
31
+ # FTS5 operators that need to be escaped when used as literal search terms
32
+ FTS5_OPERATORS: Set[str] = {
33
+ 'AND', 'OR', 'NOT', 'NEAR'
34
+ }
35
+
36
+ def __init__(self):
37
+ """Initialize the query preprocessor."""
38
+ pass
39
+
40
+ def preprocess_query(self, query: str) -> str:
41
+ """
42
+ Preprocess a user query for FTS5 search.
43
+
44
+ Args:
45
+ query: Raw user query string
46
+
47
+ Returns:
48
+ Preprocessed query string optimized for FTS5
49
+
50
+ Examples:
51
+ >>> preprocessor = QueryPreprocessor()
52
+ >>> preprocessor.preprocess_query("grpc proto")
53
+ '"grpc" AND "proto"'
54
+ >>> preprocessor.preprocess_query("error AND handling")
55
+ '"error" AND "AND" AND "handling"'
56
+ >>> preprocessor.preprocess_query('config "file system"')
57
+ '"config" AND "file system"'
58
+ """
59
+ if not query or not query.strip():
60
+ return ""
61
+
62
+ # Normalize whitespace
63
+ query = query.strip()
64
+
65
+ # Split into terms while preserving quoted phrases
66
+ terms = self._split_terms(query)
67
+
68
+ if not terms:
69
+ return ""
70
+
71
+ # Process each term: escape operators and add quotes
72
+ processed_terms = []
73
+ for term in terms:
74
+ processed_term = self._process_term(term)
75
+ if processed_term: # Skip empty terms
76
+ processed_terms.append(processed_term)
77
+
78
+ if not processed_terms:
79
+ return ""
80
+
81
+ # Join with AND for multi-word matching
82
+ result = " AND ".join(processed_terms)
83
+
84
+ logger.debug(f"Preprocessed query: '{query}' -> '{result}'")
85
+ return result
86
+
87
+ def _split_terms(self, query: str) -> List[str]:
88
+ """
89
+ Split query into terms while preserving quoted phrases.
90
+
91
+ Args:
92
+ query: Input query string
93
+
94
+ Returns:
95
+ List of terms and quoted phrases
96
+
97
+ Examples:
98
+ 'grpc proto' -> ['grpc', 'proto']
99
+ 'config "file system"' -> ['config', '"file system"']
100
+ 'error AND handling' -> ['error', 'AND', 'handling']
101
+ """
102
+ terms = []
103
+
104
+ # Regex to match quoted phrases or individual words
105
+ # This pattern captures:
106
+ # 1. Double-quoted strings (including the quotes)
107
+ # 2. Single words (sequences of non-whitespace characters)
108
+ pattern = r'"[^"]*"|\S+'
109
+
110
+ matches = re.findall(pattern, query)
111
+
112
+ for match in matches:
113
+ # Skip empty matches
114
+ if match.strip():
115
+ terms.append(match)
116
+
117
+ return terms
118
+
119
+ def _process_term(self, term: str) -> str:
120
+ """
121
+ Process a single term: escape operators and ensure proper quoting.
122
+
123
+ Args:
124
+ term: Single term or quoted phrase
125
+
126
+ Returns:
127
+ Processed term ready for FTS5
128
+
129
+ Examples:
130
+ 'grpc' -> '"grpc"'
131
+ 'AND' -> '"AND"'
132
+ '"file system"' -> '"file system"'
133
+ 'c++' -> '"c++"'
134
+ """
135
+ if not term:
136
+ return ""
137
+
138
+ # If already quoted, return as-is (user intentional phrase)
139
+ if term.startswith('"') and term.endswith('"') and len(term) >= 2:
140
+ return term
141
+
142
+ # Check if term is an FTS5 operator (case-insensitive)
143
+ if term.upper() in self.FTS5_OPERATORS:
144
+ # Escape operator by quoting
145
+ escaped_term = f'"{term}"'
146
+ logger.debug(f"Escaped FTS5 operator: '{term}' -> '{escaped_term}'")
147
+ return escaped_term
148
+
149
+ # Quote all terms to ensure whole-word matching and handle special characters
150
+ return f'"{term}"'
151
+
152
+ def _escape_quotes_in_term(self, term: str) -> str:
153
+ """
154
+ Escape internal quotes in a term for FTS5 compatibility.
155
+
156
+ Args:
157
+ term: Term that may contain quotes
158
+
159
+ Returns:
160
+ Term with escaped quotes
161
+
162
+ Examples:
163
+ 'say "hello"' -> 'say ""hello""'
164
+ "test's file" -> "test's file"
165
+ """
166
+ # In FTS5, quotes inside quoted strings are escaped by doubling them
167
+ return term.replace('"', '""')
168
+
169
+
170
+ def preprocess_search_query(query: str) -> str:
171
+ """
172
+ Convenience function for preprocessing search queries.
173
+
174
+ Args:
175
+ query: Raw user query
176
+
177
+ Returns:
178
+ Preprocessed query ready for FTS5
179
+ """
180
+ preprocessor = QueryPreprocessor()
181
+ return preprocessor.preprocess_query(query)
@@ -478,6 +478,23 @@ src/
478
478
  "properties": {},
479
479
  "additionalProperties": False
480
480
  }
481
+ ),
482
+ types.Tool(
483
+ name="search_codebase_overview",
484
+ description="Search for a single word in the codebase overview and return 2 sentences before and after where the word is found. Useful for quickly finding specific information in large overviews.",
485
+ inputSchema={
486
+ "type": "object",
487
+ "properties": {
488
+ "projectName": {"type": "string", "description": "The name of the project"},
489
+ "folderPath": {"type": "string", "description": "Absolute path to the project folder on disk"},
490
+ "branch": {"type": "string", "description": "Git branch name"},
491
+ "remoteOrigin": {"type": "string", "description": "Git remote origin URL if available"},
492
+ "upstreamOrigin": {"type": "string", "description": "Upstream repository URL if this is a fork"},
493
+ "searchWord": {"type": "string", "description": "Single word to search for in the overview"}
494
+ },
495
+ "required": ["projectName", "folderPath", "branch", "searchWord"],
496
+ "additionalProperties": False
497
+ }
481
498
  )
482
499
  ]
483
500
 
@@ -503,6 +520,7 @@ src/
503
520
  "get_word_frequency": self._handle_get_word_frequency,
504
521
  "merge_branch_descriptions": self._handle_merge_branch_descriptions,
505
522
  "check_database_health": self._handle_check_database_health,
523
+ "search_codebase_overview": self._handle_search_codebase_overview,
506
524
  }
507
525
 
508
526
  if name not in tool_handlers:
@@ -889,18 +907,28 @@ src/
889
907
  # Use provided token limit or fall back to server default
890
908
  token_limit = arguments.get("tokenLimit", self.token_limit)
891
909
 
892
- # Calculate total tokens
910
+ # Calculate total tokens for descriptions
893
911
  logger.info("Calculating total token count...")
894
- total_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
912
+ descriptions_tokens = self.token_counter.calculate_codebase_tokens(file_descriptions)
913
+
914
+ # Get overview tokens if available
915
+ overview = await self.db_manager.get_project_overview(project_id, resolved_branch)
916
+ overview_tokens = 0
917
+ if overview and overview.overview:
918
+ overview_tokens = self.token_counter.count_tokens(overview.overview)
919
+
920
+ total_tokens = descriptions_tokens + overview_tokens
895
921
  is_large = total_tokens > token_limit
896
922
  recommendation = "use_search" if is_large else "use_overview"
897
923
 
898
- logger.info(f"Codebase analysis complete: {total_tokens} tokens, {len(file_descriptions)} files")
924
+ logger.info(f"Codebase analysis complete: {total_tokens} tokens total ({descriptions_tokens} descriptions + {overview_tokens} overview), {len(file_descriptions)} files")
899
925
  logger.info(f"Size assessment: {'LARGE' if is_large else 'SMALL'} (limit: {token_limit})")
900
926
  logger.info(f"Recommendation: {recommendation}")
901
927
 
902
928
  return {
903
929
  "totalTokens": total_tokens,
930
+ "descriptionsTokens": descriptions_tokens,
931
+ "overviewTokens": overview_tokens,
904
932
  "isLarge": is_large,
905
933
  "recommendation": recommendation,
906
934
  "tokenLimit": token_limit,
@@ -1205,6 +1233,54 @@ src/
1205
1233
  "totalUniqueTerms": result.total_unique_terms
1206
1234
  }
1207
1235
 
1236
+ async def _handle_search_codebase_overview(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
1237
+ """Handle search_codebase_overview tool calls."""
1238
+ project_id = await self._get_or_create_project_id(arguments)
1239
+ resolved_branch = await self._resolve_branch(project_id, arguments["branch"])
1240
+ search_word = arguments["searchWord"].lower()
1241
+
1242
+ # Get the overview
1243
+ overview = await self.db_manager.get_project_overview(project_id, resolved_branch)
1244
+
1245
+ if not overview or not overview.overview:
1246
+ return {
1247
+ "found": False,
1248
+ "message": "No overview found for this project",
1249
+ "searchWord": arguments["searchWord"]
1250
+ }
1251
+
1252
+ # Split overview into sentences
1253
+ import re
1254
+ sentences = re.split(r'[.!?]+', overview.overview)
1255
+ sentences = [s.strip() for s in sentences if s.strip()]
1256
+
1257
+ # Find matches
1258
+ matches = []
1259
+ for i, sentence in enumerate(sentences):
1260
+ if search_word in sentence.lower():
1261
+ # Get context: 2 sentences before and after
1262
+ start_idx = max(0, i - 2)
1263
+ end_idx = min(len(sentences), i + 3)
1264
+
1265
+ context_sentences = sentences[start_idx:end_idx]
1266
+ context = '. '.join(context_sentences) + '.'
1267
+
1268
+ matches.append({
1269
+ "matchIndex": i,
1270
+ "matchSentence": sentence,
1271
+ "context": context,
1272
+ "contextStartIndex": start_idx,
1273
+ "contextEndIndex": end_idx - 1
1274
+ })
1275
+
1276
+ return {
1277
+ "found": len(matches) > 0,
1278
+ "searchWord": arguments["searchWord"],
1279
+ "matches": matches,
1280
+ "totalMatches": len(matches),
1281
+ "totalSentences": len(sentences)
1282
+ }
1283
+
1208
1284
  async def _handle_check_database_health(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
1209
1285
  """
1210
1286
  Handle check_database_health tool calls with comprehensive diagnostics.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 2.2.1
3
+ Version: 2.4.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -59,8 +59,8 @@ Dynamic: requires-python
59
59
 
60
60
  # MCP Code Indexer 🚀
61
61
 
62
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?16)](https://badge.fury.io/py/mcp-code-indexer)
63
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?16)](https://pypi.org/project/mcp-code-indexer/)
62
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?18)](https://badge.fury.io/py/mcp-code-indexer)
63
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?18)](https://pypi.org/project/mcp-code-indexer/)
64
64
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
65
65
 
66
66
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -26,6 +26,7 @@ src/mcp_code_indexer/git_hook_handler.py
26
26
  src/mcp_code_indexer/logging_config.py
27
27
  src/mcp_code_indexer/main.py
28
28
  src/mcp_code_indexer/merge_handler.py
29
+ src/mcp_code_indexer/query_preprocessor.py
29
30
  src/mcp_code_indexer/token_counter.py
30
31
  src/mcp_code_indexer.egg-info/PKG-INFO
31
32
  src/mcp_code_indexer.egg-info/SOURCES.txt