mcp-code-indexer 2.4.0__tar.gz → 3.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/MANIFEST.in +1 -1
  2. {mcp_code_indexer-2.4.0/src/mcp_code_indexer.egg-info → mcp_code_indexer-3.0.2}/PKG-INFO +3 -3
  3. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/README.md +2 -2
  4. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/pyproject.toml +1 -1
  5. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/ask_handler.py +5 -7
  6. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/claude_api_handler.py +2 -2
  7. mcp_code_indexer-3.0.2/src/mcp_code_indexer/cleanup_manager.py +255 -0
  8. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/database/database.py +125 -98
  9. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/database/models.py +3 -5
  10. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/deepask_handler.py +5 -9
  11. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/error_handler.py +3 -1
  12. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/git_hook_handler.py +2 -9
  13. mcp_code_indexer-3.0.2/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +166 -0
  14. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/server/mcp_server.py +33 -211
  15. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2/src/mcp_code_indexer.egg-info}/PKG-INFO +3 -3
  16. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer.egg-info/SOURCES.txt +5 -3
  17. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/LICENSE +0 -0
  18. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/api-reference.md +0 -0
  19. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/architecture.md +0 -0
  20. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/configuration.md +0 -0
  21. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/contributing.md +0 -0
  22. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/database-resilience.md +0 -0
  23. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/git-hook-setup.md +0 -0
  24. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/monitoring.md +0 -0
  25. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/docs/performance-tuning.md +0 -0
  26. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/requirements.txt +0 -0
  27. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/setup.cfg +0 -0
  28. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/setup.py +0 -0
  29. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/__init__.py +0 -0
  30. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/__main__.py +0 -0
  31. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
  32. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/database/__init__.py +0 -0
  33. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/database/connection_health.py +0 -0
  34. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/database/exceptions.py +0 -0
  35. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/database/retry_executor.py +0 -0
  36. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/file_scanner.py +0 -0
  37. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/logging_config.py +0 -0
  38. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/main.py +0 -0
  39. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/merge_handler.py +0 -0
  40. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/middleware/__init__.py +0 -0
  41. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
  42. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2/src/mcp_code_indexer}/migrations/001_initial.sql +0 -0
  43. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2/src/mcp_code_indexer}/migrations/002_performance_indexes.sql +0 -0
  44. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2/src/mcp_code_indexer}/migrations/003_project_overviews.sql +0 -0
  45. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/query_preprocessor.py +0 -0
  46. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/server/__init__.py +0 -0
  47. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  48. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/token_counter.py +0 -0
  49. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer/tools/__init__.py +0 -0
  50. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer.egg-info/dependency_links.txt +0 -0
  51. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer.egg-info/entry_points.txt +0 -0
  52. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer.egg-info/requires.txt +0 -0
  53. {mcp_code_indexer-2.4.0 → mcp_code_indexer-3.0.2}/src/mcp_code_indexer.egg-info/top_level.txt +0 -0
@@ -3,7 +3,7 @@ include LICENSE
3
3
  include pyproject.toml
4
4
  include requirements.txt
5
5
  recursive-include src/mcp_code_indexer/tiktoken_cache *
6
- recursive-include migrations *.sql
6
+ recursive-include src/mcp_code_indexer/migrations *.sql
7
7
  recursive-include docs *.md
8
8
  recursive-exclude tests *
9
9
  recursive-exclude venv *
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 2.4.0
3
+ Version: 3.0.2
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -59,8 +59,8 @@ Dynamic: requires-python
59
59
 
60
60
  # MCP Code Indexer 🚀
61
61
 
62
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?18)](https://badge.fury.io/py/mcp-code-indexer)
63
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?18)](https://pypi.org/project/mcp-code-indexer/)
62
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?20)](https://badge.fury.io/py/mcp-code-indexer)
63
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?20)](https://pypi.org/project/mcp-code-indexer/)
64
64
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
65
65
 
66
66
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -1,7 +1,7 @@
1
1
  # MCP Code Indexer 🚀
2
2
 
3
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?18)](https://badge.fury.io/py/mcp-code-indexer)
4
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?18)](https://pypi.org/project/mcp-code-indexer/)
3
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?20)](https://badge.fury.io/py/mcp-code-indexer)
4
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?20)](https://pypi.org/project/mcp-code-indexer/)
5
5
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
7
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mcp-code-indexer"
7
- version = "2.4.0"
7
+ version = "3.0.2"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -51,7 +51,7 @@ class AskHandler(ClaudeAPIHandler):
51
51
  Ask a question about the project using Claude API.
52
52
 
53
53
  Args:
54
- project_info: Project information dict with projectName, folderPath, branch, etc.
54
+ project_info: Project information dict with projectName, folderPath, etc.
55
55
  question: User's question about the project
56
56
  include_overview: Whether to include project overview in context
57
57
 
@@ -112,8 +112,7 @@ class AskHandler(ClaudeAPIHandler):
112
112
  "response_tokens": response.usage.get("completion_tokens") if response.usage else None,
113
113
  "total_tokens": response.usage.get("total_tokens") if response.usage else None
114
114
  },
115
- "include_overview": include_overview,
116
- "branch": project_info.get("branch", "unknown")
115
+ "include_overview": include_overview
117
116
  }
118
117
  }
119
118
 
@@ -141,10 +140,9 @@ class AskHandler(ClaudeAPIHandler):
141
140
  Formatted prompt string
142
141
  """
143
142
  project_name = project_info["projectName"]
144
- branch = project_info.get("branch", "unknown")
145
143
 
146
144
  if overview.strip():
147
- prompt = f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
145
+ prompt = f"""Please answer the following question about the codebase "{project_name}".
148
146
 
149
147
  PROJECT OVERVIEW:
150
148
  {overview}
@@ -154,7 +152,7 @@ QUESTION:
154
152
 
155
153
  Please provide a clear, detailed answer based on the project overview above. If the overview doesn't contain enough information to fully answer the question, please say so and suggest what additional information might be needed."""
156
154
  else:
157
- prompt = f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
155
+ prompt = f"""Please answer the following question about the codebase "{project_name}".
158
156
 
159
157
  Note: No project overview is available for this codebase.
160
158
 
@@ -200,7 +198,7 @@ If the project overview is insufficient to answer the question completely, expla
200
198
 
201
199
  output = []
202
200
  output.append(f"Question: {result['question']}")
203
- output.append(f"Project: {result['project_name']} (branch: {metadata['branch']})")
201
+ output.append(f"Project: {result['project_name']}")
204
202
  output.append("")
205
203
  output.append("Answer:")
206
204
  output.append(answer)
@@ -331,7 +331,7 @@ class ClaudeAPIHandler:
331
331
  Get project overview from database.
332
332
 
333
333
  Args:
334
- project_info: Project information dict with projectName, folderPath, branch, etc.
334
+ project_info: Project information dict with projectName, folderPath, etc.
335
335
 
336
336
  Returns:
337
337
  Project overview text or empty string if not found
@@ -345,7 +345,7 @@ class ClaudeAPIHandler:
345
345
  return ""
346
346
 
347
347
  # Get overview for the project using project.id
348
- overview_result = await self.db_manager.get_project_overview(project.id, project_info["branch"])
348
+ overview_result = await self.db_manager.get_project_overview(project.id)
349
349
  if overview_result:
350
350
  return overview_result.overview
351
351
  else:
@@ -0,0 +1,255 @@
1
+ """
2
+ Cleanup Manager for MCP Code Indexer.
3
+
4
+ Handles soft deletion and retention policies for file descriptions
5
+ that are marked for cleanup. Provides periodic cleanup operations
6
+ and manual cleanup methods.
7
+ """
8
+
9
+ import logging
10
+ import time
11
+ from typing import List, Optional
12
+ from pathlib import Path
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class CleanupManager:
18
+ """
19
+ Manages cleanup operations for file descriptions with retention policies.
20
+
21
+ Handles soft deletion by updating to_be_cleaned timestamps and provides
22
+ periodic cleanup to permanently remove old records after the retention period.
23
+ """
24
+
25
+ def __init__(self, db_manager, retention_months: int = 6):
26
+ """
27
+ Initialize cleanup manager.
28
+
29
+ Args:
30
+ db_manager: DatabaseManager instance
31
+ retention_months: Number of months to retain records before permanent deletion
32
+ """
33
+ self.db_manager = db_manager
34
+ self.retention_months = retention_months
35
+
36
+ async def mark_file_for_cleanup(self, project_id: str, file_path: str) -> bool:
37
+ """
38
+ Mark a specific file for cleanup by setting to_be_cleaned timestamp.
39
+
40
+ Args:
41
+ project_id: Project identifier
42
+ file_path: Path to file to mark for cleanup
43
+
44
+ Returns:
45
+ True if file was marked, False if file not found
46
+ """
47
+ cleanup_timestamp = int(time.time())
48
+
49
+ async with self.db_manager.get_write_connection_with_retry("mark_file_for_cleanup") as db:
50
+ cursor = await db.execute(
51
+ """
52
+ UPDATE file_descriptions
53
+ SET to_be_cleaned = ?
54
+ WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
55
+ """,
56
+ (cleanup_timestamp, project_id, file_path)
57
+ )
58
+ await db.commit()
59
+
60
+ # Check if any rows were affected
61
+ return cursor.rowcount > 0
62
+
63
+ async def mark_files_for_cleanup(self, project_id: str, file_paths: List[str]) -> int:
64
+ """
65
+ Mark multiple files for cleanup in a batch operation.
66
+
67
+ Args:
68
+ project_id: Project identifier
69
+ file_paths: List of file paths to mark for cleanup
70
+
71
+ Returns:
72
+ Number of files marked for cleanup
73
+ """
74
+ if not file_paths:
75
+ return 0
76
+
77
+ cleanup_timestamp = int(time.time())
78
+
79
+ async def batch_operation(conn):
80
+ data = [(cleanup_timestamp, project_id, path) for path in file_paths]
81
+ cursor = await conn.executemany(
82
+ """
83
+ UPDATE file_descriptions
84
+ SET to_be_cleaned = ?
85
+ WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
86
+ """,
87
+ data
88
+ )
89
+ return cursor.rowcount
90
+
91
+ marked_count = await self.db_manager.execute_transaction_with_retry(
92
+ batch_operation,
93
+ f"mark_files_for_cleanup_{len(file_paths)}_files",
94
+ timeout_seconds=30.0
95
+ )
96
+
97
+ logger.info(f"Marked {marked_count} files for cleanup in project {project_id}")
98
+ return marked_count
99
+
100
+ async def restore_file_from_cleanup(self, project_id: str, file_path: str) -> bool:
101
+ """
102
+ Restore a file from cleanup by clearing its to_be_cleaned timestamp.
103
+
104
+ Args:
105
+ project_id: Project identifier
106
+ file_path: Path to file to restore
107
+
108
+ Returns:
109
+ True if file was restored, False if file not found
110
+ """
111
+ async with self.db_manager.get_write_connection_with_retry("restore_file_from_cleanup") as db:
112
+ cursor = await db.execute(
113
+ """
114
+ UPDATE file_descriptions
115
+ SET to_be_cleaned = NULL
116
+ WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NOT NULL
117
+ """,
118
+ (project_id, file_path)
119
+ )
120
+ await db.commit()
121
+
122
+ return cursor.rowcount > 0
123
+
124
+ async def get_files_to_be_cleaned(self, project_id: str) -> List[dict]:
125
+ """
126
+ Get list of files marked for cleanup in a project.
127
+
128
+ Args:
129
+ project_id: Project identifier
130
+
131
+ Returns:
132
+ List of dictionaries with file_path and to_be_cleaned timestamp
133
+ """
134
+ async with self.db_manager.get_connection() as db:
135
+ cursor = await db.execute(
136
+ """
137
+ SELECT file_path, to_be_cleaned
138
+ FROM file_descriptions
139
+ WHERE project_id = ? AND to_be_cleaned IS NOT NULL
140
+ ORDER BY to_be_cleaned DESC, file_path
141
+ """,
142
+ (project_id,)
143
+ )
144
+ rows = await cursor.fetchall()
145
+
146
+ return [
147
+ {
148
+ 'file_path': row['file_path'],
149
+ 'marked_for_cleanup': row['to_be_cleaned'],
150
+ 'marked_date': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row['to_be_cleaned']))
151
+ }
152
+ for row in rows
153
+ ]
154
+
155
+ async def perform_cleanup(self, project_id: Optional[str] = None) -> int:
156
+ """
157
+ Permanently delete records that exceed the retention period.
158
+
159
+ Args:
160
+ project_id: If specified, only clean up this project. Otherwise clean all projects.
161
+
162
+ Returns:
163
+ Number of records permanently deleted
164
+ """
165
+ # Calculate cutoff timestamp (retention_months ago)
166
+ cutoff_seconds = self.retention_months * 30 * 24 * 60 * 60 # Approximate months to seconds
167
+ cutoff_timestamp = int(time.time()) - cutoff_seconds
168
+
169
+ async def cleanup_operation(conn):
170
+ if project_id:
171
+ cursor = await conn.execute(
172
+ """
173
+ DELETE FROM file_descriptions
174
+ WHERE project_id = ? AND to_be_cleaned IS NOT NULL AND to_be_cleaned < ?
175
+ """,
176
+ (project_id, cutoff_timestamp)
177
+ )
178
+ else:
179
+ cursor = await conn.execute(
180
+ """
181
+ DELETE FROM file_descriptions
182
+ WHERE to_be_cleaned IS NOT NULL AND to_be_cleaned < ?
183
+ """,
184
+ (cutoff_timestamp,)
185
+ )
186
+
187
+ return cursor.rowcount
188
+
189
+ deleted_count = await self.db_manager.execute_transaction_with_retry(
190
+ cleanup_operation,
191
+ f"perform_cleanup_{project_id or 'all_projects'}",
192
+ timeout_seconds=60.0
193
+ )
194
+
195
+ if deleted_count > 0:
196
+ scope = f"project {project_id}" if project_id else "all projects"
197
+ logger.info(f"Permanently deleted {deleted_count} old records from {scope}")
198
+
199
+ return deleted_count
200
+
201
+ async def get_cleanup_stats(self, project_id: Optional[str] = None) -> dict:
202
+ """
203
+ Get statistics about cleanup state.
204
+
205
+ Args:
206
+ project_id: If specified, get stats for this project only
207
+
208
+ Returns:
209
+ Dictionary with cleanup statistics
210
+ """
211
+ cutoff_seconds = self.retention_months * 30 * 24 * 60 * 60
212
+ cutoff_timestamp = int(time.time()) - cutoff_seconds
213
+
214
+ async with self.db_manager.get_connection() as db:
215
+ if project_id:
216
+ base_where = "WHERE project_id = ?"
217
+ params = (project_id,)
218
+ else:
219
+ base_where = ""
220
+ params = ()
221
+
222
+ # Active files
223
+ cursor = await db.execute(
224
+ f"SELECT COUNT(*) FROM file_descriptions {base_where} AND to_be_cleaned IS NULL",
225
+ params
226
+ )
227
+ active_count = (await cursor.fetchone())[0]
228
+
229
+ # Files marked for cleanup
230
+ cursor = await db.execute(
231
+ f"SELECT COUNT(*) FROM file_descriptions {base_where} AND to_be_cleaned IS NOT NULL",
232
+ params
233
+ )
234
+ marked_count = (await cursor.fetchone())[0]
235
+
236
+ # Files eligible for permanent deletion
237
+ if project_id:
238
+ cursor = await db.execute(
239
+ "SELECT COUNT(*) FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NOT NULL AND to_be_cleaned < ?",
240
+ (project_id, cutoff_timestamp)
241
+ )
242
+ else:
243
+ cursor = await db.execute(
244
+ "SELECT COUNT(*) FROM file_descriptions WHERE to_be_cleaned IS NOT NULL AND to_be_cleaned < ?",
245
+ (cutoff_timestamp,)
246
+ )
247
+ eligible_for_deletion = (await cursor.fetchone())[0]
248
+
249
+ return {
250
+ 'active_files': active_count,
251
+ 'marked_for_cleanup': marked_count,
252
+ 'eligible_for_deletion': eligible_for_deletion,
253
+ 'retention_months': self.retention_months,
254
+ 'cutoff_date': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(cutoff_timestamp))
255
+ }