mcp-code-indexer 2.1.0__tar.gz → 2.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_code_indexer-2.1.0/src/mcp_code_indexer.egg-info → mcp_code_indexer-2.2.1}/PKG-INFO +3 -3
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/README.md +2 -2
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/pyproject.toml +1 -1
- mcp_code_indexer-2.2.1/src/mcp_code_indexer/ask_handler.py +217 -0
- mcp_code_indexer-2.2.1/src/mcp_code_indexer/claude_api_handler.py +355 -0
- mcp_code_indexer-2.2.1/src/mcp_code_indexer/deepask_handler.py +465 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/server/mcp_server.py +1 -1
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1/src/mcp_code_indexer.egg-info}/PKG-INFO +3 -3
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/SOURCES.txt +3 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/LICENSE +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/MANIFEST.in +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/api-reference.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/architecture.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/configuration.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/contributing.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/database-resilience.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/git-hook-setup.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/monitoring.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/docs/performance-tuning.md +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/migrations/001_initial.sql +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/migrations/002_performance_indexes.sql +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/migrations/003_project_overviews.sql +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/requirements.txt +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/setup.cfg +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/setup.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/__init__.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/__main__.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/__init__.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/connection_health.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/database.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/exceptions.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/models.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/retry_executor.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/error_handler.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/file_scanner.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/git_hook_handler.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/logging_config.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/main.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/merge_handler.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/middleware/__init__.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/server/__init__.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/token_counter.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/tools/__init__.py +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/dependency_links.txt +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/entry_points.txt +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/requires.txt +0 -0
- {mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mcp-code-indexer
|
3
|
-
Version: 2.1
|
3
|
+
Version: 2.2.1
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
5
5
|
Author: MCP Code Indexer Contributors
|
6
6
|
Maintainer: MCP Code Indexer Contributors
|
@@ -59,8 +59,8 @@ Dynamic: requires-python
|
|
59
59
|
|
60
60
|
# MCP Code Indexer 🚀
|
61
61
|
|
62
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
63
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
64
64
|
[](https://opensource.org/licenses/MIT)
|
65
65
|
|
66
66
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# MCP Code Indexer 🚀
|
2
2
|
|
3
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
4
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
6
6
|
|
7
7
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "mcp-code-indexer"
|
7
|
-
version = "2.1
|
7
|
+
version = "2.2.1"
|
8
8
|
description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
|
9
9
|
readme = "README.md"
|
10
10
|
license = {text = "MIT"}
|
@@ -0,0 +1,217 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Ask Handler for MCP Code Indexer
|
4
|
+
|
5
|
+
Handles simple question-answering by combining project overview with user questions
|
6
|
+
and sending them to Claude via OpenRouter API for direct responses.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Dict, Optional, Any
|
12
|
+
|
13
|
+
from .claude_api_handler import ClaudeAPIHandler, ClaudeAPIError
|
14
|
+
from .database.database import DatabaseManager
|
15
|
+
|
16
|
+
|
17
|
+
class AskError(ClaudeAPIError):
|
18
|
+
"""Exception specific to Ask operations."""
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
class AskHandler(ClaudeAPIHandler):
|
23
|
+
"""
|
24
|
+
Handler for simple Q&A operations using Claude API.
|
25
|
+
|
26
|
+
Provides functionality to:
|
27
|
+
- Combine project overview with user questions
|
28
|
+
- Send combined prompt to Claude for analysis
|
29
|
+
- Return formatted responses for CLI consumption
|
30
|
+
"""
|
31
|
+
|
32
|
+
def __init__(self, db_manager: DatabaseManager, cache_dir: Path, logger: Optional[logging.Logger] = None):
|
33
|
+
"""
|
34
|
+
Initialize AskHandler.
|
35
|
+
|
36
|
+
Args:
|
37
|
+
db_manager: Database manager instance
|
38
|
+
cache_dir: Cache directory for temporary files
|
39
|
+
logger: Logger instance to use (optional, creates default if not provided)
|
40
|
+
"""
|
41
|
+
super().__init__(db_manager, cache_dir, logger)
|
42
|
+
self.logger = logger if logger is not None else logging.getLogger(__name__)
|
43
|
+
|
44
|
+
async def ask_question(
|
45
|
+
self,
|
46
|
+
project_info: Dict[str, str],
|
47
|
+
question: str,
|
48
|
+
include_overview: bool = True
|
49
|
+
) -> Dict[str, Any]:
|
50
|
+
"""
|
51
|
+
Ask a question about the project using Claude API.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
project_info: Project information dict with projectName, folderPath, branch, etc.
|
55
|
+
question: User's question about the project
|
56
|
+
include_overview: Whether to include project overview in context
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
Dict containing response and metadata
|
60
|
+
"""
|
61
|
+
try:
|
62
|
+
self.logger.info(f"Processing ask question for project: {project_info['projectName']}")
|
63
|
+
self.logger.info(f"Question: {question}")
|
64
|
+
|
65
|
+
# Validate inputs
|
66
|
+
if not question or not question.strip():
|
67
|
+
raise AskError("Question cannot be empty")
|
68
|
+
|
69
|
+
if not project_info.get("projectName"):
|
70
|
+
raise AskError("Project name is required")
|
71
|
+
|
72
|
+
# Get project overview if requested
|
73
|
+
overview = ""
|
74
|
+
if include_overview:
|
75
|
+
overview = await self.get_project_overview(project_info)
|
76
|
+
if not overview:
|
77
|
+
self.logger.warning(f"No project overview found for {project_info['projectName']}")
|
78
|
+
overview = "No project overview available."
|
79
|
+
|
80
|
+
# Build the prompt
|
81
|
+
prompt = self._build_ask_prompt(project_info, question, overview)
|
82
|
+
|
83
|
+
# Validate token limits
|
84
|
+
if not self.validate_token_limit(prompt):
|
85
|
+
raise AskError(
|
86
|
+
f"Question and project context exceed token limit of {self.config.token_limit}. "
|
87
|
+
"Please ask a more specific question or use --deepask for enhanced search."
|
88
|
+
)
|
89
|
+
|
90
|
+
# Get token counts for reporting
|
91
|
+
overview_tokens = self.get_token_count(overview) if overview else 0
|
92
|
+
question_tokens = self.get_token_count(question)
|
93
|
+
total_prompt_tokens = self.get_token_count(prompt)
|
94
|
+
|
95
|
+
self.logger.info(f"Token usage: overview={overview_tokens}, question={question_tokens}, total={total_prompt_tokens}")
|
96
|
+
|
97
|
+
# Call Claude API
|
98
|
+
system_prompt = self._get_system_prompt()
|
99
|
+
response = await self._call_claude_api(prompt, system_prompt)
|
100
|
+
|
101
|
+
# Format response
|
102
|
+
result = {
|
103
|
+
"answer": response.content,
|
104
|
+
"project_name": project_info["projectName"],
|
105
|
+
"question": question,
|
106
|
+
"metadata": {
|
107
|
+
"model": response.model or self.config.model,
|
108
|
+
"token_usage": {
|
109
|
+
"overview_tokens": overview_tokens,
|
110
|
+
"question_tokens": question_tokens,
|
111
|
+
"total_prompt_tokens": total_prompt_tokens,
|
112
|
+
"response_tokens": response.usage.get("completion_tokens") if response.usage else None,
|
113
|
+
"total_tokens": response.usage.get("total_tokens") if response.usage else None
|
114
|
+
},
|
115
|
+
"include_overview": include_overview,
|
116
|
+
"branch": project_info.get("branch", "unknown")
|
117
|
+
}
|
118
|
+
}
|
119
|
+
|
120
|
+
self.logger.info(f"Ask question completed successfully")
|
121
|
+
return result
|
122
|
+
|
123
|
+
except Exception as e:
|
124
|
+
error_msg = f"Failed to process ask question: {str(e)}"
|
125
|
+
self.logger.error(error_msg)
|
126
|
+
if isinstance(e, (ClaudeAPIError, AskError)):
|
127
|
+
raise
|
128
|
+
else:
|
129
|
+
raise AskError(error_msg)
|
130
|
+
|
131
|
+
def _build_ask_prompt(self, project_info: Dict[str, str], question: str, overview: str) -> str:
|
132
|
+
"""
|
133
|
+
Build the prompt for Claude API.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
project_info: Project information
|
137
|
+
question: User's question
|
138
|
+
overview: Project overview (may be empty)
|
139
|
+
|
140
|
+
Returns:
|
141
|
+
Formatted prompt string
|
142
|
+
"""
|
143
|
+
project_name = project_info["projectName"]
|
144
|
+
branch = project_info.get("branch", "unknown")
|
145
|
+
|
146
|
+
if overview.strip():
|
147
|
+
prompt = f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
|
148
|
+
|
149
|
+
PROJECT OVERVIEW:
|
150
|
+
{overview}
|
151
|
+
|
152
|
+
QUESTION:
|
153
|
+
{question}
|
154
|
+
|
155
|
+
Please provide a clear, detailed answer based on the project overview above. If the overview doesn't contain enough information to fully answer the question, please say so and suggest what additional information might be needed."""
|
156
|
+
else:
|
157
|
+
prompt = f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
|
158
|
+
|
159
|
+
Note: No project overview is available for this codebase.
|
160
|
+
|
161
|
+
QUESTION:
|
162
|
+
{question}
|
163
|
+
|
164
|
+
Please provide the best answer you can based on the project name and general software development knowledge. If you need more specific information about this codebase to provide a complete answer, please mention what would be helpful."""
|
165
|
+
|
166
|
+
return prompt
|
167
|
+
|
168
|
+
def _get_system_prompt(self) -> str:
|
169
|
+
"""Get system prompt for Claude API."""
|
170
|
+
return """You are a helpful software engineering assistant that analyzes codebases and answers questions about them.
|
171
|
+
|
172
|
+
When answering questions:
|
173
|
+
1. Be specific and technical when appropriate
|
174
|
+
2. Reference the project overview when available
|
175
|
+
3. If information is missing, clearly state what you don't know
|
176
|
+
4. Provide actionable suggestions when possible
|
177
|
+
5. Use clear, professional language
|
178
|
+
6. Focus on the specific question asked
|
179
|
+
|
180
|
+
If the project overview is insufficient to answer the question completely, explain what additional information would be needed and suggest using --deepask for more detailed analysis."""
|
181
|
+
|
182
|
+
def format_response(self, result: Dict[str, Any], format_type: str = "text") -> str:
|
183
|
+
"""
|
184
|
+
Format response for CLI output.
|
185
|
+
|
186
|
+
Args:
|
187
|
+
result: Result from ask_question
|
188
|
+
format_type: Output format ("text" or "json")
|
189
|
+
|
190
|
+
Returns:
|
191
|
+
Formatted response string
|
192
|
+
"""
|
193
|
+
if format_type == "json":
|
194
|
+
import json
|
195
|
+
return json.dumps(result, indent=2)
|
196
|
+
|
197
|
+
# Text format
|
198
|
+
answer = result["answer"]
|
199
|
+
metadata = result["metadata"]
|
200
|
+
|
201
|
+
output = []
|
202
|
+
output.append(f"Question: {result['question']}")
|
203
|
+
output.append(f"Project: {result['project_name']} (branch: {metadata['branch']})")
|
204
|
+
output.append("")
|
205
|
+
output.append("Answer:")
|
206
|
+
output.append(answer)
|
207
|
+
output.append("")
|
208
|
+
output.append("Metadata:")
|
209
|
+
output.append(f" Model: {metadata['model']}")
|
210
|
+
output.append(f" Overview included: {metadata['include_overview']}")
|
211
|
+
|
212
|
+
if metadata['token_usage']['total_tokens']:
|
213
|
+
output.append(f" Total tokens: {metadata['token_usage']['total_tokens']}")
|
214
|
+
else:
|
215
|
+
output.append(f" Prompt tokens: {metadata['token_usage']['total_prompt_tokens']}")
|
216
|
+
|
217
|
+
return "\n".join(output)
|
@@ -0,0 +1,355 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
Base Claude API Handler for MCP Code Indexer
|
4
|
+
|
5
|
+
Provides shared functionality for interacting with Claude via OpenRouter API,
|
6
|
+
including token management, retry logic, and response validation.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import asyncio
|
10
|
+
import json
|
11
|
+
import logging
|
12
|
+
import os
|
13
|
+
from dataclasses import dataclass
|
14
|
+
from typing import Dict, List, Optional, Any
|
15
|
+
from pathlib import Path
|
16
|
+
|
17
|
+
import aiohttp
|
18
|
+
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
|
19
|
+
|
20
|
+
from .database.database import DatabaseManager
|
21
|
+
from .token_counter import TokenCounter
|
22
|
+
|
23
|
+
|
24
|
+
class ClaudeAPIError(Exception):
|
25
|
+
"""Base exception for Claude API operations."""
|
26
|
+
pass
|
27
|
+
|
28
|
+
|
29
|
+
class ClaudeRateLimitError(ClaudeAPIError):
|
30
|
+
"""Exception for rate limiting scenarios."""
|
31
|
+
def __init__(self, message: str, retry_after: int = 60):
|
32
|
+
super().__init__(message)
|
33
|
+
self.retry_after = retry_after
|
34
|
+
|
35
|
+
|
36
|
+
class ClaudeValidationError(ClaudeAPIError):
|
37
|
+
"""Exception for response validation failures."""
|
38
|
+
pass
|
39
|
+
|
40
|
+
|
41
|
+
@dataclass
|
42
|
+
class ClaudeConfig:
|
43
|
+
"""Configuration for Claude API calls."""
|
44
|
+
model: str = "anthropic/claude-sonnet-4"
|
45
|
+
max_tokens: int = 24000
|
46
|
+
temperature: float = 0.3
|
47
|
+
timeout: int = 300
|
48
|
+
token_limit: int = 180000
|
49
|
+
|
50
|
+
|
51
|
+
@dataclass
|
52
|
+
class ClaudeResponse:
|
53
|
+
"""Structured response from Claude API."""
|
54
|
+
content: str
|
55
|
+
usage: Optional[Dict[str, Any]] = None
|
56
|
+
model: Optional[str] = None
|
57
|
+
|
58
|
+
|
59
|
+
class ClaudeAPIHandler:
|
60
|
+
"""
|
61
|
+
Base handler for Claude API interactions via OpenRouter.
|
62
|
+
|
63
|
+
Provides shared functionality for:
|
64
|
+
- Token counting and limit validation
|
65
|
+
- API request/response handling with retry logic
|
66
|
+
- Response validation and parsing
|
67
|
+
- Error handling and logging
|
68
|
+
"""
|
69
|
+
|
70
|
+
OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
|
71
|
+
|
72
|
+
def __init__(self, db_manager: DatabaseManager, cache_dir: Path, logger: Optional[logging.Logger] = None):
|
73
|
+
"""
|
74
|
+
Initialize Claude API Handler.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
db_manager: Database manager instance
|
78
|
+
cache_dir: Cache directory for temporary files
|
79
|
+
logger: Logger instance to use (optional, creates default if not provided)
|
80
|
+
"""
|
81
|
+
self.db_manager = db_manager
|
82
|
+
self.cache_dir = cache_dir
|
83
|
+
self.logger = logger if logger is not None else logging.getLogger(__name__)
|
84
|
+
self.token_counter = TokenCounter()
|
85
|
+
|
86
|
+
# Initialize configuration
|
87
|
+
self.config = ClaudeConfig(
|
88
|
+
model=os.getenv("MCP_CLAUDE_MODEL", "anthropic/claude-sonnet-4"),
|
89
|
+
max_tokens=int(os.getenv("MCP_CLAUDE_MAX_TOKENS", "24000")),
|
90
|
+
temperature=float(os.getenv("MCP_CLAUDE_TEMPERATURE", "0.3")),
|
91
|
+
timeout=int(os.getenv("MCP_CLAUDE_TIMEOUT", "600")), # 10 minutes
|
92
|
+
token_limit=int(os.getenv("MCP_CLAUDE_TOKEN_LIMIT", "180000"))
|
93
|
+
)
|
94
|
+
|
95
|
+
# Validate API key
|
96
|
+
self.api_key = os.getenv("OPENROUTER_API_KEY")
|
97
|
+
if not self.api_key:
|
98
|
+
raise ClaudeAPIError("OPENROUTER_API_KEY environment variable is required")
|
99
|
+
|
100
|
+
def validate_token_limit(self, prompt: str, context: str = "") -> bool:
|
101
|
+
"""
|
102
|
+
Validate that prompt + context fits within token limit.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
prompt: Main prompt text
|
106
|
+
context: Additional context (project overview, file descriptions, etc.)
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
True if within limits, False otherwise
|
110
|
+
"""
|
111
|
+
combined_text = f"{prompt}\n\n{context}"
|
112
|
+
token_count = self.token_counter.count_tokens(combined_text)
|
113
|
+
|
114
|
+
self.logger.debug(f"Token count validation: {token_count}/{self.config.token_limit}")
|
115
|
+
|
116
|
+
if token_count > self.config.token_limit:
|
117
|
+
self.logger.warning(
|
118
|
+
f"Token limit exceeded: {token_count} > {self.config.token_limit}. "
|
119
|
+
f"Consider using shorter context or ask for a more specific question."
|
120
|
+
)
|
121
|
+
return False
|
122
|
+
|
123
|
+
return True
|
124
|
+
|
125
|
+
def get_token_count(self, text: str) -> int:
|
126
|
+
"""Get token count for given text."""
|
127
|
+
return self.token_counter.count_tokens(text)
|
128
|
+
|
129
|
+
@retry(
|
130
|
+
wait=wait_exponential(multiplier=1, min=1, max=60),
|
131
|
+
stop=stop_after_attempt(5),
|
132
|
+
retry=retry_if_exception_type(ClaudeRateLimitError),
|
133
|
+
reraise=True
|
134
|
+
)
|
135
|
+
async def _call_claude_api(self, prompt: str, system_prompt: Optional[str] = None) -> ClaudeResponse:
|
136
|
+
"""
|
137
|
+
Make API call to Claude via OpenRouter with retry logic.
|
138
|
+
|
139
|
+
Args:
|
140
|
+
prompt: User prompt
|
141
|
+
system_prompt: Optional system prompt
|
142
|
+
|
143
|
+
Returns:
|
144
|
+
ClaudeResponse with parsed response data
|
145
|
+
"""
|
146
|
+
headers = {
|
147
|
+
"Authorization": f"Bearer {self.api_key}",
|
148
|
+
"HTTP-Referer": "https://github.com/fluffypony/mcp-code-indexer",
|
149
|
+
"X-Title": "MCP Code Indexer",
|
150
|
+
"Content-Type": "application/json"
|
151
|
+
}
|
152
|
+
|
153
|
+
messages = []
|
154
|
+
if system_prompt:
|
155
|
+
messages.append({"role": "system", "content": system_prompt})
|
156
|
+
messages.append({"role": "user", "content": prompt})
|
157
|
+
|
158
|
+
payload = {
|
159
|
+
"model": self.config.model,
|
160
|
+
"messages": messages,
|
161
|
+
"temperature": self.config.temperature,
|
162
|
+
"max_tokens": self.config.max_tokens,
|
163
|
+
}
|
164
|
+
|
165
|
+
timeout = aiohttp.ClientTimeout(total=self.config.timeout)
|
166
|
+
|
167
|
+
self.logger.info(f"Sending request to Claude API via OpenRouter...")
|
168
|
+
self.logger.info(f" Model: {self.config.model}")
|
169
|
+
self.logger.info(f" Temperature: {self.config.temperature}")
|
170
|
+
self.logger.info(f" Max tokens: {self.config.max_tokens}")
|
171
|
+
self.logger.info(f" Timeout: {self.config.timeout}s")
|
172
|
+
|
173
|
+
try:
|
174
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
175
|
+
async with session.post(
|
176
|
+
self.OPENROUTER_API_URL,
|
177
|
+
headers=headers,
|
178
|
+
json=payload
|
179
|
+
) as response:
|
180
|
+
|
181
|
+
self.logger.info(f"Claude API response status: {response.status}")
|
182
|
+
|
183
|
+
if response.status == 429:
|
184
|
+
retry_after = int(response.headers.get("Retry-After", 60))
|
185
|
+
self.logger.warning(f"Rate limited by OpenRouter, retry after {retry_after}s")
|
186
|
+
raise ClaudeRateLimitError(f"Rate limited. Retry after {retry_after}s", retry_after)
|
187
|
+
|
188
|
+
response.raise_for_status()
|
189
|
+
|
190
|
+
response_data = await response.json()
|
191
|
+
|
192
|
+
if "choices" not in response_data:
|
193
|
+
self.logger.error(f"Invalid API response format: {response_data}")
|
194
|
+
raise ClaudeAPIError(f"Invalid API response format: {response_data}")
|
195
|
+
|
196
|
+
content = response_data["choices"][0]["message"]["content"]
|
197
|
+
usage = response_data.get("usage")
|
198
|
+
model = response_data.get("model")
|
199
|
+
|
200
|
+
self.logger.info(f"Claude response content length: {len(content)} characters")
|
201
|
+
if usage:
|
202
|
+
self.logger.info(f"Token usage: {usage}")
|
203
|
+
|
204
|
+
return ClaudeResponse(content=content, usage=usage, model=model)
|
205
|
+
|
206
|
+
except aiohttp.ClientError as e:
|
207
|
+
self.logger.error(f"Claude API request failed: {e}")
|
208
|
+
raise ClaudeAPIError(f"Claude API request failed: {e}")
|
209
|
+
except asyncio.TimeoutError as e:
|
210
|
+
self.logger.error(f"Claude API request timed out after {self.config.timeout}s")
|
211
|
+
raise ClaudeAPIError("Claude API request timed out")
|
212
|
+
|
213
|
+
def validate_json_response(self, response_text: str, required_keys: List[str] = None) -> Dict[str, Any]:
|
214
|
+
"""
|
215
|
+
Validate and parse JSON response from Claude.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
response_text: Raw response content
|
219
|
+
required_keys: List of required keys in the JSON response
|
220
|
+
|
221
|
+
Returns:
|
222
|
+
Validated JSON data
|
223
|
+
"""
|
224
|
+
def extract_json_from_response(text: str) -> str:
|
225
|
+
"""Extract JSON from response that might have extra text before/after."""
|
226
|
+
text = text.strip()
|
227
|
+
|
228
|
+
# Try to find JSON in the response
|
229
|
+
json_start = -1
|
230
|
+
json_end = -1
|
231
|
+
|
232
|
+
# Look for opening brace
|
233
|
+
for i, char in enumerate(text):
|
234
|
+
if char == '{':
|
235
|
+
json_start = i
|
236
|
+
break
|
237
|
+
|
238
|
+
if json_start == -1:
|
239
|
+
return text # No JSON found, return original
|
240
|
+
|
241
|
+
# Find matching closing brace
|
242
|
+
brace_count = 0
|
243
|
+
for i in range(json_start, len(text)):
|
244
|
+
if text[i] == '{':
|
245
|
+
brace_count += 1
|
246
|
+
elif text[i] == '}':
|
247
|
+
brace_count -= 1
|
248
|
+
if brace_count == 0:
|
249
|
+
json_end = i + 1
|
250
|
+
break
|
251
|
+
|
252
|
+
if json_end == -1:
|
253
|
+
return text # No matching brace found, return original
|
254
|
+
|
255
|
+
return text[json_start:json_end]
|
256
|
+
|
257
|
+
try:
|
258
|
+
# First try parsing as-is
|
259
|
+
try:
|
260
|
+
data = json.loads(response_text.strip())
|
261
|
+
except json.JSONDecodeError:
|
262
|
+
# Try extracting JSON from response
|
263
|
+
extracted_json = extract_json_from_response(response_text)
|
264
|
+
if extracted_json != response_text.strip():
|
265
|
+
self.logger.debug(f"Extracted JSON from response: {extracted_json}")
|
266
|
+
data = json.loads(extracted_json)
|
267
|
+
|
268
|
+
# Validate required keys if specified
|
269
|
+
if required_keys:
|
270
|
+
missing_keys = [key for key in required_keys if key not in data]
|
271
|
+
if missing_keys:
|
272
|
+
raise ClaudeValidationError(f"Missing required keys in response: {missing_keys}")
|
273
|
+
|
274
|
+
return data
|
275
|
+
|
276
|
+
except json.JSONDecodeError as e:
|
277
|
+
self.logger.error(f"Failed to parse JSON response: {e}")
|
278
|
+
self.logger.error(f"Response text: {response_text}")
|
279
|
+
raise ClaudeValidationError(f"Invalid JSON response: {e}")
|
280
|
+
except Exception as e:
|
281
|
+
self.logger.error(f"Response validation failed: {e}")
|
282
|
+
raise ClaudeValidationError(f"Response validation failed: {e}")
|
283
|
+
|
284
|
+
def format_error_response(self, error: Exception, context: str = "") -> str:
|
285
|
+
"""
|
286
|
+
Format error for user-friendly display.
|
287
|
+
|
288
|
+
Args:
|
289
|
+
error: The exception that occurred
|
290
|
+
context: Additional context about the operation
|
291
|
+
|
292
|
+
Returns:
|
293
|
+
Formatted error message
|
294
|
+
"""
|
295
|
+
if isinstance(error, ClaudeRateLimitError):
|
296
|
+
return f"Rate limited by Claude API. Please wait {error.retry_after} seconds and try again."
|
297
|
+
elif isinstance(error, ClaudeValidationError):
|
298
|
+
return f"Invalid response from Claude API: {str(error)}"
|
299
|
+
elif isinstance(error, ClaudeAPIError):
|
300
|
+
return f"Claude API error: {str(error)}"
|
301
|
+
else:
|
302
|
+
return f"Unexpected error during {context}: {str(error)}"
|
303
|
+
|
304
|
+
async def find_existing_project_by_name(self, project_name: str) -> Optional[Any]:
|
305
|
+
"""
|
306
|
+
Find existing project by name for CLI usage.
|
307
|
+
|
308
|
+
Args:
|
309
|
+
project_name: Name of the project to find
|
310
|
+
|
311
|
+
Returns:
|
312
|
+
Project object if found, None otherwise
|
313
|
+
"""
|
314
|
+
try:
|
315
|
+
all_projects = await self.db_manager.get_all_projects()
|
316
|
+
normalized_name = project_name.lower()
|
317
|
+
|
318
|
+
for project in all_projects:
|
319
|
+
if project.name.lower() == normalized_name:
|
320
|
+
self.logger.info(f"Found existing project: {project.name} (ID: {project.id})")
|
321
|
+
return project
|
322
|
+
|
323
|
+
self.logger.warning(f"No existing project found with name: {project_name}")
|
324
|
+
return None
|
325
|
+
except Exception as e:
|
326
|
+
self.logger.error(f"Error finding project by name: {e}")
|
327
|
+
return None
|
328
|
+
|
329
|
+
async def get_project_overview(self, project_info: Dict[str, str]) -> str:
|
330
|
+
"""
|
331
|
+
Get project overview from database.
|
332
|
+
|
333
|
+
Args:
|
334
|
+
project_info: Project information dict with projectName, folderPath, branch, etc.
|
335
|
+
|
336
|
+
Returns:
|
337
|
+
Project overview text or empty string if not found
|
338
|
+
"""
|
339
|
+
try:
|
340
|
+
# Try to find existing project by name first
|
341
|
+
project = await self.find_existing_project_by_name(project_info["projectName"])
|
342
|
+
|
343
|
+
if not project:
|
344
|
+
self.logger.warning(f"Project '{project_info['projectName']}' not found in database")
|
345
|
+
return ""
|
346
|
+
|
347
|
+
# Get overview for the project using project.id
|
348
|
+
overview_result = await self.db_manager.get_project_overview(project.id, project_info["branch"])
|
349
|
+
if overview_result:
|
350
|
+
return overview_result.overview
|
351
|
+
else:
|
352
|
+
return ""
|
353
|
+
except Exception as e:
|
354
|
+
self.logger.warning(f"Failed to get project overview: {e}")
|
355
|
+
return ""
|
@@ -0,0 +1,465 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
DeepAsk Handler for MCP Code Indexer
|
4
|
+
|
5
|
+
Handles enhanced question-answering with two-stage processing:
|
6
|
+
1. Extract search terms and compress overview
|
7
|
+
2. Search file descriptions and provide enhanced answer
|
8
|
+
"""
|
9
|
+
|
10
|
+
import logging
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Dict, List, Optional, Any, Tuple
|
13
|
+
|
14
|
+
from .claude_api_handler import ClaudeAPIHandler, ClaudeAPIError
|
15
|
+
from .database.database import DatabaseManager
|
16
|
+
|
17
|
+
|
18
|
+
class DeepAskError(ClaudeAPIError):
|
19
|
+
"""Exception specific to DeepAsk operations."""
|
20
|
+
pass
|
21
|
+
|
22
|
+
|
23
|
+
class DeepAskHandler(ClaudeAPIHandler):
|
24
|
+
"""
|
25
|
+
Handler for enhanced Q&A operations using two-stage Claude API processing.
|
26
|
+
|
27
|
+
Stage 1: Extract search terms and compress project overview
|
28
|
+
Stage 2: Search file descriptions and provide enhanced answer with context
|
29
|
+
"""
|
30
|
+
|
31
|
+
def __init__(self, db_manager: DatabaseManager, cache_dir: Path, logger: Optional[logging.Logger] = None):
|
32
|
+
"""
|
33
|
+
Initialize DeepAskHandler.
|
34
|
+
|
35
|
+
Args:
|
36
|
+
db_manager: Database manager instance
|
37
|
+
cache_dir: Cache directory for temporary files
|
38
|
+
logger: Logger instance to use (optional, creates default if not provided)
|
39
|
+
"""
|
40
|
+
super().__init__(db_manager, cache_dir, logger)
|
41
|
+
self.logger = logger if logger is not None else logging.getLogger(__name__)
|
42
|
+
|
43
|
+
async def find_existing_project_by_name(self, project_name: str) -> Optional[Any]:
|
44
|
+
"""
|
45
|
+
Find existing project by name for CLI usage.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
project_name: Name of the project to find
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
Project object if found, None otherwise
|
52
|
+
"""
|
53
|
+
try:
|
54
|
+
all_projects = await self.db_manager.get_all_projects()
|
55
|
+
normalized_name = project_name.lower()
|
56
|
+
|
57
|
+
for project in all_projects:
|
58
|
+
if project.name.lower() == normalized_name:
|
59
|
+
self.logger.info(f"Found existing project: {project.name} (ID: {project.id})")
|
60
|
+
return project
|
61
|
+
|
62
|
+
self.logger.warning(f"No existing project found with name: {project_name}")
|
63
|
+
return None
|
64
|
+
except Exception as e:
|
65
|
+
self.logger.error(f"Error finding project by name: {e}")
|
66
|
+
return None
|
67
|
+
|
68
|
+
async def deepask_question(
|
69
|
+
self,
|
70
|
+
project_info: Dict[str, str],
|
71
|
+
question: str,
|
72
|
+
max_file_results: int = 10
|
73
|
+
) -> Dict[str, Any]:
|
74
|
+
"""
|
75
|
+
Ask an enhanced question about the project using two-stage Claude API processing.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
project_info: Project information dict with projectName, folderPath, branch, etc.
|
79
|
+
question: User's question about the project
|
80
|
+
max_file_results: Maximum number of file descriptions to include
|
81
|
+
|
82
|
+
Returns:
|
83
|
+
Dict containing enhanced response and metadata
|
84
|
+
"""
|
85
|
+
try:
|
86
|
+
self.logger.info(f"Processing deepask question for project: {project_info['projectName']}")
|
87
|
+
self.logger.info(f"Question: {question}")
|
88
|
+
|
89
|
+
# Validate inputs
|
90
|
+
if not question or not question.strip():
|
91
|
+
raise DeepAskError("Question cannot be empty")
|
92
|
+
|
93
|
+
if not project_info.get("projectName"):
|
94
|
+
raise DeepAskError("Project name is required")
|
95
|
+
|
96
|
+
# Stage 1: Extract search terms and compress overview
|
97
|
+
stage1_result = await self._stage1_extract_search_terms(project_info, question)
|
98
|
+
|
99
|
+
# Stage 2: Search files and provide enhanced answer
|
100
|
+
stage2_result = await self._stage2_enhanced_answer(
|
101
|
+
project_info,
|
102
|
+
question,
|
103
|
+
stage1_result["search_terms"],
|
104
|
+
stage1_result["compressed_overview"],
|
105
|
+
max_file_results
|
106
|
+
)
|
107
|
+
|
108
|
+
# Combine results
|
109
|
+
result = {
|
110
|
+
"answer": stage2_result["answer"],
|
111
|
+
"project_name": project_info["projectName"],
|
112
|
+
"question": question,
|
113
|
+
"search_terms": stage1_result["search_terms"],
|
114
|
+
"compressed_overview": stage1_result["compressed_overview"],
|
115
|
+
"relevant_files": stage2_result["relevant_files"],
|
116
|
+
"metadata": {
|
117
|
+
"model": self.config.model,
|
118
|
+
"stage1_tokens": stage1_result["token_usage"],
|
119
|
+
"stage2_tokens": stage2_result["token_usage"],
|
120
|
+
"total_files_found": stage2_result["total_files_found"],
|
121
|
+
"files_included": len(stage2_result["relevant_files"]),
|
122
|
+
"branch": project_info.get("branch", "unknown")
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
self.logger.info(f"DeepAsk question completed successfully")
|
127
|
+
self.logger.info(f"Search terms: {stage1_result['search_terms']}")
|
128
|
+
self.logger.info(f"Files found: {stage2_result['total_files_found']}")
|
129
|
+
self.logger.info(f"Files included: {len(stage2_result['relevant_files'])}")
|
130
|
+
|
131
|
+
return result
|
132
|
+
|
133
|
+
except Exception as e:
|
134
|
+
error_msg = f"Failed to process deepask question: {str(e)}"
|
135
|
+
self.logger.error(error_msg)
|
136
|
+
if isinstance(e, (ClaudeAPIError, DeepAskError)):
|
137
|
+
raise
|
138
|
+
else:
|
139
|
+
raise DeepAskError(error_msg)
|
140
|
+
|
141
|
+
async def _stage1_extract_search_terms(
|
142
|
+
self,
|
143
|
+
project_info: Dict[str, str],
|
144
|
+
question: str
|
145
|
+
) -> Dict[str, Any]:
|
146
|
+
"""
|
147
|
+
Stage 1: Extract search terms and compress project overview.
|
148
|
+
|
149
|
+
Args:
|
150
|
+
project_info: Project information
|
151
|
+
question: User's question
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
Dict with search_terms, compressed_overview, and token_usage
|
155
|
+
"""
|
156
|
+
self.logger.info("Stage 1: Extracting search terms and compressing overview")
|
157
|
+
|
158
|
+
# Get project overview
|
159
|
+
overview = await self.get_project_overview(project_info)
|
160
|
+
if not overview:
|
161
|
+
overview = "No project overview available."
|
162
|
+
|
163
|
+
# Build stage 1 prompt
|
164
|
+
prompt = self._build_stage1_prompt(project_info, question, overview)
|
165
|
+
|
166
|
+
# Validate token limits for stage 1
|
167
|
+
if not self.validate_token_limit(prompt):
|
168
|
+
raise DeepAskError(
|
169
|
+
f"Stage 1 prompt exceeds token limit of {self.config.token_limit}. "
|
170
|
+
"Project overview may be too large."
|
171
|
+
)
|
172
|
+
|
173
|
+
# Call Claude API for stage 1
|
174
|
+
system_prompt = self._get_stage1_system_prompt()
|
175
|
+
response = await self._call_claude_api(prompt, system_prompt)
|
176
|
+
|
177
|
+
# Parse and validate response
|
178
|
+
response_data = self.validate_json_response(
|
179
|
+
response.content,
|
180
|
+
required_keys=["search_terms", "compressed_overview"]
|
181
|
+
)
|
182
|
+
|
183
|
+
token_usage = {
|
184
|
+
"prompt_tokens": self.get_token_count(prompt),
|
185
|
+
"response_tokens": response.usage.get("completion_tokens") if response.usage else None,
|
186
|
+
"total_tokens": response.usage.get("total_tokens") if response.usage else None
|
187
|
+
}
|
188
|
+
|
189
|
+
return {
|
190
|
+
"search_terms": response_data["search_terms"],
|
191
|
+
"compressed_overview": response_data["compressed_overview"],
|
192
|
+
"token_usage": token_usage
|
193
|
+
}
|
194
|
+
|
195
|
+
async def _stage2_enhanced_answer(
|
196
|
+
self,
|
197
|
+
project_info: Dict[str, str],
|
198
|
+
question: str,
|
199
|
+
search_terms: List[str],
|
200
|
+
compressed_overview: str,
|
201
|
+
max_file_results: int
|
202
|
+
) -> Dict[str, Any]:
|
203
|
+
"""
|
204
|
+
Stage 2: Search file descriptions and provide enhanced answer.
|
205
|
+
|
206
|
+
Args:
|
207
|
+
project_info: Project information
|
208
|
+
question: User's question
|
209
|
+
search_terms: Search terms from stage 1
|
210
|
+
compressed_overview: Compressed overview from stage 1
|
211
|
+
max_file_results: Maximum number of files to include
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
Dict with answer, relevant_files, total_files_found, and token_usage
|
215
|
+
"""
|
216
|
+
self.logger.info(f"Stage 2: Searching files and generating enhanced answer")
|
217
|
+
self.logger.info(f"Search terms: {search_terms}")
|
218
|
+
|
219
|
+
# Search for relevant files
|
220
|
+
relevant_files = []
|
221
|
+
total_files_found = 0
|
222
|
+
|
223
|
+
try:
|
224
|
+
# Find existing project by name only (don't create new ones for Q&A)
|
225
|
+
project = await self.find_existing_project_by_name(project_info["projectName"])
|
226
|
+
|
227
|
+
if not project:
|
228
|
+
self.logger.warning(f"Project '{project_info['projectName']}' not found in database")
|
229
|
+
return {
|
230
|
+
"answer": f"Project '{project_info['projectName']}' not found in database. Please check the project name.",
|
231
|
+
"relevant_files": [],
|
232
|
+
"total_files_found": 0,
|
233
|
+
"token_usage": {"prompt_tokens": 0, "response_tokens": 0, "total_tokens": 0}
|
234
|
+
}
|
235
|
+
|
236
|
+
for search_term in search_terms:
|
237
|
+
try:
|
238
|
+
search_results = await self.db_manager.search_file_descriptions(
|
239
|
+
project_id=project.id,
|
240
|
+
branch=project_info["branch"],
|
241
|
+
query=search_term,
|
242
|
+
max_results=max_file_results
|
243
|
+
)
|
244
|
+
|
245
|
+
total_files_found += len(search_results)
|
246
|
+
|
247
|
+
# Add unique files to relevant_files
|
248
|
+
for result in search_results:
|
249
|
+
if not any(f["filePath"] == result.file_path for f in relevant_files):
|
250
|
+
relevant_files.append({
|
251
|
+
"filePath": result.file_path,
|
252
|
+
"description": result.description,
|
253
|
+
"search_term": search_term,
|
254
|
+
"relevance_score": result.relevance_score
|
255
|
+
})
|
256
|
+
|
257
|
+
# Stop if we have enough files
|
258
|
+
if len(relevant_files) >= max_file_results:
|
259
|
+
break
|
260
|
+
|
261
|
+
if len(relevant_files) >= max_file_results:
|
262
|
+
break
|
263
|
+
|
264
|
+
except Exception as e:
|
265
|
+
self.logger.warning(f"Search failed for term '{search_term}': {e}")
|
266
|
+
continue
|
267
|
+
|
268
|
+
except Exception as e:
|
269
|
+
self.logger.warning(f"Failed to search files: {e}")
|
270
|
+
# Continue with empty relevant_files list
|
271
|
+
|
272
|
+
# Build stage 2 prompt with file context
|
273
|
+
prompt = self._build_stage2_prompt(
|
274
|
+
project_info,
|
275
|
+
question,
|
276
|
+
compressed_overview,
|
277
|
+
relevant_files
|
278
|
+
)
|
279
|
+
|
280
|
+
# Validate token limits for stage 2
|
281
|
+
if not self.validate_token_limit(prompt):
|
282
|
+
# Try reducing file context
|
283
|
+
self.logger.warning("Stage 2 prompt exceeds token limit, reducing file context")
|
284
|
+
reduced_files = relevant_files[:max_file_results//2]
|
285
|
+
prompt = self._build_stage2_prompt(
|
286
|
+
project_info,
|
287
|
+
question,
|
288
|
+
compressed_overview,
|
289
|
+
reduced_files
|
290
|
+
)
|
291
|
+
|
292
|
+
if not self.validate_token_limit(prompt):
|
293
|
+
raise DeepAskError(
|
294
|
+
f"Stage 2 prompt still exceeds token limit even with reduced context. "
|
295
|
+
"Try a more specific question."
|
296
|
+
)
|
297
|
+
|
298
|
+
relevant_files = reduced_files
|
299
|
+
|
300
|
+
# Call Claude API for stage 2
|
301
|
+
system_prompt = self._get_stage2_system_prompt()
|
302
|
+
response = await self._call_claude_api(prompt, system_prompt)
|
303
|
+
|
304
|
+
token_usage = {
|
305
|
+
"prompt_tokens": self.get_token_count(prompt),
|
306
|
+
"response_tokens": response.usage.get("completion_tokens") if response.usage else None,
|
307
|
+
"total_tokens": response.usage.get("total_tokens") if response.usage else None
|
308
|
+
}
|
309
|
+
|
310
|
+
return {
|
311
|
+
"answer": response.content,
|
312
|
+
"relevant_files": relevant_files,
|
313
|
+
"total_files_found": total_files_found,
|
314
|
+
"token_usage": token_usage
|
315
|
+
}
|
316
|
+
|
317
|
+
def _build_stage1_prompt(
|
318
|
+
self,
|
319
|
+
project_info: Dict[str, str],
|
320
|
+
question: str,
|
321
|
+
overview: str
|
322
|
+
) -> str:
|
323
|
+
"""Build stage 1 prompt for extracting search terms."""
|
324
|
+
project_name = project_info["projectName"]
|
325
|
+
branch = project_info.get("branch", "unknown")
|
326
|
+
|
327
|
+
return f"""I need to answer a question about the codebase "{project_name}" (branch: {branch}). To provide the best answer, I need to search for relevant files and then answer the question.
|
328
|
+
|
329
|
+
PROJECT OVERVIEW:
|
330
|
+
{overview}
|
331
|
+
|
332
|
+
QUESTION:
|
333
|
+
{question}
|
334
|
+
|
335
|
+
Please analyze the question and project overview, then provide:
|
336
|
+
|
337
|
+
1. A list of 3-5 search terms that would help find relevant files to answer this question
|
338
|
+
2. A compressed version of the project overview (2-3 sentences max) that captures the most relevant information for this question
|
339
|
+
|
340
|
+
Respond with valid JSON in this format:
|
341
|
+
{{
|
342
|
+
"search_terms": ["term1", "term2", "term3"],
|
343
|
+
"compressed_overview": "Brief summary focusing on aspects relevant to the question..."
|
344
|
+
}}"""
|
345
|
+
|
346
|
+
def _build_stage2_prompt(
|
347
|
+
self,
|
348
|
+
project_info: Dict[str, str],
|
349
|
+
question: str,
|
350
|
+
compressed_overview: str,
|
351
|
+
relevant_files: List[Dict[str, Any]]
|
352
|
+
) -> str:
|
353
|
+
"""Build stage 2 prompt for enhanced answer."""
|
354
|
+
project_name = project_info["projectName"]
|
355
|
+
branch = project_info.get("branch", "unknown")
|
356
|
+
|
357
|
+
# Format file descriptions
|
358
|
+
file_context = ""
|
359
|
+
if relevant_files:
|
360
|
+
file_context = "\n\nRELEVANT FILES:\n"
|
361
|
+
for i, file_info in enumerate(relevant_files, 1):
|
362
|
+
file_context += f"\n{i}. {file_info['filePath']}\n"
|
363
|
+
file_context += f" Description: {file_info['description']}\n"
|
364
|
+
file_context += f" Found via search: {file_info['search_term']}\n"
|
365
|
+
else:
|
366
|
+
file_context = "\n\nNo relevant files found in the search."
|
367
|
+
|
368
|
+
return f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
|
369
|
+
|
370
|
+
PROJECT OVERVIEW (COMPRESSED):
|
371
|
+
{compressed_overview}
|
372
|
+
{file_context}
|
373
|
+
|
374
|
+
QUESTION:
|
375
|
+
{question}
|
376
|
+
|
377
|
+
Please provide a comprehensive answer based on the project overview and relevant file descriptions above. Reference specific files when appropriate and explain how they relate to the question. If the available information is insufficient, clearly state what additional details would be needed."""
|
378
|
+
|
379
|
+
def _get_stage1_system_prompt(self) -> str:
|
380
|
+
"""Get system prompt for stage 1."""
|
381
|
+
return """You are a technical assistant that analyzes software projects to extract relevant search terms and compress information.
|
382
|
+
|
383
|
+
Your task:
|
384
|
+
1. Analyze the user's question about a codebase
|
385
|
+
2. Extract 3-5 search terms that would help find relevant files to answer the question
|
386
|
+
3. Compress the project overview to focus on information relevant to the question
|
387
|
+
|
388
|
+
Search terms should be:
|
389
|
+
- Technical keywords (function names, class names, concepts)
|
390
|
+
- File types or directory names if relevant
|
391
|
+
- Domain-specific terminology from the question
|
392
|
+
|
393
|
+
The compressed overview should:
|
394
|
+
- Be 2-3 sentences maximum
|
395
|
+
- Focus only on aspects relevant to answering the question
|
396
|
+
- Preserve the most important architectural or functional details
|
397
|
+
|
398
|
+
Always respond with valid JSON matching the requested format."""
|
399
|
+
|
400
|
+
def _get_stage2_system_prompt(self) -> str:
|
401
|
+
"""Get system prompt for stage 2."""
|
402
|
+
return """You are a software engineering expert that provides detailed answers about codebases using available context.
|
403
|
+
|
404
|
+
When answering:
|
405
|
+
1. Use the compressed project overview for high-level context
|
406
|
+
2. Reference specific files from the relevant files list when they relate to the question
|
407
|
+
3. Explain how different files work together if relevant
|
408
|
+
4. Be specific and technical when appropriate
|
409
|
+
5. If information is incomplete, clearly state what's missing and suggest next steps
|
410
|
+
6. Provide actionable insights when possible
|
411
|
+
|
412
|
+
Your answer should be comprehensive but focused on the specific question asked."""
|
413
|
+
|
414
|
+
def format_response(self, result: Dict[str, Any], format_type: str = "text") -> str:
|
415
|
+
"""
|
416
|
+
Format response for CLI output.
|
417
|
+
|
418
|
+
Args:
|
419
|
+
result: Result from deepask_question
|
420
|
+
format_type: Output format ("text" or "json")
|
421
|
+
|
422
|
+
Returns:
|
423
|
+
Formatted response string
|
424
|
+
"""
|
425
|
+
if format_type == "json":
|
426
|
+
import json
|
427
|
+
return json.dumps(result, indent=2)
|
428
|
+
|
429
|
+
# Text format
|
430
|
+
answer = result["answer"]
|
431
|
+
metadata = result["metadata"]
|
432
|
+
|
433
|
+
output = []
|
434
|
+
output.append(f"Question: {result['question']}")
|
435
|
+
output.append(f"Project: {result['project_name']} (branch: {metadata['branch']})")
|
436
|
+
output.append("")
|
437
|
+
output.append("Answer:")
|
438
|
+
output.append(answer)
|
439
|
+
output.append("")
|
440
|
+
|
441
|
+
# Show search terms used
|
442
|
+
output.append(f"Search terms: {', '.join(result['search_terms'])}")
|
443
|
+
output.append("")
|
444
|
+
|
445
|
+
# Show relevant files
|
446
|
+
if result["relevant_files"]:
|
447
|
+
output.append("Relevant files analyzed:")
|
448
|
+
for i, file_info in enumerate(result["relevant_files"], 1):
|
449
|
+
output.append(f" {i}. {file_info['filePath']}")
|
450
|
+
else:
|
451
|
+
output.append("No relevant files found.")
|
452
|
+
output.append("")
|
453
|
+
|
454
|
+
# Show metadata
|
455
|
+
output.append("Metadata:")
|
456
|
+
output.append(f" Model: {metadata['model']}")
|
457
|
+
output.append(f" Total files found: {metadata['total_files_found']}")
|
458
|
+
output.append(f" Files included: {metadata['files_included']}")
|
459
|
+
|
460
|
+
stage1_tokens = metadata['stage1_tokens']['total_tokens']
|
461
|
+
stage2_tokens = metadata['stage2_tokens']['total_tokens']
|
462
|
+
if stage1_tokens and stage2_tokens:
|
463
|
+
output.append(f" Total tokens: {stage1_tokens + stage2_tokens} (Stage 1: {stage1_tokens}, Stage 2: {stage2_tokens})")
|
464
|
+
|
465
|
+
return "\n".join(output)
|
@@ -324,7 +324,7 @@ class MCPCodeIndexServer:
|
|
324
324
|
),
|
325
325
|
types.Tool(
|
326
326
|
name="search_descriptions",
|
327
|
-
description="Searches through all file descriptions in a project to find files related to specific functionality. Use this for large codebases instead of loading the entire structure. Always start with the fewest terms possible; if the tool returns a lot of results (more than 20) or the results are not relevant, then narrow it down by increasing the number of search
|
327
|
+
description="Searches through all file descriptions in a project to find files related to specific functionality. Use this for large codebases instead of loading the entire structure. Always start with the fewest terms possible (1 to 3 words AT MOST); if the tool returns a lot of results (more than 20) or the results are not relevant, then narrow it down by increasing the number of search words one at a time and calling the tool again. Start VERY broad, then narrow the focus only if needed!",
|
328
328
|
inputSchema={
|
329
329
|
"type": "object",
|
330
330
|
"properties": {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mcp-code-indexer
|
3
|
-
Version: 2.1
|
3
|
+
Version: 2.2.1
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
5
5
|
Author: MCP Code Indexer Contributors
|
6
6
|
Maintainer: MCP Code Indexer Contributors
|
@@ -59,8 +59,8 @@ Dynamic: requires-python
|
|
59
59
|
|
60
60
|
# MCP Code Indexer 🚀
|
61
61
|
|
62
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
63
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
64
64
|
[](https://opensource.org/licenses/MIT)
|
65
65
|
|
66
66
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
@@ -17,6 +17,9 @@ migrations/002_performance_indexes.sql
|
|
17
17
|
migrations/003_project_overviews.sql
|
18
18
|
src/mcp_code_indexer/__init__.py
|
19
19
|
src/mcp_code_indexer/__main__.py
|
20
|
+
src/mcp_code_indexer/ask_handler.py
|
21
|
+
src/mcp_code_indexer/claude_api_handler.py
|
22
|
+
src/mcp_code_indexer/deepask_handler.py
|
20
23
|
src/mcp_code_indexer/error_handler.py
|
21
24
|
src/mcp_code_indexer/file_scanner.py
|
22
25
|
src/mcp_code_indexer/git_hook_handler.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/data/stop_words_english.txt
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/connection_health.py
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/exceptions.py
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/database/retry_executor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer/middleware/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/dependency_links.txt
RENAMED
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/entry_points.txt
RENAMED
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/requires.txt
RENAMED
File without changes
|
{mcp_code_indexer-2.1.0 → mcp_code_indexer-2.2.1}/src/mcp_code_indexer.egg-info/top_level.txt
RENAMED
File without changes
|