mcp-code-indexer 4.0.1__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/__init__.py +7 -5
- mcp_code_indexer/ask_handler.py +2 -2
- mcp_code_indexer/claude_api_handler.py +10 -5
- mcp_code_indexer/cleanup_manager.py +20 -12
- mcp_code_indexer/commands/makelocal.py +85 -63
- mcp_code_indexer/data/stop_words_english.txt +1 -1
- mcp_code_indexer/database/connection_health.py +29 -20
- mcp_code_indexer/database/database.py +44 -31
- mcp_code_indexer/database/database_factory.py +19 -20
- mcp_code_indexer/database/exceptions.py +10 -10
- mcp_code_indexer/database/models.py +126 -1
- mcp_code_indexer/database/path_resolver.py +22 -21
- mcp_code_indexer/database/retry_executor.py +37 -19
- mcp_code_indexer/deepask_handler.py +3 -3
- mcp_code_indexer/error_handler.py +46 -20
- mcp_code_indexer/file_scanner.py +15 -12
- mcp_code_indexer/git_hook_handler.py +71 -76
- mcp_code_indexer/logging_config.py +13 -5
- mcp_code_indexer/main.py +85 -22
- mcp_code_indexer/middleware/__init__.py +1 -1
- mcp_code_indexer/middleware/auth.py +47 -43
- mcp_code_indexer/middleware/error_middleware.py +15 -15
- mcp_code_indexer/middleware/logging.py +44 -42
- mcp_code_indexer/middleware/security.py +84 -76
- mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
- mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
- mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
- mcp_code_indexer/query_preprocessor.py +2 -2
- mcp_code_indexer/server/mcp_server.py +158 -94
- mcp_code_indexer/transport/__init__.py +1 -1
- mcp_code_indexer/transport/base.py +19 -17
- mcp_code_indexer/transport/http_transport.py +89 -76
- mcp_code_indexer/transport/stdio_transport.py +12 -8
- mcp_code_indexer/vector_mode/__init__.py +36 -0
- mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
- mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
- mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
- mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
- mcp_code_indexer/vector_mode/config.py +155 -0
- mcp_code_indexer/vector_mode/daemon.py +335 -0
- mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
- mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
- mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
- mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
- mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
- mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
- mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
- mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
- mcp_code_indexer/vector_mode/security/__init__.py +11 -0
- mcp_code_indexer/vector_mode/security/patterns.py +297 -0
- mcp_code_indexer/vector_mode/security/redactor.py +368 -0
- {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/METADATA +82 -24
- mcp_code_indexer-4.1.0.dist-info/RECORD +66 -0
- mcp_code_indexer-4.0.1.dist-info/RECORD +0 -47
- {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/LICENSE +0 -0
- {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/entry_points.txt +0 -0
|
@@ -10,17 +10,16 @@ import asyncio
|
|
|
10
10
|
import json
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
|
-
import subprocess
|
|
13
|
+
import subprocess # nosec B404
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Dict, List, Optional, Tuple,
|
|
16
|
-
|
|
15
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
|
17
16
|
|
|
18
17
|
import aiohttp
|
|
19
18
|
from tenacity import (
|
|
20
19
|
retry,
|
|
21
|
-
wait_exponential,
|
|
22
|
-
stop_after_attempt,
|
|
23
20
|
retry_if_exception_type,
|
|
21
|
+
stop_after_attempt,
|
|
22
|
+
wait_exponential,
|
|
24
23
|
)
|
|
25
24
|
|
|
26
25
|
from .database.database import DatabaseManager
|
|
@@ -73,7 +72,7 @@ class GitHookHandler:
|
|
|
73
72
|
self.token_counter = TokenCounter()
|
|
74
73
|
|
|
75
74
|
# Git hook specific settings
|
|
76
|
-
self.config = {
|
|
75
|
+
self.config: Dict[str, Union[str, int, float]] = {
|
|
77
76
|
"model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
|
|
78
77
|
"max_diff_tokens": 136000, # Skip if diff larger than this (in tokens)
|
|
79
78
|
"chunk_token_limit": 100000, # Target token limit per chunk
|
|
@@ -91,14 +90,14 @@ class GitHookHandler:
|
|
|
91
90
|
def _log_and_print(self, message: str, level: str = "info") -> None:
|
|
92
91
|
"""
|
|
93
92
|
Log message and also print to stdout for user visibility.
|
|
94
|
-
|
|
93
|
+
|
|
95
94
|
Args:
|
|
96
95
|
message: Message to log and print
|
|
97
96
|
level: Log level (info, warning, error)
|
|
98
97
|
"""
|
|
99
98
|
# Log to logger
|
|
100
99
|
getattr(self.logger, level)(message)
|
|
101
|
-
|
|
100
|
+
|
|
102
101
|
# Also print to stdout with prefix for visibility
|
|
103
102
|
prefix = "🔍" if level == "info" else "⚠️" if level == "warning" else "❌"
|
|
104
103
|
print(f"{prefix} {message}")
|
|
@@ -130,9 +129,7 @@ class GitHookHandler:
|
|
|
130
129
|
|
|
131
130
|
# Get git info from current directory
|
|
132
131
|
project_info = await self._identify_project_from_git()
|
|
133
|
-
self._log_and_print(
|
|
134
|
-
f"Project: {project_info.get('name', 'Unknown')}"
|
|
135
|
-
)
|
|
132
|
+
self._log_and_print(f"Project: {project_info.get('name', 'Unknown')}")
|
|
136
133
|
|
|
137
134
|
# Get git diff and commit message based on mode
|
|
138
135
|
if commit_hash:
|
|
@@ -187,11 +184,11 @@ class GitHookHandler:
|
|
|
187
184
|
|
|
188
185
|
# Apply updates to database
|
|
189
186
|
await self._apply_updates(project_info, updates)
|
|
190
|
-
|
|
187
|
+
|
|
191
188
|
# Count actual updates
|
|
192
189
|
file_update_count = len(updates.get("file_updates", {}))
|
|
193
190
|
overview_updated = bool(updates.get("overview_update"))
|
|
194
|
-
|
|
191
|
+
|
|
195
192
|
if file_update_count > 0 or overview_updated:
|
|
196
193
|
update_parts = []
|
|
197
194
|
if file_update_count > 0:
|
|
@@ -250,7 +247,7 @@ class GitHookHandler:
|
|
|
250
247
|
self.logger.info(f"Single-stage prompt: {prompt_tokens} tokens")
|
|
251
248
|
self.logger.info(f"Token limit: {token_limit}")
|
|
252
249
|
|
|
253
|
-
if prompt_tokens <= token_limit:
|
|
250
|
+
if prompt_tokens <= int(token_limit):
|
|
254
251
|
# Use single-stage approach
|
|
255
252
|
self._log_and_print("Using single-stage analysis")
|
|
256
253
|
result = await self._call_openrouter(single_stage_prompt)
|
|
@@ -262,18 +259,22 @@ class GitHookHandler:
|
|
|
262
259
|
# Try two-stage analysis first
|
|
263
260
|
try:
|
|
264
261
|
return await self._analyze_with_two_stage(
|
|
265
|
-
git_diff,
|
|
266
|
-
|
|
262
|
+
git_diff,
|
|
263
|
+
commit_message,
|
|
264
|
+
current_overview,
|
|
265
|
+
current_descriptions,
|
|
266
|
+
changed_files,
|
|
267
267
|
)
|
|
268
268
|
except GitHookError as e:
|
|
269
269
|
if "too large" in str(e).lower():
|
|
270
270
|
# Fall back to chunked processing
|
|
271
|
-
self._log_and_print(
|
|
272
|
-
"Using chunked processing (very large diff)"
|
|
273
|
-
)
|
|
271
|
+
self._log_and_print("Using chunked processing (very large diff)")
|
|
274
272
|
return await self._analyze_with_chunking(
|
|
275
|
-
git_diff,
|
|
276
|
-
|
|
273
|
+
git_diff,
|
|
274
|
+
commit_message,
|
|
275
|
+
current_overview,
|
|
276
|
+
current_descriptions,
|
|
277
|
+
changed_files,
|
|
277
278
|
)
|
|
278
279
|
else:
|
|
279
280
|
raise
|
|
@@ -319,7 +320,7 @@ CURRENT FILE DESCRIPTIONS (for changed files only):
|
|
|
319
320
|
{json.dumps(relevant_descriptions, indent=2)}
|
|
320
321
|
|
|
321
322
|
CHANGED FILES:
|
|
322
|
-
{
|
|
323
|
+
{", ".join(changed_files)}
|
|
323
324
|
|
|
324
325
|
GIT DIFF:
|
|
325
326
|
{git_diff}
|
|
@@ -663,44 +664,39 @@ Return ONLY a JSON object:
|
|
|
663
664
|
)
|
|
664
665
|
|
|
665
666
|
# Break changed files into chunks and process file descriptions
|
|
666
|
-
chunk_size = await self._calculate_optimal_chunk_size(
|
|
667
|
-
|
|
668
|
-
)
|
|
669
|
-
|
|
667
|
+
chunk_size = await self._calculate_optimal_chunk_size(git_diff, changed_files)
|
|
668
|
+
|
|
670
669
|
self._log_and_print(f"Processing in {chunk_size}-file chunks")
|
|
671
|
-
|
|
670
|
+
|
|
672
671
|
all_file_updates = {}
|
|
673
|
-
|
|
672
|
+
|
|
674
673
|
for i in range(0, len(changed_files), chunk_size):
|
|
675
|
-
chunk_files = changed_files[i:i + chunk_size]
|
|
674
|
+
chunk_files = changed_files[i : i + chunk_size]
|
|
676
675
|
chunk_number = (i // chunk_size) + 1
|
|
677
676
|
total_chunks = (len(changed_files) + chunk_size - 1) // chunk_size
|
|
678
|
-
|
|
677
|
+
|
|
679
678
|
self._log_and_print(
|
|
680
679
|
f"Processing chunk {chunk_number}/{total_chunks} "
|
|
681
680
|
f"({len(chunk_files)} files)"
|
|
682
681
|
)
|
|
683
|
-
|
|
682
|
+
|
|
684
683
|
# Extract diff content for this chunk
|
|
685
684
|
chunk_diff = self._extract_chunk_diff(git_diff, chunk_files)
|
|
686
|
-
|
|
685
|
+
|
|
687
686
|
# Process this chunk
|
|
688
687
|
chunk_updates = await self._analyze_file_chunk(
|
|
689
688
|
chunk_diff, commit_message, current_descriptions, chunk_files
|
|
690
689
|
)
|
|
691
|
-
|
|
690
|
+
|
|
692
691
|
# Merge results
|
|
693
692
|
if chunk_updates and "file_updates" in chunk_updates:
|
|
694
693
|
all_file_updates.update(chunk_updates["file_updates"])
|
|
695
|
-
|
|
694
|
+
|
|
696
695
|
self.logger.info(
|
|
697
696
|
f"Chunked processing completed: updated {len(all_file_updates)} files"
|
|
698
697
|
)
|
|
699
|
-
|
|
700
|
-
return {
|
|
701
|
-
"file_updates": all_file_updates,
|
|
702
|
-
"overview_update": overview_update
|
|
703
|
-
}
|
|
698
|
+
|
|
699
|
+
return {"file_updates": all_file_updates, "overview_update": overview_update}
|
|
704
700
|
|
|
705
701
|
async def _analyze_overview_updates(
|
|
706
702
|
self,
|
|
@@ -733,7 +729,7 @@ CURRENT PROJECT OVERVIEW:
|
|
|
733
729
|
{current_overview or "No overview available"}
|
|
734
730
|
|
|
735
731
|
CHANGED FILES:
|
|
736
|
-
{
|
|
732
|
+
{", ".join(changed_files)}
|
|
737
733
|
|
|
738
734
|
GIT DIFF:
|
|
739
735
|
{git_diff}
|
|
@@ -762,10 +758,8 @@ Return ONLY a JSON object:
|
|
|
762
758
|
prompt_tokens = self.token_counter.count_tokens(prompt)
|
|
763
759
|
self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
|
|
764
760
|
|
|
765
|
-
if prompt_tokens > self.config["max_diff_tokens"]:
|
|
766
|
-
raise GitHookError(
|
|
767
|
-
f"Stage 1 prompt too large ({prompt_tokens} tokens)"
|
|
768
|
-
)
|
|
761
|
+
if prompt_tokens > int(self.config["max_diff_tokens"]):
|
|
762
|
+
raise GitHookError(f"Stage 1 prompt too large ({prompt_tokens} tokens)")
|
|
769
763
|
|
|
770
764
|
# Call OpenRouter API
|
|
771
765
|
result = await self._call_openrouter(prompt)
|
|
@@ -811,7 +805,7 @@ CURRENT FILE DESCRIPTIONS (for changed files only):
|
|
|
811
805
|
{json.dumps(relevant_descriptions, indent=2)}
|
|
812
806
|
|
|
813
807
|
CHANGED FILES:
|
|
814
|
-
{
|
|
808
|
+
{", ".join(changed_files)}
|
|
815
809
|
|
|
816
810
|
GIT DIFF:
|
|
817
811
|
{git_diff}
|
|
@@ -836,10 +830,8 @@ Return ONLY a JSON object:
|
|
|
836
830
|
prompt_tokens = self.token_counter.count_tokens(prompt)
|
|
837
831
|
self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
|
|
838
832
|
|
|
839
|
-
if prompt_tokens > self.config["max_diff_tokens"]:
|
|
840
|
-
raise GitHookError(
|
|
841
|
-
f"Stage 2 prompt too large ({prompt_tokens} tokens)"
|
|
842
|
-
)
|
|
833
|
+
if prompt_tokens > int(self.config["max_diff_tokens"]):
|
|
834
|
+
raise GitHookError(f"Stage 2 prompt too large ({prompt_tokens} tokens)")
|
|
843
835
|
|
|
844
836
|
# Call OpenRouter API
|
|
845
837
|
result = await self._call_openrouter(prompt)
|
|
@@ -875,7 +867,7 @@ CURRENT PROJECT OVERVIEW:
|
|
|
875
867
|
{current_overview or "No overview available"}
|
|
876
868
|
|
|
877
869
|
CHANGED FILES:
|
|
878
|
-
{
|
|
870
|
+
{", ".join(changed_files)}
|
|
879
871
|
|
|
880
872
|
INSTRUCTIONS:
|
|
881
873
|
Update project overview ONLY if there are major structural changes like:
|
|
@@ -918,26 +910,26 @@ Return ONLY a JSON object:
|
|
|
918
910
|
# Estimate average diff size per file
|
|
919
911
|
total_diff_tokens = self.token_counter.count_tokens(git_diff)
|
|
920
912
|
avg_tokens_per_file = total_diff_tokens / len(changed_files)
|
|
921
|
-
|
|
913
|
+
|
|
922
914
|
# Target chunk token limit
|
|
923
915
|
chunk_limit = self.config.get("chunk_token_limit", 100000)
|
|
924
|
-
|
|
916
|
+
|
|
925
917
|
# Calculate chunk size with buffer for overhead
|
|
926
918
|
overhead_factor = 0.7 # Reserve 30% for prompt overhead
|
|
927
|
-
effective_limit = chunk_limit * overhead_factor
|
|
928
|
-
|
|
919
|
+
effective_limit = int(chunk_limit) * overhead_factor
|
|
920
|
+
|
|
929
921
|
chunk_size = max(1, int(effective_limit / avg_tokens_per_file))
|
|
930
|
-
|
|
922
|
+
|
|
931
923
|
# Cap at reasonable limits
|
|
932
924
|
chunk_size = min(chunk_size, 50) # Max 50 files per chunk
|
|
933
|
-
chunk_size = max(chunk_size, 5)
|
|
934
|
-
|
|
925
|
+
chunk_size = max(chunk_size, 5) # Min 5 files per chunk
|
|
926
|
+
|
|
935
927
|
self.logger.info(
|
|
936
928
|
f"Calculated chunk size: {chunk_size} files "
|
|
937
929
|
f"(avg {avg_tokens_per_file:.0f} tokens/file, "
|
|
938
930
|
f"target {chunk_limit} tokens/chunk)"
|
|
939
931
|
)
|
|
940
|
-
|
|
932
|
+
|
|
941
933
|
return chunk_size
|
|
942
934
|
|
|
943
935
|
def _extract_chunk_diff(self, git_diff: str, chunk_files: List[str]) -> str:
|
|
@@ -951,24 +943,22 @@ Return ONLY a JSON object:
|
|
|
951
943
|
Returns:
|
|
952
944
|
Filtered diff content for chunk files only
|
|
953
945
|
"""
|
|
954
|
-
lines = git_diff.split(
|
|
946
|
+
lines = git_diff.split("\n")
|
|
955
947
|
chunk_lines = []
|
|
956
|
-
current_file = None
|
|
957
948
|
include_section = False
|
|
958
|
-
|
|
949
|
+
|
|
959
950
|
for line in lines:
|
|
960
|
-
if line.startswith(
|
|
951
|
+
if line.startswith("diff --git"):
|
|
961
952
|
# Parse file path from diff header
|
|
962
|
-
parts = line.split(
|
|
953
|
+
parts = line.split(" ")
|
|
963
954
|
if len(parts) >= 4:
|
|
964
955
|
file_path = parts[2][2:] # Remove 'a/' prefix
|
|
965
|
-
current_file = file_path
|
|
966
956
|
include_section = file_path in chunk_files
|
|
967
|
-
|
|
957
|
+
|
|
968
958
|
if include_section:
|
|
969
959
|
chunk_lines.append(line)
|
|
970
|
-
|
|
971
|
-
return
|
|
960
|
+
|
|
961
|
+
return "\n".join(chunk_lines)
|
|
972
962
|
|
|
973
963
|
async def _analyze_file_chunk(
|
|
974
964
|
self,
|
|
@@ -1005,14 +995,14 @@ CURRENT FILE DESCRIPTIONS (for chunk files only):
|
|
|
1005
995
|
{json.dumps(relevant_descriptions, indent=2)}
|
|
1006
996
|
|
|
1007
997
|
CHUNK FILES:
|
|
1008
|
-
{
|
|
998
|
+
{", ".join(chunk_files)}
|
|
1009
999
|
|
|
1010
1000
|
GIT DIFF (chunk only):
|
|
1011
1001
|
{chunk_diff}
|
|
1012
1002
|
|
|
1013
1003
|
INSTRUCTIONS:
|
|
1014
1004
|
Use the COMMIT MESSAGE to understand the intent and context of the changes.
|
|
1015
|
-
Update descriptions for files that have changed significantly.
|
|
1005
|
+
Update descriptions for files that have changed significantly.
|
|
1016
1006
|
Only include files that need actual description updates.
|
|
1017
1007
|
|
|
1018
1008
|
Return ONLY a JSON object:
|
|
@@ -1027,7 +1017,7 @@ Return ONLY a JSON object:
|
|
|
1027
1017
|
prompt_tokens = self.token_counter.count_tokens(prompt)
|
|
1028
1018
|
self.logger.info(f"Chunk prompt: {prompt_tokens} tokens")
|
|
1029
1019
|
|
|
1030
|
-
if prompt_tokens > self.config.get("chunk_token_limit", 100000):
|
|
1020
|
+
if prompt_tokens > int(self.config.get("chunk_token_limit", 100000)):
|
|
1031
1021
|
self.logger.warning(
|
|
1032
1022
|
f"Chunk still too large ({prompt_tokens} tokens), "
|
|
1033
1023
|
f"skipping {len(chunk_files)} files"
|
|
@@ -1081,7 +1071,7 @@ Return ONLY a JSON object:
|
|
|
1081
1071
|
"max_tokens": 24000,
|
|
1082
1072
|
}
|
|
1083
1073
|
|
|
1084
|
-
timeout = aiohttp.ClientTimeout(total=self.config["timeout"])
|
|
1074
|
+
timeout = aiohttp.ClientTimeout(total=float(self.config["timeout"]))
|
|
1085
1075
|
|
|
1086
1076
|
self.logger.info("Sending request to OpenRouter API...")
|
|
1087
1077
|
self.logger.info(f" Model: {self.config['model']}")
|
|
@@ -1094,7 +1084,6 @@ Return ONLY a JSON object:
|
|
|
1094
1084
|
async with session.post(
|
|
1095
1085
|
self.OPENROUTER_API_URL, headers=headers, json=payload
|
|
1096
1086
|
) as response:
|
|
1097
|
-
|
|
1098
1087
|
self.logger.info(
|
|
1099
1088
|
f"OpenRouter API response status: {response.status}"
|
|
1100
1089
|
)
|
|
@@ -1221,7 +1210,7 @@ Return ONLY a JSON object:
|
|
|
1221
1210
|
"Response must contain 'file_updates' and/or 'overview_update'"
|
|
1222
1211
|
)
|
|
1223
1212
|
|
|
1224
|
-
return data
|
|
1213
|
+
return cast(Dict[str, Any], data)
|
|
1225
1214
|
|
|
1226
1215
|
except json.JSONDecodeError as e:
|
|
1227
1216
|
self.logger.error(f"Raw response content: {repr(response_text)}")
|
|
@@ -1249,11 +1238,15 @@ Return ONLY a JSON object:
|
|
|
1249
1238
|
# Update file descriptions
|
|
1250
1239
|
file_updates = updates.get("file_updates", {})
|
|
1251
1240
|
for file_path, description in file_updates.items():
|
|
1252
|
-
from mcp_code_indexer.database.models import FileDescription
|
|
1253
1241
|
from datetime import datetime
|
|
1254
1242
|
|
|
1243
|
+
from mcp_code_indexer.database.models import FileDescription
|
|
1244
|
+
|
|
1255
1245
|
file_desc = FileDescription(
|
|
1246
|
+
id=None,
|
|
1256
1247
|
project_id=project.id,
|
|
1248
|
+
source_project_id=None,
|
|
1249
|
+
to_be_cleaned=None,
|
|
1257
1250
|
file_path=file_path,
|
|
1258
1251
|
description=description,
|
|
1259
1252
|
file_hash=None,
|
|
@@ -1266,9 +1259,10 @@ Return ONLY a JSON object:
|
|
|
1266
1259
|
# Update project overview if provided
|
|
1267
1260
|
overview_update = updates.get("overview_update")
|
|
1268
1261
|
if overview_update and overview_update.strip():
|
|
1269
|
-
from mcp_code_indexer.database.models import ProjectOverview
|
|
1270
1262
|
from datetime import datetime
|
|
1271
1263
|
|
|
1264
|
+
from mcp_code_indexer.database.models import ProjectOverview
|
|
1265
|
+
|
|
1272
1266
|
overview = ProjectOverview(
|
|
1273
1267
|
project_id=project.id,
|
|
1274
1268
|
overview=overview_update,
|
|
@@ -1305,8 +1299,9 @@ Return ONLY a JSON object:
|
|
|
1305
1299
|
stdout, stderr = await process.communicate()
|
|
1306
1300
|
|
|
1307
1301
|
if process.returncode != 0:
|
|
1302
|
+
returncode = process.returncode if process.returncode is not None else 1
|
|
1308
1303
|
raise subprocess.CalledProcessError(
|
|
1309
|
-
|
|
1304
|
+
returncode, full_cmd, stdout, stderr
|
|
1310
1305
|
)
|
|
1311
1306
|
|
|
1312
1307
|
return stdout.decode("utf-8")
|
|
@@ -9,7 +9,7 @@ import logging
|
|
|
9
9
|
import logging.handlers
|
|
10
10
|
import sys
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import Optional
|
|
12
|
+
from typing import Optional, Union
|
|
13
13
|
|
|
14
14
|
from .error_handler import StructuredFormatter
|
|
15
15
|
|
|
@@ -58,6 +58,9 @@ def setup_logging(
|
|
|
58
58
|
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
59
59
|
|
|
60
60
|
# Rotating file handler
|
|
61
|
+
file_handler: Union[
|
|
62
|
+
logging.handlers.RotatingFileHandler, logging.FileHandler
|
|
63
|
+
]
|
|
61
64
|
if max_bytes > 0:
|
|
62
65
|
file_handler = logging.handlers.RotatingFileHandler(
|
|
63
66
|
log_file,
|
|
@@ -144,7 +147,10 @@ def setup_command_logger(
|
|
|
144
147
|
|
|
145
148
|
# File handler with 50MB limit
|
|
146
149
|
file_handler = logging.handlers.RotatingFileHandler(
|
|
147
|
-
log_file,
|
|
150
|
+
log_file,
|
|
151
|
+
maxBytes=50 * 1024 * 1024,
|
|
152
|
+
backupCount=2,
|
|
153
|
+
encoding="utf-8", # 50MB
|
|
148
154
|
)
|
|
149
155
|
file_handler.setLevel(logging.DEBUG)
|
|
150
156
|
|
|
@@ -172,7 +178,9 @@ def setup_command_logger(
|
|
|
172
178
|
|
|
173
179
|
|
|
174
180
|
def _setup_component_loggers_for_command(
|
|
175
|
-
command_name: str,
|
|
181
|
+
command_name: str,
|
|
182
|
+
file_handler: logging.handlers.RotatingFileHandler,
|
|
183
|
+
formatter: logging.Formatter,
|
|
176
184
|
) -> None:
|
|
177
185
|
"""
|
|
178
186
|
Set up component loggers to also send logs to the command's log file.
|
|
@@ -205,7 +213,7 @@ def _setup_component_loggers_for_command(
|
|
|
205
213
|
command_handler.setFormatter(formatter)
|
|
206
214
|
|
|
207
215
|
# Add a marker to identify which command this handler belongs to
|
|
208
|
-
command_handler
|
|
216
|
+
setattr(command_handler, "_command_name", command_name)
|
|
209
217
|
|
|
210
218
|
# Remove any existing handlers for this command (in case of multiple calls)
|
|
211
219
|
existing_handlers = [
|
|
@@ -225,7 +233,7 @@ def _setup_component_loggers_for_command(
|
|
|
225
233
|
|
|
226
234
|
|
|
227
235
|
def log_performance_metrics(
|
|
228
|
-
logger: logging.Logger, operation: str, duration: float, **metrics
|
|
236
|
+
logger: logging.Logger, operation: str, duration: float, **metrics: object
|
|
229
237
|
) -> None:
|
|
230
238
|
"""
|
|
231
239
|
Log performance metrics in structured format.
|
mcp_code_indexer/main.py
CHANGED
|
@@ -10,10 +10,12 @@ import asyncio
|
|
|
10
10
|
import json
|
|
11
11
|
import sys
|
|
12
12
|
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
import logging
|
|
13
15
|
|
|
14
16
|
from . import __version__
|
|
15
|
-
from .logging_config import setup_logging
|
|
16
17
|
from .error_handler import setup_error_handling
|
|
18
|
+
from .logging_config import setup_logging
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
def parse_arguments() -> argparse.Namespace:
|
|
@@ -77,8 +79,7 @@ def parse_arguments() -> argparse.Namespace:
|
|
|
77
79
|
nargs="+",
|
|
78
80
|
metavar="PROJECT_ID",
|
|
79
81
|
help=(
|
|
80
|
-
"Export descriptions for a project. Usage: "
|
|
81
|
-
"--dumpdescriptions PROJECT_ID"
|
|
82
|
+
"Export descriptions for a project. Usage: --dumpdescriptions PROJECT_ID"
|
|
82
83
|
),
|
|
83
84
|
)
|
|
84
85
|
|
|
@@ -105,8 +106,7 @@ def parse_arguments() -> argparse.Namespace:
|
|
|
105
106
|
type=str,
|
|
106
107
|
metavar="PROJECT_NAME_OR_ID",
|
|
107
108
|
help=(
|
|
108
|
-
"Generate a markdown project map for the specified project "
|
|
109
|
-
"(by name or ID)"
|
|
109
|
+
"Generate a markdown project map for the specified project (by name or ID)"
|
|
110
110
|
),
|
|
111
111
|
)
|
|
112
112
|
|
|
@@ -122,27 +122,27 @@ def parse_arguments() -> argparse.Namespace:
|
|
|
122
122
|
action="store_true",
|
|
123
123
|
help="Enable HTTP transport instead of stdio (requires 'http' extras)",
|
|
124
124
|
)
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
parser.add_argument(
|
|
127
127
|
"--host",
|
|
128
128
|
type=str,
|
|
129
129
|
default="127.0.0.1",
|
|
130
130
|
help="Host to bind HTTP server to (default: 127.0.0.1)",
|
|
131
131
|
)
|
|
132
|
-
|
|
132
|
+
|
|
133
133
|
parser.add_argument(
|
|
134
134
|
"--port",
|
|
135
135
|
type=int,
|
|
136
136
|
default=7557,
|
|
137
137
|
help="Port to bind HTTP server to (default: 7557)",
|
|
138
138
|
)
|
|
139
|
-
|
|
139
|
+
|
|
140
140
|
parser.add_argument(
|
|
141
141
|
"--auth-token",
|
|
142
142
|
type=str,
|
|
143
143
|
help="Bearer token for HTTP authentication (optional)",
|
|
144
144
|
)
|
|
145
|
-
|
|
145
|
+
|
|
146
146
|
parser.add_argument(
|
|
147
147
|
"--cors-origins",
|
|
148
148
|
type=str,
|
|
@@ -151,6 +151,19 @@ def parse_arguments() -> argparse.Namespace:
|
|
|
151
151
|
help="Allowed CORS origins for HTTP transport (default: allow all)",
|
|
152
152
|
)
|
|
153
153
|
|
|
154
|
+
# Vector mode options
|
|
155
|
+
parser.add_argument(
|
|
156
|
+
"--vector",
|
|
157
|
+
action="store_true",
|
|
158
|
+
help="Enable vector mode with semantic search capabilities (requires vector extras)",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
parser.add_argument(
|
|
162
|
+
"--vector-config",
|
|
163
|
+
type=str,
|
|
164
|
+
help="Path to vector mode configuration file",
|
|
165
|
+
)
|
|
166
|
+
|
|
154
167
|
return parser.parse_args()
|
|
155
168
|
|
|
156
169
|
|
|
@@ -199,8 +212,8 @@ async def handle_getprojects(args: argparse.Namespace) -> None:
|
|
|
199
212
|
|
|
200
213
|
async def handle_runcommand(args: argparse.Namespace) -> None:
|
|
201
214
|
"""Handle --runcommand command."""
|
|
202
|
-
from .server.mcp_server import MCPCodeIndexServer
|
|
203
215
|
from .logging_config import setup_command_logger
|
|
216
|
+
from .server.mcp_server import MCPCodeIndexServer
|
|
204
217
|
|
|
205
218
|
# Set up dedicated logging for runcommand
|
|
206
219
|
cache_dir = Path(args.cache_dir).expanduser()
|
|
@@ -390,8 +403,8 @@ async def handle_runcommand(args: argparse.Namespace) -> None:
|
|
|
390
403
|
|
|
391
404
|
return html.unescape(text)
|
|
392
405
|
|
|
393
|
-
def clean_arguments(arguments:
|
|
394
|
-
cleaned = {}
|
|
406
|
+
def clean_arguments(arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
407
|
+
cleaned: Dict[str, Any] = {}
|
|
395
408
|
for key, value in arguments.items():
|
|
396
409
|
if isinstance(value, str):
|
|
397
410
|
cleaned[key] = clean_html_entities(value)
|
|
@@ -753,9 +766,7 @@ async def handle_map(args: argparse.Namespace) -> None:
|
|
|
753
766
|
)
|
|
754
767
|
|
|
755
768
|
# Generate markdown
|
|
756
|
-
markdown_content = generate_project_markdown(
|
|
757
|
-
project, overview, files, logger
|
|
758
|
-
)
|
|
769
|
+
markdown_content = generate_project_markdown(project, overview, files, logger)
|
|
759
770
|
|
|
760
771
|
# Output the markdown
|
|
761
772
|
print(markdown_content)
|
|
@@ -788,7 +799,9 @@ async def handle_map(args: argparse.Namespace) -> None:
|
|
|
788
799
|
logger.removeHandler(handler)
|
|
789
800
|
|
|
790
801
|
|
|
791
|
-
def generate_project_markdown(
|
|
802
|
+
def generate_project_markdown(
|
|
803
|
+
project: Any, overview: Optional[Any], files: List[Any], logger: logging.Logger
|
|
804
|
+
) -> str:
|
|
792
805
|
"""Generate the markdown content for the project map."""
|
|
793
806
|
import re
|
|
794
807
|
from collections import defaultdict
|
|
@@ -881,8 +894,8 @@ def generate_project_markdown(project, overview, files, logger):
|
|
|
881
894
|
async def handle_makelocal(args: argparse.Namespace) -> None:
|
|
882
895
|
"""Handle --makelocal command."""
|
|
883
896
|
try:
|
|
884
|
-
from .database.database_factory import DatabaseFactory
|
|
885
897
|
from .commands.makelocal import MakeLocalCommand
|
|
898
|
+
from .database.database_factory import DatabaseFactory
|
|
886
899
|
|
|
887
900
|
# Initialize database factory
|
|
888
901
|
db_path = Path(args.db_path).expanduser()
|
|
@@ -910,10 +923,12 @@ async def handle_makelocal(args: argparse.Namespace) -> None:
|
|
|
910
923
|
# Execute the command
|
|
911
924
|
result = await makelocal_cmd.execute(args.makelocal)
|
|
912
925
|
|
|
913
|
-
print(
|
|
926
|
+
print(
|
|
927
|
+
f"Successfully migrated project '{result['project_name']}' to local database"
|
|
928
|
+
)
|
|
914
929
|
print(f"Local database created at: {result['local_database_path']}")
|
|
915
930
|
print(f"Migrated {result['migrated_files']} file descriptions")
|
|
916
|
-
if result[
|
|
931
|
+
if result["migrated_overview"]:
|
|
917
932
|
print("Migrated project overview")
|
|
918
933
|
|
|
919
934
|
# Close all database connections
|
|
@@ -994,13 +1009,60 @@ async def main() -> None:
|
|
|
994
1009
|
)
|
|
995
1010
|
|
|
996
1011
|
try:
|
|
1012
|
+
# Handle vector mode initialization
|
|
1013
|
+
vector_daemon_task = None
|
|
1014
|
+
if args.vector:
|
|
1015
|
+
try:
|
|
1016
|
+
from .vector_mode import is_vector_mode_available, check_api_keys
|
|
1017
|
+
from .vector_mode.config import load_vector_config
|
|
1018
|
+
from .vector_mode.daemon import start_vector_daemon
|
|
1019
|
+
|
|
1020
|
+
# Check if vector mode is available
|
|
1021
|
+
if not is_vector_mode_available():
|
|
1022
|
+
logger.error("Vector mode requires additional dependencies. Install with: pip install mcp-code-indexer[vector]")
|
|
1023
|
+
sys.exit(1)
|
|
1024
|
+
|
|
1025
|
+
# Check API keys
|
|
1026
|
+
api_keys = check_api_keys()
|
|
1027
|
+
if not all(api_keys.values()):
|
|
1028
|
+
missing = [k for k, v in api_keys.items() if not v]
|
|
1029
|
+
logger.error(f"Missing API keys for vector mode: {', '.join(missing)}")
|
|
1030
|
+
sys.exit(1)
|
|
1031
|
+
|
|
1032
|
+
# Load vector configuration
|
|
1033
|
+
vector_config_path = Path(args.vector_config).expanduser() if args.vector_config else None
|
|
1034
|
+
vector_config = load_vector_config(vector_config_path)
|
|
1035
|
+
|
|
1036
|
+
logger.info(
|
|
1037
|
+
"Vector mode enabled",
|
|
1038
|
+
extra={
|
|
1039
|
+
"structured_data": {
|
|
1040
|
+
"embedding_model": vector_config.embedding_model,
|
|
1041
|
+
"batch_size": vector_config.batch_size,
|
|
1042
|
+
"daemon_enabled": vector_config.daemon_enabled,
|
|
1043
|
+
}
|
|
1044
|
+
}
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
# Start vector daemon in background
|
|
1048
|
+
if vector_config.daemon_enabled:
|
|
1049
|
+
vector_daemon_task = asyncio.create_task(
|
|
1050
|
+
start_vector_daemon(vector_config_path, db_path, cache_dir)
|
|
1051
|
+
)
|
|
1052
|
+
logger.info("Vector daemon started")
|
|
1053
|
+
|
|
1054
|
+
except Exception as e:
|
|
1055
|
+
logger.error(f"Failed to initialize vector mode: {e}")
|
|
1056
|
+
sys.exit(1)
|
|
1057
|
+
|
|
997
1058
|
# Import and run the MCP server
|
|
998
1059
|
from .server.mcp_server import MCPCodeIndexServer
|
|
999
|
-
|
|
1060
|
+
|
|
1000
1061
|
# Create transport based on arguments
|
|
1001
1062
|
transport = None
|
|
1002
1063
|
if args.http:
|
|
1003
1064
|
from .transport.http_transport import HTTPTransport
|
|
1065
|
+
|
|
1004
1066
|
transport = HTTPTransport(
|
|
1005
1067
|
server_instance=None, # Will be set after server creation
|
|
1006
1068
|
host=args.host,
|
|
@@ -1025,8 +1087,9 @@ async def main() -> None:
|
|
|
1025
1087
|
db_path=db_path,
|
|
1026
1088
|
cache_dir=cache_dir,
|
|
1027
1089
|
transport=transport,
|
|
1090
|
+
vector_mode=args.vector,
|
|
1028
1091
|
)
|
|
1029
|
-
|
|
1092
|
+
|
|
1030
1093
|
# Set server instance in transport after server creation
|
|
1031
1094
|
if transport:
|
|
1032
1095
|
transport.server = server
|
|
@@ -1038,7 +1101,7 @@ async def main() -> None:
|
|
|
1038
1101
|
raise
|
|
1039
1102
|
|
|
1040
1103
|
|
|
1041
|
-
def cli_main():
|
|
1104
|
+
def cli_main() -> None:
|
|
1042
1105
|
"""Console script entry point."""
|
|
1043
1106
|
try:
|
|
1044
1107
|
asyncio.run(main())
|
|
@@ -5,8 +5,8 @@ This module provides middleware for HTTP transport features like
|
|
|
5
5
|
logging, authentication, and security.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from .logging import HTTPLoggingMiddleware
|
|
9
8
|
from .auth import HTTPAuthMiddleware
|
|
9
|
+
from .logging import HTTPLoggingMiddleware
|
|
10
10
|
from .security import HTTPSecurityMiddleware
|
|
11
11
|
|
|
12
12
|
__all__ = ["HTTPLoggingMiddleware", "HTTPAuthMiddleware", "HTTPSecurityMiddleware"]
|