mcp-code-indexer 4.0.1__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. mcp_code_indexer/__init__.py +7 -5
  2. mcp_code_indexer/ask_handler.py +2 -2
  3. mcp_code_indexer/claude_api_handler.py +10 -5
  4. mcp_code_indexer/cleanup_manager.py +20 -12
  5. mcp_code_indexer/commands/makelocal.py +85 -63
  6. mcp_code_indexer/data/stop_words_english.txt +1 -1
  7. mcp_code_indexer/database/connection_health.py +29 -20
  8. mcp_code_indexer/database/database.py +44 -31
  9. mcp_code_indexer/database/database_factory.py +19 -20
  10. mcp_code_indexer/database/exceptions.py +10 -10
  11. mcp_code_indexer/database/models.py +126 -1
  12. mcp_code_indexer/database/path_resolver.py +22 -21
  13. mcp_code_indexer/database/retry_executor.py +37 -19
  14. mcp_code_indexer/deepask_handler.py +3 -3
  15. mcp_code_indexer/error_handler.py +46 -20
  16. mcp_code_indexer/file_scanner.py +15 -12
  17. mcp_code_indexer/git_hook_handler.py +71 -76
  18. mcp_code_indexer/logging_config.py +13 -5
  19. mcp_code_indexer/main.py +85 -22
  20. mcp_code_indexer/middleware/__init__.py +1 -1
  21. mcp_code_indexer/middleware/auth.py +47 -43
  22. mcp_code_indexer/middleware/error_middleware.py +15 -15
  23. mcp_code_indexer/middleware/logging.py +44 -42
  24. mcp_code_indexer/middleware/security.py +84 -76
  25. mcp_code_indexer/migrations/002_performance_indexes.sql +1 -1
  26. mcp_code_indexer/migrations/004_remove_branch_dependency.sql +14 -14
  27. mcp_code_indexer/migrations/006_vector_mode.sql +189 -0
  28. mcp_code_indexer/query_preprocessor.py +2 -2
  29. mcp_code_indexer/server/mcp_server.py +158 -94
  30. mcp_code_indexer/transport/__init__.py +1 -1
  31. mcp_code_indexer/transport/base.py +19 -17
  32. mcp_code_indexer/transport/http_transport.py +89 -76
  33. mcp_code_indexer/transport/stdio_transport.py +12 -8
  34. mcp_code_indexer/vector_mode/__init__.py +36 -0
  35. mcp_code_indexer/vector_mode/chunking/__init__.py +19 -0
  36. mcp_code_indexer/vector_mode/chunking/ast_chunker.py +403 -0
  37. mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +500 -0
  38. mcp_code_indexer/vector_mode/chunking/language_handlers.py +478 -0
  39. mcp_code_indexer/vector_mode/config.py +155 -0
  40. mcp_code_indexer/vector_mode/daemon.py +335 -0
  41. mcp_code_indexer/vector_mode/monitoring/__init__.py +19 -0
  42. mcp_code_indexer/vector_mode/monitoring/change_detector.py +312 -0
  43. mcp_code_indexer/vector_mode/monitoring/file_watcher.py +445 -0
  44. mcp_code_indexer/vector_mode/monitoring/merkle_tree.py +418 -0
  45. mcp_code_indexer/vector_mode/providers/__init__.py +72 -0
  46. mcp_code_indexer/vector_mode/providers/base_provider.py +230 -0
  47. mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +338 -0
  48. mcp_code_indexer/vector_mode/providers/voyage_client.py +212 -0
  49. mcp_code_indexer/vector_mode/security/__init__.py +11 -0
  50. mcp_code_indexer/vector_mode/security/patterns.py +297 -0
  51. mcp_code_indexer/vector_mode/security/redactor.py +368 -0
  52. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/METADATA +82 -24
  53. mcp_code_indexer-4.1.0.dist-info/RECORD +66 -0
  54. mcp_code_indexer-4.0.1.dist-info/RECORD +0 -47
  55. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/LICENSE +0 -0
  56. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/WHEEL +0 -0
  57. {mcp_code_indexer-4.0.1.dist-info → mcp_code_indexer-4.1.0.dist-info}/entry_points.txt +0 -0
@@ -10,17 +10,16 @@ import asyncio
10
10
  import json
11
11
  import logging
12
12
  import os
13
- import subprocess
13
+ import subprocess # nosec B404
14
14
  from pathlib import Path
15
- from typing import Dict, List, Optional, Tuple, Any
16
-
15
+ from typing import Any, Dict, List, Optional, Tuple, Union, cast
17
16
 
18
17
  import aiohttp
19
18
  from tenacity import (
20
19
  retry,
21
- wait_exponential,
22
- stop_after_attempt,
23
20
  retry_if_exception_type,
21
+ stop_after_attempt,
22
+ wait_exponential,
24
23
  )
25
24
 
26
25
  from .database.database import DatabaseManager
@@ -73,7 +72,7 @@ class GitHookHandler:
73
72
  self.token_counter = TokenCounter()
74
73
 
75
74
  # Git hook specific settings
76
- self.config = {
75
+ self.config: Dict[str, Union[str, int, float]] = {
77
76
  "model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
78
77
  "max_diff_tokens": 136000, # Skip if diff larger than this (in tokens)
79
78
  "chunk_token_limit": 100000, # Target token limit per chunk
@@ -91,14 +90,14 @@ class GitHookHandler:
91
90
  def _log_and_print(self, message: str, level: str = "info") -> None:
92
91
  """
93
92
  Log message and also print to stdout for user visibility.
94
-
93
+
95
94
  Args:
96
95
  message: Message to log and print
97
96
  level: Log level (info, warning, error)
98
97
  """
99
98
  # Log to logger
100
99
  getattr(self.logger, level)(message)
101
-
100
+
102
101
  # Also print to stdout with prefix for visibility
103
102
  prefix = "🔍" if level == "info" else "⚠️" if level == "warning" else "❌"
104
103
  print(f"{prefix} {message}")
@@ -130,9 +129,7 @@ class GitHookHandler:
130
129
 
131
130
  # Get git info from current directory
132
131
  project_info = await self._identify_project_from_git()
133
- self._log_and_print(
134
- f"Project: {project_info.get('name', 'Unknown')}"
135
- )
132
+ self._log_and_print(f"Project: {project_info.get('name', 'Unknown')}")
136
133
 
137
134
  # Get git diff and commit message based on mode
138
135
  if commit_hash:
@@ -187,11 +184,11 @@ class GitHookHandler:
187
184
 
188
185
  # Apply updates to database
189
186
  await self._apply_updates(project_info, updates)
190
-
187
+
191
188
  # Count actual updates
192
189
  file_update_count = len(updates.get("file_updates", {}))
193
190
  overview_updated = bool(updates.get("overview_update"))
194
-
191
+
195
192
  if file_update_count > 0 or overview_updated:
196
193
  update_parts = []
197
194
  if file_update_count > 0:
@@ -250,7 +247,7 @@ class GitHookHandler:
250
247
  self.logger.info(f"Single-stage prompt: {prompt_tokens} tokens")
251
248
  self.logger.info(f"Token limit: {token_limit}")
252
249
 
253
- if prompt_tokens <= token_limit:
250
+ if prompt_tokens <= int(token_limit):
254
251
  # Use single-stage approach
255
252
  self._log_and_print("Using single-stage analysis")
256
253
  result = await self._call_openrouter(single_stage_prompt)
@@ -262,18 +259,22 @@ class GitHookHandler:
262
259
  # Try two-stage analysis first
263
260
  try:
264
261
  return await self._analyze_with_two_stage(
265
- git_diff, commit_message, current_overview,
266
- current_descriptions, changed_files
262
+ git_diff,
263
+ commit_message,
264
+ current_overview,
265
+ current_descriptions,
266
+ changed_files,
267
267
  )
268
268
  except GitHookError as e:
269
269
  if "too large" in str(e).lower():
270
270
  # Fall back to chunked processing
271
- self._log_and_print(
272
- "Using chunked processing (very large diff)"
273
- )
271
+ self._log_and_print("Using chunked processing (very large diff)")
274
272
  return await self._analyze_with_chunking(
275
- git_diff, commit_message, current_overview,
276
- current_descriptions, changed_files
273
+ git_diff,
274
+ commit_message,
275
+ current_overview,
276
+ current_descriptions,
277
+ changed_files,
277
278
  )
278
279
  else:
279
280
  raise
@@ -319,7 +320,7 @@ CURRENT FILE DESCRIPTIONS (for changed files only):
319
320
  {json.dumps(relevant_descriptions, indent=2)}
320
321
 
321
322
  CHANGED FILES:
322
- {', '.join(changed_files)}
323
+ {", ".join(changed_files)}
323
324
 
324
325
  GIT DIFF:
325
326
  {git_diff}
@@ -663,44 +664,39 @@ Return ONLY a JSON object:
663
664
  )
664
665
 
665
666
  # Break changed files into chunks and process file descriptions
666
- chunk_size = await self._calculate_optimal_chunk_size(
667
- git_diff, changed_files
668
- )
669
-
667
+ chunk_size = await self._calculate_optimal_chunk_size(git_diff, changed_files)
668
+
670
669
  self._log_and_print(f"Processing in {chunk_size}-file chunks")
671
-
670
+
672
671
  all_file_updates = {}
673
-
672
+
674
673
  for i in range(0, len(changed_files), chunk_size):
675
- chunk_files = changed_files[i:i + chunk_size]
674
+ chunk_files = changed_files[i : i + chunk_size]
676
675
  chunk_number = (i // chunk_size) + 1
677
676
  total_chunks = (len(changed_files) + chunk_size - 1) // chunk_size
678
-
677
+
679
678
  self._log_and_print(
680
679
  f"Processing chunk {chunk_number}/{total_chunks} "
681
680
  f"({len(chunk_files)} files)"
682
681
  )
683
-
682
+
684
683
  # Extract diff content for this chunk
685
684
  chunk_diff = self._extract_chunk_diff(git_diff, chunk_files)
686
-
685
+
687
686
  # Process this chunk
688
687
  chunk_updates = await self._analyze_file_chunk(
689
688
  chunk_diff, commit_message, current_descriptions, chunk_files
690
689
  )
691
-
690
+
692
691
  # Merge results
693
692
  if chunk_updates and "file_updates" in chunk_updates:
694
693
  all_file_updates.update(chunk_updates["file_updates"])
695
-
694
+
696
695
  self.logger.info(
697
696
  f"Chunked processing completed: updated {len(all_file_updates)} files"
698
697
  )
699
-
700
- return {
701
- "file_updates": all_file_updates,
702
- "overview_update": overview_update
703
- }
698
+
699
+ return {"file_updates": all_file_updates, "overview_update": overview_update}
704
700
 
705
701
  async def _analyze_overview_updates(
706
702
  self,
@@ -733,7 +729,7 @@ CURRENT PROJECT OVERVIEW:
733
729
  {current_overview or "No overview available"}
734
730
 
735
731
  CHANGED FILES:
736
- {', '.join(changed_files)}
732
+ {", ".join(changed_files)}
737
733
 
738
734
  GIT DIFF:
739
735
  {git_diff}
@@ -762,10 +758,8 @@ Return ONLY a JSON object:
762
758
  prompt_tokens = self.token_counter.count_tokens(prompt)
763
759
  self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
764
760
 
765
- if prompt_tokens > self.config["max_diff_tokens"]:
766
- raise GitHookError(
767
- f"Stage 1 prompt too large ({prompt_tokens} tokens)"
768
- )
761
+ if prompt_tokens > int(self.config["max_diff_tokens"]):
762
+ raise GitHookError(f"Stage 1 prompt too large ({prompt_tokens} tokens)")
769
763
 
770
764
  # Call OpenRouter API
771
765
  result = await self._call_openrouter(prompt)
@@ -811,7 +805,7 @@ CURRENT FILE DESCRIPTIONS (for changed files only):
811
805
  {json.dumps(relevant_descriptions, indent=2)}
812
806
 
813
807
  CHANGED FILES:
814
- {', '.join(changed_files)}
808
+ {", ".join(changed_files)}
815
809
 
816
810
  GIT DIFF:
817
811
  {git_diff}
@@ -836,10 +830,8 @@ Return ONLY a JSON object:
836
830
  prompt_tokens = self.token_counter.count_tokens(prompt)
837
831
  self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
838
832
 
839
- if prompt_tokens > self.config["max_diff_tokens"]:
840
- raise GitHookError(
841
- f"Stage 2 prompt too large ({prompt_tokens} tokens)"
842
- )
833
+ if prompt_tokens > int(self.config["max_diff_tokens"]):
834
+ raise GitHookError(f"Stage 2 prompt too large ({prompt_tokens} tokens)")
843
835
 
844
836
  # Call OpenRouter API
845
837
  result = await self._call_openrouter(prompt)
@@ -875,7 +867,7 @@ CURRENT PROJECT OVERVIEW:
875
867
  {current_overview or "No overview available"}
876
868
 
877
869
  CHANGED FILES:
878
- {', '.join(changed_files)}
870
+ {", ".join(changed_files)}
879
871
 
880
872
  INSTRUCTIONS:
881
873
  Update project overview ONLY if there are major structural changes like:
@@ -918,26 +910,26 @@ Return ONLY a JSON object:
918
910
  # Estimate average diff size per file
919
911
  total_diff_tokens = self.token_counter.count_tokens(git_diff)
920
912
  avg_tokens_per_file = total_diff_tokens / len(changed_files)
921
-
913
+
922
914
  # Target chunk token limit
923
915
  chunk_limit = self.config.get("chunk_token_limit", 100000)
924
-
916
+
925
917
  # Calculate chunk size with buffer for overhead
926
918
  overhead_factor = 0.7 # Reserve 30% for prompt overhead
927
- effective_limit = chunk_limit * overhead_factor
928
-
919
+ effective_limit = int(chunk_limit) * overhead_factor
920
+
929
921
  chunk_size = max(1, int(effective_limit / avg_tokens_per_file))
930
-
922
+
931
923
  # Cap at reasonable limits
932
924
  chunk_size = min(chunk_size, 50) # Max 50 files per chunk
933
- chunk_size = max(chunk_size, 5) # Min 5 files per chunk
934
-
925
+ chunk_size = max(chunk_size, 5) # Min 5 files per chunk
926
+
935
927
  self.logger.info(
936
928
  f"Calculated chunk size: {chunk_size} files "
937
929
  f"(avg {avg_tokens_per_file:.0f} tokens/file, "
938
930
  f"target {chunk_limit} tokens/chunk)"
939
931
  )
940
-
932
+
941
933
  return chunk_size
942
934
 
943
935
  def _extract_chunk_diff(self, git_diff: str, chunk_files: List[str]) -> str:
@@ -951,24 +943,22 @@ Return ONLY a JSON object:
951
943
  Returns:
952
944
  Filtered diff content for chunk files only
953
945
  """
954
- lines = git_diff.split('\n')
946
+ lines = git_diff.split("\n")
955
947
  chunk_lines = []
956
- current_file = None
957
948
  include_section = False
958
-
949
+
959
950
  for line in lines:
960
- if line.startswith('diff --git'):
951
+ if line.startswith("diff --git"):
961
952
  # Parse file path from diff header
962
- parts = line.split(' ')
953
+ parts = line.split(" ")
963
954
  if len(parts) >= 4:
964
955
  file_path = parts[2][2:] # Remove 'a/' prefix
965
- current_file = file_path
966
956
  include_section = file_path in chunk_files
967
-
957
+
968
958
  if include_section:
969
959
  chunk_lines.append(line)
970
-
971
- return '\n'.join(chunk_lines)
960
+
961
+ return "\n".join(chunk_lines)
972
962
 
973
963
  async def _analyze_file_chunk(
974
964
  self,
@@ -1005,14 +995,14 @@ CURRENT FILE DESCRIPTIONS (for chunk files only):
1005
995
  {json.dumps(relevant_descriptions, indent=2)}
1006
996
 
1007
997
  CHUNK FILES:
1008
- {', '.join(chunk_files)}
998
+ {", ".join(chunk_files)}
1009
999
 
1010
1000
  GIT DIFF (chunk only):
1011
1001
  {chunk_diff}
1012
1002
 
1013
1003
  INSTRUCTIONS:
1014
1004
  Use the COMMIT MESSAGE to understand the intent and context of the changes.
1015
- Update descriptions for files that have changed significantly.
1005
+ Update descriptions for files that have changed significantly.
1016
1006
  Only include files that need actual description updates.
1017
1007
 
1018
1008
  Return ONLY a JSON object:
@@ -1027,7 +1017,7 @@ Return ONLY a JSON object:
1027
1017
  prompt_tokens = self.token_counter.count_tokens(prompt)
1028
1018
  self.logger.info(f"Chunk prompt: {prompt_tokens} tokens")
1029
1019
 
1030
- if prompt_tokens > self.config.get("chunk_token_limit", 100000):
1020
+ if prompt_tokens > int(self.config.get("chunk_token_limit", 100000)):
1031
1021
  self.logger.warning(
1032
1022
  f"Chunk still too large ({prompt_tokens} tokens), "
1033
1023
  f"skipping {len(chunk_files)} files"
@@ -1081,7 +1071,7 @@ Return ONLY a JSON object:
1081
1071
  "max_tokens": 24000,
1082
1072
  }
1083
1073
 
1084
- timeout = aiohttp.ClientTimeout(total=self.config["timeout"])
1074
+ timeout = aiohttp.ClientTimeout(total=float(self.config["timeout"]))
1085
1075
 
1086
1076
  self.logger.info("Sending request to OpenRouter API...")
1087
1077
  self.logger.info(f" Model: {self.config['model']}")
@@ -1094,7 +1084,6 @@ Return ONLY a JSON object:
1094
1084
  async with session.post(
1095
1085
  self.OPENROUTER_API_URL, headers=headers, json=payload
1096
1086
  ) as response:
1097
-
1098
1087
  self.logger.info(
1099
1088
  f"OpenRouter API response status: {response.status}"
1100
1089
  )
@@ -1221,7 +1210,7 @@ Return ONLY a JSON object:
1221
1210
  "Response must contain 'file_updates' and/or 'overview_update'"
1222
1211
  )
1223
1212
 
1224
- return data
1213
+ return cast(Dict[str, Any], data)
1225
1214
 
1226
1215
  except json.JSONDecodeError as e:
1227
1216
  self.logger.error(f"Raw response content: {repr(response_text)}")
@@ -1249,11 +1238,15 @@ Return ONLY a JSON object:
1249
1238
  # Update file descriptions
1250
1239
  file_updates = updates.get("file_updates", {})
1251
1240
  for file_path, description in file_updates.items():
1252
- from mcp_code_indexer.database.models import FileDescription
1253
1241
  from datetime import datetime
1254
1242
 
1243
+ from mcp_code_indexer.database.models import FileDescription
1244
+
1255
1245
  file_desc = FileDescription(
1246
+ id=None,
1256
1247
  project_id=project.id,
1248
+ source_project_id=None,
1249
+ to_be_cleaned=None,
1257
1250
  file_path=file_path,
1258
1251
  description=description,
1259
1252
  file_hash=None,
@@ -1266,9 +1259,10 @@ Return ONLY a JSON object:
1266
1259
  # Update project overview if provided
1267
1260
  overview_update = updates.get("overview_update")
1268
1261
  if overview_update and overview_update.strip():
1269
- from mcp_code_indexer.database.models import ProjectOverview
1270
1262
  from datetime import datetime
1271
1263
 
1264
+ from mcp_code_indexer.database.models import ProjectOverview
1265
+
1272
1266
  overview = ProjectOverview(
1273
1267
  project_id=project.id,
1274
1268
  overview=overview_update,
@@ -1305,8 +1299,9 @@ Return ONLY a JSON object:
1305
1299
  stdout, stderr = await process.communicate()
1306
1300
 
1307
1301
  if process.returncode != 0:
1302
+ returncode = process.returncode if process.returncode is not None else 1
1308
1303
  raise subprocess.CalledProcessError(
1309
- process.returncode, full_cmd, stdout, stderr
1304
+ returncode, full_cmd, stdout, stderr
1310
1305
  )
1311
1306
 
1312
1307
  return stdout.decode("utf-8")
@@ -9,7 +9,7 @@ import logging
9
9
  import logging.handlers
10
10
  import sys
11
11
  from pathlib import Path
12
- from typing import Optional
12
+ from typing import Optional, Union
13
13
 
14
14
  from .error_handler import StructuredFormatter
15
15
 
@@ -58,6 +58,9 @@ def setup_logging(
58
58
  log_file.parent.mkdir(parents=True, exist_ok=True)
59
59
 
60
60
  # Rotating file handler
61
+ file_handler: Union[
62
+ logging.handlers.RotatingFileHandler, logging.FileHandler
63
+ ]
61
64
  if max_bytes > 0:
62
65
  file_handler = logging.handlers.RotatingFileHandler(
63
66
  log_file,
@@ -144,7 +147,10 @@ def setup_command_logger(
144
147
 
145
148
  # File handler with 50MB limit
146
149
  file_handler = logging.handlers.RotatingFileHandler(
147
- log_file, maxBytes=50 * 1024 * 1024, backupCount=2, encoding="utf-8" # 50MB
150
+ log_file,
151
+ maxBytes=50 * 1024 * 1024,
152
+ backupCount=2,
153
+ encoding="utf-8", # 50MB
148
154
  )
149
155
  file_handler.setLevel(logging.DEBUG)
150
156
 
@@ -172,7 +178,9 @@ def setup_command_logger(
172
178
 
173
179
 
174
180
  def _setup_component_loggers_for_command(
175
- command_name: str, file_handler: logging.Handler, formatter: logging.Formatter
181
+ command_name: str,
182
+ file_handler: logging.handlers.RotatingFileHandler,
183
+ formatter: logging.Formatter,
176
184
  ) -> None:
177
185
  """
178
186
  Set up component loggers to also send logs to the command's log file.
@@ -205,7 +213,7 @@ def _setup_component_loggers_for_command(
205
213
  command_handler.setFormatter(formatter)
206
214
 
207
215
  # Add a marker to identify which command this handler belongs to
208
- command_handler._command_name = command_name
216
+ setattr(command_handler, "_command_name", command_name)
209
217
 
210
218
  # Remove any existing handlers for this command (in case of multiple calls)
211
219
  existing_handlers = [
@@ -225,7 +233,7 @@ def _setup_component_loggers_for_command(
225
233
 
226
234
 
227
235
  def log_performance_metrics(
228
- logger: logging.Logger, operation: str, duration: float, **metrics
236
+ logger: logging.Logger, operation: str, duration: float, **metrics: object
229
237
  ) -> None:
230
238
  """
231
239
  Log performance metrics in structured format.
mcp_code_indexer/main.py CHANGED
@@ -10,10 +10,12 @@ import asyncio
10
10
  import json
11
11
  import sys
12
12
  from pathlib import Path
13
+ from typing import Any, Dict, List, Optional
14
+ import logging
13
15
 
14
16
  from . import __version__
15
- from .logging_config import setup_logging
16
17
  from .error_handler import setup_error_handling
18
+ from .logging_config import setup_logging
17
19
 
18
20
 
19
21
  def parse_arguments() -> argparse.Namespace:
@@ -77,8 +79,7 @@ def parse_arguments() -> argparse.Namespace:
77
79
  nargs="+",
78
80
  metavar="PROJECT_ID",
79
81
  help=(
80
- "Export descriptions for a project. Usage: "
81
- "--dumpdescriptions PROJECT_ID"
82
+ "Export descriptions for a project. Usage: --dumpdescriptions PROJECT_ID"
82
83
  ),
83
84
  )
84
85
 
@@ -105,8 +106,7 @@ def parse_arguments() -> argparse.Namespace:
105
106
  type=str,
106
107
  metavar="PROJECT_NAME_OR_ID",
107
108
  help=(
108
- "Generate a markdown project map for the specified project "
109
- "(by name or ID)"
109
+ "Generate a markdown project map for the specified project (by name or ID)"
110
110
  ),
111
111
  )
112
112
 
@@ -122,27 +122,27 @@ def parse_arguments() -> argparse.Namespace:
122
122
  action="store_true",
123
123
  help="Enable HTTP transport instead of stdio (requires 'http' extras)",
124
124
  )
125
-
125
+
126
126
  parser.add_argument(
127
127
  "--host",
128
128
  type=str,
129
129
  default="127.0.0.1",
130
130
  help="Host to bind HTTP server to (default: 127.0.0.1)",
131
131
  )
132
-
132
+
133
133
  parser.add_argument(
134
134
  "--port",
135
135
  type=int,
136
136
  default=7557,
137
137
  help="Port to bind HTTP server to (default: 7557)",
138
138
  )
139
-
139
+
140
140
  parser.add_argument(
141
141
  "--auth-token",
142
142
  type=str,
143
143
  help="Bearer token for HTTP authentication (optional)",
144
144
  )
145
-
145
+
146
146
  parser.add_argument(
147
147
  "--cors-origins",
148
148
  type=str,
@@ -151,6 +151,19 @@ def parse_arguments() -> argparse.Namespace:
151
151
  help="Allowed CORS origins for HTTP transport (default: allow all)",
152
152
  )
153
153
 
154
+ # Vector mode options
155
+ parser.add_argument(
156
+ "--vector",
157
+ action="store_true",
158
+ help="Enable vector mode with semantic search capabilities (requires vector extras)",
159
+ )
160
+
161
+ parser.add_argument(
162
+ "--vector-config",
163
+ type=str,
164
+ help="Path to vector mode configuration file",
165
+ )
166
+
154
167
  return parser.parse_args()
155
168
 
156
169
 
@@ -199,8 +212,8 @@ async def handle_getprojects(args: argparse.Namespace) -> None:
199
212
 
200
213
  async def handle_runcommand(args: argparse.Namespace) -> None:
201
214
  """Handle --runcommand command."""
202
- from .server.mcp_server import MCPCodeIndexServer
203
215
  from .logging_config import setup_command_logger
216
+ from .server.mcp_server import MCPCodeIndexServer
204
217
 
205
218
  # Set up dedicated logging for runcommand
206
219
  cache_dir = Path(args.cache_dir).expanduser()
@@ -390,8 +403,8 @@ async def handle_runcommand(args: argparse.Namespace) -> None:
390
403
 
391
404
  return html.unescape(text)
392
405
 
393
- def clean_arguments(arguments: dict) -> dict:
394
- cleaned = {}
406
+ def clean_arguments(arguments: Dict[str, Any]) -> Dict[str, Any]:
407
+ cleaned: Dict[str, Any] = {}
395
408
  for key, value in arguments.items():
396
409
  if isinstance(value, str):
397
410
  cleaned[key] = clean_html_entities(value)
@@ -753,9 +766,7 @@ async def handle_map(args: argparse.Namespace) -> None:
753
766
  )
754
767
 
755
768
  # Generate markdown
756
- markdown_content = generate_project_markdown(
757
- project, overview, files, logger
758
- )
769
+ markdown_content = generate_project_markdown(project, overview, files, logger)
759
770
 
760
771
  # Output the markdown
761
772
  print(markdown_content)
@@ -788,7 +799,9 @@ async def handle_map(args: argparse.Namespace) -> None:
788
799
  logger.removeHandler(handler)
789
800
 
790
801
 
791
- def generate_project_markdown(project, overview, files, logger):
802
+ def generate_project_markdown(
803
+ project: Any, overview: Optional[Any], files: List[Any], logger: logging.Logger
804
+ ) -> str:
792
805
  """Generate the markdown content for the project map."""
793
806
  import re
794
807
  from collections import defaultdict
@@ -881,8 +894,8 @@ def generate_project_markdown(project, overview, files, logger):
881
894
  async def handle_makelocal(args: argparse.Namespace) -> None:
882
895
  """Handle --makelocal command."""
883
896
  try:
884
- from .database.database_factory import DatabaseFactory
885
897
  from .commands.makelocal import MakeLocalCommand
898
+ from .database.database_factory import DatabaseFactory
886
899
 
887
900
  # Initialize database factory
888
901
  db_path = Path(args.db_path).expanduser()
@@ -910,10 +923,12 @@ async def handle_makelocal(args: argparse.Namespace) -> None:
910
923
  # Execute the command
911
924
  result = await makelocal_cmd.execute(args.makelocal)
912
925
 
913
- print(f"Successfully migrated project '{result['project_name']}' to local database")
926
+ print(
927
+ f"Successfully migrated project '{result['project_name']}' to local database"
928
+ )
914
929
  print(f"Local database created at: {result['local_database_path']}")
915
930
  print(f"Migrated {result['migrated_files']} file descriptions")
916
- if result['migrated_overview']:
931
+ if result["migrated_overview"]:
917
932
  print("Migrated project overview")
918
933
 
919
934
  # Close all database connections
@@ -994,13 +1009,60 @@ async def main() -> None:
994
1009
  )
995
1010
 
996
1011
  try:
1012
+ # Handle vector mode initialization
1013
+ vector_daemon_task = None
1014
+ if args.vector:
1015
+ try:
1016
+ from .vector_mode import is_vector_mode_available, check_api_keys
1017
+ from .vector_mode.config import load_vector_config
1018
+ from .vector_mode.daemon import start_vector_daemon
1019
+
1020
+ # Check if vector mode is available
1021
+ if not is_vector_mode_available():
1022
+ logger.error("Vector mode requires additional dependencies. Install with: pip install mcp-code-indexer[vector]")
1023
+ sys.exit(1)
1024
+
1025
+ # Check API keys
1026
+ api_keys = check_api_keys()
1027
+ if not all(api_keys.values()):
1028
+ missing = [k for k, v in api_keys.items() if not v]
1029
+ logger.error(f"Missing API keys for vector mode: {', '.join(missing)}")
1030
+ sys.exit(1)
1031
+
1032
+ # Load vector configuration
1033
+ vector_config_path = Path(args.vector_config).expanduser() if args.vector_config else None
1034
+ vector_config = load_vector_config(vector_config_path)
1035
+
1036
+ logger.info(
1037
+ "Vector mode enabled",
1038
+ extra={
1039
+ "structured_data": {
1040
+ "embedding_model": vector_config.embedding_model,
1041
+ "batch_size": vector_config.batch_size,
1042
+ "daemon_enabled": vector_config.daemon_enabled,
1043
+ }
1044
+ }
1045
+ )
1046
+
1047
+ # Start vector daemon in background
1048
+ if vector_config.daemon_enabled:
1049
+ vector_daemon_task = asyncio.create_task(
1050
+ start_vector_daemon(vector_config_path, db_path, cache_dir)
1051
+ )
1052
+ logger.info("Vector daemon started")
1053
+
1054
+ except Exception as e:
1055
+ logger.error(f"Failed to initialize vector mode: {e}")
1056
+ sys.exit(1)
1057
+
997
1058
  # Import and run the MCP server
998
1059
  from .server.mcp_server import MCPCodeIndexServer
999
-
1060
+
1000
1061
  # Create transport based on arguments
1001
1062
  transport = None
1002
1063
  if args.http:
1003
1064
  from .transport.http_transport import HTTPTransport
1065
+
1004
1066
  transport = HTTPTransport(
1005
1067
  server_instance=None, # Will be set after server creation
1006
1068
  host=args.host,
@@ -1025,8 +1087,9 @@ async def main() -> None:
1025
1087
  db_path=db_path,
1026
1088
  cache_dir=cache_dir,
1027
1089
  transport=transport,
1090
+ vector_mode=args.vector,
1028
1091
  )
1029
-
1092
+
1030
1093
  # Set server instance in transport after server creation
1031
1094
  if transport:
1032
1095
  transport.server = server
@@ -1038,7 +1101,7 @@ async def main() -> None:
1038
1101
  raise
1039
1102
 
1040
1103
 
1041
- def cli_main():
1104
+ def cli_main() -> None:
1042
1105
  """Console script entry point."""
1043
1106
  try:
1044
1107
  asyncio.run(main())
@@ -5,8 +5,8 @@ This module provides middleware for HTTP transport features like
5
5
  logging, authentication, and security.
6
6
  """
7
7
 
8
- from .logging import HTTPLoggingMiddleware
9
8
  from .auth import HTTPAuthMiddleware
9
+ from .logging import HTTPLoggingMiddleware
10
10
  from .security import HTTPSecurityMiddleware
11
11
 
12
12
  __all__ = ["HTTPLoggingMiddleware", "HTTPAuthMiddleware", "HTTPSecurityMiddleware"]