mcp-code-indexer 3.3.0__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -76,6 +76,7 @@ class GitHookHandler:
76
76
  self.config = {
77
77
  "model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
78
78
  "max_diff_tokens": 136000, # Skip if diff larger than this (in tokens)
79
+ "chunk_token_limit": 100000, # Target token limit per chunk
79
80
  "timeout": 300, # 5 minutes
80
81
  "temperature": 0.3, # Lower temperature for consistent updates
81
82
  }
@@ -197,8 +198,8 @@ class GitHookHandler:
197
198
  changed_files: List[str],
198
199
  ) -> Dict[str, Any]:
199
200
  """
200
- Smart staging: Try single-stage first, fall back to two-stage if
201
- token limit exceeded.
201
+ Smart staging: Try single-stage first, fall back to two-stage,
202
+ then chunked processing if needed.
202
203
 
203
204
  Args:
204
205
  git_diff: Git diff content
@@ -240,24 +241,25 @@ class GitHookHandler:
240
241
  f"falling back to two-stage analysis"
241
242
  )
242
243
 
243
- # Stage 1: Check if overview needs updating
244
- overview_updates = await self._analyze_overview_updates(
245
- git_diff, commit_message, current_overview, changed_files
246
- )
247
-
248
- # Stage 2: Update file descriptions
249
- file_updates = await self._analyze_file_updates(
250
- git_diff, commit_message, current_descriptions, changed_files
251
- )
252
-
253
- # Combine updates
254
- updates = {
255
- "file_updates": file_updates.get("file_updates", {}),
256
- "overview_update": overview_updates.get("overview_update"),
257
- }
258
-
259
- self.logger.info("Two-stage analysis completed")
260
- return updates
244
+ # Try two-stage analysis first
245
+ try:
246
+ return await self._analyze_with_two_stage(
247
+ git_diff, commit_message, current_overview,
248
+ current_descriptions, changed_files
249
+ )
250
+ except GitHookError as e:
251
+ if "too large" in str(e).lower():
252
+ # Fall back to chunked processing
253
+ self.logger.info(
254
+ "Two-stage analysis failed due to size, "
255
+ "falling back to chunked processing"
256
+ )
257
+ return await self._analyze_with_chunking(
258
+ git_diff, commit_message, current_overview,
259
+ current_descriptions, changed_files
260
+ )
261
+ else:
262
+ raise
261
263
 
262
264
  def _build_single_stage_prompt(
263
265
  self,
@@ -571,6 +573,118 @@ Return ONLY a JSON object:
571
573
  self.logger.warning(f"Failed to get file descriptions: {e}")
572
574
  return {}
573
575
 
576
+ async def _analyze_with_two_stage(
577
+ self,
578
+ git_diff: str,
579
+ commit_message: str,
580
+ current_overview: str,
581
+ current_descriptions: Dict[str, str],
582
+ changed_files: List[str],
583
+ ) -> Dict[str, Any]:
584
+ """
585
+ Two-stage analysis: overview updates first, then file updates.
586
+
587
+ Args:
588
+ git_diff: Git diff content
589
+ commit_message: Commit message explaining the changes
590
+ current_overview: Current project overview
591
+ current_descriptions: Current file descriptions
592
+ changed_files: List of changed file paths
593
+
594
+ Returns:
595
+ Dict containing file_updates and overview_update
596
+ """
597
+ # Stage 1: Check if overview needs updating
598
+ overview_updates = await self._analyze_overview_updates(
599
+ git_diff, commit_message, current_overview, changed_files
600
+ )
601
+
602
+ # Stage 2: Update file descriptions
603
+ file_updates = await self._analyze_file_updates(
604
+ git_diff, commit_message, current_descriptions, changed_files
605
+ )
606
+
607
+ # Combine updates
608
+ updates = {
609
+ "file_updates": file_updates.get("file_updates", {}),
610
+ "overview_update": overview_updates.get("overview_update"),
611
+ }
612
+
613
+ self.logger.info("Two-stage analysis completed")
614
+ return updates
615
+
616
+ async def _analyze_with_chunking(
617
+ self,
618
+ git_diff: str,
619
+ commit_message: str,
620
+ current_overview: str,
621
+ current_descriptions: Dict[str, str],
622
+ changed_files: List[str],
623
+ ) -> Dict[str, Any]:
624
+ """
625
+ Chunked processing: Break large diffs into manageable chunks.
626
+
627
+ Args:
628
+ git_diff: Git diff content
629
+ commit_message: Commit message explaining the changes
630
+ current_overview: Current project overview
631
+ current_descriptions: Current file descriptions
632
+ changed_files: List of changed file paths
633
+
634
+ Returns:
635
+ Dict containing file_updates and overview_update
636
+ """
637
+ self.logger.info(
638
+ f"Starting chunked processing for {len(changed_files)} files"
639
+ )
640
+
641
+ # First, handle overview separately if needed
642
+ overview_update = None
643
+ if current_overview:
644
+ overview_update = await self._analyze_overview_lightweight(
645
+ commit_message, current_overview, changed_files
646
+ )
647
+
648
+ # Break changed files into chunks and process file descriptions
649
+ chunk_size = await self._calculate_optimal_chunk_size(
650
+ git_diff, changed_files
651
+ )
652
+
653
+ self.logger.info(f"Using chunk size of {chunk_size} files per chunk")
654
+
655
+ all_file_updates = {}
656
+
657
+ for i in range(0, len(changed_files), chunk_size):
658
+ chunk_files = changed_files[i:i + chunk_size]
659
+ chunk_number = (i // chunk_size) + 1
660
+ total_chunks = (len(changed_files) + chunk_size - 1) // chunk_size
661
+
662
+ self.logger.info(
663
+ f"Processing chunk {chunk_number}/{total_chunks} "
664
+ f"({len(chunk_files)} files)"
665
+ )
666
+
667
+ # Extract diff content for this chunk
668
+ chunk_diff = self._extract_chunk_diff(git_diff, chunk_files)
669
+
670
+ # Process this chunk
671
+ chunk_updates = await self._analyze_file_chunk(
672
+ chunk_diff, commit_message, current_descriptions, chunk_files
673
+ )
674
+
675
+ # Merge results
676
+ if chunk_updates and "file_updates" in chunk_updates:
677
+ all_file_updates.update(chunk_updates["file_updates"])
678
+
679
+ self.logger.info(
680
+ f"Chunked processing completed: updated {len(all_file_updates)} files"
681
+ )
682
+
683
+ return {
684
+ "file_updates": all_file_updates,
685
+ "overview_update": overview_update
686
+ }
687
+
574
688
  async def _analyze_overview_updates(
575
689
  self,
576
690
  git_diff: str,
@@ -632,11 +746,9 @@ Return ONLY a JSON object:
632
746
  self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
633
747
 
634
748
  if prompt_tokens > self.config["max_diff_tokens"]:
635
- self.logger.warning(
636
- f"Stage 1 prompt too large ({prompt_tokens} tokens), "
637
- f"skipping overview analysis"
749
+ raise GitHookError(
750
+ f"Stage 1 prompt too large ({prompt_tokens} tokens)"
638
751
  )
639
- return {"overview_update": None}
640
752
 
641
753
  # Call OpenRouter API
642
754
  result = await self._call_openrouter(prompt)
@@ -708,11 +820,9 @@ Return ONLY a JSON object:
708
820
  self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
709
821
 
710
822
  if prompt_tokens > self.config["max_diff_tokens"]:
711
- self.logger.warning(
712
- f"Stage 2 prompt too large ({prompt_tokens} tokens), "
713
- f"skipping file analysis"
823
+ raise GitHookError(
824
+ f"Stage 2 prompt too large ({prompt_tokens} tokens)"
714
825
  )
715
- return {"file_updates": {}}
716
826
 
717
827
  # Call OpenRouter API
718
828
  result = await self._call_openrouter(prompt)
@@ -720,6 +830,201 @@ Return ONLY a JSON object:
720
830
 
721
831
  return result
722
832
 
833
+ async def _analyze_overview_lightweight(
834
+ self,
835
+ commit_message: str,
836
+ current_overview: str,
837
+ changed_files: List[str],
838
+ ) -> Optional[str]:
839
+ """
840
+ Lightweight overview analysis without including full diff.
841
+
842
+ Args:
843
+ commit_message: Commit message explaining the changes
844
+ current_overview: Current project overview
845
+ changed_files: List of changed file paths
846
+
847
+ Returns:
848
+ Updated overview text or None
849
+ """
850
+ self.logger.info("Lightweight overview analysis...")
851
+
852
+ prompt = f"""Analyze this commit to determine if project overview needs updating.
853
+
854
+ COMMIT MESSAGE:
855
+ {commit_message or "No commit message available"}
856
+
857
+ CURRENT PROJECT OVERVIEW:
858
+ {current_overview or "No overview available"}
859
+
860
+ CHANGED FILES:
861
+ {', '.join(changed_files)}
862
+
863
+ INSTRUCTIONS:
864
+ Update project overview ONLY if there are major structural changes like:
865
+ - New major features or components (indicated by commit message or new directories)
866
+ - Architectural changes (new patterns, frameworks, or approaches)
867
+ - Significant dependency additions (Cargo.toml, package.json, pyproject.toml changes)
868
+ - New API endpoints or workflows
869
+ - Changes to build/deployment processes
870
+
871
+ Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
872
+
873
+ Return ONLY a JSON object:
874
+ {{
875
+ "overview_update": "Updated overview text" or null
876
+ }}"""
877
+
878
+ try:
879
+ result = await self._call_openrouter(prompt)
880
+ return result.get("overview_update")
881
+ except Exception as e:
882
+ self.logger.warning(f"Lightweight overview analysis failed: {e}")
883
+ return None
884
+
885
+ async def _calculate_optimal_chunk_size(
886
+ self, git_diff: str, changed_files: List[str]
887
+ ) -> int:
888
+ """
889
+ Calculate optimal chunk size based on diff content.
890
+
891
+ Args:
892
+ git_diff: Full git diff content
893
+ changed_files: List of changed file paths
894
+
895
+ Returns:
896
+ Optimal number of files per chunk
897
+ """
898
+ if not changed_files:
899
+ return 10 # Default chunk size
900
+
901
+ # Estimate average diff size per file
902
+ total_diff_tokens = self.token_counter.count_tokens(git_diff)
903
+ avg_tokens_per_file = total_diff_tokens / len(changed_files)
904
+
905
+ # Target chunk token limit
906
+ chunk_limit = self.config.get("chunk_token_limit", 100000)
907
+
908
+ # Calculate chunk size with buffer for overhead
909
+ overhead_factor = 0.7 # Reserve 30% for prompt overhead
910
+ effective_limit = chunk_limit * overhead_factor
911
+
912
+ chunk_size = max(1, int(effective_limit / avg_tokens_per_file))
913
+
914
+ # Cap at reasonable limits
915
+ chunk_size = min(chunk_size, 50) # Max 50 files per chunk
916
+ chunk_size = max(chunk_size, 5) # Min 5 files per chunk
917
+
918
+ self.logger.info(
919
+ f"Calculated chunk size: {chunk_size} files "
920
+ f"(avg {avg_tokens_per_file:.0f} tokens/file, "
921
+ f"target {chunk_limit} tokens/chunk)"
922
+ )
923
+
924
+ return chunk_size
925
+
926
+ def _extract_chunk_diff(self, git_diff: str, chunk_files: List[str]) -> str:
927
+ """
928
+ Extract diff content for specific files.
929
+
930
+ Args:
931
+ git_diff: Full git diff content
932
+ chunk_files: List of files to include in chunk
933
+
934
+ Returns:
935
+ Filtered diff content for chunk files only
936
+ """
937
+ lines = git_diff.split('\n')
938
+ chunk_lines = []
939
+ current_file = None
940
+ include_section = False
941
+
942
+ for line in lines:
943
+ if line.startswith('diff --git'):
944
+ # Parse file path from diff header
945
+ parts = line.split(' ')
946
+ if len(parts) >= 4:
947
+ file_path = parts[2][2:] # Remove 'a/' prefix
948
+ current_file = file_path
949
+ include_section = file_path in chunk_files
950
+
951
+ if include_section:
952
+ chunk_lines.append(line)
953
+
954
+ return '\n'.join(chunk_lines)
955
+
956
+ async def _analyze_file_chunk(
957
+ self,
958
+ chunk_diff: str,
959
+ commit_message: str,
960
+ current_descriptions: Dict[str, str],
961
+ chunk_files: List[str],
962
+ ) -> Dict[str, Any]:
963
+ """
964
+ Analyze a chunk of files for description updates.
965
+
966
+ Args:
967
+ chunk_diff: Git diff for this chunk only
968
+ commit_message: Commit message explaining the changes
969
+ current_descriptions: Current file descriptions
970
+ chunk_files: List of files in this chunk
971
+
972
+ Returns:
973
+ Dict with file_updates for this chunk
974
+ """
975
+ # Only include descriptions for files in this chunk
976
+ relevant_descriptions = {
977
+ path: desc
978
+ for path, desc in current_descriptions.items()
979
+ if path in chunk_files
980
+ }
981
+
982
+ prompt = f"""Analyze this git commit chunk and update file descriptions.
983
+
984
+ COMMIT MESSAGE:
985
+ {commit_message or "No commit message available"}
986
+
987
+ CURRENT FILE DESCRIPTIONS (for chunk files only):
988
+ {json.dumps(relevant_descriptions, indent=2)}
989
+
990
+ CHUNK FILES:
991
+ {', '.join(chunk_files)}
992
+
993
+ GIT DIFF (chunk only):
994
+ {chunk_diff}
995
+
996
+ INSTRUCTIONS:
997
+ Use the COMMIT MESSAGE to understand the intent and context of the changes.
998
+ Update descriptions for files that have changed significantly.
999
+ Only include files that need actual description updates.
1000
+
1001
+ Return ONLY a JSON object:
1002
+ {{
1003
+ "file_updates": {{
1004
+ "path/to/file1.py": "Updated description for file1",
1005
+ "path/to/file2.js": "Updated description for file2"
1006
+ }}
1007
+ }}"""
1008
+
1009
+ # Check token count
1010
+ prompt_tokens = self.token_counter.count_tokens(prompt)
1011
+ self.logger.info(f"Chunk prompt: {prompt_tokens} tokens")
1012
+
1013
+ if prompt_tokens > self.config.get("chunk_token_limit", 100000):
1014
+ self.logger.warning(
1015
+ f"Chunk still too large ({prompt_tokens} tokens), "
1016
+ f"skipping {len(chunk_files)} files"
1017
+ )
1018
+ return {"file_updates": {}}
1019
+
1020
+ # Call OpenRouter API
1021
+ try:
1022
+ result = await self._call_openrouter(prompt)
1023
+ return result
1024
+ except Exception as e:
1025
+ self.logger.error(f"Failed to analyze chunk: {e}")
1026
+ return {"file_updates": {}}
1027
+
723
1028
  @retry(
724
1029
  wait=wait_exponential(multiplier=1, min=4, max=60),
725
1030
  stop=stop_after_attempt(5),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: mcp-code-indexer
3
- Version: 3.3.0
3
+ Version: 3.4.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  License: MIT
6
6
  Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
@@ -40,8 +40,8 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  # MCP Code Indexer 🚀
42
42
 
43
- [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?30)](https://badge.fury.io/py/mcp-code-indexer)
44
- [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?30)](https://pypi.org/project/mcp-code-indexer/)
43
+ [![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?31)](https://badge.fury.io/py/mcp-code-indexer)
44
+ [![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?31)](https://pypi.org/project/mcp-code-indexer/)
45
45
  [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
46
46
 
47
47
  A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
@@ -13,7 +13,7 @@ mcp_code_indexer/database/retry_executor.py,sha256=wBIIbkU1bwQMrjM9AmDWNEQ-cw8IP
13
13
  mcp_code_indexer/deepask_handler.py,sha256=wpKMYnlsOGiaKLvuXIb62jeEb4xnYOmIcvvXjvbgdnc,18475
14
14
  mcp_code_indexer/error_handler.py,sha256=XBjjEriq1diPTGKpHcaBh9fj88_qhuNMwPeLiTWxrds,11431
15
15
  mcp_code_indexer/file_scanner.py,sha256=smY1Yfxfyqb_J5RQz5ETaSgE2_syC2SUUwzJxby3Bg8,11432
16
- mcp_code_indexer/git_hook_handler.py,sha256=HvD8vRcIxTpYTMyUk1JfTF8IIHFgQ3RghMcmNlXRKco,34447
16
+ mcp_code_indexer/git_hook_handler.py,sha256=hTrtzfZcfCj2DivUb55ffFZ3heCQL0hURuLElZZ5wG0,44762
17
17
  mcp_code_indexer/logging_config.py,sha256=hexJWw7-6QQkH_2BwtKGO1CDOtQnP8F3Yss_yHKnzE4,9816
18
18
  mcp_code_indexer/main.py,sha256=GjwUtfQCxNxsCNtYpEBfYgOC0G5Q0Bcci4keVtNY3Cc,31888
19
19
  mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
@@ -29,8 +29,8 @@ mcp_code_indexer/server/mcp_server.py,sha256=L83xi38SEqoIXH_ZJVOmMNFwTnGRHbQRuAA
29
29
  mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
30
30
  mcp_code_indexer/token_counter.py,sha256=e6WsyCEWMMSkMwLbcVtr5e8vEqh-kFqNmiJErCNdqHE,8220
31
31
  mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
32
- mcp_code_indexer-3.3.0.dist-info/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
33
- mcp_code_indexer-3.3.0.dist-info/METADATA,sha256=jSDImDDYDrtzVOROzsZMRFgNyiJ8_XXfxlNSXBPJ2Eg,19191
34
- mcp_code_indexer-3.3.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
- mcp_code_indexer-3.3.0.dist-info/entry_points.txt,sha256=UABj7HZ0mC6rvF22gxaz2LLNLGQShTrFmp5u00iUtvo,67
36
- mcp_code_indexer-3.3.0.dist-info/RECORD,,
32
+ mcp_code_indexer-3.4.0.dist-info/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
33
+ mcp_code_indexer-3.4.0.dist-info/METADATA,sha256=zkeYyRq5xh0ResAqYIXYN-eGQ2JG3aAE3D3AtKA-kZ4,19191
34
+ mcp_code_indexer-3.4.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
35
+ mcp_code_indexer-3.4.0.dist-info/entry_points.txt,sha256=UABj7HZ0mC6rvF22gxaz2LLNLGQShTrFmp5u00iUtvo,67
36
+ mcp_code_indexer-3.4.0.dist-info/RECORD,,