mcp-code-indexer 3.3.0__py3-none-any.whl → 3.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_code_indexer/git_hook_handler.py +333 -28
- {mcp_code_indexer-3.3.0.dist-info → mcp_code_indexer-3.4.0.dist-info}/METADATA +3 -3
- {mcp_code_indexer-3.3.0.dist-info → mcp_code_indexer-3.4.0.dist-info}/RECORD +6 -6
- {mcp_code_indexer-3.3.0.dist-info → mcp_code_indexer-3.4.0.dist-info}/LICENSE +0 -0
- {mcp_code_indexer-3.3.0.dist-info → mcp_code_indexer-3.4.0.dist-info}/WHEEL +0 -0
- {mcp_code_indexer-3.3.0.dist-info → mcp_code_indexer-3.4.0.dist-info}/entry_points.txt +0 -0
@@ -76,6 +76,7 @@ class GitHookHandler:
|
|
76
76
|
self.config = {
|
77
77
|
"model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
|
78
78
|
"max_diff_tokens": 136000, # Skip if diff larger than this (in tokens)
|
79
|
+
"chunk_token_limit": 100000, # Target token limit per chunk
|
79
80
|
"timeout": 300, # 5 minutes
|
80
81
|
"temperature": 0.3, # Lower temperature for consistent updates
|
81
82
|
}
|
@@ -197,8 +198,8 @@ class GitHookHandler:
|
|
197
198
|
changed_files: List[str],
|
198
199
|
) -> Dict[str, Any]:
|
199
200
|
"""
|
200
|
-
Smart staging: Try single-stage first, fall back to two-stage
|
201
|
-
|
201
|
+
Smart staging: Try single-stage first, fall back to two-stage,
|
202
|
+
then chunked processing if needed.
|
202
203
|
|
203
204
|
Args:
|
204
205
|
git_diff: Git diff content
|
@@ -240,24 +241,25 @@ class GitHookHandler:
|
|
240
241
|
f"falling back to two-stage analysis"
|
241
242
|
)
|
242
243
|
|
243
|
-
#
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
244
|
+
# Try two-stage analysis first
|
245
|
+
try:
|
246
|
+
return await self._analyze_with_two_stage(
|
247
|
+
git_diff, commit_message, current_overview,
|
248
|
+
current_descriptions, changed_files
|
249
|
+
)
|
250
|
+
except GitHookError as e:
|
251
|
+
if "too large" in str(e).lower():
|
252
|
+
# Fall back to chunked processing
|
253
|
+
self.logger.info(
|
254
|
+
"Two-stage analysis failed due to size, "
|
255
|
+
"falling back to chunked processing"
|
256
|
+
)
|
257
|
+
return await self._analyze_with_chunking(
|
258
|
+
git_diff, commit_message, current_overview,
|
259
|
+
current_descriptions, changed_files
|
260
|
+
)
|
261
|
+
else:
|
262
|
+
raise
|
261
263
|
|
262
264
|
def _build_single_stage_prompt(
|
263
265
|
self,
|
@@ -571,6 +573,118 @@ Return ONLY a JSON object:
|
|
571
573
|
self.logger.warning(f"Failed to get file descriptions: {e}")
|
572
574
|
return {}
|
573
575
|
|
576
|
+
async def _analyze_with_two_stage(
|
577
|
+
self,
|
578
|
+
git_diff: str,
|
579
|
+
commit_message: str,
|
580
|
+
current_overview: str,
|
581
|
+
current_descriptions: Dict[str, str],
|
582
|
+
changed_files: List[str],
|
583
|
+
) -> Dict[str, Any]:
|
584
|
+
"""
|
585
|
+
Two-stage analysis: overview updates first, then file updates.
|
586
|
+
|
587
|
+
Args:
|
588
|
+
git_diff: Git diff content
|
589
|
+
commit_message: Commit message explaining the changes
|
590
|
+
current_overview: Current project overview
|
591
|
+
current_descriptions: Current file descriptions
|
592
|
+
changed_files: List of changed file paths
|
593
|
+
|
594
|
+
Returns:
|
595
|
+
Dict containing file_updates and overview_update
|
596
|
+
"""
|
597
|
+
# Stage 1: Check if overview needs updating
|
598
|
+
overview_updates = await self._analyze_overview_updates(
|
599
|
+
git_diff, commit_message, current_overview, changed_files
|
600
|
+
)
|
601
|
+
|
602
|
+
# Stage 2: Update file descriptions
|
603
|
+
file_updates = await self._analyze_file_updates(
|
604
|
+
git_diff, commit_message, current_descriptions, changed_files
|
605
|
+
)
|
606
|
+
|
607
|
+
# Combine updates
|
608
|
+
updates = {
|
609
|
+
"file_updates": file_updates.get("file_updates", {}),
|
610
|
+
"overview_update": overview_updates.get("overview_update"),
|
611
|
+
}
|
612
|
+
|
613
|
+
self.logger.info("Two-stage analysis completed")
|
614
|
+
return updates
|
615
|
+
|
616
|
+
async def _analyze_with_chunking(
|
617
|
+
self,
|
618
|
+
git_diff: str,
|
619
|
+
commit_message: str,
|
620
|
+
current_overview: str,
|
621
|
+
current_descriptions: Dict[str, str],
|
622
|
+
changed_files: List[str],
|
623
|
+
) -> Dict[str, Any]:
|
624
|
+
"""
|
625
|
+
Chunked processing: Break large diffs into manageable chunks.
|
626
|
+
|
627
|
+
Args:
|
628
|
+
git_diff: Git diff content
|
629
|
+
commit_message: Commit message explaining the changes
|
630
|
+
current_overview: Current project overview
|
631
|
+
current_descriptions: Current file descriptions
|
632
|
+
changed_files: List of changed file paths
|
633
|
+
|
634
|
+
Returns:
|
635
|
+
Dict containing file_updates and overview_update
|
636
|
+
"""
|
637
|
+
self.logger.info(
|
638
|
+
f"Starting chunked processing for {len(changed_files)} files"
|
639
|
+
)
|
640
|
+
|
641
|
+
# First, handle overview separately if needed
|
642
|
+
overview_update = None
|
643
|
+
if current_overview:
|
644
|
+
overview_update = await self._analyze_overview_lightweight(
|
645
|
+
commit_message, current_overview, changed_files
|
646
|
+
)
|
647
|
+
|
648
|
+
# Break changed files into chunks and process file descriptions
|
649
|
+
chunk_size = await self._calculate_optimal_chunk_size(
|
650
|
+
git_diff, changed_files
|
651
|
+
)
|
652
|
+
|
653
|
+
self.logger.info(f"Using chunk size of {chunk_size} files per chunk")
|
654
|
+
|
655
|
+
all_file_updates = {}
|
656
|
+
|
657
|
+
for i in range(0, len(changed_files), chunk_size):
|
658
|
+
chunk_files = changed_files[i:i + chunk_size]
|
659
|
+
chunk_number = (i // chunk_size) + 1
|
660
|
+
total_chunks = (len(changed_files) + chunk_size - 1) // chunk_size
|
661
|
+
|
662
|
+
self.logger.info(
|
663
|
+
f"Processing chunk {chunk_number}/{total_chunks} "
|
664
|
+
f"({len(chunk_files)} files)"
|
665
|
+
)
|
666
|
+
|
667
|
+
# Extract diff content for this chunk
|
668
|
+
chunk_diff = self._extract_chunk_diff(git_diff, chunk_files)
|
669
|
+
|
670
|
+
# Process this chunk
|
671
|
+
chunk_updates = await self._analyze_file_chunk(
|
672
|
+
chunk_diff, commit_message, current_descriptions, chunk_files
|
673
|
+
)
|
674
|
+
|
675
|
+
# Merge results
|
676
|
+
if chunk_updates and "file_updates" in chunk_updates:
|
677
|
+
all_file_updates.update(chunk_updates["file_updates"])
|
678
|
+
|
679
|
+
self.logger.info(
|
680
|
+
f"Chunked processing completed: updated {len(all_file_updates)} files"
|
681
|
+
)
|
682
|
+
|
683
|
+
return {
|
684
|
+
"file_updates": all_file_updates,
|
685
|
+
"overview_update": overview_update
|
686
|
+
}
|
687
|
+
|
574
688
|
async def _analyze_overview_updates(
|
575
689
|
self,
|
576
690
|
git_diff: str,
|
@@ -632,11 +746,9 @@ Return ONLY a JSON object:
|
|
632
746
|
self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
|
633
747
|
|
634
748
|
if prompt_tokens > self.config["max_diff_tokens"]:
|
635
|
-
|
636
|
-
f"Stage 1 prompt too large ({prompt_tokens} tokens)
|
637
|
-
f"skipping overview analysis"
|
749
|
+
raise GitHookError(
|
750
|
+
f"Stage 1 prompt too large ({prompt_tokens} tokens)"
|
638
751
|
)
|
639
|
-
return {"overview_update": None}
|
640
752
|
|
641
753
|
# Call OpenRouter API
|
642
754
|
result = await self._call_openrouter(prompt)
|
@@ -708,11 +820,9 @@ Return ONLY a JSON object:
|
|
708
820
|
self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
|
709
821
|
|
710
822
|
if prompt_tokens > self.config["max_diff_tokens"]:
|
711
|
-
|
712
|
-
f"Stage 2 prompt too large ({prompt_tokens} tokens)
|
713
|
-
f"skipping file analysis"
|
823
|
+
raise GitHookError(
|
824
|
+
f"Stage 2 prompt too large ({prompt_tokens} tokens)"
|
714
825
|
)
|
715
|
-
return {"file_updates": {}}
|
716
826
|
|
717
827
|
# Call OpenRouter API
|
718
828
|
result = await self._call_openrouter(prompt)
|
@@ -720,6 +830,201 @@ Return ONLY a JSON object:
|
|
720
830
|
|
721
831
|
return result
|
722
832
|
|
833
|
+
async def _analyze_overview_lightweight(
|
834
|
+
self,
|
835
|
+
commit_message: str,
|
836
|
+
current_overview: str,
|
837
|
+
changed_files: List[str],
|
838
|
+
) -> Optional[str]:
|
839
|
+
"""
|
840
|
+
Lightweight overview analysis without including full diff.
|
841
|
+
|
842
|
+
Args:
|
843
|
+
commit_message: Commit message explaining the changes
|
844
|
+
current_overview: Current project overview
|
845
|
+
changed_files: List of changed file paths
|
846
|
+
|
847
|
+
Returns:
|
848
|
+
Updated overview text or None
|
849
|
+
"""
|
850
|
+
self.logger.info("Lightweight overview analysis...")
|
851
|
+
|
852
|
+
prompt = f"""Analyze this commit to determine if project overview needs updating.
|
853
|
+
|
854
|
+
COMMIT MESSAGE:
|
855
|
+
{commit_message or "No commit message available"}
|
856
|
+
|
857
|
+
CURRENT PROJECT OVERVIEW:
|
858
|
+
{current_overview or "No overview available"}
|
859
|
+
|
860
|
+
CHANGED FILES:
|
861
|
+
{', '.join(changed_files)}
|
862
|
+
|
863
|
+
INSTRUCTIONS:
|
864
|
+
Update project overview ONLY if there are major structural changes like:
|
865
|
+
- New major features or components (indicated by commit message or new directories)
|
866
|
+
- Architectural changes (new patterns, frameworks, or approaches)
|
867
|
+
- Significant dependency additions (Cargo.toml, package.json, pyproject.toml changes)
|
868
|
+
- New API endpoints or workflows
|
869
|
+
- Changes to build/deployment processes
|
870
|
+
|
871
|
+
Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
|
872
|
+
|
873
|
+
Return ONLY a JSON object:
|
874
|
+
{{
|
875
|
+
"overview_update": "Updated overview text" or null
|
876
|
+
}}"""
|
877
|
+
|
878
|
+
try:
|
879
|
+
result = await self._call_openrouter(prompt)
|
880
|
+
return result.get("overview_update")
|
881
|
+
except Exception as e:
|
882
|
+
self.logger.warning(f"Lightweight overview analysis failed: {e}")
|
883
|
+
return None
|
884
|
+
|
885
|
+
async def _calculate_optimal_chunk_size(
|
886
|
+
self, git_diff: str, changed_files: List[str]
|
887
|
+
) -> int:
|
888
|
+
"""
|
889
|
+
Calculate optimal chunk size based on diff content.
|
890
|
+
|
891
|
+
Args:
|
892
|
+
git_diff: Full git diff content
|
893
|
+
changed_files: List of changed file paths
|
894
|
+
|
895
|
+
Returns:
|
896
|
+
Optimal number of files per chunk
|
897
|
+
"""
|
898
|
+
if not changed_files:
|
899
|
+
return 10 # Default chunk size
|
900
|
+
|
901
|
+
# Estimate average diff size per file
|
902
|
+
total_diff_tokens = self.token_counter.count_tokens(git_diff)
|
903
|
+
avg_tokens_per_file = total_diff_tokens / len(changed_files)
|
904
|
+
|
905
|
+
# Target chunk token limit
|
906
|
+
chunk_limit = self.config.get("chunk_token_limit", 100000)
|
907
|
+
|
908
|
+
# Calculate chunk size with buffer for overhead
|
909
|
+
overhead_factor = 0.7 # Reserve 30% for prompt overhead
|
910
|
+
effective_limit = chunk_limit * overhead_factor
|
911
|
+
|
912
|
+
chunk_size = max(1, int(effective_limit / avg_tokens_per_file))
|
913
|
+
|
914
|
+
# Cap at reasonable limits
|
915
|
+
chunk_size = min(chunk_size, 50) # Max 50 files per chunk
|
916
|
+
chunk_size = max(chunk_size, 5) # Min 5 files per chunk
|
917
|
+
|
918
|
+
self.logger.info(
|
919
|
+
f"Calculated chunk size: {chunk_size} files "
|
920
|
+
f"(avg {avg_tokens_per_file:.0f} tokens/file, "
|
921
|
+
f"target {chunk_limit} tokens/chunk)"
|
922
|
+
)
|
923
|
+
|
924
|
+
return chunk_size
|
925
|
+
|
926
|
+
def _extract_chunk_diff(self, git_diff: str, chunk_files: List[str]) -> str:
|
927
|
+
"""
|
928
|
+
Extract diff content for specific files.
|
929
|
+
|
930
|
+
Args:
|
931
|
+
git_diff: Full git diff content
|
932
|
+
chunk_files: List of files to include in chunk
|
933
|
+
|
934
|
+
Returns:
|
935
|
+
Filtered diff content for chunk files only
|
936
|
+
"""
|
937
|
+
lines = git_diff.split('\n')
|
938
|
+
chunk_lines = []
|
939
|
+
current_file = None
|
940
|
+
include_section = False
|
941
|
+
|
942
|
+
for line in lines:
|
943
|
+
if line.startswith('diff --git'):
|
944
|
+
# Parse file path from diff header
|
945
|
+
parts = line.split(' ')
|
946
|
+
if len(parts) >= 4:
|
947
|
+
file_path = parts[2][2:] # Remove 'a/' prefix
|
948
|
+
current_file = file_path
|
949
|
+
include_section = file_path in chunk_files
|
950
|
+
|
951
|
+
if include_section:
|
952
|
+
chunk_lines.append(line)
|
953
|
+
|
954
|
+
return '\n'.join(chunk_lines)
|
955
|
+
|
956
|
+
async def _analyze_file_chunk(
|
957
|
+
self,
|
958
|
+
chunk_diff: str,
|
959
|
+
commit_message: str,
|
960
|
+
current_descriptions: Dict[str, str],
|
961
|
+
chunk_files: List[str],
|
962
|
+
) -> Dict[str, Any]:
|
963
|
+
"""
|
964
|
+
Analyze a chunk of files for description updates.
|
965
|
+
|
966
|
+
Args:
|
967
|
+
chunk_diff: Git diff for this chunk only
|
968
|
+
commit_message: Commit message explaining the changes
|
969
|
+
current_descriptions: Current file descriptions
|
970
|
+
chunk_files: List of files in this chunk
|
971
|
+
|
972
|
+
Returns:
|
973
|
+
Dict with file_updates for this chunk
|
974
|
+
"""
|
975
|
+
# Only include descriptions for files in this chunk
|
976
|
+
relevant_descriptions = {
|
977
|
+
path: desc
|
978
|
+
for path, desc in current_descriptions.items()
|
979
|
+
if path in chunk_files
|
980
|
+
}
|
981
|
+
|
982
|
+
prompt = f"""Analyze this git commit chunk and update file descriptions.
|
983
|
+
|
984
|
+
COMMIT MESSAGE:
|
985
|
+
{commit_message or "No commit message available"}
|
986
|
+
|
987
|
+
CURRENT FILE DESCRIPTIONS (for chunk files only):
|
988
|
+
{json.dumps(relevant_descriptions, indent=2)}
|
989
|
+
|
990
|
+
CHUNK FILES:
|
991
|
+
{', '.join(chunk_files)}
|
992
|
+
|
993
|
+
GIT DIFF (chunk only):
|
994
|
+
{chunk_diff}
|
995
|
+
|
996
|
+
INSTRUCTIONS:
|
997
|
+
Use the COMMIT MESSAGE to understand the intent and context of the changes.
|
998
|
+
Update descriptions for files that have changed significantly.
|
999
|
+
Only include files that need actual description updates.
|
1000
|
+
|
1001
|
+
Return ONLY a JSON object:
|
1002
|
+
{{
|
1003
|
+
"file_updates": {{
|
1004
|
+
"path/to/file1.py": "Updated description for file1",
|
1005
|
+
"path/to/file2.js": "Updated description for file2"
|
1006
|
+
}}
|
1007
|
+
}}"""
|
1008
|
+
|
1009
|
+
# Check token count
|
1010
|
+
prompt_tokens = self.token_counter.count_tokens(prompt)
|
1011
|
+
self.logger.info(f"Chunk prompt: {prompt_tokens} tokens")
|
1012
|
+
|
1013
|
+
if prompt_tokens > self.config.get("chunk_token_limit", 100000):
|
1014
|
+
self.logger.warning(
|
1015
|
+
f"Chunk still too large ({prompt_tokens} tokens), "
|
1016
|
+
f"skipping {len(chunk_files)} files"
|
1017
|
+
)
|
1018
|
+
return {"file_updates": {}}
|
1019
|
+
|
1020
|
+
# Call OpenRouter API
|
1021
|
+
try:
|
1022
|
+
result = await self._call_openrouter(prompt)
|
1023
|
+
return result
|
1024
|
+
except Exception as e:
|
1025
|
+
self.logger.error(f"Failed to analyze chunk: {e}")
|
1026
|
+
return {"file_updates": {}}
|
1027
|
+
|
723
1028
|
@retry(
|
724
1029
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
725
1030
|
stop=stop_after_attempt(5),
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: mcp-code-indexer
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.4.0
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
5
5
|
License: MIT
|
6
6
|
Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
|
@@ -40,8 +40,8 @@ Description-Content-Type: text/markdown
|
|
40
40
|
|
41
41
|
# MCP Code Indexer 🚀
|
42
42
|
|
43
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
44
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
45
45
|
[](https://opensource.org/licenses/MIT)
|
46
46
|
|
47
47
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
@@ -13,7 +13,7 @@ mcp_code_indexer/database/retry_executor.py,sha256=wBIIbkU1bwQMrjM9AmDWNEQ-cw8IP
|
|
13
13
|
mcp_code_indexer/deepask_handler.py,sha256=wpKMYnlsOGiaKLvuXIb62jeEb4xnYOmIcvvXjvbgdnc,18475
|
14
14
|
mcp_code_indexer/error_handler.py,sha256=XBjjEriq1diPTGKpHcaBh9fj88_qhuNMwPeLiTWxrds,11431
|
15
15
|
mcp_code_indexer/file_scanner.py,sha256=smY1Yfxfyqb_J5RQz5ETaSgE2_syC2SUUwzJxby3Bg8,11432
|
16
|
-
mcp_code_indexer/git_hook_handler.py,sha256=
|
16
|
+
mcp_code_indexer/git_hook_handler.py,sha256=hTrtzfZcfCj2DivUb55ffFZ3heCQL0hURuLElZZ5wG0,44762
|
17
17
|
mcp_code_indexer/logging_config.py,sha256=hexJWw7-6QQkH_2BwtKGO1CDOtQnP8F3Yss_yHKnzE4,9816
|
18
18
|
mcp_code_indexer/main.py,sha256=GjwUtfQCxNxsCNtYpEBfYgOC0G5Q0Bcci4keVtNY3Cc,31888
|
19
19
|
mcp_code_indexer/middleware/__init__.py,sha256=p-mP0pMsfiU2yajCPvokCUxUEkh_lu4XJP1LyyMW2ug,220
|
@@ -29,8 +29,8 @@ mcp_code_indexer/server/mcp_server.py,sha256=L83xi38SEqoIXH_ZJVOmMNFwTnGRHbQRuAA
|
|
29
29
|
mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4,sha256=Ijkht27pm96ZW3_3OFE-7xAPtR0YyTWXoRO8_-hlsqc,1681126
|
30
30
|
mcp_code_indexer/token_counter.py,sha256=e6WsyCEWMMSkMwLbcVtr5e8vEqh-kFqNmiJErCNdqHE,8220
|
31
31
|
mcp_code_indexer/tools/__init__.py,sha256=m01mxML2UdD7y5rih_XNhNSCMzQTz7WQ_T1TeOcYlnE,49
|
32
|
-
mcp_code_indexer-3.
|
33
|
-
mcp_code_indexer-3.
|
34
|
-
mcp_code_indexer-3.
|
35
|
-
mcp_code_indexer-3.
|
36
|
-
mcp_code_indexer-3.
|
32
|
+
mcp_code_indexer-3.4.0.dist-info/LICENSE,sha256=JN9dyPPgYwH9C-UjYM7FLNZjQ6BF7kAzpF3_4PwY4rY,1086
|
33
|
+
mcp_code_indexer-3.4.0.dist-info/METADATA,sha256=zkeYyRq5xh0ResAqYIXYN-eGQ2JG3aAE3D3AtKA-kZ4,19191
|
34
|
+
mcp_code_indexer-3.4.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
35
|
+
mcp_code_indexer-3.4.0.dist-info/entry_points.txt,sha256=UABj7HZ0mC6rvF22gxaz2LLNLGQShTrFmp5u00iUtvo,67
|
36
|
+
mcp_code_indexer-3.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|