mcp-code-indexer 3.3.0__tar.gz → 3.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/PKG-INFO +3 -3
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/README.md +2 -2
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/pyproject.toml +1 -1
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/git_hook_handler.py +375 -53
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/LICENSE +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/__init__.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/__main__.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/ask_handler.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/claude_api_handler.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/cleanup_manager.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/__init__.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/connection_health.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/database.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/exceptions.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/models.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/retry_executor.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/deepask_handler.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/error_handler.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/file_scanner.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/logging_config.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/main.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/middleware/__init__.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/migrations/001_initial.sql +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/migrations/002_performance_indexes.sql +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/migrations/003_project_overviews.sql +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/migrations/004_remove_branch_dependency.sql +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/migrations/005_remove_git_remotes.sql +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/query_preprocessor.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/server/__init__.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/server/mcp_server.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/token_counter.py +0 -0
- {mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/tools/__init__.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: mcp-code-indexer
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.4.1
|
4
4
|
Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
|
5
5
|
License: MIT
|
6
6
|
Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
|
@@ -40,8 +40,8 @@ Description-Content-Type: text/markdown
|
|
40
40
|
|
41
41
|
# MCP Code Indexer 🚀
|
42
42
|
|
43
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
44
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
45
45
|
[](https://opensource.org/licenses/MIT)
|
46
46
|
|
47
47
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# MCP Code Indexer 🚀
|
2
2
|
|
3
|
-
[](https://badge.fury.io/py/mcp-code-indexer)
|
4
|
+
[](https://pypi.org/project/mcp-code-indexer/)
|
5
5
|
[](https://opensource.org/licenses/MIT)
|
6
6
|
|
7
7
|
A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "mcp-code-indexer"
|
7
|
-
version = "3.
|
7
|
+
version = "3.4.1"
|
8
8
|
description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
|
9
9
|
authors = ["MCP Code Indexer Contributors"]
|
10
10
|
maintainers = ["MCP Code Indexer Contributors"]
|
@@ -76,6 +76,7 @@ class GitHookHandler:
|
|
76
76
|
self.config = {
|
77
77
|
"model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
|
78
78
|
"max_diff_tokens": 136000, # Skip if diff larger than this (in tokens)
|
79
|
+
"chunk_token_limit": 100000, # Target token limit per chunk
|
79
80
|
"timeout": 300, # 5 minutes
|
80
81
|
"temperature": 0.3, # Lower temperature for consistent updates
|
81
82
|
}
|
@@ -87,6 +88,21 @@ class GitHookHandler:
|
|
87
88
|
"OPENROUTER_API_KEY environment variable is required for git hook mode"
|
88
89
|
)
|
89
90
|
|
91
|
+
def _log_and_print(self, message: str, level: str = "info") -> None:
|
92
|
+
"""
|
93
|
+
Log message and also print to stdout for user visibility.
|
94
|
+
|
95
|
+
Args:
|
96
|
+
message: Message to log and print
|
97
|
+
level: Log level (info, warning, error)
|
98
|
+
"""
|
99
|
+
# Log to logger
|
100
|
+
getattr(self.logger, level)(message)
|
101
|
+
|
102
|
+
# Also print to stdout with prefix for visibility
|
103
|
+
prefix = "🔍" if level == "info" else "⚠️" if level == "warning" else "❌"
|
104
|
+
print(f"{prefix} {message}")
|
105
|
+
|
90
106
|
async def run_githook_mode(
|
91
107
|
self,
|
92
108
|
commit_hash: Optional[str] = None,
|
@@ -102,21 +118,20 @@ class GitHookHandler:
|
|
102
118
|
This is the main entry point for git hook functionality.
|
103
119
|
"""
|
104
120
|
try:
|
105
|
-
self.
|
121
|
+
self._log_and_print("=== Git Hook Analysis Started ===")
|
106
122
|
if commit_hash:
|
107
|
-
self.
|
123
|
+
self._log_and_print(f"Mode: Single commit ({commit_hash})")
|
108
124
|
elif commit_range:
|
109
|
-
self.
|
125
|
+
self._log_and_print(
|
110
126
|
f"Mode: Commit range ({commit_range[0]}..{commit_range[1]})"
|
111
127
|
)
|
112
128
|
else:
|
113
|
-
self.
|
129
|
+
self._log_and_print("Mode: Staged changes")
|
114
130
|
|
115
131
|
# Get git info from current directory
|
116
132
|
project_info = await self._identify_project_from_git()
|
117
|
-
self.
|
118
|
-
f"Project
|
119
|
-
f"at {project_info.get('folderPath', 'Unknown')}"
|
133
|
+
self._log_and_print(
|
134
|
+
f"Project: {project_info.get('name', 'Unknown')}"
|
120
135
|
)
|
121
136
|
|
122
137
|
# Get git diff and commit message based on mode
|
@@ -136,25 +151,23 @@ class GitHookHandler:
|
|
136
151
|
|
137
152
|
# Log diff details
|
138
153
|
if not git_diff:
|
139
|
-
self.
|
154
|
+
self._log_and_print("No changes detected, skipping analysis")
|
140
155
|
return
|
141
156
|
|
142
157
|
diff_tokens = self.token_counter.count_tokens(git_diff)
|
143
|
-
self.
|
158
|
+
self._log_and_print(f"Analyzing diff: {diff_tokens:,} tokens")
|
144
159
|
|
145
160
|
# Fetch current state
|
146
|
-
self.
|
161
|
+
self._log_and_print("Fetching current project state...")
|
147
162
|
current_overview = await self._get_project_overview(project_info)
|
148
163
|
current_descriptions = await self._get_all_descriptions(project_info)
|
149
164
|
changed_files = self._extract_changed_files(git_diff)
|
150
165
|
|
151
166
|
if not changed_files:
|
152
|
-
self.
|
167
|
+
self._log_and_print("No files changed, skipping analysis")
|
153
168
|
return
|
154
169
|
|
155
|
-
self.
|
156
|
-
f"Found {len(changed_files)} changed files: {', '.join(changed_files)}"
|
157
|
-
)
|
170
|
+
self._log_and_print(f"Found {len(changed_files)} changed files")
|
158
171
|
overview_tokens = (
|
159
172
|
self.token_counter.count_tokens(current_overview)
|
160
173
|
if current_overview
|
@@ -174,13 +187,23 @@ class GitHookHandler:
|
|
174
187
|
|
175
188
|
# Apply updates to database
|
176
189
|
await self._apply_updates(project_info, updates)
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
)
|
190
|
+
|
191
|
+
# Count actual updates
|
192
|
+
file_update_count = len(updates.get("file_updates", {}))
|
193
|
+
overview_updated = bool(updates.get("overview_update"))
|
194
|
+
|
195
|
+
if file_update_count > 0 or overview_updated:
|
196
|
+
update_parts = []
|
197
|
+
if file_update_count > 0:
|
198
|
+
update_parts.append(f"{file_update_count} file descriptions")
|
199
|
+
if overview_updated:
|
200
|
+
update_parts.append("project overview")
|
201
|
+
self._log_and_print(f"✅ Updated {' and '.join(update_parts)}")
|
202
|
+
else:
|
203
|
+
self._log_and_print("✅ Analysis complete, no updates needed")
|
181
204
|
|
182
205
|
except Exception as e:
|
183
|
-
self.
|
206
|
+
self._log_and_print(f"Git hook analysis failed: {e}", "error")
|
184
207
|
self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
|
185
208
|
import traceback
|
186
209
|
|
@@ -197,8 +220,8 @@ class GitHookHandler:
|
|
197
220
|
changed_files: List[str],
|
198
221
|
) -> Dict[str, Any]:
|
199
222
|
"""
|
200
|
-
Smart staging: Try single-stage first, fall back to two-stage
|
201
|
-
|
223
|
+
Smart staging: Try single-stage first, fall back to two-stage,
|
224
|
+
then chunked processing if needed.
|
202
225
|
|
203
226
|
Args:
|
204
227
|
git_diff: Git diff content
|
@@ -229,35 +252,31 @@ class GitHookHandler:
|
|
229
252
|
|
230
253
|
if prompt_tokens <= token_limit:
|
231
254
|
# Use single-stage approach
|
232
|
-
self.
|
255
|
+
self._log_and_print("Using single-stage analysis")
|
233
256
|
result = await self._call_openrouter(single_stage_prompt)
|
234
|
-
self.logger.info("Single-stage analysis completed")
|
235
257
|
return result
|
236
258
|
else:
|
237
259
|
# Fall back to two-stage approach
|
238
|
-
self.
|
239
|
-
f"Single-stage prompt too large ({prompt_tokens} tokens), "
|
240
|
-
f"falling back to two-stage analysis"
|
241
|
-
)
|
242
|
-
|
243
|
-
# Stage 1: Check if overview needs updating
|
244
|
-
overview_updates = await self._analyze_overview_updates(
|
245
|
-
git_diff, commit_message, current_overview, changed_files
|
246
|
-
)
|
247
|
-
|
248
|
-
# Stage 2: Update file descriptions
|
249
|
-
file_updates = await self._analyze_file_updates(
|
250
|
-
git_diff, commit_message, current_descriptions, changed_files
|
251
|
-
)
|
252
|
-
|
253
|
-
# Combine updates
|
254
|
-
updates = {
|
255
|
-
"file_updates": file_updates.get("file_updates", {}),
|
256
|
-
"overview_update": overview_updates.get("overview_update"),
|
257
|
-
}
|
260
|
+
self._log_and_print("Using two-stage analysis (large diff)")
|
258
261
|
|
259
|
-
|
260
|
-
|
262
|
+
# Try two-stage analysis first
|
263
|
+
try:
|
264
|
+
return await self._analyze_with_two_stage(
|
265
|
+
git_diff, commit_message, current_overview,
|
266
|
+
current_descriptions, changed_files
|
267
|
+
)
|
268
|
+
except GitHookError as e:
|
269
|
+
if "too large" in str(e).lower():
|
270
|
+
# Fall back to chunked processing
|
271
|
+
self._log_and_print(
|
272
|
+
"Using chunked processing (very large diff)"
|
273
|
+
)
|
274
|
+
return await self._analyze_with_chunking(
|
275
|
+
git_diff, commit_message, current_overview,
|
276
|
+
current_descriptions, changed_files
|
277
|
+
)
|
278
|
+
else:
|
279
|
+
raise
|
261
280
|
|
262
281
|
def _build_single_stage_prompt(
|
263
282
|
self,
|
@@ -571,6 +590,118 @@ Return ONLY a JSON object:
|
|
571
590
|
self.logger.warning(f"Failed to get file descriptions: {e}")
|
572
591
|
return {}
|
573
592
|
|
593
|
+
async def _analyze_with_two_stage(
|
594
|
+
self,
|
595
|
+
git_diff: str,
|
596
|
+
commit_message: str,
|
597
|
+
current_overview: str,
|
598
|
+
current_descriptions: Dict[str, str],
|
599
|
+
changed_files: List[str],
|
600
|
+
) -> Dict[str, Any]:
|
601
|
+
"""
|
602
|
+
Two-stage analysis: overview updates first, then file updates.
|
603
|
+
|
604
|
+
Args:
|
605
|
+
git_diff: Git diff content
|
606
|
+
commit_message: Commit message explaining the changes
|
607
|
+
current_overview: Current project overview
|
608
|
+
current_descriptions: Current file descriptions
|
609
|
+
changed_files: List of changed file paths
|
610
|
+
|
611
|
+
Returns:
|
612
|
+
Dict containing file_updates and overview_update
|
613
|
+
"""
|
614
|
+
# Stage 1: Check if overview needs updating
|
615
|
+
overview_updates = await self._analyze_overview_updates(
|
616
|
+
git_diff, commit_message, current_overview, changed_files
|
617
|
+
)
|
618
|
+
|
619
|
+
# Stage 2: Update file descriptions
|
620
|
+
file_updates = await self._analyze_file_updates(
|
621
|
+
git_diff, commit_message, current_descriptions, changed_files
|
622
|
+
)
|
623
|
+
|
624
|
+
# Combine updates
|
625
|
+
updates = {
|
626
|
+
"file_updates": file_updates.get("file_updates", {}),
|
627
|
+
"overview_update": overview_updates.get("overview_update"),
|
628
|
+
}
|
629
|
+
|
630
|
+
self.logger.info("Two-stage analysis completed")
|
631
|
+
return updates
|
632
|
+
|
633
|
+
async def _analyze_with_chunking(
|
634
|
+
self,
|
635
|
+
git_diff: str,
|
636
|
+
commit_message: str,
|
637
|
+
current_overview: str,
|
638
|
+
current_descriptions: Dict[str, str],
|
639
|
+
changed_files: List[str],
|
640
|
+
) -> Dict[str, Any]:
|
641
|
+
"""
|
642
|
+
Chunked processing: Break large diffs into manageable chunks.
|
643
|
+
|
644
|
+
Args:
|
645
|
+
git_diff: Git diff content
|
646
|
+
commit_message: Commit message explaining the changes
|
647
|
+
current_overview: Current project overview
|
648
|
+
current_descriptions: Current file descriptions
|
649
|
+
changed_files: List of changed file paths
|
650
|
+
|
651
|
+
Returns:
|
652
|
+
Dict containing file_updates and overview_update
|
653
|
+
"""
|
654
|
+
self._log_and_print(
|
655
|
+
f"Starting chunked processing for {len(changed_files)} files"
|
656
|
+
)
|
657
|
+
|
658
|
+
# First, handle overview separately if needed
|
659
|
+
overview_update = None
|
660
|
+
if current_overview:
|
661
|
+
overview_update = await self._analyze_overview_lightweight(
|
662
|
+
commit_message, current_overview, changed_files
|
663
|
+
)
|
664
|
+
|
665
|
+
# Break changed files into chunks and process file descriptions
|
666
|
+
chunk_size = await self._calculate_optimal_chunk_size(
|
667
|
+
git_diff, changed_files
|
668
|
+
)
|
669
|
+
|
670
|
+
self._log_and_print(f"Processing in {chunk_size}-file chunks")
|
671
|
+
|
672
|
+
all_file_updates = {}
|
673
|
+
|
674
|
+
for i in range(0, len(changed_files), chunk_size):
|
675
|
+
chunk_files = changed_files[i:i + chunk_size]
|
676
|
+
chunk_number = (i // chunk_size) + 1
|
677
|
+
total_chunks = (len(changed_files) + chunk_size - 1) // chunk_size
|
678
|
+
|
679
|
+
self._log_and_print(
|
680
|
+
f"Processing chunk {chunk_number}/{total_chunks} "
|
681
|
+
f"({len(chunk_files)} files)"
|
682
|
+
)
|
683
|
+
|
684
|
+
# Extract diff content for this chunk
|
685
|
+
chunk_diff = self._extract_chunk_diff(git_diff, chunk_files)
|
686
|
+
|
687
|
+
# Process this chunk
|
688
|
+
chunk_updates = await self._analyze_file_chunk(
|
689
|
+
chunk_diff, commit_message, current_descriptions, chunk_files
|
690
|
+
)
|
691
|
+
|
692
|
+
# Merge results
|
693
|
+
if chunk_updates and "file_updates" in chunk_updates:
|
694
|
+
all_file_updates.update(chunk_updates["file_updates"])
|
695
|
+
|
696
|
+
self.logger.info(
|
697
|
+
f"Chunked processing completed: updated {len(all_file_updates)} files"
|
698
|
+
)
|
699
|
+
|
700
|
+
return {
|
701
|
+
"file_updates": all_file_updates,
|
702
|
+
"overview_update": overview_update
|
703
|
+
}
|
704
|
+
|
574
705
|
async def _analyze_overview_updates(
|
575
706
|
self,
|
576
707
|
git_diff: str,
|
@@ -632,11 +763,9 @@ Return ONLY a JSON object:
|
|
632
763
|
self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
|
633
764
|
|
634
765
|
if prompt_tokens > self.config["max_diff_tokens"]:
|
635
|
-
|
636
|
-
f"Stage 1 prompt too large ({prompt_tokens} tokens)
|
637
|
-
f"skipping overview analysis"
|
766
|
+
raise GitHookError(
|
767
|
+
f"Stage 1 prompt too large ({prompt_tokens} tokens)"
|
638
768
|
)
|
639
|
-
return {"overview_update": None}
|
640
769
|
|
641
770
|
# Call OpenRouter API
|
642
771
|
result = await self._call_openrouter(prompt)
|
@@ -708,11 +837,9 @@ Return ONLY a JSON object:
|
|
708
837
|
self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
|
709
838
|
|
710
839
|
if prompt_tokens > self.config["max_diff_tokens"]:
|
711
|
-
|
712
|
-
f"Stage 2 prompt too large ({prompt_tokens} tokens)
|
713
|
-
f"skipping file analysis"
|
840
|
+
raise GitHookError(
|
841
|
+
f"Stage 2 prompt too large ({prompt_tokens} tokens)"
|
714
842
|
)
|
715
|
-
return {"file_updates": {}}
|
716
843
|
|
717
844
|
# Call OpenRouter API
|
718
845
|
result = await self._call_openrouter(prompt)
|
@@ -720,6 +847,201 @@ Return ONLY a JSON object:
|
|
720
847
|
|
721
848
|
return result
|
722
849
|
|
850
|
+
async def _analyze_overview_lightweight(
|
851
|
+
self,
|
852
|
+
commit_message: str,
|
853
|
+
current_overview: str,
|
854
|
+
changed_files: List[str],
|
855
|
+
) -> Optional[str]:
|
856
|
+
"""
|
857
|
+
Lightweight overview analysis without including full diff.
|
858
|
+
|
859
|
+
Args:
|
860
|
+
commit_message: Commit message explaining the changes
|
861
|
+
current_overview: Current project overview
|
862
|
+
changed_files: List of changed file paths
|
863
|
+
|
864
|
+
Returns:
|
865
|
+
Updated overview text or None
|
866
|
+
"""
|
867
|
+
self.logger.info("Lightweight overview analysis...")
|
868
|
+
|
869
|
+
prompt = f"""Analyze this commit to determine if project overview needs updating.
|
870
|
+
|
871
|
+
COMMIT MESSAGE:
|
872
|
+
{commit_message or "No commit message available"}
|
873
|
+
|
874
|
+
CURRENT PROJECT OVERVIEW:
|
875
|
+
{current_overview or "No overview available"}
|
876
|
+
|
877
|
+
CHANGED FILES:
|
878
|
+
{', '.join(changed_files)}
|
879
|
+
|
880
|
+
INSTRUCTIONS:
|
881
|
+
Update project overview ONLY if there are major structural changes like:
|
882
|
+
- New major features or components (indicated by commit message or new directories)
|
883
|
+
- Architectural changes (new patterns, frameworks, or approaches)
|
884
|
+
- Significant dependency additions (Cargo.toml, package.json, pyproject.toml changes)
|
885
|
+
- New API endpoints or workflows
|
886
|
+
- Changes to build/deployment processes
|
887
|
+
|
888
|
+
Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
|
889
|
+
|
890
|
+
Return ONLY a JSON object:
|
891
|
+
{{
|
892
|
+
"overview_update": "Updated overview text" or null
|
893
|
+
}}"""
|
894
|
+
|
895
|
+
try:
|
896
|
+
result = await self._call_openrouter(prompt)
|
897
|
+
return result.get("overview_update")
|
898
|
+
except Exception as e:
|
899
|
+
self.logger.warning(f"Lightweight overview analysis failed: {e}")
|
900
|
+
return None
|
901
|
+
|
902
|
+
async def _calculate_optimal_chunk_size(
|
903
|
+
self, git_diff: str, changed_files: List[str]
|
904
|
+
) -> int:
|
905
|
+
"""
|
906
|
+
Calculate optimal chunk size based on diff content.
|
907
|
+
|
908
|
+
Args:
|
909
|
+
git_diff: Full git diff content
|
910
|
+
changed_files: List of changed file paths
|
911
|
+
|
912
|
+
Returns:
|
913
|
+
Optimal number of files per chunk
|
914
|
+
"""
|
915
|
+
if not changed_files:
|
916
|
+
return 10 # Default chunk size
|
917
|
+
|
918
|
+
# Estimate average diff size per file
|
919
|
+
total_diff_tokens = self.token_counter.count_tokens(git_diff)
|
920
|
+
avg_tokens_per_file = total_diff_tokens / len(changed_files)
|
921
|
+
|
922
|
+
# Target chunk token limit
|
923
|
+
chunk_limit = self.config.get("chunk_token_limit", 100000)
|
924
|
+
|
925
|
+
# Calculate chunk size with buffer for overhead
|
926
|
+
overhead_factor = 0.7 # Reserve 30% for prompt overhead
|
927
|
+
effective_limit = chunk_limit * overhead_factor
|
928
|
+
|
929
|
+
chunk_size = max(1, int(effective_limit / avg_tokens_per_file))
|
930
|
+
|
931
|
+
# Cap at reasonable limits
|
932
|
+
chunk_size = min(chunk_size, 50) # Max 50 files per chunk
|
933
|
+
chunk_size = max(chunk_size, 5) # Min 5 files per chunk
|
934
|
+
|
935
|
+
self.logger.info(
|
936
|
+
f"Calculated chunk size: {chunk_size} files "
|
937
|
+
f"(avg {avg_tokens_per_file:.0f} tokens/file, "
|
938
|
+
f"target {chunk_limit} tokens/chunk)"
|
939
|
+
)
|
940
|
+
|
941
|
+
return chunk_size
|
942
|
+
|
943
|
+
def _extract_chunk_diff(self, git_diff: str, chunk_files: List[str]) -> str:
|
944
|
+
"""
|
945
|
+
Extract diff content for specific files.
|
946
|
+
|
947
|
+
Args:
|
948
|
+
git_diff: Full git diff content
|
949
|
+
chunk_files: List of files to include in chunk
|
950
|
+
|
951
|
+
Returns:
|
952
|
+
Filtered diff content for chunk files only
|
953
|
+
"""
|
954
|
+
lines = git_diff.split('\n')
|
955
|
+
chunk_lines = []
|
956
|
+
current_file = None
|
957
|
+
include_section = False
|
958
|
+
|
959
|
+
for line in lines:
|
960
|
+
if line.startswith('diff --git'):
|
961
|
+
# Parse file path from diff header
|
962
|
+
parts = line.split(' ')
|
963
|
+
if len(parts) >= 4:
|
964
|
+
file_path = parts[2][2:] # Remove 'a/' prefix
|
965
|
+
current_file = file_path
|
966
|
+
include_section = file_path in chunk_files
|
967
|
+
|
968
|
+
if include_section:
|
969
|
+
chunk_lines.append(line)
|
970
|
+
|
971
|
+
return '\n'.join(chunk_lines)
|
972
|
+
|
973
|
+
async def _analyze_file_chunk(
|
974
|
+
self,
|
975
|
+
chunk_diff: str,
|
976
|
+
commit_message: str,
|
977
|
+
current_descriptions: Dict[str, str],
|
978
|
+
chunk_files: List[str],
|
979
|
+
) -> Dict[str, Any]:
|
980
|
+
"""
|
981
|
+
Analyze a chunk of files for description updates.
|
982
|
+
|
983
|
+
Args:
|
984
|
+
chunk_diff: Git diff for this chunk only
|
985
|
+
commit_message: Commit message explaining the changes
|
986
|
+
current_descriptions: Current file descriptions
|
987
|
+
chunk_files: List of files in this chunk
|
988
|
+
|
989
|
+
Returns:
|
990
|
+
Dict with file_updates for this chunk
|
991
|
+
"""
|
992
|
+
# Only include descriptions for files in this chunk
|
993
|
+
relevant_descriptions = {
|
994
|
+
path: desc
|
995
|
+
for path, desc in current_descriptions.items()
|
996
|
+
if path in chunk_files
|
997
|
+
}
|
998
|
+
|
999
|
+
prompt = f"""Analyze this git commit chunk and update file descriptions.
|
1000
|
+
|
1001
|
+
COMMIT MESSAGE:
|
1002
|
+
{commit_message or "No commit message available"}
|
1003
|
+
|
1004
|
+
CURRENT FILE DESCRIPTIONS (for chunk files only):
|
1005
|
+
{json.dumps(relevant_descriptions, indent=2)}
|
1006
|
+
|
1007
|
+
CHUNK FILES:
|
1008
|
+
{', '.join(chunk_files)}
|
1009
|
+
|
1010
|
+
GIT DIFF (chunk only):
|
1011
|
+
{chunk_diff}
|
1012
|
+
|
1013
|
+
INSTRUCTIONS:
|
1014
|
+
Use the COMMIT MESSAGE to understand the intent and context of the changes.
|
1015
|
+
Update descriptions for files that have changed significantly.
|
1016
|
+
Only include files that need actual description updates.
|
1017
|
+
|
1018
|
+
Return ONLY a JSON object:
|
1019
|
+
{{
|
1020
|
+
"file_updates": {{
|
1021
|
+
"path/to/file1.py": "Updated description for file1",
|
1022
|
+
"path/to/file2.js": "Updated description for file2"
|
1023
|
+
}}
|
1024
|
+
}}"""
|
1025
|
+
|
1026
|
+
# Check token count
|
1027
|
+
prompt_tokens = self.token_counter.count_tokens(prompt)
|
1028
|
+
self.logger.info(f"Chunk prompt: {prompt_tokens} tokens")
|
1029
|
+
|
1030
|
+
if prompt_tokens > self.config.get("chunk_token_limit", 100000):
|
1031
|
+
self.logger.warning(
|
1032
|
+
f"Chunk still too large ({prompt_tokens} tokens), "
|
1033
|
+
f"skipping {len(chunk_files)} files"
|
1034
|
+
)
|
1035
|
+
return {"file_updates": {}}
|
1036
|
+
|
1037
|
+
# Call OpenRouter API
|
1038
|
+
try:
|
1039
|
+
result = await self._call_openrouter(prompt)
|
1040
|
+
return result
|
1041
|
+
except Exception as e:
|
1042
|
+
self.logger.error(f"Failed to analyze chunk: {e}")
|
1043
|
+
return {"file_updates": {}}
|
1044
|
+
|
723
1045
|
@retry(
|
724
1046
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
725
1047
|
stop=stop_after_attempt(5),
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/claude_api_handler.py
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/data/stop_words_english.txt
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/connection_health.py
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/exceptions.py
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/database/retry_executor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/middleware/__init__.py
RENAMED
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/migrations/001_initial.sql
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/query_preprocessor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|