mcp-code-indexer 1.8.0__tar.gz → 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {mcp_code_indexer-1.8.0/src/mcp_code_indexer.egg-info → mcp_code_indexer-1.9.0}/PKG-INFO +1 -1
  2. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/pyproject.toml +1 -1
  3. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/git_hook_handler.py +177 -23
  4. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0/src/mcp_code_indexer.egg-info}/PKG-INFO +1 -1
  5. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/LICENSE +0 -0
  6. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/MANIFEST.in +0 -0
  7. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/README.md +0 -0
  8. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/docs/api-reference.md +0 -0
  9. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/docs/architecture.md +0 -0
  10. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/docs/configuration.md +0 -0
  11. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/docs/contributing.md +0 -0
  12. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/docs/git-hook-setup.md +0 -0
  13. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/migrations/001_initial.sql +0 -0
  14. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/migrations/002_performance_indexes.sql +0 -0
  15. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/migrations/003_project_overviews.sql +0 -0
  16. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/requirements.txt +0 -0
  17. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/setup.cfg +0 -0
  18. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/setup.py +0 -0
  19. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/__init__.py +0 -0
  20. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/__main__.py +0 -0
  21. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/data/stop_words_english.txt +0 -0
  22. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/database/__init__.py +0 -0
  23. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/database/database.py +0 -0
  24. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/database/models.py +0 -0
  25. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/error_handler.py +0 -0
  26. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/file_scanner.py +0 -0
  27. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/logging_config.py +0 -0
  28. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/main.py +0 -0
  29. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/merge_handler.py +0 -0
  30. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/middleware/__init__.py +0 -0
  31. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/middleware/error_middleware.py +0 -0
  32. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/server/__init__.py +0 -0
  33. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/server/mcp_server.py +0 -0
  34. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +0 -0
  35. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/token_counter.py +0 -0
  36. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/tools/__init__.py +0 -0
  37. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer.egg-info/SOURCES.txt +0 -0
  38. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer.egg-info/dependency_links.txt +0 -0
  39. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer.egg-info/entry_points.txt +0 -0
  40. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer.egg-info/requires.txt +0 -0
  41. {mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.8.0
3
+ Version: 1.9.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "mcp-code-indexer"
7
- version = "1.8.0"
7
+ version = "1.9.0"
8
8
  description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -119,9 +119,8 @@ class GitHookHandler:
119
119
  self.logger.info(f"Skipping git hook update - no git diff")
120
120
  return
121
121
 
122
- diff_chars = len(git_diff)
123
122
  diff_tokens = self.token_counter.count_tokens(git_diff)
124
- self.logger.info(f"Git diff: {diff_chars} characters, {diff_tokens} tokens")
123
+ self.logger.info(f"Git diff: {diff_tokens} tokens")
125
124
 
126
125
  # Fetch current state
127
126
  self.logger.info("Fetching current project state...")
@@ -134,11 +133,69 @@ class GitHookHandler:
134
133
  return
135
134
 
136
135
  self.logger.info(f"Found {len(changed_files)} changed files: {', '.join(changed_files)}")
137
- self.logger.info(f"Current overview length: {len(current_overview) if current_overview else 0} characters")
136
+ overview_tokens = self.token_counter.count_tokens(current_overview) if current_overview else 0
137
+ self.logger.info(f"Current overview: {overview_tokens} tokens")
138
138
  self.logger.info(f"Current descriptions count: {len(current_descriptions)}")
139
139
 
140
- # Use two-stage approach for large codebases
141
- self.logger.info("Starting two-stage analysis approach...")
140
+ # Try single-stage first, fall back to two-stage if needed
141
+ updates = await self._analyze_with_smart_staging(
142
+ git_diff, commit_message, current_overview, current_descriptions, changed_files
143
+ )
144
+
145
+ # Apply updates to database
146
+ await self._apply_updates(project_info, updates)
147
+
148
+ self.logger.info(f"Git hook update completed successfully for {len(changed_files)} files")
149
+
150
+ except Exception as e:
151
+ self.logger.error(f"Git hook mode failed: {e}")
152
+ self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
153
+ import traceback
154
+ self.logger.error(f"Full traceback:\n{traceback.format_exc()}")
155
+ # Don't fail the git operation - just log the error
156
+ raise GitHookError(f"Git hook processing failed: {e}")
157
+
158
+ async def _analyze_with_smart_staging(
159
+ self,
160
+ git_diff: str,
161
+ commit_message: str,
162
+ current_overview: str,
163
+ current_descriptions: Dict[str, str],
164
+ changed_files: List[str]
165
+ ) -> Dict[str, Any]:
166
+ """
167
+ Smart staging: Try single-stage first, fall back to two-stage if token limit exceeded.
168
+
169
+ Args:
170
+ git_diff: Git diff content
171
+ commit_message: Commit message explaining the changes
172
+ current_overview: Current project overview
173
+ current_descriptions: Current file descriptions
174
+ changed_files: List of changed file paths
175
+
176
+ Returns:
177
+ Dict containing file_updates and overview_update
178
+ """
179
+ # Build single-stage prompt and check token count
180
+ single_stage_prompt = self._build_single_stage_prompt(
181
+ git_diff, commit_message, current_overview, current_descriptions, changed_files
182
+ )
183
+
184
+ prompt_tokens = self.token_counter.count_tokens(single_stage_prompt)
185
+ token_limit = self.config.get("max_diff_tokens", 130000) # Conservative limit under 136k
186
+
187
+ self.logger.info(f"Single-stage prompt: {prompt_tokens} tokens")
188
+ self.logger.info(f"Token limit: {token_limit}")
189
+
190
+ if prompt_tokens <= token_limit:
191
+ # Use single-stage approach
192
+ self.logger.info("Using single-stage analysis (within token limit)")
193
+ result = await self._call_openrouter(single_stage_prompt)
194
+ self.logger.info("Single-stage analysis completed")
195
+ return result
196
+ else:
197
+ # Fall back to two-stage approach
198
+ self.logger.info(f"Single-stage prompt too large ({prompt_tokens} tokens), falling back to two-stage analysis")
142
199
 
143
200
  # Stage 1: Check if overview needs updating
144
201
  overview_updates = await self._analyze_overview_updates(
@@ -156,20 +213,76 @@ class GitHookHandler:
156
213
  "overview_update": overview_updates.get("overview_update")
157
214
  }
158
215
 
159
- self.logger.info(f"Two-stage analysis completed, processing updates...")
160
-
161
- # Apply updates to database
162
- await self._apply_updates(project_info, updates)
163
-
164
- self.logger.info(f"Git hook update completed successfully for {len(changed_files)} files")
216
+ self.logger.info("Two-stage analysis completed")
217
+ return updates
218
+
219
+ def _build_single_stage_prompt(
220
+ self,
221
+ git_diff: str,
222
+ commit_message: str,
223
+ current_overview: str,
224
+ current_descriptions: Dict[str, str],
225
+ changed_files: List[str]
226
+ ) -> str:
227
+ """
228
+ Build single-stage prompt that handles both overview and file updates.
229
+
230
+ Args:
231
+ git_diff: Git diff content
232
+ commit_message: Commit message explaining the changes
233
+ current_overview: Current project overview
234
+ current_descriptions: Current file descriptions
235
+ changed_files: List of changed file paths
165
236
 
166
- except Exception as e:
167
- self.logger.error(f"Git hook mode failed: {e}")
168
- self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
169
- import traceback
170
- self.logger.error(f"Full traceback:\n{traceback.format_exc()}")
171
- # Don't fail the git operation - just log the error
172
- raise GitHookError(f"Git hook processing failed: {e}")
237
+ Returns:
238
+ Complete single-stage prompt
239
+ """
240
+ # Only include descriptions for changed files to reduce token usage
241
+ relevant_descriptions = {
242
+ path: desc for path, desc in current_descriptions.items()
243
+ if path in changed_files
244
+ }
245
+
246
+ return f"""Analyze this git commit and update both the project overview (if needed) and file descriptions.
247
+
248
+ COMMIT MESSAGE:
249
+ {commit_message or "No commit message available"}
250
+
251
+ CURRENT PROJECT OVERVIEW:
252
+ {current_overview or "No overview available"}
253
+
254
+ CURRENT FILE DESCRIPTIONS (for changed files only):
255
+ {json.dumps(relevant_descriptions, indent=2)}
256
+
257
+ CHANGED FILES:
258
+ {', '.join(changed_files)}
259
+
260
+ GIT DIFF:
261
+ {git_diff}
262
+
263
+ INSTRUCTIONS:
264
+
265
+ 1. OVERVIEW UPDATE: Update project overview ONLY if there are major structural changes like:
266
+ - New major features or components (indicated by commit message or new directories)
267
+ - Architectural changes (new patterns, frameworks, or approaches)
268
+ - Significant dependency additions (Cargo.toml, package.json, requirements.txt changes)
269
+ - New API endpoints or workflows
270
+ - Changes to build/deployment processes
271
+
272
+ Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
273
+
274
+ If updating, provide comprehensive narrative (10-20 pages of text) with directory structure, architecture, components, and workflows.
275
+
276
+ 2. FILE UPDATES: Update descriptions for files that have changed significantly. Consider both the diff content and commit message context. Only include files that need actual description updates.
277
+
278
+ Return ONLY a JSON object:
279
+ {{
280
+ "overview_update": "Updated overview text" or null,
281
+ "file_updates": {{
282
+ "path/to/file1.py": "Updated description for file1",
283
+ "path/to/file2.js": "Updated description for file2"
284
+ }}
285
+ }}"""
173
286
 
174
287
  async def _identify_project_from_git(self) -> Dict[str, Any]:
175
288
  """
@@ -489,9 +602,8 @@ Return ONLY a JSON object:
489
602
  }}"""
490
603
 
491
604
  # Log prompt details
492
- prompt_chars = len(prompt)
493
605
  prompt_tokens = self.token_counter.count_tokens(prompt)
494
- self.logger.info(f"Stage 1 prompt: {prompt_chars} characters, {prompt_tokens} tokens")
606
+ self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
495
607
 
496
608
  if prompt_tokens > self.config["max_diff_tokens"]:
497
609
  self.logger.warning(f"Stage 1 prompt too large ({prompt_tokens} tokens), skipping overview analysis")
@@ -559,9 +671,8 @@ Return ONLY a JSON object:
559
671
  }}"""
560
672
 
561
673
  # Log prompt details
562
- prompt_chars = len(prompt)
563
674
  prompt_tokens = self.token_counter.count_tokens(prompt)
564
- self.logger.info(f"Stage 2 prompt: {prompt_chars} characters, {prompt_tokens} tokens")
675
+ self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
565
676
 
566
677
  if prompt_tokens > self.config["max_diff_tokens"]:
567
678
  self.logger.warning(f"Stage 2 prompt too large ({prompt_tokens} tokens), skipping file analysis")
@@ -667,8 +778,49 @@ Return ONLY a JSON object:
667
778
  Returns:
668
779
  Validated response data
669
780
  """
781
+ def extract_json_from_response(text: str) -> str:
782
+ """Extract JSON from response that might have extra text before/after."""
783
+ text = text.strip()
784
+
785
+ # Try to find JSON in the response
786
+ json_start = -1
787
+ json_end = -1
788
+
789
+ # Look for opening brace
790
+ for i, char in enumerate(text):
791
+ if char == '{':
792
+ json_start = i
793
+ break
794
+
795
+ if json_start == -1:
796
+ return text # No JSON found, return original
797
+
798
+ # Find matching closing brace
799
+ brace_count = 0
800
+ for i in range(json_start, len(text)):
801
+ if text[i] == '{':
802
+ brace_count += 1
803
+ elif text[i] == '}':
804
+ brace_count -= 1
805
+ if brace_count == 0:
806
+ json_end = i + 1
807
+ break
808
+
809
+ if json_end == -1:
810
+ return text # No matching brace found, return original
811
+
812
+ return text[json_start:json_end]
813
+
670
814
  try:
671
- data = json.loads(response_text.strip())
815
+ # First try parsing as-is
816
+ try:
817
+ data = json.loads(response_text.strip())
818
+ except json.JSONDecodeError:
819
+ # Try extracting JSON from response
820
+ extracted_json = extract_json_from_response(response_text)
821
+ if extracted_json != response_text.strip():
822
+ self.logger.debug(f"Extracted JSON from response: {extracted_json}")
823
+ data = json.loads(extracted_json)
672
824
 
673
825
  # Handle both single-stage and two-stage responses
674
826
  if "file_updates" in data and "overview_update" in data:
@@ -700,8 +852,10 @@ Return ONLY a JSON object:
700
852
  return data
701
853
 
702
854
  except json.JSONDecodeError as e:
855
+ self.logger.error(f"Raw response content: {repr(response_text)}")
703
856
  raise GitHookError(f"Invalid JSON response from API: {e}")
704
857
  except ValueError as e:
858
+ self.logger.error(f"Raw response content: {repr(response_text)}")
705
859
  raise GitHookError(f"Invalid response structure: {e}")
706
860
 
707
861
  async def _apply_updates(self, project_info: Dict[str, Any], updates: Dict[str, Any]) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mcp-code-indexer
3
- Version: 1.8.0
3
+ Version: 1.9.0
4
4
  Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
5
5
  Author: MCP Code Indexer Contributors
6
6
  Maintainer: MCP Code Indexer Contributors