mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,48 +11,57 @@ import json
11
11
  import logging
12
12
  import os
13
13
  import subprocess
14
- import tempfile
15
14
  from pathlib import Path
16
15
  from typing import Dict, List, Optional, Tuple, Any
17
16
 
18
17
 
19
18
  import aiohttp
20
- from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type
19
+ from tenacity import (
20
+ retry,
21
+ wait_exponential,
22
+ stop_after_attempt,
23
+ retry_if_exception_type,
24
+ )
21
25
 
22
26
  from .database.database import DatabaseManager
23
- from .database.models import Project, FileDescription
24
- from .error_handler import ValidationError
25
27
  from .token_counter import TokenCounter
26
28
 
27
29
 
28
30
  class GitHookError(Exception):
29
31
  """Custom exception for git hook operations."""
32
+
30
33
  pass
31
34
 
32
35
 
33
36
  class ThrottlingError(Exception):
34
37
  """Exception for rate limiting scenarios."""
38
+
35
39
  pass
36
40
 
37
41
 
38
42
  class GitHookHandler:
39
43
  """
40
44
  Handles git hook integration for automated code indexing.
41
-
45
+
42
46
  This class provides functionality to:
43
47
  - Analyze git diffs to identify changed files
44
48
  - Use OpenRouter API to update file descriptions
45
49
  - Update project overview when structural changes occur
46
50
  """
47
-
51
+
48
52
  # OpenRouter configuration
49
53
  OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
50
54
  OPENROUTER_MODEL = "anthropic/claude-sonnet-4"
51
-
52
- def __init__(self, db_manager: DatabaseManager, cache_dir: Path, logger: Optional[logging.Logger] = None):
55
+
56
+ def __init__(
57
+ self,
58
+ db_manager: DatabaseManager,
59
+ cache_dir: Path,
60
+ logger: Optional[logging.Logger] = None,
61
+ ):
53
62
  """
54
63
  Initialize GitHookHandler.
55
-
64
+
56
65
  Args:
57
66
  db_manager: Database manager instance
58
67
  cache_dir: Cache directory for temporary files
@@ -62,7 +71,7 @@ class GitHookHandler:
62
71
  self.cache_dir = cache_dir
63
72
  self.logger = logger if logger is not None else logging.getLogger(__name__)
64
73
  self.token_counter = TokenCounter()
65
-
74
+
66
75
  # Git hook specific settings
67
76
  self.config = {
68
77
  "model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
@@ -70,123 +79,154 @@ class GitHookHandler:
70
79
  "timeout": 300, # 5 minutes
71
80
  "temperature": 0.3, # Lower temperature for consistent updates
72
81
  }
73
-
82
+
74
83
  # Validate OpenRouter API key
75
84
  self.api_key = os.getenv("OPENROUTER_API_KEY")
76
85
  if not self.api_key:
77
- raise GitHookError("OPENROUTER_API_KEY environment variable is required for git hook mode")
78
-
86
+ raise GitHookError(
87
+ "OPENROUTER_API_KEY environment variable is required for git hook mode"
88
+ )
89
+
79
90
  async def run_githook_mode(
80
- self,
81
- commit_hash: Optional[str] = None,
82
- commit_range: Optional[Tuple[str, str]] = None
91
+ self,
92
+ commit_hash: Optional[str] = None,
93
+ commit_range: Optional[Tuple[str, str]] = None,
83
94
  ) -> None:
84
95
  """
85
96
  Run in git hook mode - analyze changes and update descriptions.
86
-
97
+
87
98
  Args:
88
99
  commit_hash: Process a specific commit by hash
89
100
  commit_range: Process commits in range (start_hash, end_hash)
90
-
101
+
91
102
  This is the main entry point for git hook functionality.
92
103
  """
93
104
  try:
94
- self.logger.info(f"=== Git Hook Analysis Started ===")
105
+ self.logger.info("=== Git Hook Analysis Started ===")
95
106
  if commit_hash:
96
107
  self.logger.info(f"Mode: Single commit ({commit_hash})")
97
108
  elif commit_range:
98
- self.logger.info(f"Mode: Commit range ({commit_range[0]}..{commit_range[1]})")
109
+ self.logger.info(
110
+ f"Mode: Commit range ({commit_range[0]}..{commit_range[1]})"
111
+ )
99
112
  else:
100
- self.logger.info(f"Mode: Staged changes")
101
-
113
+ self.logger.info("Mode: Staged changes")
114
+
102
115
  # Get git info from current directory
103
116
  project_info = await self._identify_project_from_git()
104
- self.logger.info(f"Project identified: {project_info.get('name', 'Unknown')} at {project_info.get('folderPath', 'Unknown')}")
105
-
117
+ self.logger.info(
118
+ f"Project identified: {project_info.get('name', 'Unknown')} "
119
+ f"at {project_info.get('folderPath', 'Unknown')}"
120
+ )
121
+
106
122
  # Get git diff and commit message based on mode
107
123
  if commit_hash:
108
124
  git_diff = await self._get_git_diff_for_commit(commit_hash)
109
125
  commit_message = await self._get_commit_message_for_commit(commit_hash)
110
126
  elif commit_range:
111
- git_diff = await self._get_git_diff_for_range(commit_range[0], commit_range[1])
112
- commit_message = await self._get_commit_messages_for_range(commit_range[0], commit_range[1])
127
+ git_diff = await self._get_git_diff_for_range(
128
+ commit_range[0], commit_range[1]
129
+ )
130
+ commit_message = await self._get_commit_messages_for_range(
131
+ commit_range[0], commit_range[1]
132
+ )
113
133
  else:
114
134
  git_diff = await self._get_git_diff()
115
135
  commit_message = await self._get_commit_message()
116
-
136
+
117
137
  # Log diff details
118
138
  if not git_diff:
119
- self.logger.info(f"Skipping git hook update - no git diff")
139
+ self.logger.info("Skipping git hook update - no git diff")
120
140
  return
121
-
141
+
122
142
  diff_tokens = self.token_counter.count_tokens(git_diff)
123
143
  self.logger.info(f"Git diff: {diff_tokens} tokens")
124
-
144
+
125
145
  # Fetch current state
126
146
  self.logger.info("Fetching current project state...")
127
147
  current_overview = await self._get_project_overview(project_info)
128
148
  current_descriptions = await self._get_all_descriptions(project_info)
129
149
  changed_files = self._extract_changed_files(git_diff)
130
-
150
+
131
151
  if not changed_files:
132
152
  self.logger.info("No changed files detected in git diff")
133
153
  return
134
-
135
- self.logger.info(f"Found {len(changed_files)} changed files: {', '.join(changed_files)}")
136
- overview_tokens = self.token_counter.count_tokens(current_overview) if current_overview else 0
154
+
155
+ self.logger.info(
156
+ f"Found {len(changed_files)} changed files: {', '.join(changed_files)}"
157
+ )
158
+ overview_tokens = (
159
+ self.token_counter.count_tokens(current_overview)
160
+ if current_overview
161
+ else 0
162
+ )
137
163
  self.logger.info(f"Current overview: {overview_tokens} tokens")
138
164
  self.logger.info(f"Current descriptions count: {len(current_descriptions)}")
139
-
165
+
140
166
  # Try single-stage first, fall back to two-stage if needed
141
167
  updates = await self._analyze_with_smart_staging(
142
- git_diff, commit_message, current_overview, current_descriptions, changed_files
168
+ git_diff,
169
+ commit_message,
170
+ current_overview,
171
+ current_descriptions,
172
+ changed_files,
143
173
  )
144
-
174
+
145
175
  # Apply updates to database
146
176
  await self._apply_updates(project_info, updates)
147
-
148
- self.logger.info(f"Git hook update completed successfully for {len(changed_files)} files")
149
-
177
+
178
+ self.logger.info(
179
+ f"Git hook update completed successfully for {len(changed_files)} files"
180
+ )
181
+
150
182
  except Exception as e:
151
183
  self.logger.error(f"Git hook mode failed: {e}")
152
184
  self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
153
185
  import traceback
186
+
154
187
  self.logger.error(f"Full traceback:\n{traceback.format_exc()}")
155
188
  # Don't fail the git operation - just log the error
156
189
  raise GitHookError(f"Git hook processing failed: {e}")
157
-
190
+
158
191
  async def _analyze_with_smart_staging(
159
192
  self,
160
193
  git_diff: str,
161
194
  commit_message: str,
162
195
  current_overview: str,
163
196
  current_descriptions: Dict[str, str],
164
- changed_files: List[str]
197
+ changed_files: List[str],
165
198
  ) -> Dict[str, Any]:
166
199
  """
167
- Smart staging: Try single-stage first, fall back to two-stage if token limit exceeded.
168
-
200
+ Smart staging: Try single-stage first, fall back to two-stage if
201
+ token limit exceeded.
202
+
169
203
  Args:
170
204
  git_diff: Git diff content
171
205
  commit_message: Commit message explaining the changes
172
206
  current_overview: Current project overview
173
207
  current_descriptions: Current file descriptions
174
208
  changed_files: List of changed file paths
175
-
209
+
176
210
  Returns:
177
211
  Dict containing file_updates and overview_update
178
212
  """
179
213
  # Build single-stage prompt and check token count
180
214
  single_stage_prompt = self._build_single_stage_prompt(
181
- git_diff, commit_message, current_overview, current_descriptions, changed_files
215
+ git_diff,
216
+ commit_message,
217
+ current_overview,
218
+ current_descriptions,
219
+ changed_files,
182
220
  )
183
-
221
+
184
222
  prompt_tokens = self.token_counter.count_tokens(single_stage_prompt)
185
- token_limit = self.config.get("max_diff_tokens", 130000) # Conservative limit under 136k
186
-
223
+ token_limit = self.config.get(
224
+ "max_diff_tokens", 130000
225
+ ) # Conservative limit under 136k
226
+
187
227
  self.logger.info(f"Single-stage prompt: {prompt_tokens} tokens")
188
228
  self.logger.info(f"Token limit: {token_limit}")
189
-
229
+
190
230
  if prompt_tokens <= token_limit:
191
231
  # Use single-stage approach
192
232
  self.logger.info("Using single-stage analysis (within token limit)")
@@ -195,55 +235,60 @@ class GitHookHandler:
195
235
  return result
196
236
  else:
197
237
  # Fall back to two-stage approach
198
- self.logger.info(f"Single-stage prompt too large ({prompt_tokens} tokens), falling back to two-stage analysis")
199
-
238
+ self.logger.info(
239
+ f"Single-stage prompt too large ({prompt_tokens} tokens), "
240
+ f"falling back to two-stage analysis"
241
+ )
242
+
200
243
  # Stage 1: Check if overview needs updating
201
244
  overview_updates = await self._analyze_overview_updates(
202
245
  git_diff, commit_message, current_overview, changed_files
203
246
  )
204
-
205
- # Stage 2: Update file descriptions
247
+
248
+ # Stage 2: Update file descriptions
206
249
  file_updates = await self._analyze_file_updates(
207
250
  git_diff, commit_message, current_descriptions, changed_files
208
251
  )
209
-
252
+
210
253
  # Combine updates
211
254
  updates = {
212
255
  "file_updates": file_updates.get("file_updates", {}),
213
- "overview_update": overview_updates.get("overview_update")
256
+ "overview_update": overview_updates.get("overview_update"),
214
257
  }
215
-
258
+
216
259
  self.logger.info("Two-stage analysis completed")
217
260
  return updates
218
-
261
+
219
262
  def _build_single_stage_prompt(
220
263
  self,
221
264
  git_diff: str,
222
265
  commit_message: str,
223
266
  current_overview: str,
224
267
  current_descriptions: Dict[str, str],
225
- changed_files: List[str]
268
+ changed_files: List[str],
226
269
  ) -> str:
227
270
  """
228
271
  Build single-stage prompt that handles both overview and file updates.
229
-
272
+
230
273
  Args:
231
274
  git_diff: Git diff content
232
275
  commit_message: Commit message explaining the changes
233
276
  current_overview: Current project overview
234
277
  current_descriptions: Current file descriptions
235
278
  changed_files: List of changed file paths
236
-
279
+
237
280
  Returns:
238
281
  Complete single-stage prompt
239
282
  """
240
283
  # Only include descriptions for changed files to reduce token usage
241
284
  relevant_descriptions = {
242
- path: desc for path, desc in current_descriptions.items()
285
+ path: desc
286
+ for path, desc in current_descriptions.items()
243
287
  if path in changed_files
244
288
  }
245
-
246
- return f"""Analyze this git commit and update both the project overview (if needed) and file descriptions.
289
+
290
+ return f"""Analyze this git commit and update both the project overview
291
+ (if needed) and file descriptions.
247
292
 
248
293
  COMMIT MESSAGE:
249
294
  {commit_message or "No commit message available"}
@@ -262,18 +307,24 @@ GIT DIFF:
262
307
 
263
308
  INSTRUCTIONS:
264
309
 
265
- 1. OVERVIEW UPDATE: Update project overview ONLY if there are major structural changes like:
266
- - New major features or components (indicated by commit message or new directories)
310
+ 1. OVERVIEW UPDATE: Update project overview ONLY if there are major
311
+ structural changes like:
312
+ - New major features or components (indicated by commit message or new
313
+ directories)
267
314
  - Architectural changes (new patterns, frameworks, or approaches)
268
- - Significant dependency additions (Cargo.toml, package.json, requirements.txt changes)
315
+ - Significant dependency additions (Cargo.toml, package.json,
316
+ requirements.txt changes)
269
317
  - New API endpoints or workflows
270
318
  - Changes to build/deployment processes
271
-
319
+
272
320
  Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
273
-
274
- If updating, provide comprehensive narrative (10-20 pages of text) with directory structure, architecture, components, and workflows.
275
321
 
276
- 2. FILE UPDATES: Update descriptions for files that have changed significantly. Consider both the diff content and commit message context. Only include files that need actual description updates.
322
+ If updating, provide comprehensive narrative (10-20 pages of text) with
323
+ directory structure, architecture, components, and workflows.
324
+
325
+ 2. FILE UPDATES: Update descriptions for files that have changed
326
+ significantly. Consider both the diff content and commit message context.
327
+ Only include files that need actual description updates.
277
328
 
278
329
  Return ONLY a JSON object:
279
330
  {{
@@ -283,51 +334,46 @@ Return ONLY a JSON object:
283
334
  "path/to/file2.js": "Updated description for file2"
284
335
  }}
285
336
  }}"""
286
-
337
+
287
338
  async def _identify_project_from_git(self) -> Dict[str, Any]:
288
339
  """
289
340
  Identify project information from git repository.
290
-
341
+
291
342
  Returns:
292
343
  Dict containing project identification info
293
344
  """
294
345
  try:
295
346
  # Get current working directory as project root
296
347
  project_root = Path.cwd()
297
-
348
+
298
349
  # Use directory name as project name
299
350
  project_name = project_root.name
300
-
301
- return {
302
- "projectName": project_name,
303
- "folderPath": str(project_root)
304
- }
305
-
351
+
352
+ return {"projectName": project_name, "folderPath": str(project_root)}
353
+
306
354
  except Exception as e:
307
355
  raise GitHookError(f"Failed to identify project from git: {e}")
308
-
309
356
 
310
-
311
357
  async def _get_git_diff(self) -> str:
312
358
  """
313
359
  Get git diff for recent changes.
314
-
360
+
315
361
  Returns:
316
362
  Git diff content as string
317
363
  """
318
364
  try:
319
365
  # Get diff from last commit
320
- diff_result = await self._run_git_command([
321
- "diff", "--no-color", "--no-ext-diff", "HEAD~1..HEAD"
322
- ])
366
+ diff_result = await self._run_git_command(
367
+ ["diff", "--no-color", "--no-ext-diff", "HEAD~1..HEAD"]
368
+ )
323
369
  return diff_result
324
-
370
+
325
371
  except subprocess.CalledProcessError:
326
372
  # If HEAD~1 doesn't exist (first commit), get diff against empty tree
327
373
  try:
328
- diff_result = await self._run_git_command([
329
- "diff", "--no-color", "--no-ext-diff", "--cached"
330
- ])
374
+ diff_result = await self._run_git_command(
375
+ ["diff", "--no-color", "--no-ext-diff", "--cached"]
376
+ )
331
377
  return diff_result
332
378
  except subprocess.CalledProcessError as e:
333
379
  raise GitHookError(f"Failed to get git diff: {e}")
@@ -335,17 +381,15 @@ Return ONLY a JSON object:
335
381
  async def _get_commit_message(self) -> str:
336
382
  """
337
383
  Get the commit message for context about what was changed.
338
-
384
+
339
385
  Returns:
340
386
  Commit message as string
341
387
  """
342
388
  try:
343
389
  # Get the commit message from the latest commit
344
- message_result = await self._run_git_command([
345
- "log", "-1", "--pretty=%B"
346
- ])
390
+ message_result = await self._run_git_command(["log", "-1", "--pretty=%B"])
347
391
  return message_result.strip()
348
-
392
+
349
393
  except subprocess.CalledProcessError:
350
394
  # If no commits exist yet, return empty string
351
395
  return ""
@@ -353,182 +397,203 @@ Return ONLY a JSON object:
353
397
  async def _get_git_diff_for_commit(self, commit_hash: str) -> str:
354
398
  """
355
399
  Get git diff for a specific commit.
356
-
400
+
357
401
  Args:
358
402
  commit_hash: The commit hash to analyze
359
-
403
+
360
404
  Returns:
361
405
  Git diff content as string
362
406
  """
363
407
  try:
364
408
  # Get diff for the specific commit compared to its parent
365
- diff_result = await self._run_git_command([
366
- "diff", "--no-color", "--no-ext-diff", f"{commit_hash}~1..{commit_hash}"
367
- ])
409
+ diff_result = await self._run_git_command(
410
+ [
411
+ "diff",
412
+ "--no-color",
413
+ "--no-ext-diff",
414
+ f"{commit_hash}~1..{commit_hash}",
415
+ ]
416
+ )
368
417
  return diff_result
369
-
418
+
370
419
  except subprocess.CalledProcessError:
371
420
  # If parent doesn't exist (first commit), diff against empty tree
372
421
  try:
373
- diff_result = await self._run_git_command([
374
- "diff", "--no-color", "--no-ext-diff", "4b825dc642cb6eb9a060e54bf8d69288fbee4904", commit_hash
375
- ])
422
+ diff_result = await self._run_git_command(
423
+ [
424
+ "diff",
425
+ "--no-color",
426
+ "--no-ext-diff",
427
+ "4b825dc642cb6eb9a060e54bf8d69288fbee4904",
428
+ commit_hash,
429
+ ]
430
+ )
376
431
  return diff_result
377
432
  except subprocess.CalledProcessError as e:
378
- raise GitHookError(f"Failed to get git diff for commit {commit_hash}: {e}")
433
+ raise GitHookError(
434
+ f"Failed to get git diff for commit {commit_hash}: {e}"
435
+ )
379
436
 
380
437
  async def _get_git_diff_for_range(self, start_hash: str, end_hash: str) -> str:
381
438
  """
382
439
  Get git diff for a range of commits.
383
-
440
+
384
441
  Args:
385
442
  start_hash: Starting commit hash (exclusive)
386
443
  end_hash: Ending commit hash (inclusive)
387
-
444
+
388
445
  Returns:
389
446
  Git diff content as string
390
447
  """
391
448
  try:
392
- diff_result = await self._run_git_command([
393
- "diff", "--no-color", "--no-ext-diff", f"{start_hash}..{end_hash}"
394
- ])
449
+ diff_result = await self._run_git_command(
450
+ ["diff", "--no-color", "--no-ext-diff", f"{start_hash}..{end_hash}"]
451
+ )
395
452
  return diff_result
396
453
  except subprocess.CalledProcessError as e:
397
- raise GitHookError(f"Failed to get git diff for range {start_hash}..{end_hash}: {e}")
454
+ raise GitHookError(
455
+ f"Failed to get git diff for range {start_hash}..{end_hash}: {e}"
456
+ )
398
457
 
399
458
  async def _get_commit_message_for_commit(self, commit_hash: str) -> str:
400
459
  """
401
460
  Get the commit message for a specific commit.
402
-
461
+
403
462
  Args:
404
463
  commit_hash: The commit hash
405
-
464
+
406
465
  Returns:
407
466
  Commit message as string
408
467
  """
409
468
  try:
410
- message_result = await self._run_git_command([
411
- "log", "-1", "--pretty=%B", commit_hash
412
- ])
469
+ message_result = await self._run_git_command(
470
+ ["log", "-1", "--pretty=%B", commit_hash]
471
+ )
413
472
  return message_result.strip()
414
473
  except subprocess.CalledProcessError as e:
415
474
  raise GitHookError(f"Failed to get commit message for {commit_hash}: {e}")
416
475
 
417
- async def _get_commit_messages_for_range(self, start_hash: str, end_hash: str) -> str:
476
+ async def _get_commit_messages_for_range(
477
+ self, start_hash: str, end_hash: str
478
+ ) -> str:
418
479
  """
419
480
  Get commit messages for a range of commits.
420
-
481
+
421
482
  Args:
422
483
  start_hash: Starting commit hash (exclusive)
423
484
  end_hash: Ending commit hash (inclusive)
424
-
485
+
425
486
  Returns:
426
487
  Combined commit messages as string
427
488
  """
428
489
  try:
429
490
  # Get all commit messages in the range
430
- message_result = await self._run_git_command([
431
- "log", "--pretty=%B", f"{start_hash}..{end_hash}"
432
- ])
433
-
491
+ message_result = await self._run_git_command(
492
+ ["log", "--pretty=%B", f"{start_hash}..{end_hash}"]
493
+ )
494
+
434
495
  # Clean up and format the messages
435
496
  messages = message_result.strip()
436
497
  if messages:
437
- return f"Combined commit messages for range {start_hash}..{end_hash}:\n\n{messages}"
498
+ return (
499
+ f"Combined commit messages for range "
500
+ f"{start_hash}..{end_hash}:\n\n{messages}"
501
+ )
438
502
  else:
439
503
  return f"No commits found in range {start_hash}..{end_hash}"
440
-
504
+
441
505
  except subprocess.CalledProcessError as e:
442
- raise GitHookError(f"Failed to get commit messages for range {start_hash}..{end_hash}: {e}")
443
-
506
+ raise GitHookError(
507
+ f"Failed to get commit messages for range {start_hash}..{end_hash}: {e}"
508
+ )
509
+
444
510
  def _extract_changed_files(self, git_diff: str) -> List[str]:
445
511
  """
446
512
  Extract list of changed files from git diff.
447
-
513
+
448
514
  Args:
449
515
  git_diff: Git diff content
450
-
516
+
451
517
  Returns:
452
518
  List of file paths that changed
453
519
  """
454
520
  changed_files = []
455
- lines = git_diff.split('\n')
456
-
521
+ lines = git_diff.split("\n")
522
+
457
523
  for line in lines:
458
- if line.startswith('diff --git a/'):
524
+ if line.startswith("diff --git a/"):
459
525
  # Parse file path from diff header
460
526
  # Format: diff --git a/path/to/file b/path/to/file
461
- parts = line.split(' ')
527
+ parts = line.split(" ")
462
528
  if len(parts) >= 4:
463
529
  file_path = parts[2][2:] # Remove 'a/' prefix
464
530
  changed_files.append(file_path)
465
-
531
+
466
532
  return changed_files
467
-
533
+
468
534
  async def _get_project_overview(self, project_info: Dict[str, Any]) -> str:
469
535
  """Get current project overview from database."""
470
536
  try:
471
537
  # Try to find existing project
472
538
  project = await self.db_manager.find_matching_project(
473
- project_info["projectName"],
474
- project_info["folderPath"]
539
+ project_info["projectName"], project_info["folderPath"]
475
540
  )
476
-
541
+
477
542
  if project:
478
- overview = await self.db_manager.get_project_overview(
479
- project.id
480
- )
543
+ overview = await self.db_manager.get_project_overview(project.id)
481
544
  return overview.overview if overview else ""
482
-
545
+
483
546
  return ""
484
-
547
+
485
548
  except Exception as e:
486
549
  self.logger.warning(f"Failed to get project overview: {e}")
487
550
  return ""
488
-
489
- async def _get_all_descriptions(self, project_info: Dict[str, Any]) -> Dict[str, str]:
551
+
552
+ async def _get_all_descriptions(
553
+ self, project_info: Dict[str, Any]
554
+ ) -> Dict[str, str]:
490
555
  """Get all current file descriptions from database."""
491
556
  try:
492
557
  # Try to find existing project
493
558
  project = await self.db_manager.find_matching_project(
494
- project_info["projectName"],
495
- project_info["folderPath"]
559
+ project_info["projectName"], project_info["folderPath"]
496
560
  )
497
-
561
+
498
562
  if project:
499
563
  descriptions = await self.db_manager.get_all_file_descriptions(
500
564
  project.id
501
565
  )
502
566
  return {desc.file_path: desc.description for desc in descriptions}
503
-
567
+
504
568
  return {}
505
-
569
+
506
570
  except Exception as e:
507
571
  self.logger.warning(f"Failed to get file descriptions: {e}")
508
572
  return {}
509
-
573
+
510
574
  async def _analyze_overview_updates(
511
575
  self,
512
576
  git_diff: str,
513
- commit_message: str,
577
+ commit_message: str,
514
578
  current_overview: str,
515
- changed_files: List[str]
579
+ changed_files: List[str],
516
580
  ) -> Dict[str, Any]:
517
581
  """
518
582
  Stage 1: Analyze if project overview needs updating.
519
-
583
+
520
584
  Args:
521
585
  git_diff: Git diff content
522
586
  commit_message: Commit message explaining the changes
523
587
  current_overview: Current project overview
524
588
  changed_files: List of changed file paths
525
-
589
+
526
590
  Returns:
527
591
  Dict with overview_update key
528
592
  """
529
593
  self.logger.info("Stage 1: Analyzing overview updates...")
530
-
531
- prompt = f"""Analyze this git commit to determine if the project overview needs updating.
594
+
595
+ prompt = f"""Analyze this git commit to determine if the project overview
596
+ needs updating.
532
597
 
533
598
  COMMIT MESSAGE:
534
599
  {commit_message or "No commit message available"}
@@ -547,13 +612,15 @@ INSTRUCTIONS:
547
612
  Update project overview ONLY if there are major structural changes like:
548
613
  - New major features or components (indicated by commit message or new directories)
549
614
  - Architectural changes (new patterns, frameworks, or approaches)
550
- - Significant dependency additions (Cargo.toml, package.json, requirements.txt changes)
615
+ - Significant dependency additions (Cargo.toml, package.json,
616
+ requirements.txt changes)
551
617
  - New API endpoints or workflows
552
618
  - Changes to build/deployment processes
553
619
 
554
620
  Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
555
621
 
556
- If updating, provide comprehensive narrative (10-20 pages of text) with directory structure, architecture, components, and workflows.
622
+ If updating, provide comprehensive narrative (10-20 pages of text) with
623
+ directory structure, architecture, components, and workflows.
557
624
 
558
625
  Return ONLY a JSON object:
559
626
  {{
@@ -563,15 +630,18 @@ Return ONLY a JSON object:
563
630
  # Log prompt details
564
631
  prompt_tokens = self.token_counter.count_tokens(prompt)
565
632
  self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
566
-
633
+
567
634
  if prompt_tokens > self.config["max_diff_tokens"]:
568
- self.logger.warning(f"Stage 1 prompt too large ({prompt_tokens} tokens), skipping overview analysis")
635
+ self.logger.warning(
636
+ f"Stage 1 prompt too large ({prompt_tokens} tokens), "
637
+ f"skipping overview analysis"
638
+ )
569
639
  return {"overview_update": None}
570
-
640
+
571
641
  # Call OpenRouter API
572
642
  result = await self._call_openrouter(prompt)
573
643
  self.logger.info("Stage 1 completed: overview analysis")
574
-
644
+
575
645
  return result
576
646
 
577
647
  async def _analyze_file_updates(
@@ -579,29 +649,31 @@ Return ONLY a JSON object:
579
649
  git_diff: str,
580
650
  commit_message: str,
581
651
  current_descriptions: Dict[str, str],
582
- changed_files: List[str]
652
+ changed_files: List[str],
583
653
  ) -> Dict[str, Any]:
584
654
  """
585
655
  Stage 2: Analyze file description updates.
586
-
656
+
587
657
  Args:
588
658
  git_diff: Git diff content
589
659
  commit_message: Commit message explaining the changes
590
660
  current_descriptions: Current file descriptions for changed files only
591
661
  changed_files: List of changed file paths
592
-
662
+
593
663
  Returns:
594
664
  Dict with file_updates key
595
665
  """
596
666
  self.logger.info("Stage 2: Analyzing file description updates...")
597
-
667
+
598
668
  # Only include descriptions for changed files to reduce token usage
599
669
  relevant_descriptions = {
600
- path: desc for path, desc in current_descriptions.items()
670
+ path: desc
671
+ for path, desc in current_descriptions.items()
601
672
  if path in changed_files
602
673
  }
603
-
604
- prompt = f"""Analyze this git commit and update file descriptions for changed files.
674
+
675
+ prompt = f"""Analyze this git commit and update file descriptions for
676
+ changed files.
605
677
 
606
678
  COMMIT MESSAGE:
607
679
  {commit_message or "No commit message available"}
@@ -619,7 +691,9 @@ INSTRUCTIONS:
619
691
 
620
692
  Use the COMMIT MESSAGE to understand the intent and context of the changes.
621
693
 
622
- Update descriptions for files that have changed significantly. Consider both the diff content and commit message context. Only include files that need actual description updates.
694
+ Update descriptions for files that have changed significantly. Consider both the
695
+ diff content and commit message context. Only include files that need actual
696
+ description updates.
623
697
 
624
698
  Return ONLY a JSON object:
625
699
  {{
@@ -629,32 +703,35 @@ Return ONLY a JSON object:
629
703
  }}
630
704
  }}"""
631
705
 
632
- # Log prompt details
706
+ # Log prompt details
633
707
  prompt_tokens = self.token_counter.count_tokens(prompt)
634
708
  self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
635
-
709
+
636
710
  if prompt_tokens > self.config["max_diff_tokens"]:
637
- self.logger.warning(f"Stage 2 prompt too large ({prompt_tokens} tokens), skipping file analysis")
711
+ self.logger.warning(
712
+ f"Stage 2 prompt too large ({prompt_tokens} tokens), "
713
+ f"skipping file analysis"
714
+ )
638
715
  return {"file_updates": {}}
639
-
716
+
640
717
  # Call OpenRouter API
641
718
  result = await self._call_openrouter(prompt)
642
719
  self.logger.info("Stage 2 completed: file description analysis")
643
-
720
+
644
721
  return result
645
-
722
+
646
723
  @retry(
647
724
  wait=wait_exponential(multiplier=1, min=4, max=60),
648
725
  stop=stop_after_attempt(5),
649
- retry=retry_if_exception_type(ThrottlingError)
726
+ retry=retry_if_exception_type(ThrottlingError),
650
727
  )
651
728
  async def _call_openrouter(self, prompt: str) -> Dict[str, Any]:
652
729
  """
653
730
  Call OpenRouter API to analyze changes.
654
-
731
+
655
732
  Args:
656
733
  prompt: Analysis prompt
657
-
734
+
658
735
  Returns:
659
736
  Parsed response with file updates and overview update
660
737
  """
@@ -662,114 +739,126 @@ Return ONLY a JSON object:
662
739
  "Authorization": f"Bearer {self.api_key}",
663
740
  "HTTP-Referer": "https://github.com/fluffypony/mcp-code-indexer",
664
741
  "X-Title": "MCP Code Indexer Git Hook",
665
- "Content-Type": "application/json"
742
+ "Content-Type": "application/json",
666
743
  }
667
-
744
+
668
745
  payload = {
669
746
  "model": self.config["model"],
670
747
  "messages": [
671
748
  {
672
- "role": "system",
673
- "content": "You are a technical assistant that analyzes code changes and updates file descriptions accurately and concisely."
749
+ "role": "system",
750
+ "content": (
751
+ "You are a technical assistant that analyzes code "
752
+ "changes and updates file descriptions accurately "
753
+ "and concisely."
754
+ ),
674
755
  },
675
- {
676
- "role": "user",
677
- "content": prompt
678
- }
756
+ {"role": "user", "content": prompt},
679
757
  ],
680
758
  "temperature": self.config["temperature"],
681
759
  "max_tokens": 24000,
682
760
  }
683
-
684
761
 
685
-
686
762
  timeout = aiohttp.ClientTimeout(total=self.config["timeout"])
687
-
688
- self.logger.info(f"Sending request to OpenRouter API...")
763
+
764
+ self.logger.info("Sending request to OpenRouter API...")
689
765
  self.logger.info(f" Model: {self.config['model']}")
690
766
  self.logger.info(f" Temperature: {self.config['temperature']}")
691
- self.logger.info(f" Max tokens: 24000")
767
+ self.logger.info(" Max tokens: 24000")
692
768
  self.logger.info(f" Timeout: {self.config['timeout']}s")
693
-
769
+
694
770
  try:
695
771
  async with aiohttp.ClientSession(timeout=timeout) as session:
696
772
  async with session.post(
697
- self.OPENROUTER_API_URL,
698
- headers=headers,
699
- json=payload
773
+ self.OPENROUTER_API_URL, headers=headers, json=payload
700
774
  ) as response:
701
-
702
- self.logger.info(f"OpenRouter API response status: {response.status}")
703
-
775
+
776
+ self.logger.info(
777
+ f"OpenRouter API response status: {response.status}"
778
+ )
779
+
704
780
  if response.status == 429:
705
781
  retry_after = int(response.headers.get("Retry-After", 60))
706
- self.logger.warning(f"Rate limited by OpenRouter, retry after {retry_after}s")
707
- raise ThrottlingError(f"Rate limited. Retry after {retry_after}s")
708
-
782
+ self.logger.warning(
783
+ f"Rate limited by OpenRouter, retry after {retry_after}s"
784
+ )
785
+ raise ThrottlingError(
786
+ f"Rate limited. Retry after {retry_after}s"
787
+ )
788
+
709
789
  response.raise_for_status()
710
-
790
+
711
791
  response_data = await response.json()
712
-
792
+
713
793
  if "choices" not in response_data:
714
- self.logger.error(f"Invalid API response format: {response_data}")
715
- raise GitHookError(f"Invalid API response format: {response_data}")
716
-
794
+ self.logger.error(
795
+ f"Invalid API response format: {response_data}"
796
+ )
797
+ raise GitHookError(
798
+ f"Invalid API response format: {response_data}"
799
+ )
800
+
717
801
  content = response_data["choices"][0]["message"]["content"]
718
- self.logger.info(f"OpenRouter response content length: {len(content)} characters")
719
-
802
+ self.logger.info(
803
+ f"OpenRouter response content length: {len(content)} characters"
804
+ )
805
+
720
806
  return self._validate_githook_response(content)
721
-
807
+
722
808
  except aiohttp.ClientError as e:
723
809
  self.logger.error(f"OpenRouter API request failed: {e}")
724
810
  self.logger.error(f"ClientError details: {type(e).__name__}: {str(e)}")
725
811
  raise GitHookError(f"OpenRouter API request failed: {e}")
726
- except asyncio.TimeoutError as e:
727
- self.logger.error(f"OpenRouter API request timed out after {self.config['timeout']}s")
812
+ except asyncio.TimeoutError:
813
+ self.logger.error(
814
+ f"OpenRouter API request timed out after {self.config['timeout']}s"
815
+ )
728
816
  raise GitHookError("OpenRouter API request timed out")
729
-
817
+
730
818
  def _validate_githook_response(self, response_text: str) -> Dict[str, Any]:
731
819
  """
732
820
  Validate and parse JSON response from OpenRouter.
733
-
821
+
734
822
  Args:
735
823
  response_text: Raw response content
736
-
824
+
737
825
  Returns:
738
826
  Validated response data
739
827
  """
828
+
740
829
  def extract_json_from_response(text: str) -> str:
741
830
  """Extract JSON from response that might have extra text before/after."""
742
831
  text = text.strip()
743
-
832
+
744
833
  # Try to find JSON in the response
745
834
  json_start = -1
746
835
  json_end = -1
747
-
836
+
748
837
  # Look for opening brace
749
838
  for i, char in enumerate(text):
750
- if char == '{':
839
+ if char == "{":
751
840
  json_start = i
752
841
  break
753
-
842
+
754
843
  if json_start == -1:
755
844
  return text # No JSON found, return original
756
-
845
+
757
846
  # Find matching closing brace
758
847
  brace_count = 0
759
848
  for i in range(json_start, len(text)):
760
- if text[i] == '{':
849
+ if text[i] == "{":
761
850
  brace_count += 1
762
- elif text[i] == '}':
851
+ elif text[i] == "}":
763
852
  brace_count -= 1
764
853
  if brace_count == 0:
765
854
  json_end = i + 1
766
855
  break
767
-
856
+
768
857
  if json_end == -1:
769
858
  return text # No matching brace found, return original
770
-
859
+
771
860
  return text[json_start:json_end]
772
-
861
+
773
862
  try:
774
863
  # First try parsing as-is
775
864
  try:
@@ -780,47 +869,51 @@ Return ONLY a JSON object:
780
869
  if extracted_json != response_text.strip():
781
870
  self.logger.debug(f"Extracted JSON from response: {extracted_json}")
782
871
  data = json.loads(extracted_json)
783
-
872
+
784
873
  # Handle both single-stage and two-stage responses
785
874
  if "file_updates" in data and "overview_update" in data:
786
875
  # Original single-stage format
787
876
  if not isinstance(data["file_updates"], dict):
788
877
  raise ValueError("'file_updates' must be a dictionary")
789
-
878
+
790
879
  # Validate descriptions
791
880
  for path, desc in data["file_updates"].items():
792
881
  if not isinstance(desc, str) or not desc.strip():
793
882
  raise ValueError(f"Invalid description for {path}")
794
-
883
+
795
884
  elif "file_updates" in data:
796
885
  # Stage 2 format (file updates only)
797
886
  if not isinstance(data["file_updates"], dict):
798
887
  raise ValueError("'file_updates' must be a dictionary")
799
-
888
+
800
889
  # Validate descriptions
801
890
  for path, desc in data["file_updates"].items():
802
891
  if not isinstance(desc, str) or not desc.strip():
803
892
  raise ValueError(f"Invalid description for {path}")
804
-
893
+
805
894
  elif "overview_update" in data:
806
895
  # Stage 1 format (overview only) - overview_update can be null
807
896
  pass
808
897
  else:
809
- raise ValueError("Response must contain 'file_updates' and/or 'overview_update'")
810
-
898
+ raise ValueError(
899
+ "Response must contain 'file_updates' and/or 'overview_update'"
900
+ )
901
+
811
902
  return data
812
-
903
+
813
904
  except json.JSONDecodeError as e:
814
905
  self.logger.error(f"Raw response content: {repr(response_text)}")
815
906
  raise GitHookError(f"Invalid JSON response from API: {e}")
816
907
  except ValueError as e:
817
908
  self.logger.error(f"Raw response content: {repr(response_text)}")
818
909
  raise GitHookError(f"Invalid response structure: {e}")
819
-
820
- async def _apply_updates(self, project_info: Dict[str, Any], updates: Dict[str, Any]) -> None:
910
+
911
+ async def _apply_updates(
912
+ self, project_info: Dict[str, Any], updates: Dict[str, Any]
913
+ ) -> None:
821
914
  """
822
915
  Apply updates to database.
823
-
916
+
824
917
  Args:
825
918
  project_info: Project identification info
826
919
  updates: Updates from OpenRouter API
@@ -828,77 +921,75 @@ Return ONLY a JSON object:
828
921
  try:
829
922
  # Get or create project
830
923
  project = await self.db_manager.get_or_create_project(
831
- project_info["projectName"],
832
- project_info["folderPath"]
924
+ project_info["projectName"], project_info["folderPath"]
833
925
  )
834
-
926
+
835
927
  # Update file descriptions
836
928
  file_updates = updates.get("file_updates", {})
837
929
  for file_path, description in file_updates.items():
838
930
  from mcp_code_indexer.database.models import FileDescription
839
931
  from datetime import datetime
840
-
932
+
841
933
  file_desc = FileDescription(
842
934
  project_id=project.id,
843
935
  file_path=file_path,
844
936
  description=description,
845
937
  file_hash=None,
846
938
  last_modified=datetime.utcnow(),
847
- version=1
939
+ version=1,
848
940
  )
849
941
  await self.db_manager.create_file_description(file_desc)
850
942
  self.logger.info(f"Updated description for {file_path}")
851
-
943
+
852
944
  # Update project overview if provided
853
945
  overview_update = updates.get("overview_update")
854
946
  if overview_update and overview_update.strip():
855
947
  from mcp_code_indexer.database.models import ProjectOverview
856
948
  from datetime import datetime
857
-
949
+
858
950
  overview = ProjectOverview(
859
951
  project_id=project.id,
860
952
  overview=overview_update,
861
953
  last_modified=datetime.utcnow(),
862
954
  total_files=len(file_updates),
863
- total_tokens=len(overview_update.split())
955
+ total_tokens=len(overview_update.split()),
864
956
  )
865
957
  await self.db_manager.create_project_overview(overview)
866
958
  self.logger.info("Updated project overview")
867
-
959
+
868
960
  except Exception as e:
869
961
  raise GitHookError(f"Failed to apply updates to database: {e}")
870
-
962
+
871
963
  async def _run_git_command(self, cmd: List[str]) -> str:
872
964
  """
873
965
  Run a git command and return output.
874
-
966
+
875
967
  Args:
876
968
  cmd: Git command arguments
877
-
969
+
878
970
  Returns:
879
971
  Command output as string
880
972
  """
881
973
  full_cmd = ["git"] + cmd
882
-
974
+
883
975
  try:
884
976
  process = await asyncio.create_subprocess_exec(
885
977
  *full_cmd,
886
978
  stdout=asyncio.subprocess.PIPE,
887
979
  stderr=asyncio.subprocess.PIPE,
888
- cwd=Path.cwd()
980
+ cwd=Path.cwd(),
889
981
  )
890
-
982
+
891
983
  stdout, stderr = await process.communicate()
892
-
984
+
893
985
  if process.returncode != 0:
894
986
  raise subprocess.CalledProcessError(
895
- process.returncode,
896
- full_cmd,
897
- stdout,
898
- stderr
987
+ process.returncode, full_cmd, stdout, stderr
899
988
  )
900
-
901
- return stdout.decode('utf-8')
902
-
989
+
990
+ return stdout.decode("utf-8")
991
+
903
992
  except FileNotFoundError:
904
- raise GitHookError("Git command not found - ensure git is installed and in PATH")
993
+ raise GitHookError(
994
+ "Git command not found - ensure git is installed and in PATH"
995
+ )