mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ Handles enhanced question-answering with two-stage processing:
9
9
 
10
10
  import logging
11
11
  from pathlib import Path
12
- from typing import Dict, List, Optional, Any, Tuple
12
+ from typing import Dict, List, Optional, Any
13
13
 
14
14
  from .claude_api_handler import ClaudeAPIHandler, ClaudeAPIError
15
15
  from .database.database import DatabaseManager
@@ -17,21 +17,27 @@ from .database.database import DatabaseManager
17
17
 
18
18
  class DeepAskError(ClaudeAPIError):
19
19
  """Exception specific to DeepAsk operations."""
20
+
20
21
  pass
21
22
 
22
23
 
23
24
  class DeepAskHandler(ClaudeAPIHandler):
24
25
  """
25
26
  Handler for enhanced Q&A operations using two-stage Claude API processing.
26
-
27
+
27
28
  Stage 1: Extract search terms and compress project overview
28
29
  Stage 2: Search file descriptions and provide enhanced answer with context
29
30
  """
30
-
31
- def __init__(self, db_manager: DatabaseManager, cache_dir: Path, logger: Optional[logging.Logger] = None):
31
+
32
+ def __init__(
33
+ self,
34
+ db_manager: DatabaseManager,
35
+ cache_dir: Path,
36
+ logger: Optional[logging.Logger] = None,
37
+ ):
32
38
  """
33
39
  Initialize DeepAskHandler.
34
-
40
+
35
41
  Args:
36
42
  db_manager: Database manager instance
37
43
  cache_dir: Cache directory for temporary files
@@ -39,26 +45,28 @@ class DeepAskHandler(ClaudeAPIHandler):
39
45
  """
40
46
  super().__init__(db_manager, cache_dir, logger)
41
47
  self.logger = logger if logger is not None else logging.getLogger(__name__)
42
-
48
+
43
49
  async def find_existing_project_by_name(self, project_name: str) -> Optional[Any]:
44
50
  """
45
51
  Find existing project by name for CLI usage.
46
-
52
+
47
53
  Args:
48
54
  project_name: Name of the project to find
49
-
55
+
50
56
  Returns:
51
57
  Project object if found, None otherwise
52
58
  """
53
59
  try:
54
60
  all_projects = await self.db_manager.get_all_projects()
55
61
  normalized_name = project_name.lower()
56
-
62
+
57
63
  for project in all_projects:
58
64
  if project.name.lower() == normalized_name:
59
- self.logger.info(f"Found existing project: {project.name} (ID: {project.id})")
65
+ self.logger.info(
66
+ f"Found existing project: {project.name} (ID: {project.id})"
67
+ )
60
68
  return project
61
-
69
+
62
70
  self.logger.warning(f"No existing project found with name: {project_name}")
63
71
  return None
64
72
  except Exception as e:
@@ -66,45 +74,48 @@ class DeepAskHandler(ClaudeAPIHandler):
66
74
  return None
67
75
 
68
76
  async def deepask_question(
69
- self,
70
- project_info: Dict[str, str],
71
- question: str,
72
- max_file_results: int = 10
77
+ self, project_info: Dict[str, str], question: str, max_file_results: int = 10
73
78
  ) -> Dict[str, Any]:
74
79
  """
75
- Ask an enhanced question about the project using two-stage Claude API processing.
76
-
80
+ Ask an enhanced question about the project using two-stage Claude API
81
+ processing.
82
+
77
83
  Args:
78
84
  project_info: Project information dict with projectName, folderPath, etc.
79
85
  question: User's question about the project
80
86
  max_file_results: Maximum number of file descriptions to include
81
-
87
+
82
88
  Returns:
83
89
  Dict containing enhanced response and metadata
84
90
  """
85
91
  try:
86
- self.logger.info(f"Processing deepask question for project: {project_info['projectName']}")
92
+ self.logger.info(
93
+ f"Processing deepask question for project: "
94
+ f"{project_info['projectName']}"
95
+ )
87
96
  self.logger.info(f"Question: {question}")
88
-
97
+
89
98
  # Validate inputs
90
99
  if not question or not question.strip():
91
100
  raise DeepAskError("Question cannot be empty")
92
-
101
+
93
102
  if not project_info.get("projectName"):
94
103
  raise DeepAskError("Project name is required")
95
-
104
+
96
105
  # Stage 1: Extract search terms and compress overview
97
- stage1_result = await self._stage1_extract_search_terms(project_info, question)
98
-
106
+ stage1_result = await self._stage1_extract_search_terms(
107
+ project_info, question
108
+ )
109
+
99
110
  # Stage 2: Search files and provide enhanced answer
100
111
  stage2_result = await self._stage2_enhanced_answer(
101
- project_info,
102
- question,
112
+ project_info,
113
+ question,
103
114
  stage1_result["search_terms"],
104
115
  stage1_result["compressed_overview"],
105
- max_file_results
116
+ max_file_results,
106
117
  )
107
-
118
+
108
119
  # Combine results
109
120
  result = {
110
121
  "answer": stage2_result["answer"],
@@ -118,17 +129,17 @@ class DeepAskHandler(ClaudeAPIHandler):
118
129
  "stage1_tokens": stage1_result["token_usage"],
119
130
  "stage2_tokens": stage2_result["token_usage"],
120
131
  "total_files_found": stage2_result["total_files_found"],
121
- "files_included": len(stage2_result["relevant_files"])
122
- }
132
+ "files_included": len(stage2_result["relevant_files"]),
133
+ },
123
134
  }
124
-
125
- self.logger.info(f"DeepAsk question completed successfully")
135
+
136
+ self.logger.info("DeepAsk question completed successfully")
126
137
  self.logger.info(f"Search terms: {stage1_result['search_terms']}")
127
138
  self.logger.info(f"Files found: {stage2_result['total_files_found']}")
128
139
  self.logger.info(f"Files included: {len(stage2_result['relevant_files'])}")
129
-
140
+
130
141
  return result
131
-
142
+
132
143
  except Exception as e:
133
144
  error_msg = f"Failed to process deepask question: {str(e)}"
134
145
  self.logger.error(error_msg)
@@ -136,192 +147,207 @@ class DeepAskHandler(ClaudeAPIHandler):
136
147
  raise
137
148
  else:
138
149
  raise DeepAskError(error_msg)
139
-
150
+
140
151
  async def _stage1_extract_search_terms(
141
- self,
142
- project_info: Dict[str, str],
143
- question: str
152
+ self, project_info: Dict[str, str], question: str
144
153
  ) -> Dict[str, Any]:
145
154
  """
146
155
  Stage 1: Extract search terms and compress project overview.
147
-
156
+
148
157
  Args:
149
158
  project_info: Project information
150
159
  question: User's question
151
-
160
+
152
161
  Returns:
153
162
  Dict with search_terms, compressed_overview, and token_usage
154
163
  """
155
164
  self.logger.info("Stage 1: Extracting search terms and compressing overview")
156
-
165
+
157
166
  # Get project overview
158
167
  overview = await self.get_project_overview(project_info)
159
168
  if not overview:
160
169
  overview = "No project overview available."
161
-
170
+
162
171
  # Build stage 1 prompt
163
172
  prompt = self._build_stage1_prompt(project_info, question, overview)
164
-
173
+
165
174
  # Validate token limits for stage 1
166
175
  if not self.validate_token_limit(prompt):
167
176
  raise DeepAskError(
168
177
  f"Stage 1 prompt exceeds token limit of {self.config.token_limit}. "
169
178
  "Project overview may be too large."
170
179
  )
171
-
180
+
172
181
  # Call Claude API for stage 1
173
182
  system_prompt = self._get_stage1_system_prompt()
174
183
  response = await self._call_claude_api(prompt, system_prompt)
175
-
184
+
176
185
  # Parse and validate response
177
186
  response_data = self.validate_json_response(
178
- response.content,
179
- required_keys=["search_terms", "compressed_overview"]
187
+ response.content, required_keys=["search_terms", "compressed_overview"]
180
188
  )
181
-
189
+
182
190
  token_usage = {
183
191
  "prompt_tokens": self.get_token_count(prompt),
184
- "response_tokens": response.usage.get("completion_tokens") if response.usage else None,
185
- "total_tokens": response.usage.get("total_tokens") if response.usage else None
192
+ "response_tokens": (
193
+ response.usage.get("completion_tokens") if response.usage else None
194
+ ),
195
+ "total_tokens": (
196
+ response.usage.get("total_tokens") if response.usage else None
197
+ ),
186
198
  }
187
-
199
+
188
200
  return {
189
201
  "search_terms": response_data["search_terms"],
190
202
  "compressed_overview": response_data["compressed_overview"],
191
- "token_usage": token_usage
203
+ "token_usage": token_usage,
192
204
  }
193
-
205
+
194
206
  async def _stage2_enhanced_answer(
195
207
  self,
196
208
  project_info: Dict[str, str],
197
209
  question: str,
198
210
  search_terms: List[str],
199
211
  compressed_overview: str,
200
- max_file_results: int
212
+ max_file_results: int,
201
213
  ) -> Dict[str, Any]:
202
214
  """
203
215
  Stage 2: Search file descriptions and provide enhanced answer.
204
-
216
+
205
217
  Args:
206
218
  project_info: Project information
207
219
  question: User's question
208
220
  search_terms: Search terms from stage 1
209
221
  compressed_overview: Compressed overview from stage 1
210
222
  max_file_results: Maximum number of files to include
211
-
223
+
212
224
  Returns:
213
225
  Dict with answer, relevant_files, total_files_found, and token_usage
214
226
  """
215
- self.logger.info(f"Stage 2: Searching files and generating enhanced answer")
227
+ self.logger.info("Stage 2: Searching files and generating enhanced answer")
216
228
  self.logger.info(f"Search terms: {search_terms}")
217
-
229
+
218
230
  # Search for relevant files
219
231
  relevant_files = []
220
232
  total_files_found = 0
221
-
233
+
222
234
  try:
223
235
  # Find existing project by name only (don't create new ones for Q&A)
224
- project = await self.find_existing_project_by_name(project_info["projectName"])
225
-
236
+ project = await self.find_existing_project_by_name(
237
+ project_info["projectName"]
238
+ )
239
+
226
240
  if not project:
227
- self.logger.warning(f"Project '{project_info['projectName']}' not found in database")
241
+ self.logger.warning(
242
+ f"Project '{project_info['projectName']}' not found in database"
243
+ )
228
244
  return {
229
- "answer": f"Project '{project_info['projectName']}' not found in database. Please check the project name.",
245
+ "answer": (
246
+ f"Project '{project_info['projectName']}' not found in "
247
+ f"database. Please check the project name."
248
+ ),
230
249
  "relevant_files": [],
231
250
  "total_files_found": 0,
232
- "token_usage": {"prompt_tokens": 0, "response_tokens": 0, "total_tokens": 0}
251
+ "token_usage": {
252
+ "prompt_tokens": 0,
253
+ "response_tokens": 0,
254
+ "total_tokens": 0,
255
+ },
233
256
  }
234
-
257
+
235
258
  for search_term in search_terms:
236
259
  try:
237
260
  search_results = await self.db_manager.search_file_descriptions(
238
261
  project_id=project.id,
239
262
  query=search_term,
240
- max_results=max_file_results
263
+ max_results=max_file_results,
241
264
  )
242
-
265
+
243
266
  total_files_found += len(search_results)
244
-
267
+
245
268
  # Add unique files to relevant_files
246
269
  for result in search_results:
247
- if not any(f["filePath"] == result.file_path for f in relevant_files):
248
- relevant_files.append({
249
- "filePath": result.file_path,
250
- "description": result.description,
251
- "search_term": search_term,
252
- "relevance_score": result.relevance_score
253
- })
254
-
270
+ if not any(
271
+ f["filePath"] == result.file_path for f in relevant_files
272
+ ):
273
+ relevant_files.append(
274
+ {
275
+ "filePath": result.file_path,
276
+ "description": result.description,
277
+ "search_term": search_term,
278
+ "relevance_score": result.relevance_score,
279
+ }
280
+ )
281
+
255
282
  # Stop if we have enough files
256
283
  if len(relevant_files) >= max_file_results:
257
284
  break
258
-
285
+
259
286
  if len(relevant_files) >= max_file_results:
260
287
  break
261
-
288
+
262
289
  except Exception as e:
263
290
  self.logger.warning(f"Search failed for term '{search_term}': {e}")
264
291
  continue
265
-
292
+
266
293
  except Exception as e:
267
294
  self.logger.warning(f"Failed to search files: {e}")
268
295
  # Continue with empty relevant_files list
269
-
296
+
270
297
  # Build stage 2 prompt with file context
271
298
  prompt = self._build_stage2_prompt(
272
- project_info,
273
- question,
274
- compressed_overview,
275
- relevant_files
299
+ project_info, question, compressed_overview, relevant_files
276
300
  )
277
-
301
+
278
302
  # Validate token limits for stage 2
279
303
  if not self.validate_token_limit(prompt):
280
304
  # Try reducing file context
281
- self.logger.warning("Stage 2 prompt exceeds token limit, reducing file context")
282
- reduced_files = relevant_files[:max_file_results//2]
305
+ self.logger.warning(
306
+ "Stage 2 prompt exceeds token limit, reducing file context"
307
+ )
308
+ reduced_files = relevant_files[: max_file_results // 2]
283
309
  prompt = self._build_stage2_prompt(
284
- project_info,
285
- question,
286
- compressed_overview,
287
- reduced_files
310
+ project_info, question, compressed_overview, reduced_files
288
311
  )
289
-
312
+
290
313
  if not self.validate_token_limit(prompt):
291
314
  raise DeepAskError(
292
- f"Stage 2 prompt still exceeds token limit even with reduced context. "
293
- "Try a more specific question."
315
+ "Stage 2 prompt still exceeds token limit even with reduced "
316
+ "context. Try a more specific question."
294
317
  )
295
-
318
+
296
319
  relevant_files = reduced_files
297
-
320
+
298
321
  # Call Claude API for stage 2
299
322
  system_prompt = self._get_stage2_system_prompt()
300
323
  response = await self._call_claude_api(prompt, system_prompt)
301
-
324
+
302
325
  token_usage = {
303
326
  "prompt_tokens": self.get_token_count(prompt),
304
- "response_tokens": response.usage.get("completion_tokens") if response.usage else None,
305
- "total_tokens": response.usage.get("total_tokens") if response.usage else None
327
+ "response_tokens": (
328
+ response.usage.get("completion_tokens") if response.usage else None
329
+ ),
330
+ "total_tokens": (
331
+ response.usage.get("total_tokens") if response.usage else None
332
+ ),
306
333
  }
307
-
334
+
308
335
  return {
309
336
  "answer": response.content,
310
337
  "relevant_files": relevant_files,
311
338
  "total_files_found": total_files_found,
312
- "token_usage": token_usage
339
+ "token_usage": token_usage,
313
340
  }
314
-
341
+
315
342
  def _build_stage1_prompt(
316
- self,
317
- project_info: Dict[str, str],
318
- question: str,
319
- overview: str
343
+ self, project_info: Dict[str, str], question: str, overview: str
320
344
  ) -> str:
321
345
  """Build stage 1 prompt for extracting search terms."""
322
346
  project_name = project_info["projectName"]
323
-
324
- return f"""I need to answer a question about the codebase "{project_name}". To provide the best answer, I need to search for relevant files and then answer the question.
347
+
348
+ return f"""I need to answer a question about the codebase "{project_name}".
349
+ To provide the best answer, I need to search for relevant files and then answer
350
+ the question.
325
351
 
326
352
  PROJECT OVERVIEW:
327
353
  {overview}
@@ -331,25 +357,27 @@ QUESTION:
331
357
 
332
358
  Please analyze the question and project overview, then provide:
333
359
 
334
- 1. A list of 3-5 search terms that would help find relevant files to answer this question
335
- 2. A compressed version of the project overview (2-3 sentences max) that captures the most relevant information for this question
360
+ 1. A list of 3-5 search terms that would help find relevant files to answer
361
+ this question
362
+ 2. A compressed version of the project overview (2-3 sentences max) that
363
+ captures the most relevant information for this question
336
364
 
337
365
  Respond with valid JSON in this format:
338
366
  {{
339
367
  "search_terms": ["term1", "term2", "term3"],
340
368
  "compressed_overview": "Brief summary focusing on aspects relevant to the question..."
341
369
  }}"""
342
-
370
+
343
371
  def _build_stage2_prompt(
344
372
  self,
345
373
  project_info: Dict[str, str],
346
374
  question: str,
347
375
  compressed_overview: str,
348
- relevant_files: List[Dict[str, Any]]
376
+ relevant_files: List[Dict[str, Any]],
349
377
  ) -> str:
350
378
  """Build stage 2 prompt for enhanced answer."""
351
379
  project_name = project_info["projectName"]
352
-
380
+
353
381
  # Format file descriptions
354
382
  file_context = ""
355
383
  if relevant_files:
@@ -360,21 +388,23 @@ Respond with valid JSON in this format:
360
388
  file_context += f" Found via search: {file_info['search_term']}\n"
361
389
  else:
362
390
  file_context = "\n\nNo relevant files found in the search."
363
-
364
- return f"""Please answer the following question about the codebase "{project_name}".
365
-
366
- PROJECT OVERVIEW (COMPRESSED):
367
- {compressed_overview}
368
- {file_context}
369
391
 
370
- QUESTION:
371
- {question}
392
+ return (
393
+ f"Please answer the following question about the codebase "
394
+ f'"{project_name}".\n\n'
395
+ f"PROJECT OVERVIEW (COMPRESSED):\n{compressed_overview}\n{file_context}\n\n"
396
+ f"QUESTION:\n{question}\n\n"
397
+ "Please provide a comprehensive answer based on the project overview and "
398
+ "relevant file descriptions above. Reference specific files when "
399
+ "appropriate and explain how they relate to the question. If the "
400
+ "available information is insufficient, clearly state what "
401
+ "additional details would be needed."
402
+ )
372
403
 
373
- Please provide a comprehensive answer based on the project overview and relevant file descriptions above. Reference specific files when appropriate and explain how they relate to the question. If the available information is insufficient, clearly state what additional details would be needed."""
374
-
375
404
  def _get_stage1_system_prompt(self) -> str:
376
405
  """Get system prompt for stage 1."""
377
- return """You are a technical assistant that analyzes software projects to extract relevant search terms and compress information.
406
+ return """You are a technical assistant that analyzes software projects to
407
+ extract relevant search terms and compress information.
378
408
 
379
409
  Your task:
380
410
  1. Analyze the user's question about a codebase
@@ -392,40 +422,43 @@ The compressed overview should:
392
422
  - Preserve the most important architectural or functional details
393
423
 
394
424
  Always respond with valid JSON matching the requested format."""
395
-
425
+
396
426
  def _get_stage2_system_prompt(self) -> str:
397
427
  """Get system prompt for stage 2."""
398
- return """You are a software engineering expert that provides detailed answers about codebases using available context.
428
+ return """You are a software engineering expert that provides detailed
429
+ answers about codebases using available context.
399
430
 
400
431
  When answering:
401
432
  1. Use the compressed project overview for high-level context
402
- 2. Reference specific files from the relevant files list when they relate to the question
433
+ 2. Reference specific files from the relevant files list when they
434
+ relate to the question
403
435
  3. Explain how different files work together if relevant
404
436
  4. Be specific and technical when appropriate
405
437
  5. If information is incomplete, clearly state what's missing and suggest next steps
406
438
  6. Provide actionable insights when possible
407
439
 
408
440
  Your answer should be comprehensive but focused on the specific question asked."""
409
-
441
+
410
442
  def format_response(self, result: Dict[str, Any], format_type: str = "text") -> str:
411
443
  """
412
444
  Format response for CLI output.
413
-
445
+
414
446
  Args:
415
447
  result: Result from deepask_question
416
448
  format_type: Output format ("text" or "json")
417
-
449
+
418
450
  Returns:
419
451
  Formatted response string
420
452
  """
421
453
  if format_type == "json":
422
454
  import json
455
+
423
456
  return json.dumps(result, indent=2)
424
-
457
+
425
458
  # Text format
426
459
  answer = result["answer"]
427
460
  metadata = result["metadata"]
428
-
461
+
429
462
  output = []
430
463
  output.append(f"Question: {result['question']}")
431
464
  output.append(f"Project: {result['project_name']}")
@@ -433,11 +466,11 @@ Your answer should be comprehensive but focused on the specific question asked."
433
466
  output.append("Answer:")
434
467
  output.append(answer)
435
468
  output.append("")
436
-
469
+
437
470
  # Show search terms used
438
471
  output.append(f"Search terms: {', '.join(result['search_terms'])}")
439
472
  output.append("")
440
-
473
+
441
474
  # Show relevant files
442
475
  if result["relevant_files"]:
443
476
  output.append("Relevant files analyzed:")
@@ -446,16 +479,21 @@ Your answer should be comprehensive but focused on the specific question asked."
446
479
  else:
447
480
  output.append("No relevant files found.")
448
481
  output.append("")
449
-
482
+
450
483
  # Show metadata
451
484
  output.append("Metadata:")
452
485
  output.append(f" Model: {metadata['model']}")
453
486
  output.append(f" Total files found: {metadata['total_files_found']}")
454
487
  output.append(f" Files included: {metadata['files_included']}")
455
-
456
- stage1_tokens = metadata['stage1_tokens']['total_tokens']
457
- stage2_tokens = metadata['stage2_tokens']['total_tokens']
488
+
489
+ stage1_tokens = metadata["stage1_tokens"]["total_tokens"]
490
+ stage2_tokens = metadata["stage2_tokens"]["total_tokens"]
458
491
  if stage1_tokens and stage2_tokens:
459
- output.append(f" Total tokens: {stage1_tokens + stage2_tokens} (Stage 1: {stage1_tokens}, Stage 2: {stage2_tokens})")
460
-
492
+ output.append(
493
+ (
494
+ f" Total tokens: {stage1_tokens + stage2_tokens} "
495
+ f"(Stage 1: {stage1_tokens}, Stage 2: {stage2_tokens})"
496
+ )
497
+ )
498
+
461
499
  return "\n".join(output)