jarvis-ai-assistant 0.1.125__py3-none-any.whl → 0.1.128__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (49) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +205 -187
  3. jarvis/jarvis_code_agent/code_agent.py +116 -109
  4. jarvis/jarvis_code_agent/patch.py +157 -138
  5. jarvis/jarvis_code_agent/shell_input_handler.py +22 -0
  6. jarvis/jarvis_codebase/main.py +314 -288
  7. jarvis/jarvis_dev/main.py +695 -716
  8. jarvis/jarvis_lsp/base.py +0 -12
  9. jarvis/jarvis_lsp/cpp.py +0 -9
  10. jarvis/jarvis_lsp/go.py +0 -9
  11. jarvis/jarvis_lsp/python.py +0 -28
  12. jarvis/jarvis_lsp/registry.py +0 -1
  13. jarvis/jarvis_lsp/rust.py +0 -9
  14. jarvis/jarvis_multi_agent/__init__.py +52 -52
  15. jarvis/jarvis_platform/base.py +6 -5
  16. jarvis/jarvis_platform_manager/main.py +1 -1
  17. jarvis/jarvis_rag/main.py +250 -186
  18. jarvis/jarvis_smart_shell/main.py +0 -1
  19. jarvis/jarvis_tools/ask_codebase.py +10 -9
  20. jarvis/jarvis_tools/ask_user.py +2 -2
  21. jarvis/jarvis_tools/base.py +4 -4
  22. jarvis/jarvis_tools/chdir.py +28 -28
  23. jarvis/jarvis_tools/code_review.py +44 -39
  24. jarvis/jarvis_tools/create_code_agent.py +4 -4
  25. jarvis/jarvis_tools/create_sub_agent.py +7 -7
  26. jarvis/jarvis_tools/execute_shell.py +53 -23
  27. jarvis/jarvis_tools/execute_shell_script.py +3 -3
  28. jarvis/jarvis_tools/file_operation.py +70 -41
  29. jarvis/jarvis_tools/git_commiter.py +61 -51
  30. jarvis/jarvis_tools/lsp_find_definition.py +7 -7
  31. jarvis/jarvis_tools/lsp_prepare_rename.py +7 -7
  32. jarvis/jarvis_tools/methodology.py +6 -6
  33. jarvis/jarvis_tools/rag.py +5 -5
  34. jarvis/jarvis_tools/read_webpage.py +52 -32
  35. jarvis/jarvis_tools/registry.py +167 -180
  36. jarvis/jarvis_tools/search_web.py +66 -41
  37. jarvis/jarvis_tools/select_code_files.py +3 -3
  38. jarvis/jarvis_tools/tool_generator.py +68 -55
  39. jarvis/jarvis_utils/methodology.py +77 -59
  40. jarvis/jarvis_utils/output.py +1 -0
  41. {jarvis_ai_assistant-0.1.125.dist-info → jarvis_ai_assistant-0.1.128.dist-info}/METADATA +31 -17
  42. jarvis_ai_assistant-0.1.128.dist-info/RECORD +74 -0
  43. {jarvis_ai_assistant-0.1.125.dist-info → jarvis_ai_assistant-0.1.128.dist-info}/WHEEL +1 -1
  44. jarvis/jarvis_tools/lsp_validate_edit.py +0 -141
  45. jarvis/jarvis_tools/read_code.py +0 -192
  46. jarvis_ai_assistant-0.1.125.dist-info/RECORD +0 -75
  47. {jarvis_ai_assistant-0.1.125.dist-info → jarvis_ai_assistant-0.1.128.dist-info}/LICENSE +0 -0
  48. {jarvis_ai_assistant-0.1.125.dist-info → jarvis_ai_assistant-0.1.128.dist-info}/entry_points.txt +0 -0
  49. {jarvis_ai_assistant-0.1.125.dist-info → jarvis_ai_assistant-0.1.128.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,8 @@ import numpy as np
4
4
  import faiss
5
5
  from typing import List, Tuple, Optional, Dict
6
6
 
7
+ from yaspin import yaspin
8
+
7
9
  from jarvis.jarvis_platform.registry import PlatformRegistry
8
10
  import concurrent.futures
9
11
  from concurrent.futures import ThreadPoolExecutor
@@ -21,7 +23,11 @@ from jarvis.jarvis_utils.utils import get_file_md5, init_env, user_confirm
21
23
 
22
24
  class CodeBase:
23
25
  def __init__(self, root_dir: str):
24
- init_env()
26
+ with yaspin(text="正在初始化环境...", color="cyan") as spinner:
27
+ init_env()
28
+ spinner.text = "环境初始化完成"
29
+ spinner.ok("✅")
30
+
25
31
  self.root_dir = root_dir
26
32
  os.chdir(self.root_dir)
27
33
  self.thread_count = get_thread_count()
@@ -29,22 +35,28 @@ class CodeBase:
29
35
  self.index = None
30
36
 
31
37
  # 初始化数据目录
32
- self.data_dir = os.path.join(self.root_dir, ".jarvis/codebase")
33
- self.cache_dir = os.path.join(self.data_dir, "cache")
34
- if not os.path.exists(self.cache_dir):
35
- os.makedirs(self.cache_dir)
36
-
38
+ with yaspin(text="正在初始化数据目录...", color="cyan") as spinner:
39
+ self.data_dir = os.path.join(self.root_dir, ".jarvis/codebase")
40
+ self.cache_dir = os.path.join(self.data_dir, "cache")
41
+ if not os.path.exists(self.cache_dir):
42
+ os.makedirs(self.cache_dir)
43
+ spinner.text = "数据目录初始化完成"
44
+ spinner.ok("✅")
45
+
46
+ with yaspin("正在初始化嵌入模型...", color="cyan") as spinner:
37
47
  # 初始化嵌入模型
38
- try:
39
- self.embedding_model = load_embedding_model()
40
- test_text = """This is a test text"""
41
- self.embedding_model.encode([test_text],
42
- convert_to_tensor=True,
43
- normalize_embeddings=True)
44
- PrettyOutput.print("模型加载成功", output_type=OutputType.SUCCESS)
45
- except Exception as e:
46
- PrettyOutput.print(f"加载模型失败: {str(e)}", output_type=OutputType.ERROR)
47
- raise
48
+ try:
49
+ self.embedding_model = load_embedding_model()
50
+ test_text = """This is a test text"""
51
+ self.embedding_model.encode([test_text],
52
+ convert_to_tensor=True,
53
+ normalize_embeddings=True)
54
+ spinner.text = "嵌入模型初始化完成"
55
+ spinner.ok("✅")
56
+ except Exception as e:
57
+ spinner.text = "嵌入模型初始化失败"
58
+ spinner.fail("❌")
59
+ raise
48
60
 
49
61
  self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
50
62
  self.git_file_list = self.get_git_file_list()
@@ -55,7 +67,8 @@ class CodeBase:
55
67
  self.file_paths = []
56
68
 
57
69
  # 加载所有缓存文件
58
- self._load_all_cache()
70
+ with spinner.hidden():
71
+ self._load_all_cache()
59
72
 
60
73
  def get_git_file_list(self):
61
74
  """Get the list of files in the git repository, excluding the .jarvis-codebase directory"""
@@ -72,17 +85,13 @@ class CodeBase:
72
85
 
73
86
  def make_description(self, file_path: str, content: str) -> str:
74
87
  model = PlatformRegistry.get_global_platform_registry().get_cheap_platform()
75
- if self.thread_count > 1:
76
- model.set_suppress_output(True)
77
- else:
78
- PrettyOutput.print(f"为 {file_path} 生成描述 ...", output_type=OutputType.PROGRESS)
79
- prompt = f"""Please analyze the following code file and generate a detailed description. The description should include:
80
- 1. Overall file functionality description
81
- 2. description for each global variable, function, type definition, class, method, and other code elements
82
-
83
- Please use concise and professional language, emphasizing technical functionality to facilitate subsequent code retrieval.
84
- File path: {file_path}
85
- Code content:
88
+ prompt = f"""请分析以下代码文件并生成详细描述。描述应包含:
89
+ 1. 文件整体功能描述
90
+ 2. 对每个全局变量、函数、类型定义、类、方法和其他代码元素的描述
91
+
92
+ 请使用简洁专业的语言,强调技术功能,以便于后续代码检索。
93
+ 文件路径: {file_path}
94
+ 代码内容:
86
95
  {content}
87
96
  """
88
97
  response = model.chat_until_success(prompt)
@@ -114,50 +123,52 @@ Code content:
114
123
 
115
124
  def _load_all_cache(self):
116
125
  """Load all cache files"""
117
- try:
118
- # 清空现有缓存和文件路径
119
- self.vector_cache = {}
120
- self.file_paths = []
121
- vectors = []
122
-
123
- for cache_file in os.listdir(self.cache_dir):
124
- if not cache_file.endswith('.cache'):
125
- continue
126
-
127
- cache_path = os.path.join(self.cache_dir, cache_file)
128
- try:
129
- with lzma.open(cache_path, 'rb') as f:
130
- cache_data = pickle.load(f)
131
- file_path = cache_data["path"]
132
- self.vector_cache[file_path] = cache_data
133
- self.file_paths.append(file_path)
134
- vectors.append(cache_data["vector"])
135
- except Exception as e:
136
- PrettyOutput.print(f"加载缓存文件 {cache_file} 失败: {str(e)}",
137
- output_type=OutputType.WARNING)
138
- continue
139
-
140
- if vectors:
141
- # 重建索引
142
- vectors_array = np.vstack(vectors)
143
- hnsw_index = faiss.IndexHNSWFlat(self.vector_dim, 16)
144
- hnsw_index.hnsw.efConstruction = 40
145
- hnsw_index.hnsw.efSearch = 16
146
- self.index = faiss.IndexIDMap(hnsw_index)
147
- self.index.add_with_ids(vectors_array, np.array(range(len(vectors)))) # type: ignore
126
+ with yaspin(text="正在加载缓存文件...", color="cyan") as spinner:
127
+ try:
128
+ # 清空现有缓存和文件路径
129
+ self.vector_cache = {}
130
+ self.file_paths = []
131
+ vectors = []
148
132
 
149
- PrettyOutput.print(f"加载 {len(self.vector_cache)} 个向量缓存并重建索引",
150
- output_type=OutputType.INFO)
151
- else:
152
- self.index = None
153
- PrettyOutput.print("没有找到有效的缓存文件", output_type=OutputType.WARNING)
133
+ for cache_file in os.listdir(self.cache_dir):
134
+ if not cache_file.endswith('.cache'):
135
+ continue
136
+
137
+ cache_path = os.path.join(self.cache_dir, cache_file)
138
+ try:
139
+ with lzma.open(cache_path, 'rb') as f:
140
+ cache_data = pickle.load(f)
141
+ file_path = cache_data["path"]
142
+ self.vector_cache[file_path] = cache_data
143
+ self.file_paths.append(file_path)
144
+ vectors.append(cache_data["vector"])
145
+ spinner.write(f"✅ 加载缓存文件成功 {file_path}")
146
+ except Exception as e:
147
+ spinner.write(f"❌ 加载缓存文件失败 {cache_file} {str(e)}")
148
+ continue
154
149
 
155
- except Exception as e:
156
- PrettyOutput.print(f"加载缓存目录失败: {str(e)}",
157
- output_type=OutputType.WARNING)
158
- self.vector_cache = {}
159
- self.file_paths = []
160
- self.index = None
150
+ if vectors:
151
+ # 重建索引
152
+ vectors_array = np.vstack(vectors)
153
+ hnsw_index = faiss.IndexHNSWFlat(self.vector_dim, 16)
154
+ hnsw_index.hnsw.efConstruction = 40
155
+ hnsw_index.hnsw.efSearch = 16
156
+ self.index = faiss.IndexIDMap(hnsw_index)
157
+ self.index.add_with_ids(vectors_array, np.array(range(len(vectors)))) # type: ignore
158
+
159
+ spinner.text = f"加载 {len(self.vector_cache)} 个向量缓存并重建索引"
160
+ spinner.ok("✅")
161
+ else:
162
+ self.index = None
163
+ spinner.text = "没有找到有效的缓存文件"
164
+ spinner.ok("✅")
165
+
166
+ except Exception as e:
167
+ spinner.text = f"加载缓存目录失败: {str(e)}"
168
+ spinner.fail("❌")
169
+ self.vector_cache = {}
170
+ self.file_paths = []
171
+ self.index = None
161
172
 
162
173
  def cache_vector(self, file_path: str, vector: np.ndarray, description: str):
163
174
  """Cache the vector representation of a file"""
@@ -320,7 +331,7 @@ Content: {content}
320
331
  ids = []
321
332
  self.file_paths = [] # Reset the file path list
322
333
 
323
- for i, (file_path, data) in enumerate(self.vector_cache.items()):
334
+ for i, ( file_path, data) in enumerate(self.vector_cache.items()):
324
335
  if "vector" not in data:
325
336
  PrettyOutput.print(f"无效的缓存数据 {file_path}: 缺少向量",
326
337
  output_type=OutputType.WARNING)
@@ -450,7 +461,6 @@ Content: {content}
450
461
  # If force is True, continue directly
451
462
  if not force:
452
463
  if not user_confirm("重建索引?", False):
453
- PrettyOutput.print("取消重建索引", output_type=OutputType.INFO)
454
464
  return
455
465
 
456
466
  # Clean deleted files
@@ -464,7 +474,7 @@ Content: {content}
464
474
  files_to_process = new_files + modified_files
465
475
  processed_files = []
466
476
 
467
- with tqdm(total=len(files_to_process), desc="Processing files") as pbar:
477
+ with yaspin(text="正在处理文件...", color="cyan") as spinner:
468
478
  # Use a thread pool to process files
469
479
  with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
470
480
  # Submit all tasks
@@ -480,16 +490,18 @@ Content: {content}
480
490
  result = future.result()
481
491
  if result:
482
492
  processed_files.append(result)
493
+ spinner.write(f"✅ 处理文件成功 {file}")
483
494
  except Exception as e:
484
- PrettyOutput.print(f"Failed to process file {file}: {str(e)}",
485
- output_type=OutputType.ERROR)
486
- pbar.update(1)
495
+ spinner.write(f" 处理文件失败 {file}: {str(e)}")
496
+
497
+ spinner.text = f"处理完成"
498
+ spinner.ok("✅")
487
499
 
488
500
  if processed_files:
489
- PrettyOutput.print("重建向量数据库...", output_type=OutputType.INFO)
490
- self.gen_vector_db_from_cache()
491
- PrettyOutput.print(f"成功生成了 {len(processed_files)} 个文件的索引",
492
- output_type=OutputType.SUCCESS)
501
+ with yaspin(text="重建向量数据库...", color="cyan") as spinner:
502
+ self.gen_vector_db_from_cache()
503
+ spinner.text = f"成功生成了 {len(processed_files)} 个文件的索引"
504
+ spinner.ok("✅")
493
505
  else:
494
506
  PrettyOutput.print("没有检测到文件变化, 不需要重建索引", output_type=OutputType.INFO)
495
507
 
@@ -540,79 +552,79 @@ Content: {content}
540
552
  """
541
553
  if not initial_results:
542
554
  return []
543
-
544
- try:
545
- PrettyOutput.print(f"Picking results ...", output_type=OutputType.INFO)
546
-
547
- # Maximum content length per batch
548
- max_batch_length = self.max_token_count - 1000 # Reserve space for prompt
549
- max_file_length = max_batch_length // 3 # Limit individual file size
550
-
551
- # Process files in batches
552
- all_selected_files = []
553
- current_batch = []
554
- current_token_count = 0
555
-
556
- for path in initial_results:
557
- try:
558
- content = open(path, "r", encoding="utf-8").read()
559
- # Truncate large files
560
- if get_context_token_count(content) > max_file_length:
561
- PrettyOutput.print(f"Truncating large file: {path}", OutputType.WARNING)
562
- content = content[:max_file_length] + "\n... (content truncated)"
563
-
564
- file_info = f"File: {path}\nContent: {content}\n\n"
565
- tokens_count = get_context_token_count(file_info)
566
-
567
- # If adding this file would exceed batch limit
568
- if current_token_count + tokens_count > max_batch_length:
569
- # Process current batch
570
- if current_batch:
571
- selected = self._process_batch('\n'.join(query), current_batch)
572
- all_selected_files.extend(selected)
573
- # Start new batch
574
- current_batch = [file_info]
575
- current_token_count = tokens_count
576
- else:
577
- current_batch.append(file_info)
578
- current_token_count += tokens_count
555
+ with yaspin(text="正在筛选结果...", color="cyan") as spinner:
556
+ try:
557
+ # Maximum content length per batch
558
+ max_batch_length = self.max_token_count - 1000 # Reserve space for prompt
559
+ max_file_length = max_batch_length // 3 # Limit individual file size
560
+
561
+ # Process files in batches
562
+ all_selected_files = []
563
+ current_batch = []
564
+ current_token_count = 0
565
+
566
+ for path in initial_results:
567
+ try:
568
+ content = open(path, "r", encoding="utf-8").read()
569
+ # Truncate large files
570
+ if get_context_token_count(content) > max_file_length:
571
+ spinner.write(f"❌ 截断大文件: {path}")
572
+ content = content[:max_file_length] + "\n... (content truncated)"
579
573
 
580
- except Exception as e:
581
- PrettyOutput.print(f"读取 {path} 失败: {str(e)}", OutputType.ERROR)
582
- continue
583
-
584
- # Process final batch
585
- if current_batch:
586
- selected = self._process_batch('\n'.join(query), current_batch)
587
- all_selected_files.extend(selected)
588
-
589
- # Convert set to list and maintain original order
590
- return all_selected_files
574
+ file_info = f"File: {path}\nContent: {content}\n\n"
575
+ tokens_count = get_context_token_count(file_info)
576
+
577
+ # If adding this file would exceed batch limit
578
+ if current_token_count + tokens_count > max_batch_length:
579
+ # Process current batch
580
+ if current_batch:
581
+ selected = self._process_batch('\n'.join(query), current_batch)
582
+ all_selected_files.extend(selected)
583
+ # Start new batch
584
+ current_batch = [file_info]
585
+ current_token_count = tokens_count
586
+ else:
587
+ current_batch.append(file_info)
588
+ current_token_count += tokens_count
589
+
590
+ except Exception as e:
591
+ spinner.write(f"❌ 读取 {path} 失败: {str(e)}")
592
+ continue
593
+
594
+ # Process final batch
595
+ if current_batch:
596
+ selected = self._process_batch('\n'.join(query), current_batch)
597
+ all_selected_files.extend(selected)
598
+
599
+ spinner.write("✅ 结果筛选完成")
600
+ # Convert set to list and maintain original order
601
+ return all_selected_files
591
602
 
592
- except Exception as e:
593
- PrettyOutput.print(f"选择失败: {str(e)}", OutputType.ERROR)
594
- return [{"file": f, "reason": "" } for f in initial_results]
603
+ except Exception as e:
604
+ spinner.text = f"选择失败: {str(e)}"
605
+ spinner.fail("")
606
+ return [{"file": f, "reason": "" } for f in initial_results]
595
607
 
596
608
  def _process_batch(self, query: str, files_info: List[str]) -> List[Dict[str, str]]:
597
609
  """Process a batch of files"""
598
- prompt = f"""As a code analysis expert, please help identify the most relevant files for the given query using chain-of-thought reasoning.
610
+ prompt = f"""作为一名代码分析专家,请使用链式思维推理帮助识别与给定查询最相关的文件。
599
611
 
600
- Query: {query}
612
+ 查询: {query}
601
613
 
602
- Available files:
614
+ 可用文件:
603
615
  {''.join(files_info)}
604
616
 
605
- Think through this step by step:
606
- 1. First, analyze the query to identify key requirements and technical concepts
607
- 2. For each file:
608
- - Examine its path and content
609
- - Assess how it relates to the query's requirements
610
- - Consider both direct and indirect relationships
611
- - Rate its relevance (high/medium/low)
612
- 3. Select only files with clear relevance to the query
613
- 4. Order files by relevance, with most relevant first
614
-
615
- Please output your selection in YAML format:
617
+ 请按以下步骤思考:
618
+ 1. 首先,分析查询以识别关键需求和技术概念
619
+ 2. 对于每个文件:
620
+ - 检查其路径和内容
621
+ - 评估其与查询需求的关系
622
+ - 考虑直接和间接关系
623
+ - 评估其相关性(高/中/低)
624
+ 3. 仅选择与查询明确相关的文件
625
+ 4. 按相关性排序,最相关的文件在前
626
+
627
+ 请以YAML格式输出您的选择:
616
628
  <FILES>
617
629
  - file: path/to/most/relevant.py
618
630
  reason: xxxxxxxxxx
@@ -620,17 +632,16 @@ Please output your selection in YAML format:
620
632
  reason: yyyyyyyyyy
621
633
  </FILES>
622
634
 
623
- Important:
624
- - Only include files that are truly relevant
625
- - Exclude files with weak or unclear connections
626
- - Focus on implementation rather than test files
627
- - Consider both file paths and content
628
- - Only output the file paths, no other text
635
+ 重要提示:
636
+ - 仅包含真正相关的文件
637
+ - 排除连接不明确或较弱的文件
638
+ - 重点关注实现文件而非测试文件
639
+ - 同时考虑文件路径和内容
640
+ - 仅输出文件路径,不要包含其他文本
629
641
  """
630
642
 
631
643
  # Use a large model to evaluate
632
644
  model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
633
- model.set_suppress_output(True)
634
645
  response = model.chat_until_success(prompt)
635
646
 
636
647
  # Parse the response
@@ -656,30 +667,28 @@ Important:
656
667
  List[str]: The query variants list
657
668
  """
658
669
  model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
659
- model.set_suppress_output(True)
660
- prompt = f"""Please generate 10 different expressions optimized for vector search based on the following query. Each expression should:
661
-
662
- 1. Focus on key technical concepts and terminology
663
- 2. Use clear and specific language
664
- 3. Include important contextual terms
665
- 4. Avoid general or ambiguous words
666
- 5. Maintain semantic similarity with original query
667
- 6. Be suitable for embedding-based search
668
-
669
- Original query:
670
+ prompt = f"""请基于以下查询生成10个针对向量搜索优化的不同表达。每个表达应满足:
671
+ 1. 聚焦关键技术概念和术语
672
+ 2. 使用清晰明确的语言
673
+ 3. 包含重要的上下文术语
674
+ 4. 避免使用通用或模糊的词语
675
+ 5. 保持与原始查询的语义相似性
676
+ 6. 适合基于嵌入的搜索
677
+
678
+ 原始查询:
670
679
  {query}
671
680
 
672
- Example transformations:
673
- Query: "How to handle user login?"
674
- Output format:
681
+ 示例转换:
682
+ 查询: "如何处理用户登录?"
683
+ 输出格式:
675
684
  <QUESTION>
676
- - user authentication implementation and flow
677
- - login system architecture and components
678
- - credential validation and session management
685
+ - 用户认证的实现与流程
686
+ - 登录系统架构与组件
687
+ - 凭证验证与会话管理
679
688
  - ...
680
689
  </QUESTION>
681
690
 
682
- Please provide 10 search-optimized expressions in the specified format.
691
+ 请以指定格式提供10个搜索优化的表达。
683
692
  """
684
693
  response = model.chat_until_success(prompt)
685
694
 
@@ -733,57 +742,70 @@ Please provide 10 search-optimized expressions in the specified format.
733
742
 
734
743
  def search_similar(self, query: str, top_k: int = 30) -> List[Dict[str, str]]:
735
744
  """Search related files with optimized retrieval"""
736
- try:
737
- self.generate_codebase()
738
- if self.index is None:
739
- return []
740
-
741
- # Generate query variants for better coverage
742
- query_variants = self._generate_query_variants(query)
743
-
744
- # Collect results from all variants
745
- all_results = []
746
- seen_files = set()
747
-
748
- for variant in query_variants:
749
- # Get vector for each variant
750
- query_vector = get_embedding(self.embedding_model, variant)
751
- query_vector = query_vector.reshape(1, -1)
745
+ with yaspin(text="正在搜索相关文件...", color="cyan") as spinner:
746
+ try:
747
+ with spinner.hidden():
748
+ self.generate_codebase()
749
+ if self.index is None:
750
+ spinner.text = "没有找到有效的缓存文件"
751
+ spinner.ok("✅")
752
+ return []
753
+
754
+ # Generate query variants for better coverage
755
+ spinner.text = "生成查询变体..."
756
+ query_variants = self._generate_query_variants(query)
757
+ spinner.write("✅ 查询变体生成完成")
752
758
 
753
- # Search with current variant
754
- initial_k = min(top_k * 2, len(self.file_paths))
755
- distances, indices = self.index.search(query_vector, initial_k) # type: ignore
759
+ # Collect results from all variants
760
+ spinner.text = "收集结果..."
761
+ all_results = []
762
+ seen_files = set()
756
763
 
757
- # Process results
758
- for idx, dist in zip(indices[0], distances[0]):
759
- if idx != -1:
760
- file_path = self.file_paths[idx]
761
- if file_path not in seen_files:
762
- similarity = 1.0 / (1.0 + float(dist))
763
- if similarity > 0.3: # Lower threshold for better recall
764
- seen_files.add(file_path)
765
- all_results.append((file_path, similarity, self.vector_cache[file_path]["description"]))
766
-
767
- if not all_results:
768
- return []
764
+ for variant in query_variants:
765
+ # Get vector for each variant
766
+ query_vector = get_embedding(self.embedding_model, variant)
767
+ query_vector = query_vector.reshape(1, -1)
768
+
769
+ # Search with current variant
770
+ initial_k = min(top_k * 2, len(self.file_paths))
771
+ distances, indices = self.index.search(query_vector, initial_k) # type: ignore
772
+
773
+ # Process results
774
+ for idx, dist in zip(indices[0], distances[0]):
775
+ if idx != -1:
776
+ file_path = self.file_paths[idx]
777
+ if file_path not in seen_files:
778
+ similarity = 1.0 / (1.0 + float(dist))
779
+ if similarity > 0.3: # Lower threshold for better recall
780
+ seen_files.add(file_path)
781
+ all_results.append((file_path, similarity, self.vector_cache[file_path]["description"]))
782
+ spinner.write("✅ 结果收集完成")
783
+ if not all_results:
784
+ spinner.text = "没有找到相关文件"
785
+ spinner.ok("✅")
786
+ return []
787
+
788
+ spinner.text = "排序..."
789
+ # Sort by similarity and take top_k
790
+ all_results.sort(key=lambda x: x[1], reverse=True)
791
+ results = all_results[:top_k]
792
+ spinner.write("✅ 排序完成")
769
793
 
770
- # Sort by similarity and take top_k
771
- all_results.sort(key=lambda x: x[1], reverse=True)
772
- results = all_results[:top_k]
773
-
774
- results = self.pick_results(query_variants, [path for path, _, _ in results])
794
+ with spinner.hidden():
795
+ results = self.pick_results(query_variants, [path for path, _, _ in results])
775
796
 
776
- output = "Found related files:\n"
777
- for file in results:
778
- output += f'''- {file['file']} ({file['reason']})\n'''
779
- PrettyOutput.print(output, output_type=OutputType.INFO, lang="markdown")
797
+ output = "Found related files:\n"
798
+ for file in results:
799
+ output += f'''- {file['file']} ({file['reason']})\n'''
780
800
 
781
-
782
- return results
783
-
784
- except Exception as e:
785
- PrettyOutput.print(f"搜索失败: {str(e)}", output_type=OutputType.ERROR)
786
- return []
801
+ spinner.text="结果输出完成"
802
+ spinner.ok("✅")
803
+ return results
804
+
805
+ except Exception as e:
806
+ spinner.text = f"搜索失败: {str(e)}"
807
+ spinner.fail("❌")
808
+ return []
787
809
 
788
810
  def ask_codebase(self, query: str, top_k: int=20) -> Tuple[List[Dict[str, str]], str]:
789
811
  """Query the codebase with enhanced context building"""
@@ -794,85 +816,86 @@ Please provide 10 search-optimized expressions in the specified format.
794
816
  return [], ""
795
817
 
796
818
  prompt = f"""
797
- # 🤖 Role Definition
798
- You are a code analysis expert who provides comprehensive and accurate answers about codebases.
799
-
800
- # 🎯 Core Responsibilities
801
- - Analyze code files thoroughly
802
- - Explain technical concepts clearly
803
- - Provide relevant code examples
804
- - Identify missing information
805
- - Answer in user's language
806
-
807
- # 📋 Response Requirements
808
- ## Content Quality
809
- - Focus on implementation details
810
- - Be technically precise
811
- - Include relevant code snippets
812
- - Indicate any missing information
813
- - Use professional terminology
814
-
815
- ## Response Format
816
- - question: [Restate the question]
819
+ # 🤖 角色定义
820
+ 您是一位代码分析专家,能够提供关于代码库的全面且准确的回答。
821
+
822
+ # 🎯 核心职责
823
+ - 深入分析代码文件
824
+ - 清晰解释技术概念
825
+ - 提供相关代码示例
826
+ - 识别缺失的信息
827
+ - 使用用户的语言进行回答
828
+
829
+ # 📋 回答要求
830
+ ## 内容质量
831
+ - 关注实现细节
832
+ - 保持技术准确性
833
+ - 包含相关代码片段
834
+ - 指出任何缺失的信息
835
+ - 使用专业术语
836
+
837
+ ## 回答格式
838
+ - question: [重述问题]
817
839
  answer: |
818
- [Detailed technical answer with:
819
- - Implementation details
820
- - Code examples (if relevant)
821
- - Missing information (if any)
822
- - Related technical concepts]
840
+ [详细的技术回答,包含:
841
+ - 实现细节
842
+ - 代码示例(如果相关)
843
+ - 缺失的信息(如果有)
844
+ - 相关技术概念]
823
845
 
824
- - question: [Follow-up question if needed]
846
+ - question: [如果需要,提出后续问题]
825
847
  answer: |
826
- [Additional technical details]
848
+ [额外的技术细节]
827
849
 
828
- # 🔍 Analysis Context
829
- Question: {query}
850
+ # 🔍 分析上下文
851
+ 问题: {query}
830
852
 
831
- Relevant Code Files (by relevance):
853
+ 相关代码文件(按相关性排序):
832
854
  """
833
855
 
834
- # Add context with length control
835
- available_count = self.max_token_count - get_context_token_count(prompt) - 1000 # Reserve space for answer
836
- current_count = 0
837
-
838
- for path in files_from_codebase:
839
- try:
840
- content = open(path["file"], "r", encoding="utf-8").read()
841
- file_content = f"""
842
- ## File: {path["file"]}
843
- ```
844
- {content}
845
- ```
846
- ---
847
- """
848
- if current_count + get_context_token_count(file_content) > available_count:
849
- PrettyOutput.print(
850
- "由于上下文长度限制, 一些文件被省略",
851
- output_type=OutputType.WARNING
852
- )
853
- break
856
+ with yaspin(text="正在生成回答...", color="cyan") as spinner:
857
+ # 添加上下文,控制长度
858
+ spinner.text = "添加上下文..."
859
+ available_count = self.max_token_count - get_context_token_count(prompt) - 1000 # 为回答预留空间
860
+ current_count = 0
861
+
862
+ for path in files_from_codebase:
863
+ try:
864
+ content = open(path["file"], "r", encoding="utf-8").read()
865
+ file_content = f"""
866
+ ## 文件: {path["file"]}
867
+ ```
868
+ {content}
869
+ ```
870
+ ---
871
+ """
872
+ if current_count + get_context_token_count(file_content) > available_count:
873
+ spinner.write("⚠️ 由于上下文长度限制, 一些文件被省略")
874
+ break
875
+
876
+ prompt += file_content
877
+ current_count += get_context_token_count(file_content)
854
878
 
855
- prompt += file_content
856
- current_count += get_context_token_count(file_content)
857
-
858
- except Exception as e:
859
- PrettyOutput.print(f"读取 {path} 失败: {str(e)}",
860
- output_type=OutputType.ERROR)
861
- continue
862
-
863
- prompt += """
864
- # ❗ Important Rules
865
- 1. Always base answers on provided code
866
- 2. Use technical precision
867
- 3. Include code examples when relevant
868
- 4. Indicate any missing information
869
- 5. Maintain professional language
870
- 6. Answer in user's language
871
- """
872
-
873
- model = PlatformRegistry.get_global_platform_registry().get_thinking_platform()
879
+ except Exception as e:
880
+ spinner.write(f"❌ 读取 {path} 失败: {str(e)}")
881
+ continue
874
882
 
875
- return files_from_codebase, model.chat_until_success(prompt)
883
+ prompt += """
884
+ # ❗ 重要规则
885
+ 1. 始终基于提供的代码进行回答
886
+ 2. 保持技术准确性
887
+ 3. 在相关时包含代码示例
888
+ 4. 指出任何缺失的信息
889
+ 5. 保持专业语言
890
+ 6. 使用用户的语言进行回答
891
+ """
892
+
893
+ model = PlatformRegistry.get_global_platform_registry().get_thinking_platform()
894
+ spinner.text = "生成回答..."
895
+ ret = files_from_codebase, model.chat_until_success(prompt)
896
+ spinner.text = "回答生成完成"
897
+ spinner.ok("✅")
898
+ return ret
876
899
 
877
900
  def is_index_generated(self) -> bool:
878
901
  """Check if the index has been generated"""
@@ -973,9 +996,12 @@ def main():
973
996
  PrettyOutput.print(output, output_type=OutputType.INFO, lang="markdown")
974
997
 
975
998
  elif args.command == 'ask':
976
- response = codebase.ask_codebase(args.question, args.top_k)
977
- output = f"""{response}"""
978
- PrettyOutput.print(output, output_type=OutputType.INFO)
999
+ files, answer = codebase.ask_codebase(args.question, args.top_k)
1000
+ output = f"# 相关文件:\n"
1001
+ for file in files:
1002
+ output += f"""- {file['file']} ({file['reason']})\n"""
1003
+ output += f"# 回答:\n{answer}"
1004
+ PrettyOutput.print(output, output_type=OutputType.SYSTEM, lang="markdown")
979
1005
 
980
1006
  else:
981
1007
  parser.print_help()