jarvis-ai-assistant 0.1.132__py3-none-any.whl → 0.1.138__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (82) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +330 -347
  3. jarvis/jarvis_agent/builtin_input_handler.py +16 -6
  4. jarvis/jarvis_agent/file_input_handler.py +9 -9
  5. jarvis/jarvis_agent/jarvis.py +143 -0
  6. jarvis/jarvis_agent/main.py +12 -13
  7. jarvis/jarvis_agent/output_handler.py +3 -3
  8. jarvis/jarvis_agent/patch.py +92 -64
  9. jarvis/jarvis_agent/shell_input_handler.py +5 -3
  10. jarvis/jarvis_code_agent/code_agent.py +263 -177
  11. jarvis/jarvis_code_agent/file_select.py +24 -24
  12. jarvis/jarvis_dev/main.py +45 -59
  13. jarvis/jarvis_git_details/__init__.py +0 -0
  14. jarvis/jarvis_git_details/main.py +179 -0
  15. jarvis/jarvis_git_squash/main.py +7 -7
  16. jarvis/jarvis_lsp/base.py +11 -53
  17. jarvis/jarvis_lsp/cpp.py +13 -28
  18. jarvis/jarvis_lsp/go.py +13 -28
  19. jarvis/jarvis_lsp/python.py +8 -27
  20. jarvis/jarvis_lsp/registry.py +21 -83
  21. jarvis/jarvis_lsp/rust.py +15 -30
  22. jarvis/jarvis_methodology/main.py +101 -0
  23. jarvis/jarvis_multi_agent/__init__.py +10 -51
  24. jarvis/jarvis_multi_agent/main.py +43 -0
  25. jarvis/jarvis_platform/__init__.py +1 -1
  26. jarvis/jarvis_platform/ai8.py +67 -89
  27. jarvis/jarvis_platform/base.py +14 -13
  28. jarvis/jarvis_platform/kimi.py +25 -28
  29. jarvis/jarvis_platform/ollama.py +24 -26
  30. jarvis/jarvis_platform/openai.py +15 -19
  31. jarvis/jarvis_platform/oyi.py +48 -50
  32. jarvis/jarvis_platform/registry.py +29 -44
  33. jarvis/jarvis_platform/yuanbao.py +39 -43
  34. jarvis/jarvis_platform_manager/main.py +81 -81
  35. jarvis/jarvis_platform_manager/openai_test.py +21 -21
  36. jarvis/jarvis_rag/file_processors.py +18 -18
  37. jarvis/jarvis_rag/main.py +262 -278
  38. jarvis/jarvis_smart_shell/main.py +12 -12
  39. jarvis/jarvis_tools/ask_codebase.py +85 -78
  40. jarvis/jarvis_tools/ask_user.py +8 -8
  41. jarvis/jarvis_tools/base.py +4 -4
  42. jarvis/jarvis_tools/chdir.py +9 -9
  43. jarvis/jarvis_tools/code_review.py +40 -21
  44. jarvis/jarvis_tools/create_code_agent.py +15 -15
  45. jarvis/jarvis_tools/create_sub_agent.py +0 -1
  46. jarvis/jarvis_tools/execute_python_script.py +3 -3
  47. jarvis/jarvis_tools/execute_shell.py +11 -11
  48. jarvis/jarvis_tools/execute_shell_script.py +3 -3
  49. jarvis/jarvis_tools/file_analyzer.py +116 -105
  50. jarvis/jarvis_tools/file_operation.py +22 -20
  51. jarvis/jarvis_tools/find_caller.py +105 -40
  52. jarvis/jarvis_tools/find_methodolopy.py +65 -0
  53. jarvis/jarvis_tools/find_symbol.py +123 -39
  54. jarvis/jarvis_tools/function_analyzer.py +140 -57
  55. jarvis/jarvis_tools/git_commiter.py +10 -10
  56. jarvis/jarvis_tools/lsp_get_diagnostics.py +19 -19
  57. jarvis/jarvis_tools/methodology.py +22 -67
  58. jarvis/jarvis_tools/project_analyzer.py +137 -53
  59. jarvis/jarvis_tools/rag.py +15 -20
  60. jarvis/jarvis_tools/read_code.py +25 -23
  61. jarvis/jarvis_tools/read_webpage.py +31 -31
  62. jarvis/jarvis_tools/registry.py +72 -52
  63. jarvis/jarvis_tools/search_web.py +23 -353
  64. jarvis/jarvis_tools/tool_generator.py +19 -19
  65. jarvis/jarvis_utils/config.py +36 -96
  66. jarvis/jarvis_utils/embedding.py +83 -83
  67. jarvis/jarvis_utils/git_utils.py +20 -20
  68. jarvis/jarvis_utils/globals.py +18 -6
  69. jarvis/jarvis_utils/input.py +10 -9
  70. jarvis/jarvis_utils/methodology.py +141 -140
  71. jarvis/jarvis_utils/output.py +13 -13
  72. jarvis/jarvis_utils/utils.py +23 -71
  73. {jarvis_ai_assistant-0.1.132.dist-info → jarvis_ai_assistant-0.1.138.dist-info}/METADATA +6 -15
  74. jarvis_ai_assistant-0.1.138.dist-info/RECORD +85 -0
  75. {jarvis_ai_assistant-0.1.132.dist-info → jarvis_ai_assistant-0.1.138.dist-info}/entry_points.txt +4 -3
  76. jarvis/jarvis_tools/lsp_find_definition.py +0 -150
  77. jarvis/jarvis_tools/lsp_find_references.py +0 -127
  78. jarvis/jarvis_tools/select_code_files.py +0 -62
  79. jarvis_ai_assistant-0.1.132.dist-info/RECORD +0 -82
  80. {jarvis_ai_assistant-0.1.132.dist-info → jarvis_ai_assistant-0.1.138.dist-info}/LICENSE +0 -0
  81. {jarvis_ai_assistant-0.1.132.dist-info → jarvis_ai_assistant-0.1.138.dist-info}/WHEEL +0 -0
  82. {jarvis_ai_assistant-0.1.132.dist-info → jarvis_ai_assistant-0.1.138.dist-info}/top_level.txt +0 -0
@@ -15,10 +15,10 @@ _global_tokenizers = {}
15
15
 
16
16
  def get_context_token_count(text: str) -> int:
17
17
  """使用分词器获取文本的token数量。
18
-
18
+
19
19
  参数:
20
20
  text: 要计算token的输入文本
21
-
21
+
22
22
  返回:
23
23
  int: 文本中的token数量
24
24
  """
@@ -27,7 +27,7 @@ def get_context_token_count(text: str) -> int:
27
27
  tokenizer = load_tokenizer()
28
28
  chunks = split_text_into_chunks(text, 512)
29
29
  return sum([len(tokenizer.encode(chunk)) for chunk in chunks]) # type: ignore
30
-
30
+
31
31
  except Exception as e:
32
32
  PrettyOutput.print(f"计算token失败: {str(e)}", OutputType.WARNING)
33
33
  # 回退到基于字符的粗略估计
@@ -37,17 +37,17 @@ def get_context_token_count(text: str) -> int:
37
37
  def load_embedding_model() -> SentenceTransformer:
38
38
  """
39
39
  加载句子嵌入模型,使用缓存避免重复加载。
40
-
40
+
41
41
  返回:
42
42
  SentenceTransformer: 加载的嵌入模型
43
43
  """
44
44
  model_name = "BAAI/bge-m3"
45
45
  cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
46
-
46
+
47
47
  # 检查全局缓存中是否已有模型
48
48
  if model_name in _global_models:
49
49
  return _global_models[model_name]
50
-
50
+
51
51
  try:
52
52
  embedding_model = SentenceTransformer(
53
53
  model_name,
@@ -60,28 +60,28 @@ def load_embedding_model() -> SentenceTransformer:
60
60
  cache_folder=cache_dir,
61
61
  local_files_only=False
62
62
  )
63
-
63
+
64
64
  # 如果可用,将模型移到GPU上
65
65
  if torch.cuda.is_available():
66
66
  embedding_model.to(torch.device("cuda"))
67
-
67
+
68
68
  # 保存到全局缓存
69
69
  _global_models[model_name] = embedding_model
70
-
70
+
71
71
  return embedding_model
72
72
 
73
73
  def get_embedding(embedding_model: Any, text: str) -> np.ndarray:
74
74
  """
75
75
  为给定文本生成嵌入向量。
76
-
76
+
77
77
  参数:
78
78
  embedding_model: 使用的嵌入模型
79
79
  text: 要嵌入的输入文本
80
-
80
+
81
81
  返回:
82
82
  np.ndarray: 嵌入向量
83
83
  """
84
- embedding = embedding_model.encode(text,
84
+ embedding = embedding_model.encode(text,
85
85
  normalize_embeddings=True,
86
86
  show_progress_bar=False)
87
87
  return np.array(embedding, dtype=np.float32)
@@ -89,53 +89,53 @@ def get_embedding(embedding_model: Any, text: str) -> np.ndarray:
89
89
  def get_embedding_batch(embedding_model: Any, prefix: str, texts: List[str], spinner: Optional[Yaspin] = None, batch_size: int = 8) -> np.ndarray:
90
90
  """
91
91
  为一批文本生成嵌入向量,使用高效的批处理,针对RAG优化。
92
-
92
+
93
93
  参数:
94
94
  embedding_model: 使用的嵌入模型
95
95
  prefix: 进度条前缀
96
96
  texts: 要嵌入的文本列表
97
97
  spinner: 可选的进度指示器
98
98
  batch_size: 批处理大小,更大的值可能更快但需要更多内存
99
-
99
+
100
100
  返回:
101
101
  np.ndarray: 堆叠的嵌入向量
102
102
  """
103
103
  # 简单嵌入缓存,避免重复计算相同文本块
104
104
  embedding_cache = {}
105
105
  cache_hits = 0
106
-
106
+
107
107
  try:
108
108
  # 预处理:将所有文本分块
109
109
  all_chunks = []
110
110
  chunk_indices = [] # 跟踪每个原始文本对应的块索引
111
-
111
+
112
112
  for i, text in enumerate(texts):
113
113
  if spinner:
114
114
  spinner.text = f"{prefix} 预处理中 ({i+1}/{len(texts)}) ..."
115
-
115
+
116
116
  # 预处理文本:移除多余空白,规范化
117
117
  text = ' '.join(text.split()) if text else ""
118
-
118
+
119
119
  # 使用更优化的分块函数
120
120
  chunks = split_text_into_chunks(text, 512)
121
121
  start_idx = len(all_chunks)
122
122
  all_chunks.extend(chunks)
123
123
  end_idx = len(all_chunks)
124
124
  chunk_indices.append((start_idx, end_idx))
125
-
125
+
126
126
  if not all_chunks:
127
127
  return np.zeros((0, embedding_model.get_sentence_embedding_dimension()), dtype=np.float32)
128
-
128
+
129
129
  # 批量处理所有块
130
130
  all_vectors = []
131
131
  for i in range(0, len(all_chunks), batch_size):
132
132
  if spinner:
133
133
  spinner.text = f"{prefix} 批量处理嵌入 ({i+1}/{len(all_chunks)}) ..."
134
-
134
+
135
135
  batch = all_chunks[i:i+batch_size]
136
136
  batch_to_process = []
137
137
  batch_indices = []
138
-
138
+
139
139
  # 检查缓存,避免重复计算
140
140
  for j, chunk in enumerate(batch):
141
141
  chunk_hash = hash(chunk)
@@ -145,16 +145,16 @@ def get_embedding_batch(embedding_model: Any, prefix: str, texts: List[str], spi
145
145
  else:
146
146
  batch_to_process.append(chunk)
147
147
  batch_indices.append(j)
148
-
148
+
149
149
  if batch_to_process:
150
150
  # 对未缓存的块处理
151
151
  batch_vectors = embedding_model.encode(
152
- batch_to_process,
152
+ batch_to_process,
153
153
  normalize_embeddings=True,
154
154
  show_progress_bar=False,
155
155
  convert_to_numpy=True,
156
156
  )
157
-
157
+
158
158
  # 处理结果并更新缓存
159
159
  if len(batch_to_process) == 1:
160
160
  vec = batch_vectors
@@ -166,7 +166,7 @@ def get_embedding_batch(embedding_model: Any, prefix: str, texts: List[str], spi
166
166
  chunk_hash = hash(batch_to_process[j])
167
167
  embedding_cache[chunk_hash] = vec
168
168
  all_vectors.append(vec)
169
-
169
+
170
170
  # 组织结果到原始文本顺序
171
171
  result_vectors = []
172
172
  for start_idx, end_idx in chunk_indices:
@@ -174,73 +174,73 @@ def get_embedding_batch(embedding_model: Any, prefix: str, texts: List[str], spi
174
174
  for j in range(start_idx, end_idx):
175
175
  if j < len(all_vectors):
176
176
  text_vectors.append(all_vectors[j])
177
-
177
+
178
178
  if text_vectors:
179
179
  # 当一个文本被分成多个块时,采用加权平均
180
180
  if len(text_vectors) > 1:
181
181
  # 针对RAG优化:对多个块进行加权平均,前面的块权重略高
182
182
  weights = np.linspace(1.0, 0.8, len(text_vectors))
183
183
  weights = weights / weights.sum() # 归一化权重
184
-
184
+
185
185
  # 应用权重并求和
186
186
  weighted_sum = np.zeros_like(text_vectors[0])
187
187
  for i, vec in enumerate(text_vectors):
188
188
  # 确保向量形状一致,处理可能的维度不匹配问题
189
189
  vec_array = np.asarray(vec).reshape(weighted_sum.shape)
190
190
  weighted_sum += vec_array * weights[i]
191
-
191
+
192
192
  # 归一化结果向量
193
193
  norm = np.linalg.norm(weighted_sum)
194
194
  if norm > 0:
195
195
  weighted_sum = weighted_sum / norm
196
-
196
+
197
197
  result_vectors.append(weighted_sum)
198
198
  else:
199
199
  # 单块直接使用
200
200
  result_vectors.append(text_vectors[0])
201
-
201
+
202
202
  if spinner and cache_hits > 0:
203
203
  spinner.text = f"{prefix} 缓存命中: {cache_hits}/{len(all_chunks)} 块"
204
-
204
+
205
205
  return np.vstack(result_vectors)
206
-
206
+
207
207
  except Exception as e:
208
208
  PrettyOutput.print(f"批量嵌入失败: {str(e)}", OutputType.ERROR)
209
209
  return np.zeros((0, embedding_model.get_sentence_embedding_dimension()), dtype=np.float32)
210
-
210
+
211
211
  def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 50) -> List[str]:
212
212
  """将文本分割成带重叠窗口的块,优化RAG检索效果。
213
-
213
+
214
214
  参数:
215
215
  text: 要分割的输入文本
216
216
  max_length: 每个块的最大长度
217
217
  min_length: 每个块的最小长度(除了最后一块可能较短)
218
-
218
+
219
219
  返回:
220
220
  List[str]: 文本块列表,每个块的长度尽可能接近但不超过max_length
221
221
  """
222
222
  if not text:
223
223
  return []
224
-
224
+
225
225
  # 如果文本长度小于最大长度,直接返回整个文本
226
226
  if len(text) <= max_length:
227
227
  return [text]
228
-
228
+
229
229
  # 预处理:规范化文本,移除多余空白字符
230
230
  text = ' '.join(text.split())
231
-
231
+
232
232
  # 中英文标点符号集合,优化RAG召回的句子边界
233
233
  primary_punctuation = {'.', '!', '?', '\n', '。', '!', '?'} # 主要句末标点
234
234
  secondary_punctuation = {';', ':', '…', ';', ':'} # 次级分隔符
235
235
  tertiary_punctuation = {',', ',', '、', ')', ')', ']', '】', '}', '》', '"', "'"} # 最低优先级
236
-
236
+
237
237
  chunks = []
238
238
  start = 0
239
-
239
+
240
240
  while start < len(text):
241
241
  # 初始化结束位置为最大可能长度
242
242
  end = min(start + max_length, len(text))
243
-
243
+
244
244
  # 只有当不是最后一块且结束位置等于最大长度时,才尝试寻找句子边界
245
245
  if end < len(text) and end == start + max_length:
246
246
  # 优先查找段落边界,这对RAG特别重要
@@ -251,17 +251,17 @@ def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 5
251
251
  # 寻找句子边界,从end-1位置开始
252
252
  found_boundary = False
253
253
  best_boundary = -1
254
-
254
+
255
255
  # 扩大搜索范围以找到更好的语义边界
256
256
  search_range = min(120, end - start - min_length) # 扩大搜索范围,但确保新块不小于min_length
257
-
257
+
258
258
  # 先尝试找主要标点(句号等)
259
259
  for i in range(end-1, max(start, end-search_range), -1):
260
260
  if text[i] in primary_punctuation:
261
261
  best_boundary = i
262
262
  found_boundary = True
263
263
  break
264
-
264
+
265
265
  # 如果没找到主要标点,再找次要标点(分号、冒号等)
266
266
  if not found_boundary:
267
267
  for i in range(end-1, max(start, end-search_range), -1):
@@ -269,7 +269,7 @@ def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 5
269
269
  best_boundary = i
270
270
  found_boundary = True
271
271
  break
272
-
272
+
273
273
  # 最后考虑逗号和其他可能的边界
274
274
  if not found_boundary:
275
275
  for i in range(end-1, max(start, end-search_range), -1):
@@ -277,11 +277,11 @@ def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 5
277
277
  best_boundary = i
278
278
  found_boundary = True
279
279
  break
280
-
280
+
281
281
  # 如果找到了合适的边界且不会导致太短的块,使用它
282
282
  if found_boundary and (best_boundary - start) >= min_length:
283
283
  end = best_boundary + 1
284
-
284
+
285
285
  # 添加当前块,并确保删除开头和结尾的空白字符
286
286
  chunk = text[start:end].strip()
287
287
  if chunk and len(chunk) >= min_length: # 只添加符合最小长度的非空块
@@ -295,16 +295,16 @@ def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 5
295
295
  else:
296
296
  # 如果合并会导致太长,添加这个小块(特殊情况)
297
297
  chunks.append(chunk)
298
-
298
+
299
299
  # 计算下一块的开始位置,调整重叠窗口大小以提高RAG检索质量
300
300
  next_start = end - int(max_length * 0.2) # 20%的重叠窗口大小
301
-
301
+
302
302
  # 确保总是有前进,避免无限循环
303
303
  if next_start <= start:
304
304
  next_start = start + max(1, min_length // 2)
305
-
305
+
306
306
  start = next_start
307
-
307
+
308
308
  # 最后检查是否有太短的块,尝试合并相邻的短块
309
309
  if len(chunks) > 1:
310
310
  merged_chunks = []
@@ -321,7 +321,7 @@ def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 5
321
321
  merged_chunks.append(current)
322
322
  i += 1
323
323
  chunks = merged_chunks
324
-
324
+
325
325
  return chunks
326
326
 
327
327
 
@@ -329,17 +329,17 @@ def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 5
329
329
  def load_tokenizer() -> AutoTokenizer:
330
330
  """
331
331
  加载用于文本处理的分词器,使用缓存避免重复加载。
332
-
332
+
333
333
  返回:
334
334
  AutoTokenizer: 加载的分词器
335
335
  """
336
336
  model_name = "gpt2"
337
337
  cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
338
-
338
+
339
339
  # 检查全局缓存
340
340
  if model_name in _global_tokenizers:
341
341
  return _global_tokenizers[model_name]
342
-
342
+
343
343
  try:
344
344
  tokenizer = AutoTokenizer.from_pretrained(
345
345
  model_name,
@@ -352,28 +352,28 @@ def load_tokenizer() -> AutoTokenizer:
352
352
  cache_dir=cache_dir,
353
353
  local_files_only=False
354
354
  )
355
-
355
+
356
356
  # 保存到全局缓存
357
357
  _global_tokenizers[model_name] = tokenizer
358
-
358
+
359
359
  return tokenizer # type: ignore
360
360
 
361
361
  @functools.lru_cache(maxsize=1)
362
362
  def load_rerank_model() -> Tuple[AutoModelForSequenceClassification, AutoTokenizer]:
363
363
  """
364
364
  加载重排序模型和分词器,使用缓存避免重复加载。
365
-
365
+
366
366
  返回:
367
367
  Tuple[AutoModelForSequenceClassification, AutoTokenizer]: 加载的模型和分词器
368
368
  """
369
369
  model_name = "BAAI/bge-reranker-v2-m3"
370
370
  cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
371
-
371
+
372
372
  # 检查全局缓存
373
373
  key = f"rerank_{model_name}"
374
374
  if key in _global_models and f"{key}_tokenizer" in _global_tokenizers:
375
375
  return _global_models[key], _global_tokenizers[f"{key}_tokenizer"]
376
-
376
+
377
377
  try:
378
378
  tokenizer = AutoTokenizer.from_pretrained(
379
379
  model_name,
@@ -396,53 +396,53 @@ def load_rerank_model() -> Tuple[AutoModelForSequenceClassification, AutoTokeniz
396
396
  cache_dir=cache_dir,
397
397
  local_files_only=False
398
398
  )
399
-
399
+
400
400
  if torch.cuda.is_available():
401
401
  model = model.cuda()
402
402
  model.eval()
403
-
403
+
404
404
  # 保存到全局缓存
405
405
  _global_models[key] = model
406
406
  _global_tokenizers[f"{key}_tokenizer"] = tokenizer
407
-
407
+
408
408
  return model, tokenizer # type: ignore
409
409
 
410
- def rerank_results(query: str, documents: List[str], initial_scores: Optional[List[float]] = None,
410
+ def rerank_results(query: str, documents: List[str], initial_scores: Optional[List[float]] = None,
411
411
  batch_size: int = 8, spinner: Optional[Yaspin] = None) -> List[float]:
412
412
  """
413
413
  使用交叉编码器重排序检索结果,提高RAG精度。
414
-
414
+
415
415
  参数:
416
416
  query: 查询文本
417
417
  documents: 要重排序的文档内容列表
418
418
  initial_scores: 初始检索分数,可选。如果提供,将与重排序分数融合
419
419
  batch_size: 批处理大小
420
420
  spinner: 可选的进度指示器
421
-
421
+
422
422
  返回:
423
423
  List[float]: 重排序后的分数列表,与输入文档对应
424
424
  """
425
425
  try:
426
426
  if not documents:
427
427
  return []
428
-
428
+
429
429
  # 加载重排序模型
430
430
  if spinner:
431
431
  spinner.text = "加载重排序模型..."
432
432
  model, tokenizer = load_rerank_model()
433
-
433
+
434
434
  # 准备评分
435
435
  all_scores = []
436
-
436
+
437
437
  # 批量处理
438
438
  for i in range(0, len(documents), batch_size):
439
439
  if spinner:
440
440
  spinner.text = f"重排序进度: {i}/{len(documents)}..."
441
-
441
+
442
442
  # 准备当前批次
443
443
  batch_docs = documents[i:i+batch_size]
444
444
  pairs = [(query, doc) for doc in batch_docs]
445
-
445
+
446
446
  # 编码输入
447
447
  with torch.no_grad():
448
448
  # 使用类型忽略以避免mypy错误
@@ -453,21 +453,21 @@ def rerank_results(query: str, documents: List[str], initial_scores: Optional[Li
453
453
  return_tensors="pt",
454
454
  max_length=512
455
455
  )
456
-
456
+
457
457
  # 使用GPU加速(如果可用)
458
458
  if torch.cuda.is_available():
459
459
  inputs = {k: v.cuda() for k, v in inputs.items()}
460
-
460
+
461
461
  # 获取分数
462
462
  outputs = model(**inputs) # type: ignore
463
463
  scores = outputs.logits.squeeze(-1).cpu().tolist()
464
-
464
+
465
465
  # 如果只有一个文档,确保返回列表
466
466
  if len(batch_docs) == 1:
467
467
  all_scores.append(float(scores))
468
468
  else:
469
469
  all_scores.extend(scores)
470
-
470
+
471
471
  # 归一化分数到0-1范围
472
472
  if all_scores:
473
473
  min_score = min(all_scores)
@@ -476,26 +476,26 @@ def rerank_results(query: str, documents: List[str], initial_scores: Optional[Li
476
476
  normalized_scores = [(score - min_score) / (max_score - min_score) for score in all_scores]
477
477
  else:
478
478
  normalized_scores = [0.5] * len(all_scores)
479
-
479
+
480
480
  # 融合初始分数(如果提供)
481
481
  if initial_scores and len(initial_scores) == len(normalized_scores):
482
482
  # 使用加权平均融合分数:初始分数权重0.3,重排序分数权重0.7
483
- final_scores = [0.3 * init_score + 0.7 * rerank_score
483
+ final_scores = [0.3 * init_score + 0.7 * rerank_score
484
484
  for init_score, rerank_score in zip(initial_scores, normalized_scores)]
485
485
  return final_scores
486
-
486
+
487
487
  return normalized_scores
488
-
488
+
489
489
  if spinner:
490
490
  spinner.text = "重排序完成"
491
-
491
+
492
492
  # 如果重排序失败,返回初始分数或默认分数
493
493
  return initial_scores if initial_scores else [0.5] * len(documents)
494
-
494
+
495
495
  except Exception as e:
496
496
  PrettyOutput.print(f"重排序失败: {str(e)}", OutputType.ERROR)
497
497
  if spinner:
498
498
  spinner.text = f"重排序失败: {str(e)}"
499
-
499
+
500
500
  # 发生错误时回退到初始分数
501
501
  return initial_scores if initial_scores else [0.5] * len(documents)
@@ -16,10 +16,10 @@ from jarvis.jarvis_utils.output import PrettyOutput, OutputType
16
16
  def find_git_root(start_dir="."):
17
17
  """
18
18
  切换到给定路径的Git根目录,如果不是Git仓库则初始化。
19
-
19
+
20
20
  参数:
21
21
  start_dir (str): 起始查找目录,默认为当前目录。
22
-
22
+
23
23
  返回:
24
24
  str: Git仓库根目录路径。如果目录不是Git仓库,则会初始化一个新的Git仓库。
25
25
  """
@@ -39,28 +39,28 @@ def has_uncommitted_changes():
39
39
  """检查Git仓库中是否有未提交的更改"""
40
40
  # 静默添加所有更改
41
41
  subprocess.run(["git", "add", "."], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
42
-
42
+
43
43
  # 检查工作目录更改
44
- working_changes = subprocess.run(["git", "diff", "--exit-code"],
45
- stdout=subprocess.DEVNULL,
44
+ working_changes = subprocess.run(["git", "diff", "--exit-code"],
45
+ stdout=subprocess.DEVNULL,
46
46
  stderr=subprocess.DEVNULL).returncode != 0
47
-
47
+
48
48
  # 检查暂存区更改
49
- staged_changes = subprocess.run(["git", "diff", "--cached", "--exit-code"],
50
- stdout=subprocess.DEVNULL,
49
+ staged_changes = subprocess.run(["git", "diff", "--cached", "--exit-code"],
50
+ stdout=subprocess.DEVNULL,
51
51
  stderr=subprocess.DEVNULL).returncode != 0
52
-
52
+
53
53
  # 静默重置更改
54
54
  subprocess.run(["git", "reset"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
55
-
55
+
56
56
  return working_changes or staged_changes
57
57
  def get_commits_between(start_hash: str, end_hash: str) -> List[Tuple[str, str]]:
58
58
  """获取两个提交哈希值之间的提交列表
59
-
59
+
60
60
  参数:
61
61
  start_hash: 起始提交哈希值(不包含)
62
62
  end_hash: 结束提交哈希值(包含)
63
-
63
+
64
64
  返回:
65
65
  List[Tuple[str, str]]: (提交哈希值, 提交信息) 元组列表
66
66
  """
@@ -75,20 +75,20 @@ def get_commits_between(start_hash: str, end_hash: str) -> List[Tuple[str, str]]
75
75
  if result.returncode != 0:
76
76
  PrettyOutput.print(f"获取commit历史失败: {result.stderr}", OutputType.ERROR)
77
77
  return []
78
-
78
+
79
79
  commits = []
80
80
  for line in result.stdout.splitlines():
81
81
  if '|' in line:
82
82
  commit_hash, message = line.split('|', 1)
83
83
  commits.append((commit_hash, message))
84
84
  return commits
85
-
85
+
86
86
  except Exception as e:
87
87
  PrettyOutput.print(f"获取commit历史异常: {str(e)}", OutputType.ERROR)
88
88
  return []
89
89
  def get_latest_commit_hash() -> str:
90
90
  """获取当前Git仓库的最新提交哈希值
91
-
91
+
92
92
  返回:
93
93
  str: 提交哈希值,如果不在Git仓库或发生错误则返回空字符串
94
94
  """
@@ -106,25 +106,25 @@ def get_latest_commit_hash() -> str:
106
106
  return ""
107
107
  def get_modified_line_ranges() -> Dict[str, Tuple[int, int]]:
108
108
  """从Git差异中获取所有更改文件的修改行范围
109
-
109
+
110
110
  返回:
111
111
  字典,将文件路径映射到包含修改部分的(起始行, 结束行)范围元组。
112
112
  行号从1开始。
113
113
  """
114
114
  # 获取所有文件的Git差异
115
115
  diff_output = os.popen("git show").read()
116
-
116
+
117
117
  # 解析差异以获取修改的文件及其行范围
118
118
  result = {}
119
119
  current_file = None
120
-
120
+
121
121
  for line in diff_output.splitlines():
122
122
  # 匹配类似"+++ b/path/to/file"的行
123
123
  file_match = re.match(r"^\+\+\+ b/(.*)", line)
124
124
  if file_match:
125
125
  current_file = file_match.group(1)
126
126
  continue
127
-
127
+
128
128
  # 匹配类似"@@ -100,5 +100,7 @@"的行,其中+部分显示新行
129
129
  range_match = re.match(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@", line)
130
130
  if range_match and current_file:
@@ -132,5 +132,5 @@ def get_modified_line_ranges() -> Dict[str, Tuple[int, int]]:
132
132
  line_count = int(range_match.group(2)) if range_match.group(2) else 1
133
133
  end_line = start_line + line_count - 1
134
134
  result[current_file] = (start_line, end_line)
135
-
135
+
136
136
  return result