jarvis-ai-assistant 0.1.126__py3-none-any.whl → 0.1.129__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +108 -95
- jarvis/jarvis_agent/main.py +77 -0
- jarvis/jarvis_code_agent/builtin_input_handler.py +43 -0
- jarvis/jarvis_code_agent/code_agent.py +17 -81
- jarvis/jarvis_code_agent/file_input_handler.py +88 -0
- jarvis/jarvis_code_agent/patch.py +142 -114
- jarvis/jarvis_code_agent/shell_input_handler.py +8 -2
- jarvis/jarvis_codebase/main.py +240 -213
- jarvis/jarvis_dev/main.py +4 -3
- jarvis/jarvis_multi_agent/__init__.py +51 -40
- jarvis/jarvis_platform/base.py +6 -5
- jarvis/jarvis_platform_manager/main.py +1 -1
- jarvis/jarvis_rag/main.py +250 -186
- jarvis/jarvis_smart_shell/main.py +0 -1
- jarvis/jarvis_tools/ask_codebase.py +4 -3
- jarvis/jarvis_tools/chdir.py +22 -22
- jarvis/jarvis_tools/code_review.py +38 -33
- jarvis/jarvis_tools/execute_shell.py +0 -3
- jarvis/jarvis_tools/file_operation.py +56 -55
- jarvis/jarvis_tools/git_commiter.py +60 -50
- jarvis/jarvis_tools/read_code.py +143 -0
- jarvis/jarvis_tools/read_webpage.py +50 -30
- jarvis/jarvis_tools/registry.py +4 -21
- jarvis/jarvis_tools/search_web.py +61 -36
- jarvis/jarvis_tools/tool_generator.py +78 -36
- jarvis/jarvis_utils/__init__.py +17 -17
- jarvis/jarvis_utils/config.py +87 -51
- jarvis/jarvis_utils/embedding.py +49 -48
- jarvis/jarvis_utils/git_utils.py +34 -34
- jarvis/jarvis_utils/globals.py +26 -26
- jarvis/jarvis_utils/input.py +61 -45
- jarvis/jarvis_utils/methodology.py +94 -76
- jarvis/jarvis_utils/output.py +63 -62
- jarvis/jarvis_utils/utils.py +2 -2
- {jarvis_ai_assistant-0.1.126.dist-info → jarvis_ai_assistant-0.1.129.dist-info}/METADATA +1 -1
- jarvis_ai_assistant-0.1.129.dist-info/RECORD +78 -0
- {jarvis_ai_assistant-0.1.126.dist-info → jarvis_ai_assistant-0.1.129.dist-info}/entry_points.txt +2 -0
- jarvis_ai_assistant-0.1.126.dist-info/RECORD +0 -74
- {jarvis_ai_assistant-0.1.126.dist-info → jarvis_ai_assistant-0.1.129.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.126.dist-info → jarvis_ai_assistant-0.1.129.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.126.dist-info → jarvis_ai_assistant-0.1.129.dist-info}/top_level.txt +0 -0
jarvis/jarvis_rag/main.py
CHANGED
|
@@ -8,6 +8,8 @@ from tqdm import tqdm
|
|
|
8
8
|
import fitz # PyMuPDF for PDF files
|
|
9
9
|
from docx import Document as DocxDocument # python-docx for DOCX files
|
|
10
10
|
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from yaspin import yaspin
|
|
11
13
|
from jarvis.jarvis_platform.registry import PlatformRegistry
|
|
12
14
|
import lzma # 添加 lzma 导入
|
|
13
15
|
from threading import Lock
|
|
@@ -138,56 +140,80 @@ class RAGTool:
|
|
|
138
140
|
Args:
|
|
139
141
|
root_dir: Project root directory
|
|
140
142
|
"""
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
143
|
+
with yaspin(text="初始化环境...", color="cyan") as spinner:
|
|
144
|
+
init_env()
|
|
145
|
+
self.root_dir = root_dir
|
|
146
|
+
os.chdir(self.root_dir)
|
|
147
|
+
spinner.text = "环境初始化完成"
|
|
148
|
+
spinner.ok("✅")
|
|
144
149
|
|
|
145
150
|
# Initialize configuration
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
151
|
+
with yaspin(text="初始化配置...", color="cyan") as spinner:
|
|
152
|
+
self.min_paragraph_length = get_min_paragraph_length() # Minimum paragraph length
|
|
153
|
+
self.max_paragraph_length = get_max_paragraph_length() # Maximum paragraph length
|
|
154
|
+
self.context_window = 5 # Fixed context window size
|
|
155
|
+
self.max_token_count = int(get_max_token_count() * 0.8)
|
|
156
|
+
spinner.text = "配置初始化完成"
|
|
157
|
+
spinner.ok("✅")
|
|
150
158
|
|
|
151
159
|
# Initialize data directory
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
os.
|
|
160
|
+
with yaspin(text="初始化数据目录...", color="cyan") as spinner:
|
|
161
|
+
self.data_dir = os.path.join(self.root_dir, ".jarvis/rag")
|
|
162
|
+
if not os.path.exists(self.data_dir):
|
|
163
|
+
os.makedirs(self.data_dir)
|
|
164
|
+
spinner.text = "数据目录初始化完成"
|
|
165
|
+
spinner.ok("✅")
|
|
155
166
|
|
|
156
167
|
# Initialize embedding model
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
168
|
+
with yaspin(text="初始化模型...", color="cyan") as spinner:
|
|
169
|
+
try:
|
|
170
|
+
self.embedding_model = load_embedding_model()
|
|
171
|
+
self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
|
|
172
|
+
spinner.text = "模型加载完成"
|
|
173
|
+
spinner.ok("✅")
|
|
174
|
+
except Exception as e:
|
|
175
|
+
spinner.text = "模型加载失败"
|
|
176
|
+
spinner.fail("❌")
|
|
177
|
+
raise
|
|
178
|
+
|
|
179
|
+
with yaspin(text="初始化缓存目录...", color="cyan") as spinner:
|
|
180
|
+
self.cache_dir = os.path.join(self.data_dir, "cache")
|
|
181
|
+
if not os.path.exists(self.cache_dir):
|
|
182
|
+
os.makedirs(self.cache_dir)
|
|
183
|
+
|
|
184
|
+
self.documents: List[Document] = []
|
|
185
|
+
self.index = None
|
|
186
|
+
self.flat_index = None
|
|
187
|
+
self.file_md5_cache = {}
|
|
188
|
+
spinner.text = "缓存目录初始化完成"
|
|
189
|
+
spinner.ok("✅")
|
|
174
190
|
|
|
175
191
|
# 加载缓存索引
|
|
176
192
|
self._load_cache_index()
|
|
177
193
|
|
|
178
194
|
# Register file processors
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
195
|
+
with yaspin(text="初始化文件处理器...", color="cyan") as spinner:
|
|
196
|
+
self.file_processors = [
|
|
197
|
+
TextFileProcessor(),
|
|
198
|
+
PDFProcessor(),
|
|
199
|
+
DocxProcessor()
|
|
200
|
+
]
|
|
201
|
+
spinner.text = "文件处理器初始化完成"
|
|
202
|
+
spinner.ok("✅")
|
|
203
|
+
|
|
184
204
|
|
|
185
205
|
# Add thread related configuration
|
|
186
|
-
|
|
187
|
-
|
|
206
|
+
with yaspin(text="初始化线程配置...", color="cyan") as spinner:
|
|
207
|
+
self.thread_count = get_thread_count()
|
|
208
|
+
self.vector_lock = Lock() # Protect vector list concurrency
|
|
209
|
+
spinner.text = "线程配置初始化完成"
|
|
210
|
+
spinner.ok("✅")
|
|
188
211
|
|
|
189
212
|
# 初始化 GPU 内存配置
|
|
190
|
-
|
|
213
|
+
with yaspin(text="初始化 GPU 内存配置...", color="cyan") as spinner:
|
|
214
|
+
self.gpu_config = init_gpu_config()
|
|
215
|
+
spinner.text = "GPU 内存配置初始化完成"
|
|
216
|
+
spinner.ok("✅")
|
|
191
217
|
|
|
192
218
|
|
|
193
219
|
def _get_cache_path(self, file_path: str) -> str:
|
|
@@ -208,41 +234,48 @@ class RAGTool:
|
|
|
208
234
|
index_path = os.path.join(self.data_dir, "index.pkl")
|
|
209
235
|
if os.path.exists(index_path):
|
|
210
236
|
try:
|
|
211
|
-
with
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
237
|
+
with yaspin(text="加载缓存索引...", color="cyan") as spinner:
|
|
238
|
+
with lzma.open(index_path, 'rb') as f:
|
|
239
|
+
cache_data = pickle.load(f)
|
|
240
|
+
self.file_md5_cache = cache_data.get("file_md5_cache", {})
|
|
241
|
+
spinner.text = "缓存索引加载完成"
|
|
242
|
+
spinner.ok("✅")
|
|
243
|
+
|
|
215
244
|
# 从各个缓存文件加载文档
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
245
|
+
with yaspin(text="加载缓存文件...", color="cyan") as spinner:
|
|
246
|
+
for file_path in self.file_md5_cache:
|
|
247
|
+
cache_path = self._get_cache_path(file_path)
|
|
248
|
+
if os.path.exists(cache_path):
|
|
249
|
+
try:
|
|
250
|
+
with lzma.open(cache_path, 'rb') as f:
|
|
251
|
+
file_cache = pickle.load(f)
|
|
252
|
+
self.documents.extend(file_cache["documents"])
|
|
253
|
+
spinner.write(f"✅ 加载缓存文件: {file_path}")
|
|
254
|
+
except Exception as e:
|
|
255
|
+
spinner.write(f"❌ 加载缓存文件失败: {file_path}: {str(e)}")
|
|
256
|
+
spinner.text = "缓存文件加载完成"
|
|
257
|
+
spinner.ok("✅")
|
|
226
258
|
|
|
227
259
|
# 重建向量索引
|
|
260
|
+
|
|
228
261
|
if self.documents:
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
if vectors:
|
|
241
|
-
vectors = np.vstack(vectors)
|
|
242
|
-
self._build_index(vectors)
|
|
262
|
+
with yaspin(text="重建向量索引...", color="cyan") as spinner:
|
|
263
|
+
vectors = []
|
|
264
|
+
for doc in self.documents:
|
|
265
|
+
cache_path = self._get_cache_path(doc.metadata['file_path'])
|
|
266
|
+
if os.path.exists(cache_path):
|
|
267
|
+
with lzma.open(cache_path, 'rb') as f:
|
|
268
|
+
file_cache = pickle.load(f)
|
|
269
|
+
doc_idx = next((i for i, d in enumerate(file_cache["documents"])
|
|
270
|
+
if d.metadata['chunk_index'] == doc.metadata['chunk_index']), None)
|
|
271
|
+
if doc_idx is not None:
|
|
272
|
+
vectors.append(file_cache["vectors"][doc_idx])
|
|
243
273
|
|
|
244
|
-
|
|
245
|
-
|
|
274
|
+
if vectors:
|
|
275
|
+
vectors = np.vstack(vectors)
|
|
276
|
+
self._build_index(vectors)
|
|
277
|
+
spinner.text = "向量索引重建完成,加载 {len(self.documents)} 个文档片段"
|
|
278
|
+
spinner.ok("✅")
|
|
246
279
|
|
|
247
280
|
except Exception as e:
|
|
248
281
|
PrettyOutput.print(f"加载缓存索引失败: {str(e)}",
|
|
@@ -446,37 +479,42 @@ class RAGTool:
|
|
|
446
479
|
def build_index(self, dir: str):
|
|
447
480
|
"""Build document index with optimized processing"""
|
|
448
481
|
# Get all files
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
for file in files:
|
|
457
|
-
# Skip .jarvis files
|
|
458
|
-
if '.jarvis' in root:
|
|
482
|
+
with yaspin(text="获取所有文件...", color="cyan") as spinner:
|
|
483
|
+
all_files = []
|
|
484
|
+
for root, _, files in os.walk(dir):
|
|
485
|
+
# Skip .jarvis directories and other ignored paths
|
|
486
|
+
if any(ignored in root for ignored in ['.git', '__pycache__', 'node_modules', '.jarvis']) or \
|
|
487
|
+
any(part.startswith('.jarvis-') for part in root.split(os.sep)):
|
|
459
488
|
continue
|
|
460
489
|
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
490
|
+
for file in files:
|
|
491
|
+
# Skip .jarvis files
|
|
492
|
+
if '.jarvis' in root:
|
|
493
|
+
continue
|
|
494
|
+
|
|
495
|
+
file_path = os.path.join(root, file)
|
|
496
|
+
if os.path.getsize(file_path) > 100 * 1024 * 1024: # 100MB
|
|
497
|
+
PrettyOutput.print(f"Skip large file: {file_path}",
|
|
498
|
+
output_type=OutputType.WARNING)
|
|
499
|
+
continue
|
|
500
|
+
all_files.append(file_path)
|
|
501
|
+
spinner.text = f"获取所有文件完成,共 {len(all_files)} 个文件"
|
|
502
|
+
spinner.ok("✅")
|
|
467
503
|
|
|
468
504
|
# Clean up cache for deleted files
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
505
|
+
with yaspin(text="清理缓存...", color="cyan") as spinner:
|
|
506
|
+
deleted_files = set(self.file_md5_cache.keys()) - set(all_files)
|
|
507
|
+
for file_path in deleted_files:
|
|
508
|
+
del self.file_md5_cache[file_path]
|
|
509
|
+
# Remove related documents
|
|
510
|
+
self.documents = [doc for doc in self.documents if doc.metadata['file_path'] != file_path]
|
|
511
|
+
spinner.text = f"清理缓存完成,共 {len(deleted_files)} 个文件"
|
|
512
|
+
spinner.ok("✅")
|
|
474
513
|
|
|
475
514
|
# Check file changes
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
with tqdm(total=len(all_files), desc="Check file status") as pbar:
|
|
515
|
+
with yaspin(text="检查文件变化...", color="cyan") as spinner:
|
|
516
|
+
files_to_process = []
|
|
517
|
+
unchanged_files = []
|
|
480
518
|
for file_path in all_files:
|
|
481
519
|
current_md5 = get_file_md5(file_path)
|
|
482
520
|
if current_md5: # Only process files that can successfully calculate MD5
|
|
@@ -486,7 +524,9 @@ class RAGTool:
|
|
|
486
524
|
else:
|
|
487
525
|
# New file or modified file
|
|
488
526
|
files_to_process.append(file_path)
|
|
489
|
-
|
|
527
|
+
spinner.write(f"⚠️ 文件变化: {file_path}")
|
|
528
|
+
spinner.text = f"检查文件变化完成,共 {len(files_to_process)} 个文件需要处理"
|
|
529
|
+
spinner.ok("✅")
|
|
490
530
|
|
|
491
531
|
# Keep documents for unchanged files
|
|
492
532
|
unchanged_documents = [doc for doc in self.documents
|
|
@@ -494,13 +534,12 @@ class RAGTool:
|
|
|
494
534
|
|
|
495
535
|
# Process files one by one with optimized vectorization
|
|
496
536
|
if files_to_process:
|
|
497
|
-
PrettyOutput.print(f"Processing {len(files_to_process)} files...", OutputType.INFO)
|
|
498
|
-
|
|
499
537
|
new_documents = []
|
|
500
538
|
new_vectors = []
|
|
501
539
|
|
|
502
|
-
|
|
503
|
-
|
|
540
|
+
|
|
541
|
+
for file_path in files_to_process:
|
|
542
|
+
with yaspin(text=f"处理文件 {file_path} ...", color="cyan") as spinner:
|
|
504
543
|
try:
|
|
505
544
|
# Process single file
|
|
506
545
|
file_docs = self._process_file(file_path)
|
|
@@ -518,31 +557,38 @@ class RAGTool:
|
|
|
518
557
|
# Accumulate documents and vectors
|
|
519
558
|
new_documents.extend(file_docs)
|
|
520
559
|
new_vectors.append(file_vectors)
|
|
560
|
+
|
|
561
|
+
spinner.text = f"处理文件 {file_path} 完成"
|
|
562
|
+
spinner.ok("✅")
|
|
521
563
|
|
|
522
564
|
except Exception as e:
|
|
523
|
-
|
|
565
|
+
spinner.text = f"处理文件失败: {file_path}: {str(e)}"
|
|
566
|
+
spinner.fail("❌")
|
|
567
|
+
|
|
524
568
|
|
|
525
|
-
pbar.update(1)
|
|
526
|
-
|
|
527
569
|
# Update documents list
|
|
528
570
|
self.documents.extend(new_documents)
|
|
529
571
|
|
|
530
572
|
# Build final index
|
|
531
573
|
if new_vectors:
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
574
|
+
with yaspin(text="构建最终索引...", color="cyan") as spinner:
|
|
575
|
+
all_new_vectors = np.vstack(new_vectors)
|
|
576
|
+
|
|
577
|
+
if self.flat_index is not None:
|
|
578
|
+
# Get vectors for unchanged documents
|
|
579
|
+
unchanged_vectors = self._get_unchanged_vectors(unchanged_documents)
|
|
580
|
+
if unchanged_vectors is not None:
|
|
581
|
+
final_vectors = np.vstack([unchanged_vectors, all_new_vectors])
|
|
582
|
+
else:
|
|
583
|
+
final_vectors = all_new_vectors
|
|
539
584
|
else:
|
|
540
585
|
final_vectors = all_new_vectors
|
|
541
|
-
else:
|
|
542
|
-
final_vectors = all_new_vectors
|
|
543
586
|
|
|
544
|
-
|
|
545
|
-
|
|
587
|
+
# Build index
|
|
588
|
+
spinner.text = f"构建索引..."
|
|
589
|
+
self._build_index(final_vectors)
|
|
590
|
+
spinner.text = f"索引构建完成,共 {len(self.documents)} 个文档 "
|
|
591
|
+
spinner.ok("✅")
|
|
546
592
|
|
|
547
593
|
PrettyOutput.print(
|
|
548
594
|
f"索引 {len(self.documents)} 个文档 "
|
|
@@ -575,58 +621,69 @@ class RAGTool:
|
|
|
575
621
|
def search(self, query: str, top_k: int = 30) -> List[Tuple[Document, float]]:
|
|
576
622
|
"""Search documents with context window"""
|
|
577
623
|
if not self.index:
|
|
578
|
-
PrettyOutput.print("索引未构建,正在构建...", output_type=OutputType.INFO)
|
|
579
624
|
self.build_index(self.root_dir)
|
|
580
625
|
|
|
581
626
|
# Get query vector
|
|
582
|
-
|
|
583
|
-
|
|
627
|
+
with yaspin(text="获取查询向量...", color="cyan") as spinner:
|
|
628
|
+
query_vector = get_embedding(self.embedding_model, query)
|
|
629
|
+
query_vector = query_vector.reshape(1, -1)
|
|
630
|
+
spinner.text = "查询向量获取完成"
|
|
631
|
+
spinner.ok("✅")
|
|
584
632
|
|
|
585
633
|
# Search with more candidates
|
|
586
|
-
|
|
587
|
-
|
|
634
|
+
with yaspin(text="搜索...", color="cyan") as spinner:
|
|
635
|
+
initial_k = min(top_k * 4, len(self.documents))
|
|
636
|
+
distances, indices = self.index.search(query_vector, initial_k) # type: ignore
|
|
637
|
+
spinner.text = "搜索完成"
|
|
638
|
+
spinner.ok("✅")
|
|
588
639
|
|
|
589
640
|
# Process results with context window
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
seen_files
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
641
|
+
with yaspin(text="处理结果...", color="cyan") as spinner:
|
|
642
|
+
results = []
|
|
643
|
+
seen_files = set()
|
|
644
|
+
|
|
645
|
+
for idx, dist in zip(indices[0], distances[0]):
|
|
646
|
+
if idx != -1:
|
|
647
|
+
doc = self.documents[idx]
|
|
648
|
+
similarity = 1.0 / (1.0 + float(dist))
|
|
649
|
+
if similarity > 0.3:
|
|
650
|
+
file_path = doc.metadata['file_path']
|
|
651
|
+
if file_path not in seen_files:
|
|
652
|
+
seen_files.add(file_path)
|
|
653
|
+
|
|
654
|
+
# Get full context from original document
|
|
655
|
+
original_doc = next((d for d in self.documents
|
|
656
|
+
if d.metadata['file_path'] == file_path), None)
|
|
657
|
+
if original_doc:
|
|
658
|
+
window_docs = [] # Add this line to initialize the list
|
|
659
|
+
full_content = original_doc.content
|
|
660
|
+
# Find all chunks from this file
|
|
661
|
+
file_chunks = [d for d in self.documents
|
|
662
|
+
if d.metadata['file_path'] == file_path]
|
|
663
|
+
# Add all related chunks
|
|
664
|
+
for chunk_doc in file_chunks:
|
|
665
|
+
window_docs.append((chunk_doc, similarity * 0.9))
|
|
666
|
+
|
|
667
|
+
results.extend(window_docs)
|
|
668
|
+
if len(results) >= top_k * (2 * self.context_window + 1):
|
|
669
|
+
break
|
|
670
|
+
spinner.text = "处理结果完成"
|
|
671
|
+
spinner.ok("✅")
|
|
618
672
|
|
|
619
673
|
# Sort by similarity and deduplicate
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
seen
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
674
|
+
with yaspin(text="排序...", color="cyan") as spinner:
|
|
675
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
676
|
+
seen = set()
|
|
677
|
+
final_results = []
|
|
678
|
+
for doc, score in results:
|
|
679
|
+
key = (doc.metadata['file_path'], doc.metadata['chunk_index'])
|
|
680
|
+
if key not in seen:
|
|
681
|
+
seen.add(key)
|
|
682
|
+
final_results.append((doc, score))
|
|
683
|
+
if len(final_results) >= top_k:
|
|
684
|
+
break
|
|
685
|
+
spinner.text = "排序完成"
|
|
686
|
+
spinner.ok("✅")
|
|
630
687
|
|
|
631
688
|
return final_results
|
|
632
689
|
|
|
@@ -691,40 +748,47 @@ Relevant Documents (by relevance):
|
|
|
691
748
|
"""
|
|
692
749
|
|
|
693
750
|
# Add context with length control
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
751
|
+
with yaspin(text="添加上下文...", color="cyan") as spinner:
|
|
752
|
+
available_count = self.max_token_count - get_context_token_count(prompt) - 1000
|
|
753
|
+
current_count = 0
|
|
754
|
+
|
|
755
|
+
for doc, score in results:
|
|
756
|
+
doc_content = f"""
|
|
757
|
+
## Document Fragment [Score: {score:.3f}]
|
|
758
|
+
Source: {doc.metadata['file_path']}
|
|
759
|
+
```
|
|
760
|
+
{doc.content}
|
|
761
|
+
```
|
|
762
|
+
---
|
|
763
|
+
"""
|
|
764
|
+
if current_count + get_context_token_count(doc_content) > available_count:
|
|
765
|
+
PrettyOutput.print(
|
|
766
|
+
"由于上下文长度限制,部分内容被省略",
|
|
767
|
+
output_type=OutputType.WARNING
|
|
768
|
+
)
|
|
769
|
+
break
|
|
770
|
+
|
|
771
|
+
prompt += doc_content
|
|
772
|
+
current_count += get_context_token_count(doc_content)
|
|
773
|
+
|
|
774
|
+
prompt += """
|
|
775
|
+
# ❗ Important Rules
|
|
776
|
+
1. Only use provided documents
|
|
777
|
+
2. Be precise and accurate
|
|
778
|
+
3. Quote sources when relevant
|
|
779
|
+
4. Indicate missing information
|
|
780
|
+
5. Maintain professional tone
|
|
781
|
+
6. Answer in user's language
|
|
782
|
+
"""
|
|
783
|
+
spinner.text = "添加上下文完成"
|
|
784
|
+
spinner.ok("✅")
|
|
785
|
+
|
|
786
|
+
with yaspin(text="回答...", color="cyan") as spinner:
|
|
787
|
+
model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
|
|
788
|
+
response = model.chat_until_success(prompt)
|
|
789
|
+
spinner.text = "回答完成"
|
|
790
|
+
spinner.ok("✅")
|
|
791
|
+
return response
|
|
728
792
|
|
|
729
793
|
except Exception as e:
|
|
730
794
|
PrettyOutput.print(f"回答失败:{str(e)}", OutputType.ERROR)
|
|
@@ -43,7 +43,6 @@ def process_request(request: str) -> Optional[str]:
|
|
|
43
43
|
try:
|
|
44
44
|
# Get language model instance
|
|
45
45
|
model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
|
|
46
|
-
model.set_suppress_output(True)
|
|
47
46
|
|
|
48
47
|
shell = get_shell_name()
|
|
49
48
|
current_path = os.getcwd()
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
from typing import Dict, Any
|
|
2
|
+
|
|
3
|
+
from yaspin import yaspin
|
|
2
4
|
from jarvis.jarvis_codebase.main import CodeBase
|
|
3
5
|
from jarvis.jarvis_utils.config import dont_use_local_model
|
|
4
6
|
from jarvis.jarvis_utils.git_utils import find_git_root
|
|
@@ -46,16 +48,15 @@ class AskCodebaseTool:
|
|
|
46
48
|
try:
|
|
47
49
|
question = args["question"]
|
|
48
50
|
top_k = args.get("top_k", 20)
|
|
49
|
-
|
|
50
|
-
PrettyOutput.print(f"正在分析代码库以回答问题: {question}", OutputType.INFO)
|
|
51
|
-
|
|
52
51
|
# Create new CodeBase instance
|
|
53
52
|
git_root = find_git_root()
|
|
54
53
|
codebase = CodeBase(git_root)
|
|
55
54
|
|
|
56
55
|
# Use ask_codebase method
|
|
56
|
+
|
|
57
57
|
files, response = codebase.ask_codebase(question, top_k)
|
|
58
58
|
|
|
59
|
+
|
|
59
60
|
# Print found files
|
|
60
61
|
if files:
|
|
61
62
|
output = "找到的相关文件:\n"
|