jarvis-ai-assistant 0.1.96__py3-none-any.whl → 0.1.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/agent.py +138 -144
- jarvis/jarvis_codebase/main.py +87 -54
- jarvis/jarvis_coder/git_utils.py +22 -25
- jarvis/jarvis_coder/main.py +166 -171
- jarvis/jarvis_coder/patch_handler.py +153 -453
- jarvis/jarvis_coder/plan_generator.py +76 -48
- jarvis/jarvis_platform/main.py +39 -39
- jarvis/jarvis_rag/main.py +182 -182
- jarvis/jarvis_smart_shell/main.py +34 -34
- jarvis/main.py +24 -24
- jarvis/models/ai8.py +22 -22
- jarvis/models/base.py +17 -13
- jarvis/models/kimi.py +31 -31
- jarvis/models/ollama.py +28 -28
- jarvis/models/openai.py +22 -24
- jarvis/models/oyi.py +25 -25
- jarvis/models/registry.py +33 -34
- jarvis/tools/ask_user.py +5 -5
- jarvis/tools/base.py +2 -2
- jarvis/tools/chdir.py +9 -9
- jarvis/tools/codebase_qa.py +4 -4
- jarvis/tools/coder.py +4 -4
- jarvis/tools/file_ops.py +1 -1
- jarvis/tools/generator.py +23 -23
- jarvis/tools/methodology.py +4 -4
- jarvis/tools/rag.py +4 -4
- jarvis/tools/registry.py +38 -38
- jarvis/tools/search.py +42 -42
- jarvis/tools/shell.py +13 -13
- jarvis/tools/sub_agent.py +16 -16
- jarvis/tools/thinker.py +41 -41
- jarvis/tools/webpage.py +17 -17
- jarvis/utils.py +59 -60
- {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/METADATA +1 -1
- jarvis_ai_assistant-0.1.98.dist-info/RECORD +47 -0
- jarvis_ai_assistant-0.1.96.dist-info/RECORD +0 -47
- {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/top_level.txt +0 -0
jarvis/jarvis_codebase/main.py
CHANGED
|
@@ -3,6 +3,8 @@ import os
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import faiss
|
|
5
5
|
from typing import List, Tuple, Optional, Dict
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
6
8
|
from jarvis.models.registry import PlatformRegistry
|
|
7
9
|
import concurrent.futures
|
|
8
10
|
from threading import Lock
|
|
@@ -10,10 +12,10 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
10
12
|
from jarvis.utils import OutputType, PrettyOutput, find_git_root, get_file_md5, get_max_context_length, get_thread_count, load_embedding_model, load_rerank_model
|
|
11
13
|
from jarvis.utils import load_env_from_file
|
|
12
14
|
import argparse
|
|
13
|
-
from sentence_transformers import SentenceTransformer
|
|
14
15
|
import pickle
|
|
15
16
|
import lzma # 添加 lzma 导入
|
|
16
17
|
from tqdm import tqdm
|
|
18
|
+
import re
|
|
17
19
|
|
|
18
20
|
class CodeBase:
|
|
19
21
|
def __init__(self, root_dir: str):
|
|
@@ -91,19 +93,28 @@ class CodeBase:
|
|
|
91
93
|
|
|
92
94
|
def make_description(self, file_path: str, content: str) -> str:
|
|
93
95
|
model = PlatformRegistry.get_global_platform_registry().get_cheap_platform()
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
96
|
+
if self.thread_count > 1:
|
|
97
|
+
model.set_suppress_output(True)
|
|
98
|
+
prompt = f"""Please analyze the following code file and generate a detailed description. The description should include:
|
|
99
|
+
1. Overall file functionality description, no more than 100 characters
|
|
100
|
+
2. One-sentence description (max 50 characters) for each global variable, function, type definition, class, method, and other code elements
|
|
101
|
+
3. 5 potential questions users might ask about this file
|
|
102
|
+
|
|
103
|
+
Please use concise and professional language, emphasizing technical functionality to facilitate subsequent code retrieval.
|
|
104
|
+
File path: {file_path}
|
|
105
|
+
Code content:
|
|
102
106
|
{content}
|
|
103
107
|
"""
|
|
104
|
-
response = model.
|
|
108
|
+
response = model.chat_until_success(prompt)
|
|
105
109
|
return response
|
|
106
110
|
|
|
111
|
+
def export(self):
|
|
112
|
+
"""导出当前索引数据到标准输出"""
|
|
113
|
+
for file_path, data in self.vector_cache.items():
|
|
114
|
+
print(f"## {file_path}")
|
|
115
|
+
print(f"- path: {file_path}")
|
|
116
|
+
print(f"- description: {data['description']}")
|
|
117
|
+
|
|
107
118
|
def _save_cache(self):
|
|
108
119
|
"""保存缓存数据"""
|
|
109
120
|
try:
|
|
@@ -187,14 +198,13 @@ class CodeBase:
|
|
|
187
198
|
return cached_vector
|
|
188
199
|
|
|
189
200
|
# 读取文件内容并组合信息
|
|
190
|
-
|
|
191
|
-
content = f.read()[:self.max_context_length] # 限制文件内容长度
|
|
201
|
+
content = open(file_path, "r", encoding="utf-8").read()[:self.max_context_length] # 限制文件内容长度
|
|
192
202
|
|
|
193
203
|
# 组合文件信息,包含文件内容
|
|
194
204
|
combined_text = f"""
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
205
|
+
{file_path}
|
|
206
|
+
{description}
|
|
207
|
+
{content}
|
|
198
208
|
"""
|
|
199
209
|
vector = self.get_embedding(combined_text)
|
|
200
210
|
|
|
@@ -449,7 +459,7 @@ class CodeBase:
|
|
|
449
459
|
score = len(matched_keywords) / len(keywords)
|
|
450
460
|
return score
|
|
451
461
|
|
|
452
|
-
def rerank_results(self, query: str, initial_results: List[Tuple[str, float, str]]) -> List[Tuple[str, float
|
|
462
|
+
def rerank_results(self, query: str, initial_results: List[Tuple[str, float, str]]) -> List[Tuple[str, float]]:
|
|
453
463
|
"""使用多种策略对搜索结果重新排序"""
|
|
454
464
|
if not initial_results:
|
|
455
465
|
return []
|
|
@@ -465,16 +475,15 @@ class CodeBase:
|
|
|
465
475
|
|
|
466
476
|
for path, _, desc in initial_results:
|
|
467
477
|
try:
|
|
468
|
-
|
|
469
|
-
content = f.read()[:512] # 限制内容长度
|
|
478
|
+
content = open(path, "r", encoding="utf-8").read()[:512] # 限制内容长度
|
|
470
479
|
|
|
471
480
|
# 组合文件信息
|
|
472
|
-
doc_content = f"
|
|
481
|
+
doc_content = f"File path: {path}\nDescription: {desc}\nContent: {content}"
|
|
473
482
|
pairs.append([query, doc_content])
|
|
474
483
|
except Exception as e:
|
|
475
484
|
PrettyOutput.print(f"读取文件失败 {path}: {str(e)}",
|
|
476
485
|
output_type=OutputType.ERROR)
|
|
477
|
-
doc_content = f"
|
|
486
|
+
doc_content = f"File path: {path}\nDescription: {desc}"
|
|
478
487
|
pairs.append([query, doc_content])
|
|
479
488
|
|
|
480
489
|
# 使用更大的batch size提高处理速度
|
|
@@ -507,11 +516,9 @@ class CodeBase:
|
|
|
507
516
|
|
|
508
517
|
# 将重排序分数与原始分数结合
|
|
509
518
|
scored_results = []
|
|
510
|
-
for (path,
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
if combined_score >= 0.5: # 只保留相关度较高的结果
|
|
514
|
-
scored_results.append((path, combined_score, desc))
|
|
519
|
+
for (path,_, desc), rerank_score in zip(initial_results, batch_scores):
|
|
520
|
+
if rerank_score >= 0.5: # 只保留相关度较高的结果
|
|
521
|
+
scored_results.append((path, rerank_score))
|
|
515
522
|
|
|
516
523
|
# 按综合分数降序排序
|
|
517
524
|
scored_results.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -521,7 +528,7 @@ class CodeBase:
|
|
|
521
528
|
except Exception as e:
|
|
522
529
|
PrettyOutput.print(f"重排序失败: {str(e)}",
|
|
523
530
|
output_type=OutputType.ERROR)
|
|
524
|
-
return initial_results # 发生错误时返回原始结果
|
|
531
|
+
return [(path, score) for path, score, _ in initial_results] # 发生错误时返回原始结果
|
|
525
532
|
|
|
526
533
|
def _generate_query_variants(self, query: str) -> List[str]:
|
|
527
534
|
"""生成查询的不同表述变体
|
|
@@ -533,12 +540,12 @@ class CodeBase:
|
|
|
533
540
|
List[str]: 查询变体列表
|
|
534
541
|
"""
|
|
535
542
|
model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
|
|
536
|
-
prompt = f"""
|
|
537
|
-
|
|
543
|
+
prompt = f"""Please generate 3 different expressions based on the following query, each expression should fully convey the meaning of the original query. These expressions will be used for code search, maintain professionalism and accuracy.
|
|
544
|
+
Original query: {query}
|
|
538
545
|
|
|
539
|
-
|
|
546
|
+
Please output 3 expressions directly, separated by two line breaks, without numbering or other markers.
|
|
540
547
|
"""
|
|
541
|
-
variants = model.
|
|
548
|
+
variants = model.chat_until_success(prompt).strip().split('\n\n')
|
|
542
549
|
variants.append(query) # 添加原始查询
|
|
543
550
|
return variants
|
|
544
551
|
|
|
@@ -574,7 +581,7 @@ class CodeBase:
|
|
|
574
581
|
return results
|
|
575
582
|
|
|
576
583
|
|
|
577
|
-
def search_similar(self, query: str, top_k: int = 30) -> List[Tuple[str, float
|
|
584
|
+
def search_similar(self, query: str, top_k: int = 30) -> List[Tuple[str, float]]:
|
|
578
585
|
"""搜索关联文件"""
|
|
579
586
|
try:
|
|
580
587
|
if self.index is None:
|
|
@@ -594,6 +601,9 @@ class CodeBase:
|
|
|
594
601
|
# 如果没有找到结果,直接返回
|
|
595
602
|
if not initial_results:
|
|
596
603
|
return []
|
|
604
|
+
|
|
605
|
+
# 过滤低分结果
|
|
606
|
+
initial_results = [(path, score, desc) for path, score, desc in initial_results if score >= 0.5]
|
|
597
607
|
|
|
598
608
|
# 对初步结果进行重排序
|
|
599
609
|
return self.rerank_results(query, initial_results)
|
|
@@ -610,21 +620,21 @@ class CodeBase:
|
|
|
610
620
|
return ""
|
|
611
621
|
|
|
612
622
|
PrettyOutput.print(f"找到的关联文件: ", output_type=OutputType.SUCCESS)
|
|
613
|
-
for path, score
|
|
623
|
+
for path, score in results:
|
|
614
624
|
PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}",
|
|
615
625
|
output_type=OutputType.INFO)
|
|
616
626
|
|
|
617
627
|
prompt = f"""你是一个代码专家,请根据以下文件信息回答用户的问题:
|
|
618
628
|
"""
|
|
619
|
-
for path, _
|
|
629
|
+
for path, _ in results:
|
|
620
630
|
try:
|
|
621
631
|
if len(prompt) > self.max_context_length:
|
|
622
632
|
PrettyOutput.print(f"避免上下文超限,丢弃低相关度文件:{path}", OutputType.WARNING)
|
|
623
633
|
continue
|
|
624
634
|
content = open(path, "r", encoding="utf-8").read()
|
|
625
635
|
prompt += f"""
|
|
626
|
-
|
|
627
|
-
|
|
636
|
+
File path: {path}prompt
|
|
637
|
+
File content:
|
|
628
638
|
{content}
|
|
629
639
|
========================================
|
|
630
640
|
"""
|
|
@@ -634,12 +644,12 @@ class CodeBase:
|
|
|
634
644
|
continue
|
|
635
645
|
|
|
636
646
|
prompt += f"""
|
|
637
|
-
|
|
647
|
+
User question: {query}
|
|
638
648
|
|
|
639
|
-
|
|
649
|
+
Please answer the user's question in Chinese using professional language. If the provided file content is insufficient to answer the user's question, please inform the user. Never make up information.
|
|
640
650
|
"""
|
|
641
651
|
model = PlatformRegistry.get_global_platform_registry().get_codegen_platform()
|
|
642
|
-
response = model.
|
|
652
|
+
response = model.chat_until_success(prompt)
|
|
643
653
|
return response
|
|
644
654
|
|
|
645
655
|
def is_index_generated(self) -> bool:
|
|
@@ -669,46 +679,69 @@ class CodeBase:
|
|
|
669
679
|
|
|
670
680
|
|
|
671
681
|
|
|
682
|
+
|
|
683
|
+
|
|
672
684
|
def main():
|
|
685
|
+
|
|
673
686
|
parser = argparse.ArgumentParser(description='Codebase management and search tool')
|
|
674
|
-
parser.
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
687
|
+
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
|
688
|
+
|
|
689
|
+
# Generate command
|
|
690
|
+
generate_parser = subparsers.add_parser('generate', help='Generate codebase index')
|
|
691
|
+
generate_parser.add_argument('--force', action='store_true', help='Force rebuild index')
|
|
692
|
+
|
|
693
|
+
# Search command
|
|
694
|
+
search_parser = subparsers.add_parser('search', help='Search similar code files')
|
|
695
|
+
search_parser.add_argument('query', type=str, help='Search query')
|
|
696
|
+
search_parser.add_argument('--top-k', type=int, default=20, help='Number of results to return (default: 20)')
|
|
697
|
+
|
|
698
|
+
# Ask command
|
|
699
|
+
ask_parser = subparsers.add_parser('ask', help='Ask a question about the codebase')
|
|
700
|
+
ask_parser.add_argument('question', type=str, help='Question to ask')
|
|
701
|
+
ask_parser.add_argument('--top-k', type=int, default=20, help='Number of results to use (default: 20)')
|
|
702
|
+
|
|
703
|
+
export_parser = subparsers.add_parser('export', help='Export current index data')
|
|
678
704
|
args = parser.parse_args()
|
|
679
705
|
|
|
680
706
|
current_dir = find_git_root()
|
|
681
707
|
codebase = CodeBase(current_dir)
|
|
682
708
|
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
PrettyOutput.print("索引尚未生成,请先运行 --generate 生成索引", output_type=OutputType.WARNING)
|
|
709
|
+
if args.command == 'export':
|
|
710
|
+
codebase.export()
|
|
686
711
|
return
|
|
687
712
|
|
|
713
|
+
# 如果没有生成索引,且不是生成命令,提示用户先生成索引
|
|
714
|
+
if not codebase.is_index_generated() and args.command != 'generate':
|
|
715
|
+
PrettyOutput.print("索引尚未生成,请先运行 'generate' 命令生成索引", output_type=OutputType.WARNING)
|
|
716
|
+
return
|
|
688
717
|
|
|
689
|
-
if args.generate:
|
|
718
|
+
if args.command == 'generate':
|
|
690
719
|
try:
|
|
691
|
-
codebase.generate_codebase(force=
|
|
720
|
+
codebase.generate_codebase(force=args.force)
|
|
692
721
|
PrettyOutput.print("\nCodebase generation completed", output_type=OutputType.SUCCESS)
|
|
693
722
|
except Exception as e:
|
|
694
723
|
PrettyOutput.print(f"Error during codebase generation: {str(e)}", output_type=OutputType.ERROR)
|
|
695
724
|
|
|
696
|
-
|
|
697
|
-
results = codebase.search_similar(args.
|
|
725
|
+
elif args.command == 'search':
|
|
726
|
+
results = codebase.search_similar(args.query, args.top_k)
|
|
698
727
|
if not results:
|
|
699
728
|
PrettyOutput.print("No similar files found", output_type=OutputType.WARNING)
|
|
700
729
|
return
|
|
701
730
|
|
|
702
731
|
PrettyOutput.print("\nSearch Results:", output_type=OutputType.INFO)
|
|
703
|
-
for path, score
|
|
732
|
+
for path, score in results:
|
|
704
733
|
PrettyOutput.print("\n" + "="*50, output_type=OutputType.INFO)
|
|
705
734
|
PrettyOutput.print(f"File: {path}", output_type=OutputType.INFO)
|
|
706
735
|
PrettyOutput.print(f"Similarity: {score:.3f}", output_type=OutputType.INFO)
|
|
707
|
-
PrettyOutput.print(f"Description: {desc[100:]}", output_type=OutputType.INFO)
|
|
708
736
|
|
|
709
|
-
|
|
710
|
-
codebase.ask_codebase(args.
|
|
737
|
+
elif args.command == 'ask':
|
|
738
|
+
response = codebase.ask_codebase(args.question, args.top_k)
|
|
739
|
+
PrettyOutput.print("\nAnswer:", output_type=OutputType.INFO)
|
|
740
|
+
PrettyOutput.print(response, output_type=OutputType.INFO)
|
|
741
|
+
|
|
742
|
+
else:
|
|
743
|
+
parser.print_help()
|
|
711
744
|
|
|
712
745
|
|
|
713
746
|
if __name__ == "__main__":
|
|
714
|
-
exit(main())
|
|
747
|
+
exit(main())
|
jarvis/jarvis_coder/git_utils.py
CHANGED
|
@@ -6,51 +6,48 @@ from jarvis.utils import OutputType, PrettyOutput, while_success
|
|
|
6
6
|
from jarvis.models.registry import PlatformRegistry
|
|
7
7
|
|
|
8
8
|
def has_uncommitted_files() -> bool:
|
|
9
|
-
"""
|
|
10
|
-
#
|
|
9
|
+
"""Check if there are uncommitted files in the repository"""
|
|
10
|
+
# Get unstaged modifications
|
|
11
11
|
unstaged = os.popen("git diff --name-only").read()
|
|
12
|
-
#
|
|
12
|
+
# Get staged but uncommitted modifications
|
|
13
13
|
staged = os.popen("git diff --cached --name-only").read()
|
|
14
|
-
#
|
|
14
|
+
# Get untracked files
|
|
15
15
|
untracked = os.popen("git ls-files --others --exclude-standard").read()
|
|
16
16
|
|
|
17
17
|
return bool(unstaged or staged or untracked)
|
|
18
18
|
|
|
19
|
-
def generate_commit_message(git_diff: str
|
|
20
|
-
"""
|
|
21
|
-
prompt = f"""
|
|
19
|
+
def generate_commit_message(git_diff: str) -> str:
|
|
20
|
+
"""Generate commit message based on git diff and feature description"""
|
|
21
|
+
prompt = f"""You are an experienced programmer, please generate a concise and clear commit message based on the following code changes and feature description:
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
{feature}
|
|
25
|
-
|
|
26
|
-
代码变更:
|
|
23
|
+
Code changes:
|
|
27
24
|
Git Diff:
|
|
28
25
|
{git_diff}
|
|
29
26
|
|
|
30
|
-
|
|
31
|
-
1.
|
|
32
|
-
2.
|
|
33
|
-
3.
|
|
34
|
-
4.
|
|
35
|
-
5.
|
|
36
|
-
6.
|
|
27
|
+
Please follow these rules:
|
|
28
|
+
1. Write in English
|
|
29
|
+
2. Use conventional commit message format: <type>(<scope>): <subject>
|
|
30
|
+
3. Keep it concise, no more than 50 characters
|
|
31
|
+
4. Accurately describe the main content of code changes
|
|
32
|
+
5. Prioritize feature description and changes in git diff
|
|
33
|
+
6. Only generate the commit message text, do not output anything else
|
|
37
34
|
"""
|
|
38
35
|
|
|
39
|
-
model = PlatformRegistry().get_global_platform_registry().
|
|
40
|
-
response =
|
|
36
|
+
model = PlatformRegistry().get_global_platform_registry().get_normal_platform()
|
|
37
|
+
response = model.chat_until_success(prompt)
|
|
41
38
|
|
|
42
|
-
return response.strip().split("\n")
|
|
39
|
+
return ';'.join(response.strip().split("\n"))
|
|
43
40
|
|
|
44
41
|
def save_edit_record(record_dir: str, commit_message: str, git_diff: str) -> None:
|
|
45
|
-
"""
|
|
46
|
-
#
|
|
42
|
+
"""Save code modification record"""
|
|
43
|
+
# Get next sequence number
|
|
47
44
|
existing_records = [f for f in os.listdir(record_dir) if f.endswith('.yaml')]
|
|
48
45
|
next_num = 1
|
|
49
46
|
if existing_records:
|
|
50
47
|
last_num = max(int(f[:4]) for f in existing_records)
|
|
51
48
|
next_num = last_num + 1
|
|
52
49
|
|
|
53
|
-
#
|
|
50
|
+
# Create record file
|
|
54
51
|
record = {
|
|
55
52
|
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
|
56
53
|
"commit_message": commit_message,
|
|
@@ -61,4 +58,4 @@ def save_edit_record(record_dir: str, commit_message: str, git_diff: str) -> Non
|
|
|
61
58
|
with open(record_path, "w", encoding="utf-8") as f:
|
|
62
59
|
yaml.safe_dump(record, f, allow_unicode=True)
|
|
63
60
|
|
|
64
|
-
PrettyOutput.print(f"
|
|
61
|
+
PrettyOutput.print(f"Modification record saved: {record_path}", OutputType.SUCCESS)
|