jarvis-ai-assistant 0.1.96__py3-none-any.whl → 0.1.98__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (41) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/agent.py +138 -144
  3. jarvis/jarvis_codebase/main.py +87 -54
  4. jarvis/jarvis_coder/git_utils.py +22 -25
  5. jarvis/jarvis_coder/main.py +166 -171
  6. jarvis/jarvis_coder/patch_handler.py +153 -453
  7. jarvis/jarvis_coder/plan_generator.py +76 -48
  8. jarvis/jarvis_platform/main.py +39 -39
  9. jarvis/jarvis_rag/main.py +182 -182
  10. jarvis/jarvis_smart_shell/main.py +34 -34
  11. jarvis/main.py +24 -24
  12. jarvis/models/ai8.py +22 -22
  13. jarvis/models/base.py +17 -13
  14. jarvis/models/kimi.py +31 -31
  15. jarvis/models/ollama.py +28 -28
  16. jarvis/models/openai.py +22 -24
  17. jarvis/models/oyi.py +25 -25
  18. jarvis/models/registry.py +33 -34
  19. jarvis/tools/ask_user.py +5 -5
  20. jarvis/tools/base.py +2 -2
  21. jarvis/tools/chdir.py +9 -9
  22. jarvis/tools/codebase_qa.py +4 -4
  23. jarvis/tools/coder.py +4 -4
  24. jarvis/tools/file_ops.py +1 -1
  25. jarvis/tools/generator.py +23 -23
  26. jarvis/tools/methodology.py +4 -4
  27. jarvis/tools/rag.py +4 -4
  28. jarvis/tools/registry.py +38 -38
  29. jarvis/tools/search.py +42 -42
  30. jarvis/tools/shell.py +13 -13
  31. jarvis/tools/sub_agent.py +16 -16
  32. jarvis/tools/thinker.py +41 -41
  33. jarvis/tools/webpage.py +17 -17
  34. jarvis/utils.py +59 -60
  35. {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/METADATA +1 -1
  36. jarvis_ai_assistant-0.1.98.dist-info/RECORD +47 -0
  37. jarvis_ai_assistant-0.1.96.dist-info/RECORD +0 -47
  38. {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/LICENSE +0 -0
  39. {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/WHEEL +0 -0
  40. {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/entry_points.txt +0 -0
  41. {jarvis_ai_assistant-0.1.96.dist-info → jarvis_ai_assistant-0.1.98.dist-info}/top_level.txt +0 -0
@@ -3,6 +3,8 @@ import os
3
3
  import numpy as np
4
4
  import faiss
5
5
  from typing import List, Tuple, Optional, Dict
6
+
7
+ import yaml
6
8
  from jarvis.models.registry import PlatformRegistry
7
9
  import concurrent.futures
8
10
  from threading import Lock
@@ -10,10 +12,10 @@ from concurrent.futures import ThreadPoolExecutor
10
12
  from jarvis.utils import OutputType, PrettyOutput, find_git_root, get_file_md5, get_max_context_length, get_thread_count, load_embedding_model, load_rerank_model
11
13
  from jarvis.utils import load_env_from_file
12
14
  import argparse
13
- from sentence_transformers import SentenceTransformer
14
15
  import pickle
15
16
  import lzma # 添加 lzma 导入
16
17
  from tqdm import tqdm
18
+ import re
17
19
 
18
20
  class CodeBase:
19
21
  def __init__(self, root_dir: str):
@@ -91,19 +93,28 @@ class CodeBase:
91
93
 
92
94
  def make_description(self, file_path: str, content: str) -> str:
93
95
  model = PlatformRegistry.get_global_platform_registry().get_cheap_platform()
94
- model.set_suppress_output(True)
95
- prompt = f"""请分析以下代码文件,并生成一个详细的描述。描述应该包含以下要点:
96
- 1. 整个文件的功能描述,不超过100个字
97
- 2. 每个全局变量的函数、类型定义、类、方法等代码元素的一句话描述,不超过50字
98
-
99
- 请用简洁专业的语言描述,突出代码的技术功能,以便后续进行关联代码检索。
100
- 文件路径:{file_path}
101
- 代码内容:
96
+ if self.thread_count > 1:
97
+ model.set_suppress_output(True)
98
+ prompt = f"""Please analyze the following code file and generate a detailed description. The description should include:
99
+ 1. Overall file functionality description, no more than 100 characters
100
+ 2. One-sentence description (max 50 characters) for each global variable, function, type definition, class, method, and other code elements
101
+ 3. 5 potential questions users might ask about this file
102
+
103
+ Please use concise and professional language, emphasizing technical functionality to facilitate subsequent code retrieval.
104
+ File path: {file_path}
105
+ Code content:
102
106
  {content}
103
107
  """
104
- response = model.chat(prompt)
108
+ response = model.chat_until_success(prompt)
105
109
  return response
106
110
 
111
+ def export(self):
112
+ """导出当前索引数据到标准输出"""
113
+ for file_path, data in self.vector_cache.items():
114
+ print(f"## {file_path}")
115
+ print(f"- path: {file_path}")
116
+ print(f"- description: {data['description']}")
117
+
107
118
  def _save_cache(self):
108
119
  """保存缓存数据"""
109
120
  try:
@@ -187,14 +198,13 @@ class CodeBase:
187
198
  return cached_vector
188
199
 
189
200
  # 读取文件内容并组合信息
190
- with open(file_path, "r", encoding="utf-8") as f:
191
- content = f.read()[:self.max_context_length] # 限制文件内容长度
201
+ content = open(file_path, "r", encoding="utf-8").read()[:self.max_context_length] # 限制文件内容长度
192
202
 
193
203
  # 组合文件信息,包含文件内容
194
204
  combined_text = f"""
195
- 文件路径: {file_path}
196
- 文件描述: {description}
197
- 文件内容: {content}
205
+ {file_path}
206
+ {description}
207
+ {content}
198
208
  """
199
209
  vector = self.get_embedding(combined_text)
200
210
 
@@ -449,7 +459,7 @@ class CodeBase:
449
459
  score = len(matched_keywords) / len(keywords)
450
460
  return score
451
461
 
452
- def rerank_results(self, query: str, initial_results: List[Tuple[str, float, str]]) -> List[Tuple[str, float, str]]:
462
+ def rerank_results(self, query: str, initial_results: List[Tuple[str, float, str]]) -> List[Tuple[str, float]]:
453
463
  """使用多种策略对搜索结果重新排序"""
454
464
  if not initial_results:
455
465
  return []
@@ -465,16 +475,15 @@ class CodeBase:
465
475
 
466
476
  for path, _, desc in initial_results:
467
477
  try:
468
- with open(path, "r", encoding="utf-8") as f:
469
- content = f.read()[:512] # 限制内容长度
478
+ content = open(path, "r", encoding="utf-8").read()[:512] # 限制内容长度
470
479
 
471
480
  # 组合文件信息
472
- doc_content = f"文件: {path}\n描述: {desc}\n内容: {content}"
481
+ doc_content = f"File path: {path}\nDescription: {desc}\nContent: {content}"
473
482
  pairs.append([query, doc_content])
474
483
  except Exception as e:
475
484
  PrettyOutput.print(f"读取文件失败 {path}: {str(e)}",
476
485
  output_type=OutputType.ERROR)
477
- doc_content = f"文件: {path}\n描述: {desc}"
486
+ doc_content = f"File path: {path}\nDescription: {desc}"
478
487
  pairs.append([query, doc_content])
479
488
 
480
489
  # 使用更大的batch size提高处理速度
@@ -507,11 +516,9 @@ class CodeBase:
507
516
 
508
517
  # 将重排序分数与原始分数结合
509
518
  scored_results = []
510
- for (path, orig_score, desc), rerank_score in zip(initial_results, batch_scores):
511
- # 综合分数 = 0.3 * 原始分数 + 0.7 * 重排序分数
512
- combined_score = 0.3 * float(orig_score) + 0.7 * float(rerank_score)
513
- if combined_score >= 0.5: # 只保留相关度较高的结果
514
- scored_results.append((path, combined_score, desc))
519
+ for (path,_, desc), rerank_score in zip(initial_results, batch_scores):
520
+ if rerank_score >= 0.5: # 只保留相关度较高的结果
521
+ scored_results.append((path, rerank_score))
515
522
 
516
523
  # 按综合分数降序排序
517
524
  scored_results.sort(key=lambda x: x[1], reverse=True)
@@ -521,7 +528,7 @@ class CodeBase:
521
528
  except Exception as e:
522
529
  PrettyOutput.print(f"重排序失败: {str(e)}",
523
530
  output_type=OutputType.ERROR)
524
- return initial_results # 发生错误时返回原始结果
531
+ return [(path, score) for path, score, _ in initial_results] # 发生错误时返回原始结果
525
532
 
526
533
  def _generate_query_variants(self, query: str) -> List[str]:
527
534
  """生成查询的不同表述变体
@@ -533,12 +540,12 @@ class CodeBase:
533
540
  List[str]: 查询变体列表
534
541
  """
535
542
  model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
536
- prompt = f"""请根据以下查询,生成3个不同的表述,每个表述都要完整表达原始查询的意思。这些表述将用于代码搜索,要保持专业性和准确性。
537
- 原始查询: {query}
543
+ prompt = f"""Please generate 3 different expressions based on the following query, each expression should fully convey the meaning of the original query. These expressions will be used for code search, maintain professionalism and accuracy.
544
+ Original query: {query}
538
545
 
539
- 请直接输出3个表述,用换行分隔,不要有编号或其他标记。
546
+ Please output 3 expressions directly, separated by two line breaks, without numbering or other markers.
540
547
  """
541
- variants = model.chat(prompt).strip().split('\n')
548
+ variants = model.chat_until_success(prompt).strip().split('\n\n')
542
549
  variants.append(query) # 添加原始查询
543
550
  return variants
544
551
 
@@ -574,7 +581,7 @@ class CodeBase:
574
581
  return results
575
582
 
576
583
 
577
- def search_similar(self, query: str, top_k: int = 30) -> List[Tuple[str, float, str]]:
584
+ def search_similar(self, query: str, top_k: int = 30) -> List[Tuple[str, float]]:
578
585
  """搜索关联文件"""
579
586
  try:
580
587
  if self.index is None:
@@ -594,6 +601,9 @@ class CodeBase:
594
601
  # 如果没有找到结果,直接返回
595
602
  if not initial_results:
596
603
  return []
604
+
605
+ # 过滤低分结果
606
+ initial_results = [(path, score, desc) for path, score, desc in initial_results if score >= 0.5]
597
607
 
598
608
  # 对初步结果进行重排序
599
609
  return self.rerank_results(query, initial_results)
@@ -610,21 +620,21 @@ class CodeBase:
610
620
  return ""
611
621
 
612
622
  PrettyOutput.print(f"找到的关联文件: ", output_type=OutputType.SUCCESS)
613
- for path, score, _ in results:
623
+ for path, score in results:
614
624
  PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}",
615
625
  output_type=OutputType.INFO)
616
626
 
617
627
  prompt = f"""你是一个代码专家,请根据以下文件信息回答用户的问题:
618
628
  """
619
- for path, _, _ in results:
629
+ for path, _ in results:
620
630
  try:
621
631
  if len(prompt) > self.max_context_length:
622
632
  PrettyOutput.print(f"避免上下文超限,丢弃低相关度文件:{path}", OutputType.WARNING)
623
633
  continue
624
634
  content = open(path, "r", encoding="utf-8").read()
625
635
  prompt += f"""
626
- 文件路径: {path}prompt
627
- 文件内容:
636
+ File path: {path}prompt
637
+ File content:
628
638
  {content}
629
639
  ========================================
630
640
  """
@@ -634,12 +644,12 @@ class CodeBase:
634
644
  continue
635
645
 
636
646
  prompt += f"""
637
- 用户问题: {query}
647
+ User question: {query}
638
648
 
639
- 请用专业的语言回答用户的问题,如果给出的文件内容不足以回答用户的问题,请告诉用户,绝对不要胡编乱造。
649
+ Please answer the user's question in Chinese using professional language. If the provided file content is insufficient to answer the user's question, please inform the user. Never make up information.
640
650
  """
641
651
  model = PlatformRegistry.get_global_platform_registry().get_codegen_platform()
642
- response = model.chat(prompt)
652
+ response = model.chat_until_success(prompt)
643
653
  return response
644
654
 
645
655
  def is_index_generated(self) -> bool:
@@ -669,46 +679,69 @@ class CodeBase:
669
679
 
670
680
 
671
681
 
682
+
683
+
672
684
  def main():
685
+
673
686
  parser = argparse.ArgumentParser(description='Codebase management and search tool')
674
- parser.add_argument('--search', type=str, help='Search query to find similar code files')
675
- parser.add_argument('--top-k', type=int, default=20, help='Number of results to return (default: 20)')
676
- parser.add_argument('--ask', type=str, help='Ask a question about the codebase')
677
- parser.add_argument('--generate', action='store_true', help='Generate codebase index')
687
+ subparsers = parser.add_subparsers(dest='command', help='Available commands')
688
+
689
+ # Generate command
690
+ generate_parser = subparsers.add_parser('generate', help='Generate codebase index')
691
+ generate_parser.add_argument('--force', action='store_true', help='Force rebuild index')
692
+
693
+ # Search command
694
+ search_parser = subparsers.add_parser('search', help='Search similar code files')
695
+ search_parser.add_argument('query', type=str, help='Search query')
696
+ search_parser.add_argument('--top-k', type=int, default=20, help='Number of results to return (default: 20)')
697
+
698
+ # Ask command
699
+ ask_parser = subparsers.add_parser('ask', help='Ask a question about the codebase')
700
+ ask_parser.add_argument('question', type=str, help='Question to ask')
701
+ ask_parser.add_argument('--top-k', type=int, default=20, help='Number of results to use (default: 20)')
702
+
703
+ export_parser = subparsers.add_parser('export', help='Export current index data')
678
704
  args = parser.parse_args()
679
705
 
680
706
  current_dir = find_git_root()
681
707
  codebase = CodeBase(current_dir)
682
708
 
683
- # 如果没有生成索引,且不是生成命令,提示用户先生成索引
684
- if not codebase.is_index_generated() and not args.generate:
685
- PrettyOutput.print("索引尚未生成,请先运行 --generate 生成索引", output_type=OutputType.WARNING)
709
+ if args.command == 'export':
710
+ codebase.export()
686
711
  return
687
712
 
713
+ # 如果没有生成索引,且不是生成命令,提示用户先生成索引
714
+ if not codebase.is_index_generated() and args.command != 'generate':
715
+ PrettyOutput.print("索引尚未生成,请先运行 'generate' 命令生成索引", output_type=OutputType.WARNING)
716
+ return
688
717
 
689
- if args.generate:
718
+ if args.command == 'generate':
690
719
  try:
691
- codebase.generate_codebase(force=True)
720
+ codebase.generate_codebase(force=args.force)
692
721
  PrettyOutput.print("\nCodebase generation completed", output_type=OutputType.SUCCESS)
693
722
  except Exception as e:
694
723
  PrettyOutput.print(f"Error during codebase generation: {str(e)}", output_type=OutputType.ERROR)
695
724
 
696
- if args.search:
697
- results = codebase.search_similar(args.search, args.top_k)
725
+ elif args.command == 'search':
726
+ results = codebase.search_similar(args.query, args.top_k)
698
727
  if not results:
699
728
  PrettyOutput.print("No similar files found", output_type=OutputType.WARNING)
700
729
  return
701
730
 
702
731
  PrettyOutput.print("\nSearch Results:", output_type=OutputType.INFO)
703
- for path, score, desc in results:
732
+ for path, score in results:
704
733
  PrettyOutput.print("\n" + "="*50, output_type=OutputType.INFO)
705
734
  PrettyOutput.print(f"File: {path}", output_type=OutputType.INFO)
706
735
  PrettyOutput.print(f"Similarity: {score:.3f}", output_type=OutputType.INFO)
707
- PrettyOutput.print(f"Description: {desc[100:]}", output_type=OutputType.INFO)
708
736
 
709
- if args.ask:
710
- codebase.ask_codebase(args.ask, args.top_k)
737
+ elif args.command == 'ask':
738
+ response = codebase.ask_codebase(args.question, args.top_k)
739
+ PrettyOutput.print("\nAnswer:", output_type=OutputType.INFO)
740
+ PrettyOutput.print(response, output_type=OutputType.INFO)
741
+
742
+ else:
743
+ parser.print_help()
711
744
 
712
745
 
713
746
  if __name__ == "__main__":
714
- exit(main())
747
+ exit(main())
@@ -6,51 +6,48 @@ from jarvis.utils import OutputType, PrettyOutput, while_success
6
6
  from jarvis.models.registry import PlatformRegistry
7
7
 
8
8
  def has_uncommitted_files() -> bool:
9
- """判断代码库是否有未提交的文件"""
10
- # 获取未暂存的修改
9
+ """Check if there are uncommitted files in the repository"""
10
+ # Get unstaged modifications
11
11
  unstaged = os.popen("git diff --name-only").read()
12
- # 获取已暂存但未提交的修改
12
+ # Get staged but uncommitted modifications
13
13
  staged = os.popen("git diff --cached --name-only").read()
14
- # 获取未跟踪的文件
14
+ # Get untracked files
15
15
  untracked = os.popen("git ls-files --others --exclude-standard").read()
16
16
 
17
17
  return bool(unstaged or staged or untracked)
18
18
 
19
- def generate_commit_message(git_diff: str, feature: str) -> str:
20
- """根据git diff和功能描述生成commit信息"""
21
- prompt = f"""你是一个经验丰富的程序员,请根据以下代码变更和功能描述生成简洁明了的commit信息:
19
+ def generate_commit_message(git_diff: str) -> str:
20
+ """Generate commit message based on git diff and feature description"""
21
+ prompt = f"""You are an experienced programmer, please generate a concise and clear commit message based on the following code changes and feature description:
22
22
 
23
- 功能描述:
24
- {feature}
25
-
26
- 代码变更:
23
+ Code changes:
27
24
  Git Diff:
28
25
  {git_diff}
29
26
 
30
- 请遵循以下规则:
31
- 1. 使用英文编写
32
- 2. 采用常规的commit message格式:<type>(<scope>): <subject>
33
- 3. 保持简洁,不超过50个字符
34
- 4. 准确描述代码变更的主要内容
35
- 5. 优先考虑功能描述和git diff中的变更内容
36
- 6. 仅生成commit信息的文本,不要输出任何其他内容
27
+ Please follow these rules:
28
+ 1. Write in English
29
+ 2. Use conventional commit message format: <type>(<scope>): <subject>
30
+ 3. Keep it concise, no more than 50 characters
31
+ 4. Accurately describe the main content of code changes
32
+ 5. Prioritize feature description and changes in git diff
33
+ 6. Only generate the commit message text, do not output anything else
37
34
  """
38
35
 
39
- model = PlatformRegistry().get_global_platform_registry().get_codegen_platform()
40
- response = while_success(lambda: model.chat(prompt), 5)
36
+ model = PlatformRegistry().get_global_platform_registry().get_normal_platform()
37
+ response = model.chat_until_success(prompt)
41
38
 
42
- return response.strip().split("\n")[0]
39
+ return ';'.join(response.strip().split("\n"))
43
40
 
44
41
  def save_edit_record(record_dir: str, commit_message: str, git_diff: str) -> None:
45
- """保存代码修改记录"""
46
- # 获取下一个序号
42
+ """Save code modification record"""
43
+ # Get next sequence number
47
44
  existing_records = [f for f in os.listdir(record_dir) if f.endswith('.yaml')]
48
45
  next_num = 1
49
46
  if existing_records:
50
47
  last_num = max(int(f[:4]) for f in existing_records)
51
48
  next_num = last_num + 1
52
49
 
53
- # 创建记录文件
50
+ # Create record file
54
51
  record = {
55
52
  "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
56
53
  "commit_message": commit_message,
@@ -61,4 +58,4 @@ def save_edit_record(record_dir: str, commit_message: str, git_diff: str) -> Non
61
58
  with open(record_path, "w", encoding="utf-8") as f:
62
59
  yaml.safe_dump(record, f, allow_unicode=True)
63
60
 
64
- PrettyOutput.print(f"已保存修改记录: {record_path}", OutputType.SUCCESS)
61
+ PrettyOutput.print(f"Modification record saved: {record_path}", OutputType.SUCCESS)