auto-coder 0.1.283__py3-none-any.whl → 0.1.284__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.283
3
+ Version: 0.1.284
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
12
12
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
13
13
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
14
14
  autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
15
- autocoder/version.py,sha256=gD3sSROI4mWkMlhRoIZLn--lc2LLLHyqeGIDWZ8UCTM,23
15
+ autocoder/version.py,sha256=CdPfaa9UyiMW7CWw6BaV5azX5klLdPvg_B_GcYjlyFk,23
16
16
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
18
18
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -30,7 +30,7 @@ autocoder/commands/auto_command.py,sha256=3ZQvG_JX2oWxTv_xiXQDQwMfTAVK-Tynqo6mC9
30
30
  autocoder/commands/tools.py,sha256=lanjoBGR6H8HDJSY3KrM6ibrtHZbgKX6mKJHSSE66dg,20493
31
31
  autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
32
32
  autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
33
- autocoder/common/__init__.py,sha256=Z6gvzhzLWYnXGVCnek2UoWic5DRiqWGQh4AiGZL3XVQ,12989
33
+ autocoder/common/__init__.py,sha256=nmvI1UImcPzPMrO1E6_5H7rXFA8bP8i1qGBYYDD5kBc,13182
34
34
  autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
35
35
  autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
36
36
  autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
@@ -110,9 +110,10 @@ autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
111
111
  autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
112
112
  autocoder/rag/document_retriever.py,sha256=MGn6oIPo49BbRC99xmLMFkZrpHfcDfKoGYqWxXF554U,8051
113
- autocoder/rag/lang.py,sha256=TVNx5m7OtBcdfahzI29tMj9m1yrEm32G1c1zc4ZNIPs,3130
113
+ autocoder/rag/lang.py,sha256=_jmUtxZDG1fmF4b2mhMJbYS1YQDb2ZE8nyAn5_vrvjA,3350
114
114
  autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
115
- autocoder/rag/long_context_rag.py,sha256=mI7X_UT_QgL9uGmX1K5jSiRGC0K5o6m3CgtQESaG6Vk,40581
115
+ autocoder/rag/long_context_rag.py,sha256=Q-kVwfauaLcPtlVlHS5smOG07gyL-8uDg6ewwIfw13A,40121
116
+ autocoder/rag/qa_conversation_strategy.py,sha256=bWFSMcAsacEgvV7nTHtCroia2mstxqhWj8nz7k4HECI,4898
116
117
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
117
118
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
118
119
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -168,9 +169,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
169
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
169
170
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
170
171
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
- auto_coder-0.1.283.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
172
- auto_coder-0.1.283.dist-info/METADATA,sha256=pLzj-iE-hpBIpDnMabXu-4cpgkQmR3qSrOMruAEY098,2643
173
- auto_coder-0.1.283.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
174
- auto_coder-0.1.283.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
175
- auto_coder-0.1.283.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
176
- auto_coder-0.1.283.dist-info/RECORD,,
172
+ auto_coder-0.1.284.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
173
+ auto_coder-0.1.284.dist-info/METADATA,sha256=NYN7m8jbf2aikPm1nXpD_hTBHMJOmVcpb0-Y8DbhveE,2643
174
+ auto_coder-0.1.284.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
175
+ auto_coder-0.1.284.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
176
+ auto_coder-0.1.284.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
177
+ auto_coder-0.1.284.dist-info/RECORD,,
@@ -295,7 +295,13 @@ class AutoCoderArgs(pydantic.BaseModel):
295
295
  rag_type: Optional[str] = "storage"
296
296
  rag_params_max_tokens: Optional[int] = 4096
297
297
  rag_doc_filter_relevance: Optional[int] = 5
298
- rag_context_window_limit: Optional[int] = 120000
298
+ rag_context_window_limit: Optional[int] = 120000
299
+
300
+ # 回答用户问题时,使用哪种对话历史策略
301
+ # single_round: 单轮对话
302
+ # multi_round: 多轮对话
303
+ rag_qa_conversation_strategy: Optional[str] = "multi_round"
304
+
299
305
  verify_file_relevance_score: int = 6
300
306
  enable_rag_search: Optional[Union[bool, str]] = False
301
307
  enable_rag_context: Optional[Union[bool, str]] = False
autocoder/rag/lang.py CHANGED
@@ -13,7 +13,8 @@ MESSAGES = {
13
13
  "doc_filter_start": "Document filtering start, total {{total}} documents",
14
14
  "doc_filter_progress": "Document filtering progress: {{progress_percent}}% processed {{relevant_count}}/{{total}} documents",
15
15
  "doc_filter_error": "Document filtering error: {{error}}",
16
- "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents"
16
+ "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents",
17
+ "context_docs_names": "The following are the documents related to the user's question: {{context_docs_names}}",
17
18
  },
18
19
  "zh": {
19
20
  "rag_error_title": "RAG 错误",
@@ -26,7 +27,8 @@ MESSAGES = {
26
27
  "doc_filter_start": "开始过滤文档,共 {{total}} 个文档",
27
28
  "doc_filter_progress": "文档过滤进度:{{progress_percent}}%,处理了 {{relevant_count}}/{{total}} 个文档",
28
29
  "doc_filter_error": "文档过滤错误:{{error}}",
29
- "doc_filter_complete": "文档过滤完成,耗时 {{total_time}} 秒,找到 {{relevant_count}} 个相关文档"
30
+ "doc_filter_complete": "文档过滤完成,耗时 {{total_time}} 秒,找到 {{relevant_count}} 个相关文档",
31
+ "context_docs_names": "以下是和用户问题相关的文档:{{context_docs_names}}",
30
32
  }
31
33
  }
32
34
 
@@ -37,6 +37,7 @@ from autocoder.rag.relevant_utils import DocFilterResult
37
37
  from pydantic import BaseModel
38
38
  from byzerllm.utils.types import SingleOutputMeta
39
39
  from autocoder.rag.lang import get_message_with_format_and_newline
40
+ from autocoder.rag.qa_conversation_strategy import get_qa_strategy
40
41
 
41
42
  try:
42
43
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -173,10 +174,11 @@ class LongContextRAG:
173
174
 
174
175
  self.token_limit = self.args.rag_context_window_limit or 120000
175
176
  retriever_class = self._get_document_retriever_class()
176
-
177
+
177
178
  if self.args.enable_hybrid_index and not self.on_ray:
178
179
  if self.emb_llm is None:
179
- raise ValueError("emb_llm is required for local byzer storage cache")
180
+ raise ValueError(
181
+ "emb_llm is required for local byzer storage cache")
180
182
 
181
183
  self.document_retriever = retriever_class(
182
184
  self.path,
@@ -255,36 +257,7 @@ class LongContextRAG:
255
257
  请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
256
258
  如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
257
259
  提取的信息尽量保持和原文中的一样,并且只输出这些信息。
258
- """
259
-
260
- @byzerllm.prompt()
261
- def _answer_question(
262
- self, query: str, relevant_docs: List[str]
263
- ) -> Generator[str, None, None]:
264
- """
265
- 文档:
266
- <documents>
267
- {% for doc in relevant_docs %}
268
- {{ doc }}
269
- {% endfor %}
270
- </documents>
271
-
272
- 使用以上文档来回答用户的问题。回答要求:
273
-
274
- 1. 严格基于文档内容回答
275
- - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
276
- - 不要添加、推测或扩展文档未提及的信息
277
-
278
- 2. 格式如 ![image](./path.png) 的 Markdown 图片处理
279
- - 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
280
- - 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
281
- - 完整保留原始图片路径,不省略任何部分
282
-
283
- 3. 回答格式要求
284
- - 使用markdown格式提升可读性
285
-
286
- 问题:{{ query }}
287
- """
260
+ """
288
261
 
289
262
  def _get_document_retriever_class(self):
290
263
  """Get the document retriever class based on configuration."""
@@ -627,13 +600,22 @@ class LongContextRAG:
627
600
 
628
601
  if not relevant_docs:
629
602
  yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
630
- generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
631
- rag_stat.chunk_stat.total_generated_tokens,
632
- ))
603
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
604
+ rag_stat.chunk_stat.total_generated_tokens,
605
+ ))
633
606
  return
634
607
 
635
608
  context = [doc.source_code.module_name for doc in relevant_docs]
636
609
 
610
+ yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
611
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
612
+ rag_stat.chunk_stat.total_generated_tokens,
613
+ reasoning_content=get_message_with_format_and_newline(
614
+ "context_docs_names",
615
+ context_docs_names=",".join(
616
+ context))
617
+ ))
618
+
637
619
  # 将 FilterDoc 转化为 SourceCode 方便后续的逻辑继续做处理
638
620
  relevant_docs = [doc.source_code for doc in relevant_docs]
639
621
 
@@ -792,7 +774,7 @@ class LongContextRAG:
792
774
  tokens=request_tokens
793
775
  )
794
776
  ))
795
-
777
+
796
778
  yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
797
779
  generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
798
780
  rag_stat.chunk_stat.total_generated_tokens,
@@ -834,16 +816,12 @@ class LongContextRAG:
834
816
 
835
817
  self._print_rag_stats(rag_stat)
836
818
  else:
837
- new_conversations = conversations[:-1] + [
838
- {
839
- "role": "user",
840
- "content": self._answer_question.prompt(
841
- query=query,
842
- relevant_docs=[
843
- doc.source_code for doc in relevant_docs],
844
- ),
845
- }
846
- ]
819
+
820
+ qa_strategy = get_qa_strategy(self.args.rag_qa_conversation_strategy)
821
+ new_conversations = qa_strategy.create_conversation(
822
+ documents=[doc.source_code for doc in relevant_docs],
823
+ conversations=conversations
824
+ )
847
825
 
848
826
  chunks = target_llm.stream_chat_oai(
849
827
  conversations=new_conversations,
@@ -864,7 +842,7 @@ class LongContextRAG:
864
842
  chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
865
843
  rag_stat.chunk_stat.total_generated_tokens + \
866
844
  rag_stat.answer_stat.total_generated_tokens
867
-
845
+
868
846
  yield chunk
869
847
 
870
848
  self._print_rag_stats(rag_stat)
@@ -0,0 +1,132 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Dict, Any,Generator
3
+ import byzerllm
4
+
5
+ class QAConversationStrategy(ABC):
6
+ """
7
+ Abstract base class for conversation strategies.
8
+ Different strategies organize documents and conversations differently.
9
+ """
10
+ @abstractmethod
11
+ def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
12
+ """
13
+ Create a conversation structure based on documents and history
14
+
15
+ Args:
16
+ documents: List of retrieved documents
17
+ conversations: conversation turns
18
+
19
+ Returns:
20
+ List of message dictionaries representing the conversation to send to the model
21
+ """
22
+ pass
23
+
24
+ class MultiRoundStrategy(QAConversationStrategy):
25
+ """
26
+ Multi-round strategy: First let the model read documents, then do Q&A.
27
+ Creates multiple conversation turns.
28
+ """
29
+ def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
30
+ messages = []
31
+ messages.extend([
32
+ {"role": "user", "content": self._read_docs_prompt.prompt(documents)},
33
+ {"role": "assistant", "content": "好的"}
34
+ ])
35
+ messages.extend(conversations)
36
+ return messages
37
+
38
+ @byzerllm.prompt()
39
+ def _read_docs_prompt(
40
+ self, relevant_docs: List[str]
41
+ ) -> Generator[str, None, None]:
42
+ """
43
+ 请阅读以下:
44
+ <documents>
45
+ {% for doc in relevant_docs %}
46
+ {{ doc }}
47
+ {% endfor %}
48
+ </documents>
49
+
50
+ 阅读完成后,使用以上文档来回答用户的问题。回答要求:
51
+
52
+ 1. 严格基于文档内容回答
53
+ - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
54
+ - 不要添加、推测或扩展文档未提及的信息
55
+
56
+ 2. 格式如 ![image](./path.png) 的 Markdown 图片处理
57
+ - 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
58
+ - 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
59
+ - 完整保留原始图片路径,不省略任何部分
60
+
61
+ 3. 回答格式要求
62
+ - 使用markdown格式提升可读性
63
+ """
64
+
65
+ class SingleRoundStrategy(QAConversationStrategy):
66
+ """
67
+ Single-round strategy: Put documents and conversation history in a single round.
68
+ """
69
+ def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
70
+ messages = []
71
+ messages.extend([
72
+ {"role": "user", "content": self._single_round_answer_question.prompt(documents, conversations)}
73
+ ])
74
+ return messages
75
+
76
+ @byzerllm.prompt()
77
+ def _single_round_answer_question(
78
+ self, relevant_docs: List[str], conversations: List[Dict[str, str]]
79
+ ) -> Generator[str, None, None]:
80
+ """
81
+ 文档:
82
+ <documents>
83
+ {% for doc in relevant_docs %}
84
+ {{ doc }}
85
+ {% endfor %}
86
+ </documents>
87
+
88
+ 用户历史对话:
89
+ <conversations>
90
+ {% for msg in conversations %}
91
+ <{{ msg.role }}>: {{ msg.content }}
92
+ {% endfor %}
93
+ </conversations>
94
+
95
+ 使用以上文档来回答用户最后的问题。回答要求:
96
+
97
+ 1. 严格基于文档内容回答
98
+ - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
99
+ - 不要添加、推测或扩展文档未提及的信息
100
+
101
+ 2. 格式如 ![image](./path.png) 的 Markdown 图片处理
102
+ - 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
103
+ - 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
104
+ - 完整保留原始图片路径,不省略任何部分
105
+
106
+ 3. 回答格式要求
107
+ - 使用markdown格式提升可读性
108
+
109
+ """
110
+
111
+ def get_qa_strategy(strategy_name: str) -> QAConversationStrategy:
112
+ """
113
+ Factory method to get the appropriate conversation strategy
114
+
115
+ Args:
116
+ strategy_name: Name of the strategy to use
117
+
118
+ Returns:
119
+ An instance of the requested strategy
120
+
121
+ Raises:
122
+ ValueError: If the requested strategy doesn't exist
123
+ """
124
+ strategies = {
125
+ "multi_round": MultiRoundStrategy,
126
+ "single_round": SingleRoundStrategy,
127
+ }
128
+
129
+ if strategy_name not in strategies:
130
+ raise ValueError(f"Unknown strategy: {strategy_name}. Available strategies: {list(strategies.keys())}")
131
+
132
+ return strategies[strategy_name]()
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.283"
1
+ __version__ = "0.1.284"