auto-coder 0.1.289__py3-none-any.whl → 0.1.291__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.289.dist-info → auto_coder-0.1.291.dist-info}/METADATA +2 -2
- {auto_coder-0.1.289.dist-info → auto_coder-0.1.291.dist-info}/RECORD +19 -16
- autocoder/auto_coder_rag.py +10 -0
- autocoder/common/__init__.py +4 -0
- autocoder/rag/api_server.py +48 -0
- autocoder/rag/cache/byzer_storage_cache.py +254 -44
- autocoder/rag/cache/cache_result_merge.py +265 -0
- autocoder/rag/cache/file_monitor_cache.py +117 -4
- autocoder/rag/cache/local_byzer_storage_cache.py +286 -58
- autocoder/rag/cache/rag_file_meta.py +494 -0
- autocoder/rag/cache/simple_cache.py +80 -8
- autocoder/rag/conversation_to_queries.py +139 -0
- autocoder/rag/long_context_rag.py +9 -3
- autocoder/rag/qa_conversation_strategy.py +21 -10
- autocoder/version.py +1 -1
- {auto_coder-0.1.289.dist-info → auto_coder-0.1.291.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.289.dist-info → auto_coder-0.1.291.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.289.dist-info → auto_coder-0.1.291.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.289.dist-info → auto_coder-0.1.291.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from typing import List, Dict, Any, Optional, Union
|
|
2
|
+
import logging
|
|
3
|
+
import byzerllm
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from autocoder.common import AutoCoderArgs
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SearchQuery(BaseModel):
|
|
11
|
+
"""搜索查询模型"""
|
|
12
|
+
query: str
|
|
13
|
+
importance: int = 5 # 1-10,表示查询的重要性
|
|
14
|
+
purpose: str = "" # 查询的目的说明
|
|
15
|
+
|
|
16
|
+
class ConversationToQueries:
|
|
17
|
+
"""
|
|
18
|
+
将对话历史转换为搜索查询的工具类。
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM]):
|
|
22
|
+
"""
|
|
23
|
+
初始化对话转查询工具类。
|
|
24
|
+
|
|
25
|
+
参数:
|
|
26
|
+
llm: ByzerLLM 实例,用于执行 prompt 函数
|
|
27
|
+
"""
|
|
28
|
+
self.llm = llm
|
|
29
|
+
|
|
30
|
+
@byzerllm.prompt()
|
|
31
|
+
def generate_search_queries(self, conversations: List[Dict[str, Any]], max_queries: int = 3) -> str:
|
|
32
|
+
"""
|
|
33
|
+
根据历史对话生成搜索查询。
|
|
34
|
+
|
|
35
|
+
参数:
|
|
36
|
+
conversations: 历史对话列表,每个对话是一个字典,包含 'role' 和 'content' 字段
|
|
37
|
+
max_queries: 最大生成的查询数量,默认为 3
|
|
38
|
+
|
|
39
|
+
返回:
|
|
40
|
+
生成的搜索查询列表的 JSON 字符串
|
|
41
|
+
|
|
42
|
+
任务说明:
|
|
43
|
+
你是一个专业的对话分析助手。你的任务是分析用户与 AI 的对话历史,从中提取关键信息,
|
|
44
|
+
并生成用于搜索引擎的查询,以便获取与对话相关的知识和信息。
|
|
45
|
+
|
|
46
|
+
具体要求:
|
|
47
|
+
1. 仔细分析对话历史,特别是最近的几轮对话
|
|
48
|
+
2. 识别用户可能需要更多信息或知识的关键问题和主题
|
|
49
|
+
3. 将这些关键问题转化为明确、简洁的搜索查询
|
|
50
|
+
4. 每个查询应该足够具体,能够通过搜索引擎找到有用的结果
|
|
51
|
+
5. 为每个查询提供重要性评分(1-10 分)和用途说明
|
|
52
|
+
6. 最多生成 {{ max_queries }} 个查询,按重要性排序
|
|
53
|
+
7. 返回符合指定格式的 JSON 数据
|
|
54
|
+
|
|
55
|
+
可能的场景:
|
|
56
|
+
- 用户询问特定技术或概念,需要进一步的解释或示例
|
|
57
|
+
- 用户遇到编程问题,需要查找解决方案或最佳实践
|
|
58
|
+
- 用户讨论的话题涉及多个方面,需要查找不同角度的信息
|
|
59
|
+
- 用户想了解某个领域的最新发展或趋势
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
对话历史:
|
|
64
|
+
<conversations>
|
|
65
|
+
{% for msg in conversations %}
|
|
66
|
+
{{ msg.role }}: {{ msg.content }}
|
|
67
|
+
{% endfor %}
|
|
68
|
+
</conversations>
|
|
69
|
+
|
|
70
|
+
请分析上述对话,提取关键问题并生成最多 {{ max_queries }} 个搜索查询。
|
|
71
|
+
|
|
72
|
+
输出格式:
|
|
73
|
+
```json
|
|
74
|
+
[
|
|
75
|
+
{
|
|
76
|
+
"query": "搜索查询1",
|
|
77
|
+
"importance": 评分(1-10),
|
|
78
|
+
"purpose": "该查询的目的说明"
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"query": "搜索查询2",
|
|
82
|
+
"importance": 评分(1-10),
|
|
83
|
+
"purpose": "该查询的目的说明"
|
|
84
|
+
}
|
|
85
|
+
]
|
|
86
|
+
```
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def extract_queries(self, conversations: List[Dict[str, Any]], max_queries: int = 3) -> List[SearchQuery]:
|
|
90
|
+
"""
|
|
91
|
+
从对话历史中提取搜索查询。
|
|
92
|
+
|
|
93
|
+
参数:
|
|
94
|
+
conversations: 历史对话列表
|
|
95
|
+
max_queries: 最大生成的查询数量
|
|
96
|
+
|
|
97
|
+
返回:
|
|
98
|
+
SearchQuery 对象列表
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
# 使用 prompt 函数生成搜索查询
|
|
102
|
+
queries = self.generate_search_queries.with_llm(self.llm).with_return_type(SearchQuery).run(
|
|
103
|
+
conversations=conversations,
|
|
104
|
+
max_queries=max_queries
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# 按重要性排序
|
|
108
|
+
queries.sort(key=lambda x: x.importance, reverse=True)
|
|
109
|
+
|
|
110
|
+
return queries
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.error(f"Error extracting queries from conversation: {str(e)}")
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
def extract_search_queries(
|
|
116
|
+
conversations: List[Dict[str, Any]],
|
|
117
|
+
args:AutoCoderArgs,
|
|
118
|
+
llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM],
|
|
119
|
+
max_queries: int = 3,
|
|
120
|
+
) -> List[SearchQuery]:
|
|
121
|
+
"""
|
|
122
|
+
从对话历史中提取搜索查询的便捷函数。
|
|
123
|
+
|
|
124
|
+
参数:
|
|
125
|
+
conversations: 历史对话列表
|
|
126
|
+
llm: ByzerLLM 实例
|
|
127
|
+
max_queries: 最大生成的查询数量
|
|
128
|
+
|
|
129
|
+
返回:
|
|
130
|
+
SearchQuery 对象列表
|
|
131
|
+
"""
|
|
132
|
+
if max_queries == 0:
|
|
133
|
+
return []
|
|
134
|
+
try:
|
|
135
|
+
extractor = ConversationToQueries(llm)
|
|
136
|
+
return extractor.extract_queries(conversations, max_queries)
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(f"Error extracting search queries from conversation: {str(e)}")
|
|
139
|
+
return []
|
|
@@ -39,6 +39,7 @@ from byzerllm.utils.types import SingleOutputMeta
|
|
|
39
39
|
from autocoder.rag.lang import get_message_with_format_and_newline
|
|
40
40
|
from autocoder.rag.qa_conversation_strategy import get_qa_strategy
|
|
41
41
|
from autocoder.rag.searchable import SearchableResults
|
|
42
|
+
from autocoder.rag.conversation_to_queries import extract_search_queries
|
|
42
43
|
try:
|
|
43
44
|
from autocoder_pro.rag.llm_compute import LLMComputeEngine
|
|
44
45
|
pro_version = version("auto-coder-pro")
|
|
@@ -333,7 +334,9 @@ class LongContextRAG:
|
|
|
333
334
|
|
|
334
335
|
def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
|
|
335
336
|
query = conversations[-1]["content"]
|
|
336
|
-
|
|
337
|
+
queries = extract_search_queries(conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
|
|
338
|
+
documents = self._retrieve_documents(
|
|
339
|
+
options={"queries": [query] + [query.query for query in queries]})
|
|
337
340
|
return self.doc_filter.filter_docs(
|
|
338
341
|
conversations=conversations, documents=documents
|
|
339
342
|
)
|
|
@@ -546,7 +549,10 @@ class LongContextRAG:
|
|
|
546
549
|
model_name=rag_stat.recall_stat.model_name
|
|
547
550
|
)
|
|
548
551
|
query = conversations[-1]["content"]
|
|
549
|
-
|
|
552
|
+
queries = extract_search_queries(
|
|
553
|
+
conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
|
|
554
|
+
documents = self._retrieve_documents(
|
|
555
|
+
options={"queries": [query] + [query.query for query in queries]})
|
|
550
556
|
|
|
551
557
|
# 使用带进度报告的过滤方法
|
|
552
558
|
for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
|
|
@@ -833,7 +839,7 @@ class LongContextRAG:
|
|
|
833
839
|
self.args.rag_qa_conversation_strategy)
|
|
834
840
|
new_conversations = qa_strategy.create_conversation(
|
|
835
841
|
documents=[doc.source_code for doc in relevant_docs],
|
|
836
|
-
conversations=conversations
|
|
842
|
+
conversations=conversations, local_image_host=self.args.local_image_host
|
|
837
843
|
)
|
|
838
844
|
|
|
839
845
|
chunks = target_llm.stream_chat_oai(
|
|
@@ -8,7 +8,7 @@ class QAConversationStrategy(ABC):
|
|
|
8
8
|
Different strategies organize documents and conversations differently.
|
|
9
9
|
"""
|
|
10
10
|
@abstractmethod
|
|
11
|
-
def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
|
|
11
|
+
def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]], local_image_host: str) -> List[Dict]:
|
|
12
12
|
"""
|
|
13
13
|
Create a conversation structure based on documents and history
|
|
14
14
|
|
|
@@ -26,10 +26,10 @@ class MultiRoundStrategy(QAConversationStrategy):
|
|
|
26
26
|
Multi-round strategy: First let the model read documents, then do Q&A.
|
|
27
27
|
Creates multiple conversation turns.
|
|
28
28
|
"""
|
|
29
|
-
def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
|
|
29
|
+
def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]], local_image_host: str) -> List[Dict]:
|
|
30
30
|
messages = []
|
|
31
31
|
messages.extend([
|
|
32
|
-
{"role": "user", "content": self._read_docs_prompt.prompt(documents)},
|
|
32
|
+
{"role": "user", "content": self._read_docs_prompt.prompt(documents, local_image_host)},
|
|
33
33
|
{"role": "assistant", "content": "好的"}
|
|
34
34
|
])
|
|
35
35
|
messages.extend(conversations)
|
|
@@ -37,7 +37,7 @@ class MultiRoundStrategy(QAConversationStrategy):
|
|
|
37
37
|
|
|
38
38
|
@byzerllm.prompt()
|
|
39
39
|
def _read_docs_prompt(
|
|
40
|
-
self, relevant_docs: List[str]
|
|
40
|
+
self, relevant_docs: List[str], local_image_host: str
|
|
41
41
|
) -> Generator[str, None, None]:
|
|
42
42
|
"""
|
|
43
43
|
请阅读以下:
|
|
@@ -53,29 +53,35 @@ class MultiRoundStrategy(QAConversationStrategy):
|
|
|
53
53
|
- 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
|
|
54
54
|
- 不要添加、推测或扩展文档未提及的信息
|
|
55
55
|
|
|
56
|
-
2. 格式如  的 Markdown 图片处理
|
|
57
57
|
- 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
|
|
58
58
|
- 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
|
|
59
59
|
- 完整保留原始图片路径,不省略任何部分
|
|
60
60
|
|
|
61
61
|
3. 回答格式要求
|
|
62
62
|
- 使用markdown格式提升可读性
|
|
63
|
+
{% if local_image_host %}
|
|
64
|
+
4. 图片路径处理
|
|
65
|
+
- 图片地址需返回绝对路径,
|
|
66
|
+
- 为请求图片资源 需增加 http://{{ local_image_host }}/static/ 作为前缀
|
|
67
|
+
例如:/path/to/images/image.png, 返回 http://{{ local_image_host }}/static/path/to/images/image.png
|
|
68
|
+
{% endif %}
|
|
63
69
|
"""
|
|
64
70
|
|
|
65
71
|
class SingleRoundStrategy(QAConversationStrategy):
|
|
66
72
|
"""
|
|
67
73
|
Single-round strategy: Put documents and conversation history in a single round.
|
|
68
74
|
"""
|
|
69
|
-
def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
|
|
75
|
+
def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]], local_image_host: str) -> List[Dict]:
|
|
70
76
|
messages = []
|
|
71
77
|
messages.extend([
|
|
72
|
-
{"role": "user", "content": self._single_round_answer_question.prompt(documents, conversations)}
|
|
78
|
+
{"role": "user", "content": self._single_round_answer_question.prompt(documents, conversations, local_image_host)}
|
|
73
79
|
])
|
|
74
80
|
return messages
|
|
75
81
|
|
|
76
82
|
@byzerllm.prompt()
|
|
77
83
|
def _single_round_answer_question(
|
|
78
|
-
self, relevant_docs: List[str], conversations: List[Dict[str, str]]
|
|
84
|
+
self, relevant_docs: List[str], conversations: List[Dict[str, str]], local_image_host: str
|
|
79
85
|
) -> Generator[str, None, None]:
|
|
80
86
|
"""
|
|
81
87
|
文档:
|
|
@@ -98,14 +104,19 @@ class SingleRoundStrategy(QAConversationStrategy):
|
|
|
98
104
|
- 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
|
|
99
105
|
- 不要添加、推测或扩展文档未提及的信息
|
|
100
106
|
|
|
101
|
-
2. 格式如  的 Markdown 图片处理
|
|
102
108
|
- 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
|
|
103
109
|
- 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
|
|
104
110
|
- 完整保留原始图片路径,不省略任何部分
|
|
105
111
|
|
|
106
112
|
3. 回答格式要求
|
|
107
113
|
- 使用markdown格式提升可读性
|
|
108
|
-
|
|
114
|
+
{% if local_image_host %}
|
|
115
|
+
4. 图片路径处理
|
|
116
|
+
- 图片地址需返回绝对路径,
|
|
117
|
+
- 为请求图片资源 需增加 http://{{ local_image_host }}/static/ 作为前缀
|
|
118
|
+
例如:/path/to/images/image.png, 返回 http://{{ local_image_host }}/static/path/to/images/image.png
|
|
119
|
+
{% endif %}
|
|
109
120
|
"""
|
|
110
121
|
|
|
111
122
|
def get_qa_strategy(strategy_name: str) -> QAConversationStrategy:
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.291"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|