flowllm 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +19 -6
- flowllm/app.py +4 -14
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/{default_config.yaml → default.yaml} +3 -8
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +17 -17
- flowllm/context/base_context.py +27 -7
- flowllm/context/flow_context.py +6 -18
- flowllm/context/registry.py +5 -1
- flowllm/context/service_context.py +83 -37
- flowllm/embedding_model/__init__.py +1 -1
- flowllm/embedding_model/base_embedding_model.py +91 -0
- flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +74 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +62 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/__init__.py +0 -0
- flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
- flowllm/llm/__init__.py +2 -1
- flowllm/llm/base_llm.py +94 -4
- flowllm/llm/litellm_llm.py +456 -0
- flowllm/llm/openai_compatible_llm.py +205 -5
- flowllm/op/__init__.py +12 -3
- flowllm/op/agent/__init__.py +1 -0
- flowllm/op/agent/react_v1_op.py +109 -0
- flowllm/op/agent/react_v1_prompt.yaml +54 -0
- flowllm/op/agent/react_v2_op.py +86 -0
- flowllm/op/agent/react_v2_prompt.yaml +35 -0
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +14 -22
- flowllm/op/akshare/get_ak_a_info_op.py +17 -20
- flowllm/op/{llm_base_op.py → base_llm_op.py} +7 -5
- flowllm/op/base_op.py +40 -44
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +2 -9
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +267 -0
- flowllm/op/search/dashscope_search_op.py +186 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +109 -0
- flowllm/op/sequential_op.py +1 -9
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/message.py +2 -0
- flowllm/schema/service_config.py +12 -16
- flowllm/schema/tool_call.py +20 -8
- flowllm/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -2
- flowllm/service/base_service.py +50 -41
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +34 -42
- flowllm/service/mcp_service.py +13 -11
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
- flowllm/storage/vector_store/__init__.py +3 -3
- flowllm/storage/vector_store/base_vector_store.py +3 -0
- flowllm/storage/vector_store/es_vector_store.py +4 -5
- flowllm/storage/vector_store/local_vector_store.py +0 -1
- flowllm/utils/common_utils.py +9 -21
- flowllm/utils/fetch_url.py +16 -12
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/logger_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/METADATA +43 -390
- flowllm-0.1.3.dist-info/RECORD +102 -0
- flowllm-0.1.3.dist-info/entry_points.txt +2 -0
- flowllm/flow_engine/__init__.py +0 -1
- flowllm/flow_engine/base_flow_engine.py +0 -34
- flowllm-0.1.1.dist-info/RECORD +0 -62
- flowllm-0.1.1.dist-info/entry_points.txt +0 -4
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/WHEEL +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/top_level.txt +0 -0
flowllm/op/parallel_op.py
CHANGED
@@ -4,13 +4,6 @@ from flowllm.op.base_op import BaseOp
|
|
4
4
|
|
5
5
|
|
6
6
|
class ParallelOp(BaseOp):
|
7
|
-
"""Container class for parallel operation execution
|
8
|
-
|
9
|
-
Executes multiple operations in parallel, all operations use the same input,
|
10
|
-
returns a list of results from all operations.
|
11
|
-
Supports parallel calls: op1 | op2 | op3
|
12
|
-
Falls back to sequential execution if no thread pool is available.
|
13
|
-
"""
|
14
7
|
|
15
8
|
def __init__(self, ops: List[BaseOp], **kwargs):
|
16
9
|
super().__init__(**kwargs)
|
@@ -18,9 +11,9 @@ class ParallelOp(BaseOp):
|
|
18
11
|
|
19
12
|
def execute(self):
|
20
13
|
for op in self.ops:
|
21
|
-
self.submit_task(op.__call__)
|
14
|
+
self.submit_task(op.__call__, context=self.context)
|
22
15
|
|
23
|
-
|
16
|
+
self.join_task(task_desc="parallel execution")
|
24
17
|
|
25
18
|
def __or__(self, op: BaseOp):
|
26
19
|
if isinstance(op, ParallelOp):
|
@@ -0,0 +1,267 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import dashscope
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from flowllm.context.flow_context import FlowContext
|
7
|
+
from flowllm.context.service_context import C
|
8
|
+
from flowllm.op.base_llm_op import BaseLLMOp
|
9
|
+
from flowllm.storage.cache.data_cache import DataCache
|
10
|
+
|
11
|
+
|
12
|
+
@C.register_op()
|
13
|
+
class DashscopeDeepResearchOp(BaseLLMOp):
|
14
|
+
file_path: str = __file__
|
15
|
+
|
16
|
+
"""
|
17
|
+
Dashscope deep research operation using Alibaba's Qwen-deep-research model.
|
18
|
+
|
19
|
+
This operation performs deep research using Dashscope's Generation API with the
|
20
|
+
qwen-deep-research model. It handles the multi-phase research process including
|
21
|
+
model questioning, web research, and result generation.
|
22
|
+
"""
|
23
|
+
|
24
|
+
def __init__(self,
|
25
|
+
model: str = "qwen-deep-research",
|
26
|
+
enable_print: bool = True,
|
27
|
+
enable_cache: bool = True,
|
28
|
+
cache_path: str = "./dashscope_deep_research_cache",
|
29
|
+
cache_expire_hours: float = 24,
|
30
|
+
max_retries: int = 3,
|
31
|
+
return_only_content: bool = True,
|
32
|
+
**kwargs):
|
33
|
+
super().__init__(**kwargs)
|
34
|
+
|
35
|
+
self.model = model
|
36
|
+
self.enable_print = enable_print
|
37
|
+
self.enable_cache = enable_cache
|
38
|
+
self.cache_expire_hours = cache_expire_hours
|
39
|
+
self.max_retries = max_retries
|
40
|
+
self.return_only_content = return_only_content
|
41
|
+
|
42
|
+
# Ensure API key is available
|
43
|
+
self.api_key = os.environ["FLOW_DASHSCOPE_API_KEY"]
|
44
|
+
self.cache_path: str = cache_path
|
45
|
+
self._cache: DataCache | None = None
|
46
|
+
|
47
|
+
@property
|
48
|
+
def cache(self):
|
49
|
+
if self.enable_cache and self._cache is None:
|
50
|
+
self._cache = DataCache(self.cache_path)
|
51
|
+
return self._cache
|
52
|
+
|
53
|
+
def process_responses(self, responses, step_name):
|
54
|
+
"""Process streaming responses from the deep research model"""
|
55
|
+
current_phase = None
|
56
|
+
phase_content = ""
|
57
|
+
research_goal = ""
|
58
|
+
web_sites = []
|
59
|
+
keepalive_shown = False # 标记是否已经显示过KeepAlive提示
|
60
|
+
|
61
|
+
for response in responses:
|
62
|
+
# 检查响应状态码
|
63
|
+
if hasattr(response, 'status_code') and response.status_code != 200:
|
64
|
+
logger.warning(f"HTTP返回码:{response.status_code}")
|
65
|
+
if hasattr(response, 'code'):
|
66
|
+
logger.warning(f"错误码:{response.code}")
|
67
|
+
if hasattr(response, 'message'):
|
68
|
+
logger.warning(f"错误信息:{response.message}")
|
69
|
+
continue
|
70
|
+
|
71
|
+
if hasattr(response, 'output') and response.output:
|
72
|
+
message = response.output.get('message', {})
|
73
|
+
phase = message.get('phase')
|
74
|
+
content = message.get('content', '')
|
75
|
+
status = message.get('status')
|
76
|
+
extra = message.get('extra', {})
|
77
|
+
|
78
|
+
# 阶段变化检测
|
79
|
+
if phase != current_phase:
|
80
|
+
if current_phase and phase_content:
|
81
|
+
# 根据阶段名称和步骤名称来显示不同的完成描述
|
82
|
+
if step_name == "第一步:模型反问确认" and current_phase == "answer":
|
83
|
+
logger.info("模型反问阶段完成")
|
84
|
+
else:
|
85
|
+
logger.info(f"{current_phase} 阶段完成")
|
86
|
+
current_phase = phase
|
87
|
+
phase_content = ""
|
88
|
+
keepalive_shown = False # 重置KeepAlive提示标记
|
89
|
+
|
90
|
+
# 根据阶段名称和步骤名称来显示不同的描述
|
91
|
+
if step_name == "第一步:模型反问确认" and phase == "answer":
|
92
|
+
logger.info("进入模型反问阶段")
|
93
|
+
else:
|
94
|
+
logger.info(f"进入 {phase} 阶段")
|
95
|
+
|
96
|
+
# 处理WebResearch阶段的特殊信息
|
97
|
+
if phase == "WebResearch":
|
98
|
+
if extra.get('deep_research', {}).get('research'):
|
99
|
+
research_info = extra['deep_research']['research']
|
100
|
+
|
101
|
+
# 处理streamingQueries状态
|
102
|
+
if status == "streamingQueries":
|
103
|
+
if 'researchGoal' in research_info:
|
104
|
+
goal = research_info['researchGoal']
|
105
|
+
if goal:
|
106
|
+
research_goal += goal
|
107
|
+
if self.enable_print:
|
108
|
+
print(f" 研究目标: {goal}", end='', flush=True)
|
109
|
+
|
110
|
+
# 处理streamingWebResult状态
|
111
|
+
elif status == "streamingWebResult":
|
112
|
+
if 'webSites' in research_info:
|
113
|
+
sites = research_info['webSites']
|
114
|
+
if sites and sites != web_sites: # 避免重复显示
|
115
|
+
web_sites = sites
|
116
|
+
if self.enable_print:
|
117
|
+
print(f" 找到 {len(sites)} 个相关网站:")
|
118
|
+
for i, site in enumerate(sites, 1):
|
119
|
+
print(f" {i}. {site.get('title', '无标题')}")
|
120
|
+
print(f" 描述: {site.get('description', '无描述')[:100]}...")
|
121
|
+
print(f" URL: {site.get('url', '无链接')}")
|
122
|
+
if site.get('favicon'):
|
123
|
+
print(f" 图标: {site['favicon']}")
|
124
|
+
print()
|
125
|
+
|
126
|
+
# 处理WebResultFinished状态
|
127
|
+
elif status == "WebResultFinished":
|
128
|
+
if self.enable_print:
|
129
|
+
print(f" 网络搜索完成,共找到 {len(web_sites)} 个参考信息源")
|
130
|
+
if research_goal:
|
131
|
+
print(f" 研究目标: {research_goal}")
|
132
|
+
|
133
|
+
# 累积内容并显示
|
134
|
+
if content:
|
135
|
+
phase_content += content
|
136
|
+
# 实时显示内容
|
137
|
+
if self.enable_print:
|
138
|
+
print(content, end='', flush=True)
|
139
|
+
|
140
|
+
# 显示阶段状态变化
|
141
|
+
if status and status != "typing":
|
142
|
+
if self.enable_print:
|
143
|
+
print(f" 状态: {status}")
|
144
|
+
|
145
|
+
# 显示状态说明
|
146
|
+
if status == "streamingQueries":
|
147
|
+
if self.enable_print:
|
148
|
+
print(" → 正在生成研究目标和搜索查询(WebResearch阶段)")
|
149
|
+
elif status == "streamingWebResult":
|
150
|
+
if self.enable_print:
|
151
|
+
print(" → 正在执行搜索、网页阅读和代码执行(WebResearch阶段)")
|
152
|
+
elif status == "WebResultFinished":
|
153
|
+
if self.enable_print:
|
154
|
+
print(" → 网络搜索阶段完成(WebResearch阶段)")
|
155
|
+
|
156
|
+
# 当状态为finished时,显示token消耗情况
|
157
|
+
if status == "finished":
|
158
|
+
if hasattr(response, 'usage') and response.usage:
|
159
|
+
usage = response.usage
|
160
|
+
if self.enable_print:
|
161
|
+
print(f" Token消耗统计:")
|
162
|
+
print(f" 输入tokens: {usage.get('input_tokens', 0)}")
|
163
|
+
print(f" 输出tokens: {usage.get('output_tokens', 0)}")
|
164
|
+
print(f" 请求ID: {response.get('request_id', '未知')}")
|
165
|
+
|
166
|
+
if phase == "KeepAlive":
|
167
|
+
# 只在第一次进入KeepAlive阶段时显示提示
|
168
|
+
if not keepalive_shown:
|
169
|
+
if self.enable_print:
|
170
|
+
print("当前步骤已经完成,准备开始下一步骤工作")
|
171
|
+
keepalive_shown = True
|
172
|
+
continue
|
173
|
+
|
174
|
+
if current_phase and phase_content:
|
175
|
+
if step_name == "第一步:模型反问确认" and current_phase == "answer":
|
176
|
+
logger.info("模型反问阶段完成")
|
177
|
+
else:
|
178
|
+
logger.info(f"{current_phase} 阶段完成")
|
179
|
+
|
180
|
+
return phase_content
|
181
|
+
|
182
|
+
def call_deep_research_model(self, messages, step_name):
|
183
|
+
"""Call the deep research model with the given messages"""
|
184
|
+
if self.enable_print:
|
185
|
+
print(f"\n=== {step_name} ===")
|
186
|
+
|
187
|
+
try:
|
188
|
+
responses = dashscope.Generation.call(
|
189
|
+
api_key=self.api_key,
|
190
|
+
model=self.model,
|
191
|
+
messages=messages,
|
192
|
+
# qwen-deep-research模型目前仅支持流式输出
|
193
|
+
stream=True
|
194
|
+
# incremental_output=True 使用增量输出请添加此参数
|
195
|
+
)
|
196
|
+
|
197
|
+
return self.process_responses(responses, step_name)
|
198
|
+
|
199
|
+
except Exception as e:
|
200
|
+
logger.error(f"调用API时发生错误: {e}")
|
201
|
+
return ""
|
202
|
+
|
203
|
+
def execute(self):
|
204
|
+
"""Execute the Dashscope deep research operation"""
|
205
|
+
# Get query from context
|
206
|
+
query = self.context.query
|
207
|
+
|
208
|
+
# Check cache first
|
209
|
+
if self.enable_cache and self.cache:
|
210
|
+
cached_result = self.cache.load(query)
|
211
|
+
if cached_result:
|
212
|
+
if self.return_only_content:
|
213
|
+
self.context.dashscope_deep_research_result = cached_result.get("content", "")
|
214
|
+
else:
|
215
|
+
self.context.dashscope_deep_research_result = cached_result
|
216
|
+
return
|
217
|
+
|
218
|
+
# 第一步:模型反问确认
|
219
|
+
# 模型会分析用户问题,提出细化问题来明确研究方向
|
220
|
+
messages = [{'role': 'user', 'content': query}]
|
221
|
+
step1_content = self.call_deep_research_model(messages, "第一步:模型反问确认")
|
222
|
+
|
223
|
+
# 第二步:深入研究
|
224
|
+
# 基于第一步的反问内容,模型会执行完整的研究流程
|
225
|
+
messages = [
|
226
|
+
{'role': 'user', 'content': query},
|
227
|
+
{'role': 'assistant', 'content': step1_content}, # 包含模型的反问内容
|
228
|
+
{'role': 'user', 'content': '帮我生成完整且逻辑性的报告'}
|
229
|
+
]
|
230
|
+
|
231
|
+
result_content = self.call_deep_research_model(messages, "第二步:深入研究")
|
232
|
+
|
233
|
+
if self.enable_print:
|
234
|
+
print(result_content)
|
235
|
+
print("\n 研究完成!")
|
236
|
+
|
237
|
+
# Prepare final result
|
238
|
+
final_result = {
|
239
|
+
"query": query,
|
240
|
+
"step1_content": step1_content,
|
241
|
+
"final_result": result_content,
|
242
|
+
"model": self.model
|
243
|
+
}
|
244
|
+
|
245
|
+
# Cache the result if enabled
|
246
|
+
if self.enable_cache and self.cache:
|
247
|
+
self.cache.save(query, final_result, expire_hours=self.cache_expire_hours)
|
248
|
+
|
249
|
+
# Set context
|
250
|
+
if self.return_only_content:
|
251
|
+
self.context.dashscope_deep_research_result = result_content
|
252
|
+
else:
|
253
|
+
self.context.dashscope_deep_research_result = final_result
|
254
|
+
|
255
|
+
|
256
|
+
def main():
|
257
|
+
C.set_default_service_config().init_by_service_config()
|
258
|
+
|
259
|
+
op = DashscopeDeepResearchOp(enable_print=True, enable_cache=True)
|
260
|
+
|
261
|
+
context = FlowContext(query="中国电解铝行业值得投资吗,有哪些值得投资的标的,各个标的之间需要对比优劣势")
|
262
|
+
op(context=context)
|
263
|
+
print(context.dashscope_deep_research_result)
|
264
|
+
|
265
|
+
|
266
|
+
if __name__ == "__main__":
|
267
|
+
main()
|
@@ -0,0 +1,186 @@
|
|
1
|
+
import os
|
2
|
+
import time
|
3
|
+
from typing import Dict, Any, List
|
4
|
+
|
5
|
+
import dashscope
|
6
|
+
from loguru import logger
|
7
|
+
|
8
|
+
from flowllm.context.flow_context import FlowContext
|
9
|
+
from flowllm.context.service_context import C
|
10
|
+
from flowllm.op.base_llm_op import BaseLLMOp
|
11
|
+
from flowllm.storage.cache.data_cache import DataCache
|
12
|
+
|
13
|
+
|
14
|
+
@C.register_op()
|
15
|
+
class DashscopeSearchOp(BaseLLMOp):
|
16
|
+
file_path: str = __file__
|
17
|
+
|
18
|
+
"""
|
19
|
+
Dashscope search operation using Alibaba's Qwen model with search capabilities.
|
20
|
+
|
21
|
+
This operation performs web search using Dashscope's Generation API with search enabled.
|
22
|
+
It extracts search results and provides formatted responses with citations.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self,
|
26
|
+
model: str = "qwen-plus",
|
27
|
+
enable_print: bool = True,
|
28
|
+
enable_cache: bool = False,
|
29
|
+
cache_path: str = "./dashscope_search_cache",
|
30
|
+
cache_expire_hours: float = 0.1,
|
31
|
+
max_retries: int = 3,
|
32
|
+
search_strategy: str = "max",
|
33
|
+
return_only_content: bool = True,
|
34
|
+
enable_role_prompt: bool = True,
|
35
|
+
**kwargs):
|
36
|
+
super().__init__(**kwargs)
|
37
|
+
|
38
|
+
self.model = model
|
39
|
+
self.enable_print = enable_print
|
40
|
+
self.enable_cache = enable_cache
|
41
|
+
self.cache_expire_hours = cache_expire_hours
|
42
|
+
self.max_retries = max_retries
|
43
|
+
self.search_strategy = search_strategy
|
44
|
+
self.return_only_content = return_only_content
|
45
|
+
self.enable_role_prompt = enable_role_prompt
|
46
|
+
|
47
|
+
# Ensure API key is available
|
48
|
+
self.api_key = os.environ["FLOW_DASHSCOPE_API_KEY"]
|
49
|
+
self.cache_path: str = cache_path
|
50
|
+
self._cache: DataCache | None = None
|
51
|
+
|
52
|
+
@property
|
53
|
+
def cache(self):
|
54
|
+
if self.enable_cache and self._cache is None:
|
55
|
+
self._cache = DataCache(self.cache_path)
|
56
|
+
return self._cache
|
57
|
+
|
58
|
+
@staticmethod
|
59
|
+
def format_search_results(search_results: List[Dict[str, Any]]) -> str:
|
60
|
+
"""Format search results for display"""
|
61
|
+
formatted_results = ["=" * 20 + " Search Results " + "=" * 20]
|
62
|
+
|
63
|
+
for web in search_results:
|
64
|
+
formatted_results.append(f"[{web['index']}]: [{web['title']}]({web['url']})")
|
65
|
+
|
66
|
+
return "\n".join(formatted_results)
|
67
|
+
|
68
|
+
def post_process(self, response_data: dict) -> dict:
|
69
|
+
"""Post-process the response and optionally print results"""
|
70
|
+
if self.enable_print:
|
71
|
+
# Print search information
|
72
|
+
if "search_results" in response_data:
|
73
|
+
search_info = self.format_search_results(response_data["search_results"])
|
74
|
+
logger.info(f"Search Information:\n{search_info}")
|
75
|
+
|
76
|
+
# Print response content
|
77
|
+
if "response_content" in response_data:
|
78
|
+
logger.info("=" * 20 + " Response Content " + "=" * 20)
|
79
|
+
logger.info(response_data["response_content"])
|
80
|
+
|
81
|
+
return response_data
|
82
|
+
|
83
|
+
def execute(self):
|
84
|
+
"""Execute the Dashscope search operation"""
|
85
|
+
# Get query from context - support multiple parameter names
|
86
|
+
query = self.context.query
|
87
|
+
|
88
|
+
# Check cache first
|
89
|
+
if self.enable_cache and self.cache:
|
90
|
+
cached_result = self.cache.load(query)
|
91
|
+
if cached_result:
|
92
|
+
result = self.post_process(cached_result)
|
93
|
+
if self.return_only_content:
|
94
|
+
self.context.dashscope_search_result = result["response_content"]
|
95
|
+
else:
|
96
|
+
self.context.dashscope_search_result = result
|
97
|
+
|
98
|
+
return
|
99
|
+
|
100
|
+
if self.enable_role_prompt:
|
101
|
+
user_query = self.prompt_format(prompt_name="role_prompt", query=query)
|
102
|
+
else:
|
103
|
+
user_query = query
|
104
|
+
messages: list = [{"role": "user", "content": user_query}]
|
105
|
+
|
106
|
+
# Retry logic for API calls
|
107
|
+
for attempt in range(self.max_retries):
|
108
|
+
try:
|
109
|
+
# Call Dashscope Generation API with search enabled
|
110
|
+
response = dashscope.Generation.call(
|
111
|
+
api_key=self.api_key,
|
112
|
+
model=self.model,
|
113
|
+
messages=messages,
|
114
|
+
enable_search=True, # Enable web search
|
115
|
+
search_options={
|
116
|
+
"forced_search": True, # Force web search
|
117
|
+
"enable_source": True, # Include search source information
|
118
|
+
"enable_citation": False, # Enable citation markers
|
119
|
+
"search_strategy": self.search_strategy, # Search strategy
|
120
|
+
},
|
121
|
+
result_format="message",
|
122
|
+
)
|
123
|
+
|
124
|
+
# Extract search results and response content
|
125
|
+
search_results = []
|
126
|
+
response_content = ""
|
127
|
+
|
128
|
+
if hasattr(response, 'output') and response.output:
|
129
|
+
# Extract search information
|
130
|
+
if hasattr(response.output, 'search_info') and response.output.search_info:
|
131
|
+
search_results = response.output.search_info.get("search_results", [])
|
132
|
+
|
133
|
+
# Extract response content
|
134
|
+
if (hasattr(response.output, 'choices') and
|
135
|
+
response.output.choices and
|
136
|
+
len(response.output.choices) > 0):
|
137
|
+
response_content = response.output.choices[0].message.content
|
138
|
+
|
139
|
+
# Prepare final result
|
140
|
+
final_result = {
|
141
|
+
"query": query,
|
142
|
+
"search_results": search_results,
|
143
|
+
"response_content": response_content,
|
144
|
+
"model": self.model,
|
145
|
+
"search_strategy": self.search_strategy
|
146
|
+
}
|
147
|
+
|
148
|
+
# Cache the result if enabled
|
149
|
+
if self.enable_cache and self.cache:
|
150
|
+
self.cache.save(query, final_result, expire_hours=self.cache_expire_hours)
|
151
|
+
|
152
|
+
# Post-process and set context
|
153
|
+
result = self.post_process(final_result)
|
154
|
+
if self.return_only_content:
|
155
|
+
self.context.dashscope_search_result = result["response_content"]
|
156
|
+
else:
|
157
|
+
self.context.dashscope_search_result = result
|
158
|
+
|
159
|
+
return
|
160
|
+
|
161
|
+
except Exception as e:
|
162
|
+
logger.warning(f"Dashscope search attempt {attempt + 1} failed for query='{query}': {e}")
|
163
|
+
if attempt < self.max_retries - 1:
|
164
|
+
time.sleep(attempt + 1) # Exponential backoff
|
165
|
+
else:
|
166
|
+
logger.error(f"All {self.max_retries} attempts failed for Dashscope search")
|
167
|
+
|
168
|
+
self.context.dashscope_search_result = "dashscope_search failed"
|
169
|
+
|
170
|
+
|
171
|
+
def main():
|
172
|
+
from flowllm.utils.common_utils import load_env
|
173
|
+
|
174
|
+
load_env()
|
175
|
+
|
176
|
+
C.set_default_service_config().init_by_service_config()
|
177
|
+
|
178
|
+
op = DashscopeSearchOp(enable_print=True, enable_cache=False)
|
179
|
+
|
180
|
+
context = FlowContext(query="杭州明天天气")
|
181
|
+
op(context=context)
|
182
|
+
print(context.dashscope_search_result)
|
183
|
+
|
184
|
+
|
185
|
+
if __name__ == "__main__":
|
186
|
+
main()
|
@@ -0,0 +1,13 @@
|
|
1
|
+
role_prompt: |
|
2
|
+
# user's question
|
3
|
+
{query}
|
4
|
+
|
5
|
+
# task
|
6
|
+
Extract the original content related to the user's query directly from the context, maintain accuracy, and avoid excessive processing.
|
7
|
+
|
8
|
+
role_prompt_zh: |
|
9
|
+
# 用户问题
|
10
|
+
{query}
|
11
|
+
|
12
|
+
# task
|
13
|
+
直接从上下文中提取与用户问题相关的原始内容,保持准确性,避免过度处理。
|
@@ -0,0 +1,109 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
import time
|
4
|
+
from typing import Literal
|
5
|
+
|
6
|
+
from loguru import logger
|
7
|
+
from tavily import TavilyClient
|
8
|
+
|
9
|
+
from flowllm.context.flow_context import FlowContext
|
10
|
+
from flowllm.context.service_context import C
|
11
|
+
from flowllm.op.base_op import BaseOp
|
12
|
+
from flowllm.storage.cache.data_cache import DataCache
|
13
|
+
|
14
|
+
|
15
|
+
@C.register_op()
|
16
|
+
class TavilySearchOp(BaseOp):
|
17
|
+
def __init__(self,
|
18
|
+
enable_print: bool = True,
|
19
|
+
enable_cache: bool = True,
|
20
|
+
cache_path: str = "./tavily_search_cache",
|
21
|
+
cache_expire_hours: float = 0.1,
|
22
|
+
topic: Literal["general", "news", "finance"] = "general",
|
23
|
+
max_retries: int = 3,
|
24
|
+
return_only_content: bool = True,
|
25
|
+
**kwargs):
|
26
|
+
super().__init__(**kwargs)
|
27
|
+
|
28
|
+
self.enable_print = enable_print
|
29
|
+
self.enable_cache = enable_cache
|
30
|
+
self.cache_expire_hours = cache_expire_hours
|
31
|
+
self.topic = topic
|
32
|
+
self.max_retries = max_retries
|
33
|
+
self.return_only_content = return_only_content
|
34
|
+
|
35
|
+
# Initialize DataCache if caching is enabled
|
36
|
+
self._client = TavilyClient(api_key=os.getenv("FLOW_TAVILY_API_KEY", ""))
|
37
|
+
self.cache_path: str = cache_path
|
38
|
+
self._cache: DataCache | None = None
|
39
|
+
|
40
|
+
@property
|
41
|
+
def cache(self):
|
42
|
+
if self.enable_cache and self._cache is None:
|
43
|
+
self._cache = DataCache(self.cache_path)
|
44
|
+
return self._cache
|
45
|
+
|
46
|
+
def post_process(self, response):
|
47
|
+
if self.enable_print:
|
48
|
+
logger.info("response=\n" + json.dumps(response, indent=2, ensure_ascii=False))
|
49
|
+
|
50
|
+
return response
|
51
|
+
|
52
|
+
def execute(self):
|
53
|
+
# Get query from context
|
54
|
+
query: str = self.context.query
|
55
|
+
|
56
|
+
# Check cache first
|
57
|
+
if self.enable_cache and self.cache:
|
58
|
+
cached_result = self.cache.load(query)
|
59
|
+
if cached_result:
|
60
|
+
final_result = self.post_process(cached_result)
|
61
|
+
if self.return_only_content:
|
62
|
+
self.context.tavily_search_result = json.dumps(final_result, ensure_ascii=False, indent=2)
|
63
|
+
else:
|
64
|
+
self.context.tavily_search_result = final_result
|
65
|
+
return
|
66
|
+
|
67
|
+
for i in range(self.max_retries):
|
68
|
+
try:
|
69
|
+
response = self._client.search(query=query, topic=self.topic)
|
70
|
+
url_info_dict = {item["url"]: item for item in response["results"]}
|
71
|
+
response_extract = self._client.extract(urls=[item["url"] for item in response["results"]],
|
72
|
+
format="text")
|
73
|
+
|
74
|
+
final_result = {}
|
75
|
+
for item in response_extract["results"]:
|
76
|
+
url = item["url"]
|
77
|
+
final_result[url] = url_info_dict[url]
|
78
|
+
final_result[url]["raw_content"] = item["raw_content"]
|
79
|
+
|
80
|
+
# Cache the result if enabled
|
81
|
+
if self.enable_cache and self.cache:
|
82
|
+
self.cache.save(query, final_result, expire_hours=self.cache_expire_hours)
|
83
|
+
|
84
|
+
final_result = self.post_process(final_result)
|
85
|
+
|
86
|
+
if self.return_only_content:
|
87
|
+
self.context.tavily_search_result = json.dumps(final_result, ensure_ascii=False, indent=2)
|
88
|
+
else:
|
89
|
+
self.context.tavily_search_result = final_result
|
90
|
+
return
|
91
|
+
|
92
|
+
except Exception as e:
|
93
|
+
logger.exception(f"tavily search with query={query} encounter error with e={e.args}")
|
94
|
+
time.sleep(i + 1)
|
95
|
+
|
96
|
+
self.context.tavily_search_result = "tavily search failed!"
|
97
|
+
|
98
|
+
|
99
|
+
if __name__ == "__main__":
|
100
|
+
from flowllm.utils.common_utils import load_env
|
101
|
+
|
102
|
+
load_env()
|
103
|
+
|
104
|
+
C.set_default_service_config().init_by_service_config()
|
105
|
+
|
106
|
+
op = TavilySearchOp(enable_cache=True)
|
107
|
+
context = FlowContext(query="A股医药为什么一直涨")
|
108
|
+
op(context=context)
|
109
|
+
print(context.tavily_search_result)
|
flowllm/op/sequential_op.py
CHANGED
@@ -4,22 +4,14 @@ from flowllm.op.base_op import BaseOp
|
|
4
4
|
|
5
5
|
|
6
6
|
class SequentialOp(BaseOp):
|
7
|
-
"""Container class for sequential operation execution
|
8
|
-
|
9
|
-
Executes multiple operations in sequence, where the output of the previous operation
|
10
|
-
becomes the input of the next operation.
|
11
|
-
Supports chaining: op1 >> op2 >> op3
|
12
|
-
"""
|
13
7
|
|
14
8
|
def __init__(self, ops: List[BaseOp], **kwargs):
|
15
9
|
super().__init__(**kwargs)
|
16
10
|
self.ops = ops
|
17
11
|
|
18
12
|
def execute(self):
|
19
|
-
result = None
|
20
13
|
for op in self.ops:
|
21
|
-
|
22
|
-
return result
|
14
|
+
op.__call__(self.context)
|
23
15
|
|
24
16
|
def __rshift__(self, op: BaseOp):
|
25
17
|
if isinstance(op, SequentialOp):
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from pydantic import Field, BaseModel
|
4
|
+
|
5
|
+
from flowllm.schema.message import Message
|
6
|
+
|
7
|
+
|
8
|
+
class FlowRequest(BaseModel, extra="allow"):
|
9
|
+
query: str = Field(default="")
|
10
|
+
messages: List[Message] = Field(default_factory=list)
|
11
|
+
workspace_id: str = Field(default="")
|
12
|
+
metadata: dict = Field(default_factory=dict)
|
flowllm/schema/message.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import datetime
|
1
2
|
from typing import List
|
2
3
|
|
3
4
|
from pydantic import BaseModel, Field
|
@@ -12,6 +13,7 @@ class Message(BaseModel):
|
|
12
13
|
reasoning_content: str = Field(default="")
|
13
14
|
tool_calls: List[ToolCall] = Field(default_factory=list)
|
14
15
|
tool_call_id: str = Field(default="")
|
16
|
+
time_created: str = Field(default_factory=lambda: datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
|
15
17
|
metadata: dict = Field(default_factory=dict)
|
16
18
|
|
17
19
|
def simple_dump(self, add_reason_content: bool = True) -> dict:
|