flowllm 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. flowllm/__init__.py +19 -6
  2. flowllm/app.py +4 -14
  3. flowllm/client/__init__.py +25 -0
  4. flowllm/client/async_http_client.py +81 -0
  5. flowllm/client/http_client.py +81 -0
  6. flowllm/client/mcp_client.py +133 -0
  7. flowllm/client/sync_mcp_client.py +116 -0
  8. flowllm/config/__init__.py +1 -0
  9. flowllm/config/{default_config.yaml → default.yaml} +3 -8
  10. flowllm/config/empty.yaml +37 -0
  11. flowllm/config/pydantic_config_parser.py +17 -17
  12. flowllm/context/base_context.py +27 -7
  13. flowllm/context/flow_context.py +6 -18
  14. flowllm/context/registry.py +5 -1
  15. flowllm/context/service_context.py +83 -37
  16. flowllm/embedding_model/__init__.py +1 -1
  17. flowllm/embedding_model/base_embedding_model.py +91 -0
  18. flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
  19. flowllm/flow/__init__.py +1 -0
  20. flowllm/flow/base_flow.py +74 -0
  21. flowllm/flow/base_tool_flow.py +15 -0
  22. flowllm/flow/gallery/__init__.py +8 -0
  23. flowllm/flow/gallery/cmd_flow.py +11 -0
  24. flowllm/flow/gallery/code_tool_flow.py +30 -0
  25. flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
  26. flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
  27. flowllm/flow/gallery/expression_tool_flow.py +18 -0
  28. flowllm/flow/gallery/mock_tool_flow.py +62 -0
  29. flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
  30. flowllm/flow/gallery/terminate_tool_flow.py +30 -0
  31. flowllm/flow/parser/__init__.py +0 -0
  32. flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
  33. flowllm/llm/__init__.py +2 -1
  34. flowllm/llm/base_llm.py +94 -4
  35. flowllm/llm/litellm_llm.py +456 -0
  36. flowllm/llm/openai_compatible_llm.py +205 -5
  37. flowllm/op/__init__.py +12 -3
  38. flowllm/op/agent/__init__.py +1 -0
  39. flowllm/op/agent/react_v1_op.py +109 -0
  40. flowllm/op/agent/react_v1_prompt.yaml +54 -0
  41. flowllm/op/agent/react_v2_op.py +86 -0
  42. flowllm/op/agent/react_v2_prompt.yaml +35 -0
  43. flowllm/op/akshare/__init__.py +3 -0
  44. flowllm/op/akshare/get_ak_a_code_op.py +14 -22
  45. flowllm/op/akshare/get_ak_a_info_op.py +17 -20
  46. flowllm/op/{llm_base_op.py → base_llm_op.py} +7 -5
  47. flowllm/op/base_op.py +40 -44
  48. flowllm/op/base_ray_op.py +313 -0
  49. flowllm/op/code/__init__.py +1 -0
  50. flowllm/op/code/execute_code_op.py +42 -0
  51. flowllm/op/gallery/__init__.py +2 -0
  52. flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
  53. flowllm/op/gallery/terminate_op.py +29 -0
  54. flowllm/op/parallel_op.py +2 -9
  55. flowllm/op/search/__init__.py +3 -0
  56. flowllm/op/search/dashscope_deep_research_op.py +267 -0
  57. flowllm/op/search/dashscope_search_op.py +186 -0
  58. flowllm/op/search/dashscope_search_prompt.yaml +13 -0
  59. flowllm/op/search/tavily_search_op.py +109 -0
  60. flowllm/op/sequential_op.py +1 -9
  61. flowllm/schema/flow_request.py +12 -0
  62. flowllm/schema/message.py +2 -0
  63. flowllm/schema/service_config.py +12 -16
  64. flowllm/schema/tool_call.py +20 -8
  65. flowllm/schema/vector_node.py +1 -0
  66. flowllm/service/__init__.py +3 -2
  67. flowllm/service/base_service.py +50 -41
  68. flowllm/service/cmd_service.py +15 -0
  69. flowllm/service/http_service.py +34 -42
  70. flowllm/service/mcp_service.py +13 -11
  71. flowllm/storage/cache/__init__.py +1 -0
  72. flowllm/storage/cache/cache_data_handler.py +104 -0
  73. flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
  74. flowllm/storage/vector_store/__init__.py +3 -3
  75. flowllm/storage/vector_store/base_vector_store.py +3 -0
  76. flowllm/storage/vector_store/es_vector_store.py +4 -5
  77. flowllm/storage/vector_store/local_vector_store.py +0 -1
  78. flowllm/utils/common_utils.py +9 -21
  79. flowllm/utils/fetch_url.py +16 -12
  80. flowllm/utils/llm_utils.py +28 -0
  81. flowllm/utils/logger_utils.py +28 -0
  82. flowllm/utils/ridge_v2.py +54 -0
  83. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/METADATA +43 -390
  84. flowllm-0.1.3.dist-info/RECORD +102 -0
  85. flowllm-0.1.3.dist-info/entry_points.txt +2 -0
  86. flowllm/flow_engine/__init__.py +0 -1
  87. flowllm/flow_engine/base_flow_engine.py +0 -34
  88. flowllm-0.1.1.dist-info/RECORD +0 -62
  89. flowllm-0.1.1.dist-info/entry_points.txt +0 -4
  90. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/WHEEL +0 -0
  91. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/licenses/LICENSE +0 -0
  92. {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/top_level.txt +0 -0
flowllm/op/parallel_op.py CHANGED
@@ -4,13 +4,6 @@ from flowllm.op.base_op import BaseOp
4
4
 
5
5
 
6
6
  class ParallelOp(BaseOp):
7
- """Container class for parallel operation execution
8
-
9
- Executes multiple operations in parallel, all operations use the same input,
10
- returns a list of results from all operations.
11
- Supports parallel calls: op1 | op2 | op3
12
- Falls back to sequential execution if no thread pool is available.
13
- """
14
7
 
15
8
  def __init__(self, ops: List[BaseOp], **kwargs):
16
9
  super().__init__(**kwargs)
@@ -18,9 +11,9 @@ class ParallelOp(BaseOp):
18
11
 
19
12
  def execute(self):
20
13
  for op in self.ops:
21
- self.submit_task(op.__call__)
14
+ self.submit_task(op.__call__, context=self.context)
22
15
 
23
- return self.join_task(task_desc="Parallel execution")
16
+ self.join_task(task_desc="parallel execution")
24
17
 
25
18
  def __or__(self, op: BaseOp):
26
19
  if isinstance(op, ParallelOp):
@@ -0,0 +1,3 @@
1
+ from .dashscope_deep_research_op import DashscopeDeepResearchOp
2
+ from .dashscope_search_op import DashscopeSearchOp
3
+ from .tavily_search_op import TavilySearchOp
@@ -0,0 +1,267 @@
1
+ import os
2
+
3
+ import dashscope
4
+ from loguru import logger
5
+
6
+ from flowllm.context.flow_context import FlowContext
7
+ from flowllm.context.service_context import C
8
+ from flowllm.op.base_llm_op import BaseLLMOp
9
+ from flowllm.storage.cache.data_cache import DataCache
10
+
11
+
12
+ @C.register_op()
13
+ class DashscopeDeepResearchOp(BaseLLMOp):
14
+ file_path: str = __file__
15
+
16
+ """
17
+ Dashscope deep research operation using Alibaba's Qwen-deep-research model.
18
+
19
+ This operation performs deep research using Dashscope's Generation API with the
20
+ qwen-deep-research model. It handles the multi-phase research process including
21
+ model questioning, web research, and result generation.
22
+ """
23
+
24
+ def __init__(self,
25
+ model: str = "qwen-deep-research",
26
+ enable_print: bool = True,
27
+ enable_cache: bool = True,
28
+ cache_path: str = "./dashscope_deep_research_cache",
29
+ cache_expire_hours: float = 24,
30
+ max_retries: int = 3,
31
+ return_only_content: bool = True,
32
+ **kwargs):
33
+ super().__init__(**kwargs)
34
+
35
+ self.model = model
36
+ self.enable_print = enable_print
37
+ self.enable_cache = enable_cache
38
+ self.cache_expire_hours = cache_expire_hours
39
+ self.max_retries = max_retries
40
+ self.return_only_content = return_only_content
41
+
42
+ # Ensure API key is available
43
+ self.api_key = os.environ["FLOW_DASHSCOPE_API_KEY"]
44
+ self.cache_path: str = cache_path
45
+ self._cache: DataCache | None = None
46
+
47
+ @property
48
+ def cache(self):
49
+ if self.enable_cache and self._cache is None:
50
+ self._cache = DataCache(self.cache_path)
51
+ return self._cache
52
+
53
+ def process_responses(self, responses, step_name):
54
+ """Process streaming responses from the deep research model"""
55
+ current_phase = None
56
+ phase_content = ""
57
+ research_goal = ""
58
+ web_sites = []
59
+ keepalive_shown = False # 标记是否已经显示过KeepAlive提示
60
+
61
+ for response in responses:
62
+ # 检查响应状态码
63
+ if hasattr(response, 'status_code') and response.status_code != 200:
64
+ logger.warning(f"HTTP返回码:{response.status_code}")
65
+ if hasattr(response, 'code'):
66
+ logger.warning(f"错误码:{response.code}")
67
+ if hasattr(response, 'message'):
68
+ logger.warning(f"错误信息:{response.message}")
69
+ continue
70
+
71
+ if hasattr(response, 'output') and response.output:
72
+ message = response.output.get('message', {})
73
+ phase = message.get('phase')
74
+ content = message.get('content', '')
75
+ status = message.get('status')
76
+ extra = message.get('extra', {})
77
+
78
+ # 阶段变化检测
79
+ if phase != current_phase:
80
+ if current_phase and phase_content:
81
+ # 根据阶段名称和步骤名称来显示不同的完成描述
82
+ if step_name == "第一步:模型反问确认" and current_phase == "answer":
83
+ logger.info("模型反问阶段完成")
84
+ else:
85
+ logger.info(f"{current_phase} 阶段完成")
86
+ current_phase = phase
87
+ phase_content = ""
88
+ keepalive_shown = False # 重置KeepAlive提示标记
89
+
90
+ # 根据阶段名称和步骤名称来显示不同的描述
91
+ if step_name == "第一步:模型反问确认" and phase == "answer":
92
+ logger.info("进入模型反问阶段")
93
+ else:
94
+ logger.info(f"进入 {phase} 阶段")
95
+
96
+ # 处理WebResearch阶段的特殊信息
97
+ if phase == "WebResearch":
98
+ if extra.get('deep_research', {}).get('research'):
99
+ research_info = extra['deep_research']['research']
100
+
101
+ # 处理streamingQueries状态
102
+ if status == "streamingQueries":
103
+ if 'researchGoal' in research_info:
104
+ goal = research_info['researchGoal']
105
+ if goal:
106
+ research_goal += goal
107
+ if self.enable_print:
108
+ print(f" 研究目标: {goal}", end='', flush=True)
109
+
110
+ # 处理streamingWebResult状态
111
+ elif status == "streamingWebResult":
112
+ if 'webSites' in research_info:
113
+ sites = research_info['webSites']
114
+ if sites and sites != web_sites: # 避免重复显示
115
+ web_sites = sites
116
+ if self.enable_print:
117
+ print(f" 找到 {len(sites)} 个相关网站:")
118
+ for i, site in enumerate(sites, 1):
119
+ print(f" {i}. {site.get('title', '无标题')}")
120
+ print(f" 描述: {site.get('description', '无描述')[:100]}...")
121
+ print(f" URL: {site.get('url', '无链接')}")
122
+ if site.get('favicon'):
123
+ print(f" 图标: {site['favicon']}")
124
+ print()
125
+
126
+ # 处理WebResultFinished状态
127
+ elif status == "WebResultFinished":
128
+ if self.enable_print:
129
+ print(f" 网络搜索完成,共找到 {len(web_sites)} 个参考信息源")
130
+ if research_goal:
131
+ print(f" 研究目标: {research_goal}")
132
+
133
+ # 累积内容并显示
134
+ if content:
135
+ phase_content += content
136
+ # 实时显示内容
137
+ if self.enable_print:
138
+ print(content, end='', flush=True)
139
+
140
+ # 显示阶段状态变化
141
+ if status and status != "typing":
142
+ if self.enable_print:
143
+ print(f" 状态: {status}")
144
+
145
+ # 显示状态说明
146
+ if status == "streamingQueries":
147
+ if self.enable_print:
148
+ print(" → 正在生成研究目标和搜索查询(WebResearch阶段)")
149
+ elif status == "streamingWebResult":
150
+ if self.enable_print:
151
+ print(" → 正在执行搜索、网页阅读和代码执行(WebResearch阶段)")
152
+ elif status == "WebResultFinished":
153
+ if self.enable_print:
154
+ print(" → 网络搜索阶段完成(WebResearch阶段)")
155
+
156
+ # 当状态为finished时,显示token消耗情况
157
+ if status == "finished":
158
+ if hasattr(response, 'usage') and response.usage:
159
+ usage = response.usage
160
+ if self.enable_print:
161
+ print(f" Token消耗统计:")
162
+ print(f" 输入tokens: {usage.get('input_tokens', 0)}")
163
+ print(f" 输出tokens: {usage.get('output_tokens', 0)}")
164
+ print(f" 请求ID: {response.get('request_id', '未知')}")
165
+
166
+ if phase == "KeepAlive":
167
+ # 只在第一次进入KeepAlive阶段时显示提示
168
+ if not keepalive_shown:
169
+ if self.enable_print:
170
+ print("当前步骤已经完成,准备开始下一步骤工作")
171
+ keepalive_shown = True
172
+ continue
173
+
174
+ if current_phase and phase_content:
175
+ if step_name == "第一步:模型反问确认" and current_phase == "answer":
176
+ logger.info("模型反问阶段完成")
177
+ else:
178
+ logger.info(f"{current_phase} 阶段完成")
179
+
180
+ return phase_content
181
+
182
+ def call_deep_research_model(self, messages, step_name):
183
+ """Call the deep research model with the given messages"""
184
+ if self.enable_print:
185
+ print(f"\n=== {step_name} ===")
186
+
187
+ try:
188
+ responses = dashscope.Generation.call(
189
+ api_key=self.api_key,
190
+ model=self.model,
191
+ messages=messages,
192
+ # qwen-deep-research模型目前仅支持流式输出
193
+ stream=True
194
+ # incremental_output=True 使用增量输出请添加此参数
195
+ )
196
+
197
+ return self.process_responses(responses, step_name)
198
+
199
+ except Exception as e:
200
+ logger.error(f"调用API时发生错误: {e}")
201
+ return ""
202
+
203
+ def execute(self):
204
+ """Execute the Dashscope deep research operation"""
205
+ # Get query from context
206
+ query = self.context.query
207
+
208
+ # Check cache first
209
+ if self.enable_cache and self.cache:
210
+ cached_result = self.cache.load(query)
211
+ if cached_result:
212
+ if self.return_only_content:
213
+ self.context.dashscope_deep_research_result = cached_result.get("content", "")
214
+ else:
215
+ self.context.dashscope_deep_research_result = cached_result
216
+ return
217
+
218
+ # 第一步:模型反问确认
219
+ # 模型会分析用户问题,提出细化问题来明确研究方向
220
+ messages = [{'role': 'user', 'content': query}]
221
+ step1_content = self.call_deep_research_model(messages, "第一步:模型反问确认")
222
+
223
+ # 第二步:深入研究
224
+ # 基于第一步的反问内容,模型会执行完整的研究流程
225
+ messages = [
226
+ {'role': 'user', 'content': query},
227
+ {'role': 'assistant', 'content': step1_content}, # 包含模型的反问内容
228
+ {'role': 'user', 'content': '帮我生成完整且逻辑性的报告'}
229
+ ]
230
+
231
+ result_content = self.call_deep_research_model(messages, "第二步:深入研究")
232
+
233
+ if self.enable_print:
234
+ print(result_content)
235
+ print("\n 研究完成!")
236
+
237
+ # Prepare final result
238
+ final_result = {
239
+ "query": query,
240
+ "step1_content": step1_content,
241
+ "final_result": result_content,
242
+ "model": self.model
243
+ }
244
+
245
+ # Cache the result if enabled
246
+ if self.enable_cache and self.cache:
247
+ self.cache.save(query, final_result, expire_hours=self.cache_expire_hours)
248
+
249
+ # Set context
250
+ if self.return_only_content:
251
+ self.context.dashscope_deep_research_result = result_content
252
+ else:
253
+ self.context.dashscope_deep_research_result = final_result
254
+
255
+
256
+ def main():
257
+ C.set_default_service_config().init_by_service_config()
258
+
259
+ op = DashscopeDeepResearchOp(enable_print=True, enable_cache=True)
260
+
261
+ context = FlowContext(query="中国电解铝行业值得投资吗,有哪些值得投资的标的,各个标的之间需要对比优劣势")
262
+ op(context=context)
263
+ print(context.dashscope_deep_research_result)
264
+
265
+
266
+ if __name__ == "__main__":
267
+ main()
@@ -0,0 +1,186 @@
1
+ import os
2
+ import time
3
+ from typing import Dict, Any, List
4
+
5
+ import dashscope
6
+ from loguru import logger
7
+
8
+ from flowllm.context.flow_context import FlowContext
9
+ from flowllm.context.service_context import C
10
+ from flowllm.op.base_llm_op import BaseLLMOp
11
+ from flowllm.storage.cache.data_cache import DataCache
12
+
13
+
14
+ @C.register_op()
15
+ class DashscopeSearchOp(BaseLLMOp):
16
+ file_path: str = __file__
17
+
18
+ """
19
+ Dashscope search operation using Alibaba's Qwen model with search capabilities.
20
+
21
+ This operation performs web search using Dashscope's Generation API with search enabled.
22
+ It extracts search results and provides formatted responses with citations.
23
+ """
24
+
25
+ def __init__(self,
26
+ model: str = "qwen-plus",
27
+ enable_print: bool = True,
28
+ enable_cache: bool = False,
29
+ cache_path: str = "./dashscope_search_cache",
30
+ cache_expire_hours: float = 0.1,
31
+ max_retries: int = 3,
32
+ search_strategy: str = "max",
33
+ return_only_content: bool = True,
34
+ enable_role_prompt: bool = True,
35
+ **kwargs):
36
+ super().__init__(**kwargs)
37
+
38
+ self.model = model
39
+ self.enable_print = enable_print
40
+ self.enable_cache = enable_cache
41
+ self.cache_expire_hours = cache_expire_hours
42
+ self.max_retries = max_retries
43
+ self.search_strategy = search_strategy
44
+ self.return_only_content = return_only_content
45
+ self.enable_role_prompt = enable_role_prompt
46
+
47
+ # Ensure API key is available
48
+ self.api_key = os.environ["FLOW_DASHSCOPE_API_KEY"]
49
+ self.cache_path: str = cache_path
50
+ self._cache: DataCache | None = None
51
+
52
+ @property
53
+ def cache(self):
54
+ if self.enable_cache and self._cache is None:
55
+ self._cache = DataCache(self.cache_path)
56
+ return self._cache
57
+
58
+ @staticmethod
59
+ def format_search_results(search_results: List[Dict[str, Any]]) -> str:
60
+ """Format search results for display"""
61
+ formatted_results = ["=" * 20 + " Search Results " + "=" * 20]
62
+
63
+ for web in search_results:
64
+ formatted_results.append(f"[{web['index']}]: [{web['title']}]({web['url']})")
65
+
66
+ return "\n".join(formatted_results)
67
+
68
+ def post_process(self, response_data: dict) -> dict:
69
+ """Post-process the response and optionally print results"""
70
+ if self.enable_print:
71
+ # Print search information
72
+ if "search_results" in response_data:
73
+ search_info = self.format_search_results(response_data["search_results"])
74
+ logger.info(f"Search Information:\n{search_info}")
75
+
76
+ # Print response content
77
+ if "response_content" in response_data:
78
+ logger.info("=" * 20 + " Response Content " + "=" * 20)
79
+ logger.info(response_data["response_content"])
80
+
81
+ return response_data
82
+
83
+ def execute(self):
84
+ """Execute the Dashscope search operation"""
85
+ # Get query from context - support multiple parameter names
86
+ query = self.context.query
87
+
88
+ # Check cache first
89
+ if self.enable_cache and self.cache:
90
+ cached_result = self.cache.load(query)
91
+ if cached_result:
92
+ result = self.post_process(cached_result)
93
+ if self.return_only_content:
94
+ self.context.dashscope_search_result = result["response_content"]
95
+ else:
96
+ self.context.dashscope_search_result = result
97
+
98
+ return
99
+
100
+ if self.enable_role_prompt:
101
+ user_query = self.prompt_format(prompt_name="role_prompt", query=query)
102
+ else:
103
+ user_query = query
104
+ messages: list = [{"role": "user", "content": user_query}]
105
+
106
+ # Retry logic for API calls
107
+ for attempt in range(self.max_retries):
108
+ try:
109
+ # Call Dashscope Generation API with search enabled
110
+ response = dashscope.Generation.call(
111
+ api_key=self.api_key,
112
+ model=self.model,
113
+ messages=messages,
114
+ enable_search=True, # Enable web search
115
+ search_options={
116
+ "forced_search": True, # Force web search
117
+ "enable_source": True, # Include search source information
118
+ "enable_citation": False, # Enable citation markers
119
+ "search_strategy": self.search_strategy, # Search strategy
120
+ },
121
+ result_format="message",
122
+ )
123
+
124
+ # Extract search results and response content
125
+ search_results = []
126
+ response_content = ""
127
+
128
+ if hasattr(response, 'output') and response.output:
129
+ # Extract search information
130
+ if hasattr(response.output, 'search_info') and response.output.search_info:
131
+ search_results = response.output.search_info.get("search_results", [])
132
+
133
+ # Extract response content
134
+ if (hasattr(response.output, 'choices') and
135
+ response.output.choices and
136
+ len(response.output.choices) > 0):
137
+ response_content = response.output.choices[0].message.content
138
+
139
+ # Prepare final result
140
+ final_result = {
141
+ "query": query,
142
+ "search_results": search_results,
143
+ "response_content": response_content,
144
+ "model": self.model,
145
+ "search_strategy": self.search_strategy
146
+ }
147
+
148
+ # Cache the result if enabled
149
+ if self.enable_cache and self.cache:
150
+ self.cache.save(query, final_result, expire_hours=self.cache_expire_hours)
151
+
152
+ # Post-process and set context
153
+ result = self.post_process(final_result)
154
+ if self.return_only_content:
155
+ self.context.dashscope_search_result = result["response_content"]
156
+ else:
157
+ self.context.dashscope_search_result = result
158
+
159
+ return
160
+
161
+ except Exception as e:
162
+ logger.warning(f"Dashscope search attempt {attempt + 1} failed for query='{query}': {e}")
163
+ if attempt < self.max_retries - 1:
164
+ time.sleep(attempt + 1) # Exponential backoff
165
+ else:
166
+ logger.error(f"All {self.max_retries} attempts failed for Dashscope search")
167
+
168
+ self.context.dashscope_search_result = "dashscope_search failed"
169
+
170
+
171
+ def main():
172
+ from flowllm.utils.common_utils import load_env
173
+
174
+ load_env()
175
+
176
+ C.set_default_service_config().init_by_service_config()
177
+
178
+ op = DashscopeSearchOp(enable_print=True, enable_cache=False)
179
+
180
+ context = FlowContext(query="杭州明天天气")
181
+ op(context=context)
182
+ print(context.dashscope_search_result)
183
+
184
+
185
+ if __name__ == "__main__":
186
+ main()
@@ -0,0 +1,13 @@
1
+ role_prompt: |
2
+ # user's question
3
+ {query}
4
+
5
+ # task
6
+ Extract the original content related to the user's query directly from the context, maintain accuracy, and avoid excessive processing.
7
+
8
+ role_prompt_zh: |
9
+ # 用户问题
10
+ {query}
11
+
12
+ # task
13
+ 直接从上下文中提取与用户问题相关的原始内容,保持准确性,避免过度处理。
@@ -0,0 +1,109 @@
1
+ import json
2
+ import os
3
+ import time
4
+ from typing import Literal
5
+
6
+ from loguru import logger
7
+ from tavily import TavilyClient
8
+
9
+ from flowllm.context.flow_context import FlowContext
10
+ from flowllm.context.service_context import C
11
+ from flowllm.op.base_op import BaseOp
12
+ from flowllm.storage.cache.data_cache import DataCache
13
+
14
+
15
+ @C.register_op()
16
+ class TavilySearchOp(BaseOp):
17
+ def __init__(self,
18
+ enable_print: bool = True,
19
+ enable_cache: bool = True,
20
+ cache_path: str = "./tavily_search_cache",
21
+ cache_expire_hours: float = 0.1,
22
+ topic: Literal["general", "news", "finance"] = "general",
23
+ max_retries: int = 3,
24
+ return_only_content: bool = True,
25
+ **kwargs):
26
+ super().__init__(**kwargs)
27
+
28
+ self.enable_print = enable_print
29
+ self.enable_cache = enable_cache
30
+ self.cache_expire_hours = cache_expire_hours
31
+ self.topic = topic
32
+ self.max_retries = max_retries
33
+ self.return_only_content = return_only_content
34
+
35
+ # Initialize DataCache if caching is enabled
36
+ self._client = TavilyClient(api_key=os.getenv("FLOW_TAVILY_API_KEY", ""))
37
+ self.cache_path: str = cache_path
38
+ self._cache: DataCache | None = None
39
+
40
+ @property
41
+ def cache(self):
42
+ if self.enable_cache and self._cache is None:
43
+ self._cache = DataCache(self.cache_path)
44
+ return self._cache
45
+
46
+ def post_process(self, response):
47
+ if self.enable_print:
48
+ logger.info("response=\n" + json.dumps(response, indent=2, ensure_ascii=False))
49
+
50
+ return response
51
+
52
+ def execute(self):
53
+ # Get query from context
54
+ query: str = self.context.query
55
+
56
+ # Check cache first
57
+ if self.enable_cache and self.cache:
58
+ cached_result = self.cache.load(query)
59
+ if cached_result:
60
+ final_result = self.post_process(cached_result)
61
+ if self.return_only_content:
62
+ self.context.tavily_search_result = json.dumps(final_result, ensure_ascii=False, indent=2)
63
+ else:
64
+ self.context.tavily_search_result = final_result
65
+ return
66
+
67
+ for i in range(self.max_retries):
68
+ try:
69
+ response = self._client.search(query=query, topic=self.topic)
70
+ url_info_dict = {item["url"]: item for item in response["results"]}
71
+ response_extract = self._client.extract(urls=[item["url"] for item in response["results"]],
72
+ format="text")
73
+
74
+ final_result = {}
75
+ for item in response_extract["results"]:
76
+ url = item["url"]
77
+ final_result[url] = url_info_dict[url]
78
+ final_result[url]["raw_content"] = item["raw_content"]
79
+
80
+ # Cache the result if enabled
81
+ if self.enable_cache and self.cache:
82
+ self.cache.save(query, final_result, expire_hours=self.cache_expire_hours)
83
+
84
+ final_result = self.post_process(final_result)
85
+
86
+ if self.return_only_content:
87
+ self.context.tavily_search_result = json.dumps(final_result, ensure_ascii=False, indent=2)
88
+ else:
89
+ self.context.tavily_search_result = final_result
90
+ return
91
+
92
+ except Exception as e:
93
+ logger.exception(f"tavily search with query={query} encounter error with e={e.args}")
94
+ time.sleep(i + 1)
95
+
96
+ self.context.tavily_search_result = "tavily search failed!"
97
+
98
+
99
+ if __name__ == "__main__":
100
+ from flowllm.utils.common_utils import load_env
101
+
102
+ load_env()
103
+
104
+ C.set_default_service_config().init_by_service_config()
105
+
106
+ op = TavilySearchOp(enable_cache=True)
107
+ context = FlowContext(query="A股医药为什么一直涨")
108
+ op(context=context)
109
+ print(context.tavily_search_result)
@@ -4,22 +4,14 @@ from flowllm.op.base_op import BaseOp
4
4
 
5
5
 
6
6
  class SequentialOp(BaseOp):
7
- """Container class for sequential operation execution
8
-
9
- Executes multiple operations in sequence, where the output of the previous operation
10
- becomes the input of the next operation.
11
- Supports chaining: op1 >> op2 >> op3
12
- """
13
7
 
14
8
  def __init__(self, ops: List[BaseOp], **kwargs):
15
9
  super().__init__(**kwargs)
16
10
  self.ops = ops
17
11
 
18
12
  def execute(self):
19
- result = None
20
13
  for op in self.ops:
21
- result = op.execute()
22
- return result
14
+ op.__call__(self.context)
23
15
 
24
16
  def __rshift__(self, op: BaseOp):
25
17
  if isinstance(op, SequentialOp):
@@ -0,0 +1,12 @@
1
+ from typing import List
2
+
3
+ from pydantic import Field, BaseModel
4
+
5
+ from flowllm.schema.message import Message
6
+
7
+
8
+ class FlowRequest(BaseModel, extra="allow"):
9
+ query: str = Field(default="")
10
+ messages: List[Message] = Field(default_factory=list)
11
+ workspace_id: str = Field(default="")
12
+ metadata: dict = Field(default_factory=dict)
flowllm/schema/message.py CHANGED
@@ -1,3 +1,4 @@
1
+ import datetime
1
2
  from typing import List
2
3
 
3
4
  from pydantic import BaseModel, Field
@@ -12,6 +13,7 @@ class Message(BaseModel):
12
13
  reasoning_content: str = Field(default="")
13
14
  tool_calls: List[ToolCall] = Field(default_factory=list)
14
15
  tool_call_id: str = Field(default="")
16
+ time_created: str = Field(default_factory=lambda: datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
15
17
  metadata: dict = Field(default_factory=dict)
16
18
 
17
19
  def simple_dump(self, add_reason_content: bool = True) -> dict: