flowllm 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +15 -6
- flowllm/app.py +4 -14
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/{default_config.yaml → default.yaml} +3 -8
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +17 -17
- flowllm/context/base_context.py +27 -7
- flowllm/context/flow_context.py +6 -18
- flowllm/context/registry.py +5 -1
- flowllm/context/service_context.py +81 -37
- flowllm/embedding_model/__init__.py +1 -1
- flowllm/embedding_model/base_embedding_model.py +91 -0
- flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/__init__.py +0 -0
- flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
- flowllm/llm/__init__.py +2 -1
- flowllm/llm/base_llm.py +94 -4
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +205 -5
- flowllm/op/__init__.py +11 -3
- flowllm/op/agent/__init__.py +0 -0
- flowllm/op/agent/react_op.py +83 -0
- flowllm/op/agent/react_prompt.yaml +28 -0
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +14 -22
- flowllm/op/akshare/get_ak_a_info_op.py +17 -20
- flowllm/op/{llm_base_op.py → base_llm_op.py} +6 -5
- flowllm/op/base_op.py +14 -35
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +2 -9
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +1 -9
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/service_config.py +12 -16
- flowllm/schema/tool_call.py +13 -5
- flowllm/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -2
- flowllm/service/base_service.py +50 -41
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +34 -42
- flowllm/service/mcp_service.py +13 -11
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
- flowllm/storage/vector_store/__init__.py +3 -3
- flowllm/storage/vector_store/es_vector_store.py +1 -2
- flowllm/storage/vector_store/local_vector_store.py +0 -1
- flowllm/utils/common_utils.py +9 -21
- flowllm/utils/fetch_url.py +16 -12
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/METADATA +43 -390
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- flowllm/flow_engine/__init__.py +0 -1
- flowllm/flow_engine/base_flow_engine.py +0 -34
- flowllm-0.1.1.dist-info/RECORD +0 -62
- flowllm-0.1.1.dist-info/entry_points.txt +0 -4
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.2.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import os
|
|
2
2
|
from typing import List, Dict
|
3
3
|
|
4
4
|
from loguru import logger
|
5
|
-
from openai import OpenAI
|
5
|
+
from openai import OpenAI, AsyncOpenAI
|
6
6
|
from openai.types import CompletionUsage
|
7
7
|
from pydantic import Field, PrivateAttr, model_validator
|
8
8
|
|
@@ -33,19 +33,21 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
33
33
|
base_url: str = Field(default_factory=lambda: os.getenv("FLOW_LLM_BASE_URL"),
|
34
34
|
description="Base URL for the API endpoint")
|
35
35
|
_client: OpenAI = PrivateAttr()
|
36
|
+
_aclient: AsyncOpenAI = PrivateAttr()
|
36
37
|
|
37
38
|
@model_validator(mode="after")
|
38
39
|
def init_client(self):
|
39
40
|
"""
|
40
|
-
Initialize the OpenAI
|
41
|
+
Initialize the OpenAI clients after model validation.
|
41
42
|
|
42
43
|
This validator runs after all field validation is complete,
|
43
|
-
ensuring we have valid API credentials before creating the
|
44
|
+
ensuring we have valid API credentials before creating the clients.
|
44
45
|
|
45
46
|
Returns:
|
46
47
|
Self for method chaining
|
47
48
|
"""
|
48
49
|
self._client = OpenAI(api_key=self.api_key, base_url=self.base_url)
|
50
|
+
self._aclient = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
|
49
51
|
return self
|
50
52
|
|
51
53
|
def stream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
|
@@ -151,6 +153,109 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
151
153
|
else:
|
152
154
|
yield e.args, ChunkEnum.ERROR
|
153
155
|
|
156
|
+
async def astream_chat(self, messages: List[Message], tools: List[ToolCall] = None, **kwargs):
|
157
|
+
"""
|
158
|
+
Async stream chat completions from OpenAI-compatible API.
|
159
|
+
|
160
|
+
This method handles async streaming responses and categorizes chunks into different types:
|
161
|
+
- THINK: Reasoning/thinking content from the model
|
162
|
+
- ANSWER: Regular response content
|
163
|
+
- TOOL: Tool calls that need to be executed
|
164
|
+
- USAGE: Token usage statistics
|
165
|
+
- ERROR: Error information
|
166
|
+
|
167
|
+
Args:
|
168
|
+
messages: List of conversation messages
|
169
|
+
tools: Optional list of tools available to the model
|
170
|
+
**kwargs: Additional parameters
|
171
|
+
|
172
|
+
Yields:
|
173
|
+
Tuple of (chunk_content, ChunkEnum) for each streaming piece
|
174
|
+
"""
|
175
|
+
for i in range(self.max_retries):
|
176
|
+
try:
|
177
|
+
extra_body = {}
|
178
|
+
if self.enable_thinking:
|
179
|
+
extra_body["enable_thinking"] = True # qwen3 params
|
180
|
+
|
181
|
+
completion = await self._aclient.chat.completions.create(
|
182
|
+
model=self.model_name,
|
183
|
+
messages=[x.simple_dump() for x in messages],
|
184
|
+
seed=self.seed,
|
185
|
+
top_p=self.top_p,
|
186
|
+
stream=True,
|
187
|
+
stream_options=self.stream_options,
|
188
|
+
temperature=self.temperature,
|
189
|
+
extra_body=extra_body,
|
190
|
+
tools=[x.simple_input_dump() for x in tools] if tools else None,
|
191
|
+
parallel_tool_calls=self.parallel_tool_calls)
|
192
|
+
|
193
|
+
# Initialize tool call tracking
|
194
|
+
ret_tools: List[ToolCall] = [] # Accumulate tool calls across chunks
|
195
|
+
is_answering: bool = False # Track when model starts answering
|
196
|
+
|
197
|
+
# Process each chunk in the streaming response
|
198
|
+
async for chunk in completion:
|
199
|
+
# Handle chunks without choices (usually usage info)
|
200
|
+
if not chunk.choices:
|
201
|
+
yield chunk.usage, ChunkEnum.USAGE
|
202
|
+
|
203
|
+
else:
|
204
|
+
delta = chunk.choices[0].delta
|
205
|
+
|
206
|
+
# Handle reasoning/thinking content (model's internal thoughts)
|
207
|
+
if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None:
|
208
|
+
yield delta.reasoning_content, ChunkEnum.THINK
|
209
|
+
|
210
|
+
else:
|
211
|
+
# Mark transition from thinking to answering
|
212
|
+
if not is_answering:
|
213
|
+
is_answering = True
|
214
|
+
|
215
|
+
# Handle regular response content
|
216
|
+
if delta.content is not None:
|
217
|
+
yield delta.content, ChunkEnum.ANSWER
|
218
|
+
|
219
|
+
# Handle tool calls (function calling)
|
220
|
+
if delta.tool_calls is not None:
|
221
|
+
for tool_call in delta.tool_calls:
|
222
|
+
index = tool_call.index
|
223
|
+
|
224
|
+
# Ensure we have enough tool call slots
|
225
|
+
while len(ret_tools) <= index:
|
226
|
+
ret_tools.append(ToolCall(index=index))
|
227
|
+
|
228
|
+
# Accumulate tool call information across chunks
|
229
|
+
if tool_call.id:
|
230
|
+
ret_tools[index].id += tool_call.id
|
231
|
+
|
232
|
+
if tool_call.function and tool_call.function.name:
|
233
|
+
ret_tools[index].name += tool_call.function.name
|
234
|
+
|
235
|
+
if tool_call.function and tool_call.function.arguments:
|
236
|
+
ret_tools[index].arguments += tool_call.function.arguments
|
237
|
+
|
238
|
+
# Yield completed tool calls after streaming finishes
|
239
|
+
if ret_tools:
|
240
|
+
tool_dict: Dict[str, ToolCall] = {x.name: x for x in tools} if tools else {}
|
241
|
+
for tool in ret_tools:
|
242
|
+
# Only yield tool calls that correspond to available tools
|
243
|
+
if tool.name not in tool_dict:
|
244
|
+
continue
|
245
|
+
|
246
|
+
yield tool, ChunkEnum.TOOL
|
247
|
+
|
248
|
+
return
|
249
|
+
|
250
|
+
except Exception as e:
|
251
|
+
logger.exception(f"async stream chat with model={self.model_name} encounter error with e={e.args}")
|
252
|
+
|
253
|
+
# Handle retry logic
|
254
|
+
if i == self.max_retries - 1 and self.raise_exception:
|
255
|
+
raise e
|
256
|
+
else:
|
257
|
+
yield e.args, ChunkEnum.ERROR
|
258
|
+
|
154
259
|
def _chat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
|
155
260
|
**kwargs) -> Message:
|
156
261
|
"""
|
@@ -224,6 +329,95 @@ class OpenAICompatibleBaseLLM(BaseLLM):
|
|
224
329
|
content=answer_content,
|
225
330
|
tool_calls=tool_calls)
|
226
331
|
|
332
|
+
async def _achat(self, messages: List[Message], tools: List[ToolCall] = None, enable_stream_print: bool = False,
|
333
|
+
**kwargs) -> Message:
|
334
|
+
"""
|
335
|
+
Perform an async complete chat completion by aggregating streaming chunks.
|
336
|
+
|
337
|
+
This method consumes the entire async streaming response and combines all
|
338
|
+
chunks into a single Message object. It separates reasoning content,
|
339
|
+
regular answer content, and tool calls.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
messages: List of conversation messages
|
343
|
+
tools: Optional list of tools available to the model
|
344
|
+
enable_stream_print: Whether to print streaming response to console
|
345
|
+
**kwargs: Additional parameters
|
346
|
+
|
347
|
+
Returns:
|
348
|
+
Complete Message with all content aggregated
|
349
|
+
"""
|
350
|
+
|
351
|
+
enter_think = False # Whether we've started printing thinking content
|
352
|
+
enter_answer = False # Whether we've started printing answer content
|
353
|
+
reasoning_content = "" # Model's internal reasoning
|
354
|
+
answer_content = "" # Final response content
|
355
|
+
tool_calls = [] # List of tool calls to execute
|
356
|
+
|
357
|
+
# Consume async streaming response and aggregate chunks by type
|
358
|
+
async for chunk, chunk_enum in self.astream_chat(messages, tools, **kwargs):
|
359
|
+
if chunk_enum is ChunkEnum.USAGE:
|
360
|
+
# Display token usage statistics
|
361
|
+
if enable_stream_print:
|
362
|
+
if isinstance(chunk, CompletionUsage):
|
363
|
+
print(f"\n<usage>{chunk.model_dump_json(indent=2)}</usage>")
|
364
|
+
else:
|
365
|
+
print(f"\n<usage>{chunk}</usage>")
|
366
|
+
|
367
|
+
elif chunk_enum is ChunkEnum.THINK:
|
368
|
+
if enable_stream_print:
|
369
|
+
# Format thinking/reasoning content
|
370
|
+
if not enter_think:
|
371
|
+
enter_think = True
|
372
|
+
print("<think>\n", end="")
|
373
|
+
print(chunk, end="")
|
374
|
+
|
375
|
+
reasoning_content += chunk
|
376
|
+
|
377
|
+
elif chunk_enum is ChunkEnum.ANSWER:
|
378
|
+
if enable_stream_print:
|
379
|
+
if not enter_answer:
|
380
|
+
enter_answer = True
|
381
|
+
# Close thinking section if we were in it
|
382
|
+
if enter_think:
|
383
|
+
print("\n</think>")
|
384
|
+
print(chunk, end="")
|
385
|
+
|
386
|
+
answer_content += chunk
|
387
|
+
|
388
|
+
elif chunk_enum is ChunkEnum.TOOL:
|
389
|
+
if enable_stream_print:
|
390
|
+
print(f"\n<tool>{chunk.model_dump_json()}</tool>", end="")
|
391
|
+
|
392
|
+
tool_calls.append(chunk)
|
393
|
+
|
394
|
+
elif chunk_enum is ChunkEnum.ERROR:
|
395
|
+
if enable_stream_print:
|
396
|
+
# Display error information
|
397
|
+
print(f"\n<error>{chunk}</error>", end="")
|
398
|
+
|
399
|
+
# Construct complete response message
|
400
|
+
return Message(role=Role.ASSISTANT,
|
401
|
+
reasoning_content=reasoning_content,
|
402
|
+
content=answer_content,
|
403
|
+
tool_calls=tool_calls)
|
404
|
+
|
405
|
+
|
406
|
+
async def async_main():
|
407
|
+
from flowllm.utils.common_utils import load_env
|
408
|
+
|
409
|
+
load_env()
|
410
|
+
|
411
|
+
# model_name = "qwen-max-2025-01-25"
|
412
|
+
model_name = "qwen3-30b-a3b-thinking-2507"
|
413
|
+
llm = OpenAICompatibleBaseLLM(model_name=model_name)
|
414
|
+
|
415
|
+
# Test async chat
|
416
|
+
message: Message = await llm.achat([Message(role=Role.USER, content="hello")], [],
|
417
|
+
enable_stream_print=True)
|
418
|
+
print("Async result:", message)
|
419
|
+
|
420
|
+
|
227
421
|
def main():
|
228
422
|
from flowllm.utils.common_utils import load_env
|
229
423
|
|
@@ -231,9 +425,15 @@ def main():
|
|
231
425
|
|
232
426
|
model_name = "qwen-max-2025-01-25"
|
233
427
|
llm = OpenAICompatibleBaseLLM(model_name=model_name)
|
428
|
+
|
429
|
+
# Test sync chat
|
234
430
|
message: Message = llm.chat([Message(role=Role.USER, content="hello")], [],
|
235
431
|
enable_stream_print=False)
|
236
|
-
print(message)
|
432
|
+
print("Sync result:", message)
|
433
|
+
|
237
434
|
|
238
435
|
if __name__ == "__main__":
|
239
|
-
main()
|
436
|
+
# main()
|
437
|
+
|
438
|
+
import asyncio
|
439
|
+
asyncio.run(async_main())
|
flowllm/op/__init__.py
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
-
from
|
2
|
-
from
|
3
|
-
from
|
1
|
+
from .base_llm_op import BaseLLMOp
|
2
|
+
from .base_op import BaseOp
|
3
|
+
from .base_ray_op import BaseRayOp
|
4
|
+
|
5
|
+
"""
|
6
|
+
op folder
|
7
|
+
"""
|
8
|
+
from . import akshare
|
9
|
+
from . import code
|
10
|
+
from . import gallery
|
11
|
+
from . import search
|
File without changes
|
@@ -0,0 +1,83 @@
|
|
1
|
+
import datetime
|
2
|
+
import time
|
3
|
+
from typing import List, Dict
|
4
|
+
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
from flowllm import C, BaseLLMOp
|
8
|
+
from flowllm.flow.base_tool_flow import BaseToolFlow
|
9
|
+
from flowllm.flow.gallery import DashscopeSearchToolFlow, CodeToolFlow, TerminateToolFlow
|
10
|
+
from flowllm.schema.message import Message, Role
|
11
|
+
|
12
|
+
|
13
|
+
@C.register_op()
|
14
|
+
class ReactOp(BaseLLMOp):
|
15
|
+
# TODO: test react op
|
16
|
+
file_path: str = __file__
|
17
|
+
|
18
|
+
def execute(self):
|
19
|
+
query: str = self.context.query
|
20
|
+
|
21
|
+
max_steps: int = int(self.op_params.get("max_steps", 10))
|
22
|
+
tools: List[BaseToolFlow] = [DashscopeSearchToolFlow(), CodeToolFlow(), TerminateToolFlow()]
|
23
|
+
tool_dict: Dict[str, BaseToolFlow] = {x.name: x for x in tools}
|
24
|
+
now_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
25
|
+
has_terminate_tool = False
|
26
|
+
|
27
|
+
user_prompt = self.prompt_format(prompt_name="role_prompt",
|
28
|
+
time=now_time,
|
29
|
+
tools=",".join([x.name for x in tools]),
|
30
|
+
query=query)
|
31
|
+
messages: List[Message] = [Message(role=Role.USER, content=user_prompt)]
|
32
|
+
logger.info(f"step.0 user_prompt={user_prompt}")
|
33
|
+
|
34
|
+
for i in range(max_steps):
|
35
|
+
if has_terminate_tool:
|
36
|
+
assistant_message: Message = self.llm.chat(messages)
|
37
|
+
else:
|
38
|
+
assistant_message: Message = self.llm.chat(messages, tools=[x.tool_call for x in tools])
|
39
|
+
|
40
|
+
messages.append(assistant_message)
|
41
|
+
logger.info(f"assistant.{i}.reasoning_content={assistant_message.reasoning_content}\n"
|
42
|
+
f"content={assistant_message.content}\n"
|
43
|
+
f"tool.size={len(assistant_message.tool_calls)}")
|
44
|
+
|
45
|
+
if has_terminate_tool:
|
46
|
+
break
|
47
|
+
|
48
|
+
for tool in assistant_message.tool_calls:
|
49
|
+
if tool.name == "terminate":
|
50
|
+
has_terminate_tool = True
|
51
|
+
logger.info(f"step={i} find terminate tool, break.")
|
52
|
+
break
|
53
|
+
|
54
|
+
if not has_terminate_tool and not assistant_message.tool_calls:
|
55
|
+
logger.warning(f"【bugfix】step={i} no tools, break.")
|
56
|
+
has_terminate_tool = True
|
57
|
+
|
58
|
+
for j, tool_call in enumerate(assistant_message.tool_calls):
|
59
|
+
logger.info(f"submit step={i} tool_calls.name={tool_call.name} argument_dict={tool_call.argument_dict}")
|
60
|
+
|
61
|
+
if tool_call.name not in tool_dict:
|
62
|
+
continue
|
63
|
+
|
64
|
+
self.submit_task(tool_dict[tool_call.name].__call__, **tool_call.argument_dict)
|
65
|
+
time.sleep(1)
|
66
|
+
|
67
|
+
if not has_terminate_tool:
|
68
|
+
user_content_list = []
|
69
|
+
for tool_result, tool_call in zip(self.join_task(), assistant_message.tool_calls):
|
70
|
+
logger.info(f"submit step={i} tool_calls.name={tool_call.name} tool_result={tool_result}")
|
71
|
+
assert isinstance(tool_result, str)
|
72
|
+
user_content_list.append(f"<tool_response>\n{tool_result}\n</tool_response>")
|
73
|
+
user_content_list.append(self.prompt_format(prompt_name="next_prompt"))
|
74
|
+
assistant_message.tool_calls.clear()
|
75
|
+
messages.append(Message(role=Role.USER, content="\n".join(user_content_list)))
|
76
|
+
|
77
|
+
else:
|
78
|
+
assistant_message.tool_calls.clear()
|
79
|
+
messages.append(Message(role=Role.USER, content=self.prompt_format(prompt_name="final_prompt")))
|
80
|
+
|
81
|
+
# Store results in context instead of response
|
82
|
+
self.context.messages = messages
|
83
|
+
self.context.answer = messages[-1].content
|
@@ -0,0 +1,28 @@
|
|
1
|
+
role_prompt: |
|
2
|
+
You are a helpful assistant.
|
3
|
+
The current time is {time}.
|
4
|
+
Please proactively choose the most suitable tool or combination of tools based on the user's question, including {tools} etc.
|
5
|
+
Please first think about how to break down the problem into subtasks, what tools and parameters should be used for each subtask, and finally provide the tool call name and parameters.
|
6
|
+
Try calling the same tool multiple times with different parameters to obtain information from various perspectives.
|
7
|
+
Please determine the response language based on the language of the user's question.
|
8
|
+
|
9
|
+
{query}
|
10
|
+
|
11
|
+
# write a complete and rigorous report to answer user's questions based on the context.
|
12
|
+
next_prompt: |
|
13
|
+
Think based on the current content and the user's question: Is the current context sufficient to answer the user's question?
|
14
|
+
|
15
|
+
- If the current context is not sufficient to answer the user's question, consider what information is missing.
|
16
|
+
Re-plan and think about how to break down the missing information into subtasks.
|
17
|
+
For each subtask, determine what tools and parameters should be used for the query.
|
18
|
+
Please first provide the reasoning process, then give the tool call name and parameters.
|
19
|
+
|
20
|
+
- If the current context is sufficient to answer the user's question, use the **terminate** tool.
|
21
|
+
|
22
|
+
# Please determine the response language based on the language of the user's question.
|
23
|
+
final_prompt: |
|
24
|
+
Please integrate the context and provide a complete answer to the user's question.
|
25
|
+
|
26
|
+
# User's Question
|
27
|
+
{query}
|
28
|
+
|
flowllm/op/akshare/__init__.py
CHANGED
@@ -6,13 +6,12 @@ import akshare as ak
|
|
6
6
|
import pandas as pd
|
7
7
|
from loguru import logger
|
8
8
|
|
9
|
-
from flowllm.config.pydantic_config_parser import get_default_config
|
10
9
|
from flowllm.context.flow_context import FlowContext
|
11
10
|
from flowllm.context.service_context import C
|
12
11
|
from flowllm.enumeration.role import Role
|
13
|
-
from flowllm.op.
|
12
|
+
from flowllm.op.base_llm_op import BaseLLMOp
|
14
13
|
from flowllm.schema.message import Message
|
15
|
-
from flowllm.
|
14
|
+
from flowllm.storage.cache.data_cache import DataCache
|
16
15
|
from flowllm.utils.timer import timer
|
17
16
|
|
18
17
|
|
@@ -25,9 +24,9 @@ class GetAkACodeOp(BaseLLMOp):
|
|
25
24
|
|
26
25
|
@staticmethod
|
27
26
|
def download_a_stock_df():
|
28
|
-
|
27
|
+
cache = DataCache()
|
29
28
|
save_df_key: str = "all_a_stock_name_code"
|
30
|
-
if not
|
29
|
+
if not cache.exists(save_df_key):
|
31
30
|
stock_sh_a_spot_em_df = ak.stock_sh_a_spot_em()
|
32
31
|
stock_sz_a_spot_em_df = ak.stock_sz_a_spot_em()
|
33
32
|
stock_bj_a_spot_em_df = ak.stock_bj_a_spot_em()
|
@@ -36,9 +35,9 @@ class GetAkACodeOp(BaseLLMOp):
|
|
36
35
|
df = df.drop(columns=["序号"])
|
37
36
|
df = df.reset_index(drop=True)
|
38
37
|
df = df.sort_values(by="代码")
|
39
|
-
|
38
|
+
cache.save(save_df_key, df, expire_hours=0.25)
|
40
39
|
|
41
|
-
df =
|
40
|
+
df = cache.load(save_df_key, dtype={"代码": str})
|
42
41
|
return df
|
43
42
|
|
44
43
|
def get_name_code_dict(self) -> dict:
|
@@ -73,7 +72,7 @@ class GetAkACodeOp(BaseLLMOp):
|
|
73
72
|
stock_names = "\n".join([x.strip() for x in stock_names if x])
|
74
73
|
prompt = self.prompt_format(prompt_name="find_stock_name",
|
75
74
|
stock_names=stock_names,
|
76
|
-
query=self.
|
75
|
+
query=self.context.query)
|
77
76
|
logger.info(f"prompt={prompt}")
|
78
77
|
|
79
78
|
def callback_fn(msg: Message):
|
@@ -97,20 +96,13 @@ class GetAkACodeOp(BaseLLMOp):
|
|
97
96
|
time.sleep(1)
|
98
97
|
|
99
98
|
stock_names = sorted(set(self.join_task()))
|
100
|
-
self.
|
101
|
-
logger.info(f"code_infos={self.
|
99
|
+
self.context.code_infos = {name_code_dict[n]: {"股票名称": n} for n in stock_names}
|
100
|
+
logger.info(f"code_infos={self.context.code_infos}")
|
102
101
|
|
103
102
|
|
104
103
|
if __name__ == "__main__":
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
flow_context.query = "茅台和五粮现在价格多少?"
|
111
|
-
flow_context.service_config = service_config
|
112
|
-
|
113
|
-
op = GetAkACodeOp(flow_context=flow_context)
|
114
|
-
# for x in op.split_list(list(range(10)), 3):
|
115
|
-
# print(x)
|
116
|
-
op.execute()
|
104
|
+
C.set_default_service_config().init_by_service_config()
|
105
|
+
context = FlowContext(query="茅台和五粮现在价格多少?")
|
106
|
+
|
107
|
+
op = GetAkACodeOp()
|
108
|
+
op(context=context)
|
@@ -4,8 +4,8 @@ import time
|
|
4
4
|
import akshare as ak
|
5
5
|
import pandas as pd
|
6
6
|
from loguru import logger
|
7
|
+
from tqdm import tqdm
|
7
8
|
|
8
|
-
from flowllm.config.pydantic_config_parser import get_default_config
|
9
9
|
from flowllm.context.flow_context import FlowContext
|
10
10
|
from flowllm.context.service_context import C
|
11
11
|
from flowllm.op.base_op import BaseOp
|
@@ -24,7 +24,7 @@ class GetAkAInfoOp(BaseOp):
|
|
24
24
|
|
25
25
|
def execute(self):
|
26
26
|
max_retries: int = self.op_params.get("max_retries", 3)
|
27
|
-
for code, info_dict in self.
|
27
|
+
for code, info_dict in self.context.code_infos.items():
|
28
28
|
result = {}
|
29
29
|
for i in range(max_retries):
|
30
30
|
try:
|
@@ -39,14 +39,14 @@ class GetAkAInfoOp(BaseOp):
|
|
39
39
|
info_dict.update(result)
|
40
40
|
|
41
41
|
time.sleep(1)
|
42
|
-
logger.info(f"code_infos={json.dumps(self.
|
42
|
+
logger.info(f"code_infos={json.dumps(self.context.code_infos, ensure_ascii=False, indent=2)}")
|
43
43
|
|
44
44
|
|
45
45
|
@C.register_op()
|
46
46
|
class GetAkASpotOp(GetAkAInfoOp):
|
47
47
|
|
48
48
|
def execute_code(self, code: str) -> dict:
|
49
|
-
from flowllm.op import GetAkACodeOp
|
49
|
+
from flowllm.op.akshare import GetAkACodeOp
|
50
50
|
|
51
51
|
df: pd.DataFrame = GetAkACodeOp.download_a_stock_df()
|
52
52
|
df = df.loc[df["代码"] == code, :]
|
@@ -87,7 +87,7 @@ class GetAkANewsOp(GetAkAInfoOp):
|
|
87
87
|
top_n_news: int = self.op_params.get("top_n_news", 1)
|
88
88
|
|
89
89
|
news_content_list = []
|
90
|
-
for i, line in enumerate(stock_news_em_df.to_dict(orient="records")[:top_n_news]):
|
90
|
+
for i, line in enumerate(tqdm(stock_news_em_df.to_dict(orient="records")[:top_n_news])):
|
91
91
|
url = line["新闻链接"]
|
92
92
|
# http://finance.eastmoney.com/a/202508133482756869.html
|
93
93
|
ts = url.split("/")[-1].split(".")[0]
|
@@ -104,7 +104,7 @@ class MergeAkAInfoOp(BaseOp):
|
|
104
104
|
|
105
105
|
def execute(self):
|
106
106
|
code_content = {}
|
107
|
-
for code, info_dict in self.
|
107
|
+
for code, info_dict in self.context.code_infos.items():
|
108
108
|
content_list = [f"\n\n### {code}"]
|
109
109
|
for key, value in info_dict.items():
|
110
110
|
content_list.append(f"\n#### {code}-{key}")
|
@@ -120,24 +120,21 @@ class MergeAkAInfoOp(BaseOp):
|
|
120
120
|
|
121
121
|
answer = "\n".join(code_content.values())
|
122
122
|
logger.info(f"answer=\n{answer}")
|
123
|
-
self.
|
123
|
+
self.context.response.answer = answer.strip()
|
124
124
|
|
125
125
|
|
126
126
|
if __name__ == "__main__":
|
127
|
-
|
127
|
+
C.set_default_service_config().init_by_service_config()
|
128
128
|
|
129
129
|
code_infos = {"000858": {}, "600519": {}}
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
op4 = GetAkAFinancialInfoOp(flow_context=flow_context)
|
139
|
-
op5 = GetAkANewsOp(flow_context=flow_context)
|
140
|
-
op6 = MergeAkAInfoOp(flow_context=flow_context)
|
130
|
+
context = FlowContext(code_infos=code_infos, query="茅台和五粮现在价格多少?")
|
131
|
+
|
132
|
+
op1 = GetAkAInfoOp()
|
133
|
+
op2 = GetAkASpotOp()
|
134
|
+
op3 = GetAkAMoneyFlowOp()
|
135
|
+
op4 = GetAkAFinancialInfoOp()
|
136
|
+
op5 = GetAkANewsOp()
|
137
|
+
op6 = MergeAkAInfoOp()
|
141
138
|
|
142
139
|
op = op1 >> op2 >> op3 >> op4 >> op5 >> op6
|
143
|
-
op
|
140
|
+
op(context=context)
|
@@ -14,26 +14,27 @@ class BaseLLMOp(BaseOp, ABC):
|
|
14
14
|
file_path: str = __file__
|
15
15
|
|
16
16
|
def __init__(self,
|
17
|
+
language: str = "",
|
17
18
|
prompt_path: str = "",
|
18
19
|
llm: str = "default",
|
19
20
|
embedding_model: str = "default",
|
20
21
|
vector_store: str = "default",
|
21
22
|
**kwargs):
|
22
|
-
|
23
23
|
super().__init__(**kwargs)
|
24
24
|
|
25
|
+
self.language: str = language or C.language
|
26
|
+
self.prompt_path: Path = Path(prompt_path) if prompt_path else \
|
27
|
+
Path(self.file_path).parent / self.name.replace("_op", "_prompt.yaml")
|
25
28
|
self._llm: BaseLLM | str = llm
|
26
29
|
self._embedding_model: BaseEmbeddingModel | str = embedding_model
|
27
30
|
self._vector_store: BaseVectorStore | str = vector_store
|
28
31
|
|
29
|
-
default_prompt_path: Path = Path(self.file_path).parent / self.name.replace("_op", "_prompt.yaml")
|
30
|
-
self.prompt_path: Path = Path(prompt_path) if prompt_path else default_prompt_path
|
31
32
|
self.prompt = PromptHandler(language=self.language).load_prompt_by_file(self.prompt_path)
|
32
33
|
|
33
34
|
@property
|
34
35
|
def llm(self) -> BaseLLM:
|
35
36
|
if isinstance(self._llm, str):
|
36
|
-
llm_config: LLMConfig =
|
37
|
+
llm_config: LLMConfig = C.service_config.llm[self._llm]
|
37
38
|
llm_cls = C.resolve_llm(llm_config.backend)
|
38
39
|
self._llm = llm_cls(model_name=llm_config.model_name, **llm_config.params)
|
39
40
|
|
@@ -43,7 +44,7 @@ class BaseLLMOp(BaseOp, ABC):
|
|
43
44
|
def embedding_model(self) -> BaseEmbeddingModel:
|
44
45
|
if isinstance(self._embedding_model, str):
|
45
46
|
embedding_model_config: EmbeddingModelConfig = \
|
46
|
-
|
47
|
+
C.service_config.embedding_model[self._embedding_model]
|
47
48
|
embedding_model_cls = C.resolve_embedding_model(embedding_model_config.backend)
|
48
49
|
self._embedding_model = embedding_model_cls(model_name=embedding_model_config.model_name,
|
49
50
|
**embedding_model_config.params)
|