flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +21 -0
- flowllm/app.py +15 -0
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/default.yaml +77 -0
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +242 -0
- flowllm/context/base_context.py +79 -0
- flowllm/context/flow_context.py +16 -0
- llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
- flowllm/context/registry.py +30 -0
- flowllm/context/service_context.py +147 -0
- flowllm/embedding_model/__init__.py +1 -0
- {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
- {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +72 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +67 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/expression_parser.py +171 -0
- flowllm/llm/__init__.py +2 -0
- {llmflow → flowllm}/llm/base_llm.py +100 -18
- flowllm/llm/litellm_llm.py +455 -0
- flowllm/llm/openai_compatible_llm.py +439 -0
- flowllm/op/__init__.py +11 -0
- llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +108 -0
- flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
- flowllm/op/akshare/get_ak_a_info_op.py +140 -0
- flowllm/op/base_llm_op.py +64 -0
- flowllm/op/base_op.py +148 -0
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/gallery/mock_op.py +42 -0
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +23 -0
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +260 -0
- flowllm/op/search/dashscope_search_op.py +179 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +102 -0
- flowllm/op/sequential_op.py +21 -0
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/flow_response.py +12 -0
- flowllm/schema/message.py +35 -0
- flowllm/schema/service_config.py +72 -0
- flowllm/schema/tool_call.py +118 -0
- {llmflow → flowllm}/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -0
- flowllm/service/base_service.py +68 -0
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +79 -0
- flowllm/service/mcp_service.py +47 -0
- flowllm/storage/__init__.py +1 -0
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/storage/cache/data_cache.py +375 -0
- flowllm/storage/vector_store/__init__.py +3 -0
- flowllm/storage/vector_store/base_vector_store.py +44 -0
- {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
- {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
- llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
- flowllm/utils/common_utils.py +52 -0
- flowllm/utils/fetch_url.py +117 -0
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {llmflow → flowllm}/utils/timer.py +5 -4
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
- flowllm-0.1.2.dist-info/RECORD +99 -0
- flowllm-0.1.2.dist-info/entry_points.txt +2 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
- flowllm-0.1.2.dist-info/top_level.txt +1 -0
- flowllm-0.1.0.dist-info/RECORD +0 -66
- flowllm-0.1.0.dist-info/entry_points.txt +0 -3
- flowllm-0.1.0.dist-info/top_level.txt +0 -1
- llmflow/app.py +0 -53
- llmflow/config/config_parser.py +0 -80
- llmflow/config/mock_config.yaml +0 -58
- llmflow/embedding_model/__init__.py +0 -5
- llmflow/enumeration/agent_state.py +0 -8
- llmflow/llm/__init__.py +0 -5
- llmflow/llm/openai_compatible_llm.py +0 -283
- llmflow/mcp_server.py +0 -110
- llmflow/op/__init__.py +0 -10
- llmflow/op/base_op.py +0 -125
- llmflow/op/mock_op.py +0 -40
- llmflow/op/vector_store/__init__.py +0 -13
- llmflow/op/vector_store/recall_vector_store_op.py +0 -48
- llmflow/op/vector_store/update_vector_store_op.py +0 -28
- llmflow/op/vector_store/vector_store_action_op.py +0 -46
- llmflow/pipeline/pipeline.py +0 -94
- llmflow/pipeline/pipeline_context.py +0 -37
- llmflow/schema/app_config.py +0 -69
- llmflow/schema/experience.py +0 -144
- llmflow/schema/message.py +0 -68
- llmflow/schema/request.py +0 -32
- llmflow/schema/response.py +0 -29
- llmflow/service/__init__.py +0 -0
- llmflow/service/llmflow_service.py +0 -96
- llmflow/tool/__init__.py +0 -9
- llmflow/tool/base_tool.py +0 -80
- llmflow/tool/code_tool.py +0 -43
- llmflow/tool/dashscope_search_tool.py +0 -162
- llmflow/tool/mcp_tool.py +0 -77
- llmflow/tool/tavily_search_tool.py +0 -109
- llmflow/tool/terminate_tool.py +0 -23
- llmflow/utils/__init__.py +0 -0
- llmflow/utils/common_utils.py +0 -17
- llmflow/utils/file_handler.py +0 -25
- llmflow/utils/http_client.py +0 -156
- llmflow/utils/op_utils.py +0 -102
- llmflow/utils/registry.py +0 -33
- llmflow/vector_store/__init__.py +0 -7
- llmflow/vector_store/base_vector_store.py +0 -136
- {llmflow → flowllm/context}/__init__.py +0 -0
- {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
- {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
- {llmflow → flowllm}/enumeration/http_enum.py +0 -0
- {llmflow → flowllm}/enumeration/role.py +0 -0
- {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
- {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
- /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
- {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
- {llmflow/schema → flowllm/utils}/__init__.py +0 -0
- {llmflow → flowllm}/utils/singleton.py +0 -0
- {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
import json
|
2
|
+
import time
|
3
|
+
|
4
|
+
import akshare as ak
|
5
|
+
import pandas as pd
|
6
|
+
from loguru import logger
|
7
|
+
from tqdm import tqdm
|
8
|
+
|
9
|
+
from flowllm.context.flow_context import FlowContext
|
10
|
+
from flowllm.context.service_context import C
|
11
|
+
from flowllm.op.base_op import BaseOp
|
12
|
+
from flowllm.utils.fetch_url import fetch_webpage_text
|
13
|
+
|
14
|
+
|
15
|
+
@C.register_op()
|
16
|
+
class GetAkAInfoOp(BaseOp):
|
17
|
+
|
18
|
+
def execute_code(self, code: str) -> dict:
|
19
|
+
df = ak.stock_individual_info_em(symbol=code)
|
20
|
+
result = {}
|
21
|
+
for line in df.to_dict(orient="records"):
|
22
|
+
result[line["item"].strip()] = line["value"]
|
23
|
+
return {"基本信息": result}
|
24
|
+
|
25
|
+
def execute(self):
|
26
|
+
max_retries: int = self.op_params.get("max_retries", 3)
|
27
|
+
for code, info_dict in self.context.code_infos.items():
|
28
|
+
result = {}
|
29
|
+
for i in range(max_retries):
|
30
|
+
try:
|
31
|
+
result = self.execute_code(code)
|
32
|
+
break
|
33
|
+
|
34
|
+
except Exception as _:
|
35
|
+
if i != max_retries - 1:
|
36
|
+
time.sleep(i * 2 + 1)
|
37
|
+
|
38
|
+
if result:
|
39
|
+
info_dict.update(result)
|
40
|
+
|
41
|
+
time.sleep(1)
|
42
|
+
logger.info(f"code_infos={json.dumps(self.context.code_infos, ensure_ascii=False, indent=2)}")
|
43
|
+
|
44
|
+
|
45
|
+
@C.register_op()
|
46
|
+
class GetAkASpotOp(GetAkAInfoOp):
|
47
|
+
|
48
|
+
def execute_code(self, code: str) -> dict:
|
49
|
+
from flowllm.op.akshare import GetAkACodeOp
|
50
|
+
|
51
|
+
df: pd.DataFrame = GetAkACodeOp.download_a_stock_df()
|
52
|
+
df = df.loc[df["代码"] == code, :]
|
53
|
+
result = {}
|
54
|
+
if len(df) > 0:
|
55
|
+
result["实时行情"] = df.to_dict(orient="records")[-1]
|
56
|
+
|
57
|
+
return result
|
58
|
+
|
59
|
+
|
60
|
+
@C.register_op()
|
61
|
+
class GetAkAMoneyFlowOp(GetAkAInfoOp):
|
62
|
+
|
63
|
+
def execute_code(self, code: str) -> dict:
|
64
|
+
df = ak.stock_individual_fund_flow(stock=code)
|
65
|
+
result = {}
|
66
|
+
if len(df) > 0:
|
67
|
+
result["资金流入流出"] = {k: str(v) for k, v in df.to_dict(orient="records")[-1].items()}
|
68
|
+
return result
|
69
|
+
|
70
|
+
|
71
|
+
@C.register_op()
|
72
|
+
class GetAkAFinancialInfoOp(GetAkAInfoOp):
|
73
|
+
|
74
|
+
def execute_code(self, code: str) -> dict:
|
75
|
+
df = ak.stock_financial_abstract_ths(symbol=code, indicator="按报告期")
|
76
|
+
result = {}
|
77
|
+
if len(df) > 0:
|
78
|
+
result["财务信息"] = {k: str(v) for k, v in df.to_dict(orient="records")[-1].items()}
|
79
|
+
return result
|
80
|
+
|
81
|
+
|
82
|
+
@C.register_op()
|
83
|
+
class GetAkANewsOp(GetAkAInfoOp):
|
84
|
+
|
85
|
+
def execute_code(self, code: str) -> dict:
|
86
|
+
stock_news_em_df = ak.stock_news_em(symbol=code)
|
87
|
+
top_n_news: int = self.op_params.get("top_n_news", 1)
|
88
|
+
|
89
|
+
news_content_list = []
|
90
|
+
for i, line in enumerate(tqdm(stock_news_em_df.to_dict(orient="records")[:top_n_news])):
|
91
|
+
url = line["新闻链接"]
|
92
|
+
# http://finance.eastmoney.com/a/202508133482756869.html
|
93
|
+
ts = url.split("/")[-1].split(".")[0]
|
94
|
+
date = ts[:8]
|
95
|
+
content = fetch_webpage_text(url).strip()
|
96
|
+
content = f"新闻{i}\n时间{date}\n{content}"
|
97
|
+
news_content_list.append(content)
|
98
|
+
|
99
|
+
return {"新闻": "\n\n".join(news_content_list)}
|
100
|
+
|
101
|
+
|
102
|
+
@C.register_op()
|
103
|
+
class MergeAkAInfoOp(BaseOp):
|
104
|
+
|
105
|
+
def execute(self):
|
106
|
+
code_content = {}
|
107
|
+
for code, info_dict in self.context.code_infos.items():
|
108
|
+
content_list = [f"\n\n### {code}"]
|
109
|
+
for key, value in info_dict.items():
|
110
|
+
content_list.append(f"\n#### {code}-{key}")
|
111
|
+
if isinstance(value, str):
|
112
|
+
content_list.append(value)
|
113
|
+
elif isinstance(value, dict):
|
114
|
+
for attr_name, attr_value in value.items():
|
115
|
+
content_list.append(f"{attr_name}: {attr_value}")
|
116
|
+
elif isinstance(value, list):
|
117
|
+
content_list.extend([x.strip() for x in value if x])
|
118
|
+
|
119
|
+
code_content[code] = "\n".join(content_list)
|
120
|
+
|
121
|
+
answer = "\n".join(code_content.values())
|
122
|
+
logger.info(f"answer=\n{answer}")
|
123
|
+
self.context.response.answer = answer.strip()
|
124
|
+
|
125
|
+
|
126
|
+
if __name__ == "__main__":
|
127
|
+
C.set_default_service_config().init_by_service_config()
|
128
|
+
|
129
|
+
code_infos = {"000858": {}, "600519": {}}
|
130
|
+
context = FlowContext(code_infos=code_infos, query="茅台和五粮现在价格多少?")
|
131
|
+
|
132
|
+
op1 = GetAkAInfoOp()
|
133
|
+
op2 = GetAkASpotOp()
|
134
|
+
op3 = GetAkAMoneyFlowOp()
|
135
|
+
op4 = GetAkAFinancialInfoOp()
|
136
|
+
op5 = GetAkANewsOp()
|
137
|
+
op6 = MergeAkAInfoOp()
|
138
|
+
|
139
|
+
op = op1 >> op2 >> op3 >> op4 >> op5 >> op6
|
140
|
+
op(context=context)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from flowllm.context.prompt_handler import PromptHandler
|
5
|
+
from flowllm.context.service_context import C
|
6
|
+
from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
|
7
|
+
from flowllm.llm.base_llm import BaseLLM
|
8
|
+
from flowllm.op.base_op import BaseOp
|
9
|
+
from flowllm.schema.service_config import LLMConfig, EmbeddingModelConfig
|
10
|
+
from flowllm.storage.vector_store.base_vector_store import BaseVectorStore
|
11
|
+
|
12
|
+
|
13
|
+
class BaseLLMOp(BaseOp, ABC):
|
14
|
+
file_path: str = __file__
|
15
|
+
|
16
|
+
def __init__(self,
|
17
|
+
language: str = "",
|
18
|
+
prompt_path: str = "",
|
19
|
+
llm: str = "default",
|
20
|
+
embedding_model: str = "default",
|
21
|
+
vector_store: str = "default",
|
22
|
+
**kwargs):
|
23
|
+
super().__init__(**kwargs)
|
24
|
+
|
25
|
+
self.language: str = language or C.language
|
26
|
+
self.prompt_path: Path = Path(prompt_path) if prompt_path else \
|
27
|
+
Path(self.file_path).parent / self.name.replace("_op", "_prompt.yaml")
|
28
|
+
self._llm: BaseLLM | str = llm
|
29
|
+
self._embedding_model: BaseEmbeddingModel | str = embedding_model
|
30
|
+
self._vector_store: BaseVectorStore | str = vector_store
|
31
|
+
|
32
|
+
self.prompt = PromptHandler(language=self.language).load_prompt_by_file(self.prompt_path)
|
33
|
+
|
34
|
+
@property
|
35
|
+
def llm(self) -> BaseLLM:
|
36
|
+
if isinstance(self._llm, str):
|
37
|
+
llm_config: LLMConfig = C.service_config.llm[self._llm]
|
38
|
+
llm_cls = C.resolve_llm(llm_config.backend)
|
39
|
+
self._llm = llm_cls(model_name=llm_config.model_name, **llm_config.params)
|
40
|
+
|
41
|
+
return self._llm
|
42
|
+
|
43
|
+
@property
|
44
|
+
def embedding_model(self) -> BaseEmbeddingModel:
|
45
|
+
if isinstance(self._embedding_model, str):
|
46
|
+
embedding_model_config: EmbeddingModelConfig = \
|
47
|
+
C.service_config.embedding_model[self._embedding_model]
|
48
|
+
embedding_model_cls = C.resolve_embedding_model(embedding_model_config.backend)
|
49
|
+
self._embedding_model = embedding_model_cls(model_name=embedding_model_config.model_name,
|
50
|
+
**embedding_model_config.params)
|
51
|
+
|
52
|
+
return self._embedding_model
|
53
|
+
|
54
|
+
@property
|
55
|
+
def vector_store(self) -> BaseVectorStore:
|
56
|
+
if isinstance(self._vector_store, str):
|
57
|
+
self._vector_store = C.get_vector_store(self._vector_store)
|
58
|
+
return self._vector_store
|
59
|
+
|
60
|
+
def prompt_format(self, prompt_name: str, **kwargs) -> str:
|
61
|
+
return self.prompt.prompt_format(prompt_name=prompt_name, **kwargs)
|
62
|
+
|
63
|
+
def get_prompt(self, prompt_name: str) -> str:
|
64
|
+
return self.prompt.get_prompt(prompt_name=prompt_name)
|
flowllm/op/base_op.py
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
from abc import abstractmethod, ABC
|
2
|
+
from concurrent.futures import Future
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from loguru import logger
|
6
|
+
from tqdm import tqdm
|
7
|
+
|
8
|
+
from flowllm.context.flow_context import FlowContext
|
9
|
+
from flowllm.context.service_context import C
|
10
|
+
from flowllm.utils.common_utils import camel_to_snake
|
11
|
+
from flowllm.utils.timer import Timer
|
12
|
+
|
13
|
+
|
14
|
+
class BaseOp(ABC):
|
15
|
+
|
16
|
+
def __init__(self,
|
17
|
+
name: str = "",
|
18
|
+
raise_exception: bool = True,
|
19
|
+
**kwargs):
|
20
|
+
super().__init__()
|
21
|
+
|
22
|
+
self.name: str = name or camel_to_snake(self.__class__.__name__)
|
23
|
+
self.raise_exception: bool = raise_exception
|
24
|
+
self.op_params: dict = kwargs
|
25
|
+
|
26
|
+
self.task_list: List[Future] = []
|
27
|
+
self.ray_task_list: List = [] # Ray ObjectRef list
|
28
|
+
self.timer = Timer(name=self.name)
|
29
|
+
self.context: FlowContext | None = None
|
30
|
+
|
31
|
+
@abstractmethod
|
32
|
+
def execute(self):
|
33
|
+
...
|
34
|
+
|
35
|
+
def __call__(self, context: FlowContext = None):
|
36
|
+
self.context = context
|
37
|
+
with self.timer:
|
38
|
+
if self.raise_exception:
|
39
|
+
self.execute()
|
40
|
+
|
41
|
+
else:
|
42
|
+
|
43
|
+
try:
|
44
|
+
self.execute()
|
45
|
+
except Exception as e:
|
46
|
+
logger.exception(f"op={self.name} execute failed, error={e.args}")
|
47
|
+
|
48
|
+
return self.context.response if self.context else None
|
49
|
+
|
50
|
+
def submit_task(self, fn, *args, **kwargs):
|
51
|
+
task = C.thread_pool.submit(fn, *args, **kwargs)
|
52
|
+
self.task_list.append(task)
|
53
|
+
return self
|
54
|
+
|
55
|
+
def join_task(self, task_desc: str = None) -> list:
|
56
|
+
result = []
|
57
|
+
for task in tqdm(self.task_list, desc=task_desc or self.name):
|
58
|
+
t_result = task.result()
|
59
|
+
if t_result:
|
60
|
+
if isinstance(t_result, list):
|
61
|
+
result.extend(t_result)
|
62
|
+
else:
|
63
|
+
result.append(t_result)
|
64
|
+
self.task_list.clear()
|
65
|
+
return result
|
66
|
+
|
67
|
+
def __rshift__(self, op: "BaseOp"):
|
68
|
+
from flowllm.op.sequential_op import SequentialOp
|
69
|
+
|
70
|
+
sequential_op = SequentialOp(ops=[self])
|
71
|
+
|
72
|
+
if isinstance(op, SequentialOp):
|
73
|
+
sequential_op.ops.extend(op.ops)
|
74
|
+
else:
|
75
|
+
sequential_op.ops.append(op)
|
76
|
+
return sequential_op
|
77
|
+
|
78
|
+
def __or__(self, op: "BaseOp"):
|
79
|
+
from flowllm.op.parallel_op import ParallelOp
|
80
|
+
|
81
|
+
parallel_op = ParallelOp(ops=[self])
|
82
|
+
|
83
|
+
if isinstance(op, ParallelOp):
|
84
|
+
parallel_op.ops.extend(op.ops)
|
85
|
+
else:
|
86
|
+
parallel_op.ops.append(op)
|
87
|
+
|
88
|
+
return parallel_op
|
89
|
+
|
90
|
+
|
91
|
+
def run1():
|
92
|
+
"""Basic test"""
|
93
|
+
|
94
|
+
class MockOp(BaseOp):
|
95
|
+
def execute(self):
|
96
|
+
logger.info(f"op={self.name} execute")
|
97
|
+
|
98
|
+
mock_op = MockOp()
|
99
|
+
mock_op()
|
100
|
+
|
101
|
+
|
102
|
+
def run2():
|
103
|
+
"""Test operator overloading functionality"""
|
104
|
+
from concurrent.futures import ThreadPoolExecutor
|
105
|
+
import time
|
106
|
+
|
107
|
+
class TestOp(BaseOp):
|
108
|
+
|
109
|
+
def execute(self):
|
110
|
+
time.sleep(0.1)
|
111
|
+
op_result = f"{self.name}"
|
112
|
+
logger.info(f"Executing {op_result}")
|
113
|
+
return op_result
|
114
|
+
|
115
|
+
# Create service_context for parallel execution
|
116
|
+
C["thread_pool"] = ThreadPoolExecutor(max_workers=4)
|
117
|
+
|
118
|
+
# Create test operations
|
119
|
+
op1 = TestOp("op1")
|
120
|
+
op2 = TestOp("op2")
|
121
|
+
op3 = TestOp("op3")
|
122
|
+
op4 = TestOp("op4")
|
123
|
+
|
124
|
+
logger.info("=== Testing sequential execution op1 >> op2 ===")
|
125
|
+
sequential = op1 >> op2
|
126
|
+
result = sequential()
|
127
|
+
logger.info(f"Sequential result: {result}")
|
128
|
+
|
129
|
+
logger.info("=== Testing parallel execution op1 | op2 ===")
|
130
|
+
parallel = op1 | op2
|
131
|
+
result = parallel()
|
132
|
+
logger.info(f"Parallel result: {result}")
|
133
|
+
|
134
|
+
logger.info("=== Testing mixed calls op1 >> (op2 | op3) >> op4 ===")
|
135
|
+
mixed = op1 >> (op2 | op3) >> op4
|
136
|
+
result = mixed()
|
137
|
+
logger.info(f"Mixed result: {result}")
|
138
|
+
|
139
|
+
logger.info("=== Testing complex mixed calls op1 >> (op1 | (op2 >> op3)) >> op4 ===")
|
140
|
+
complex_mixed = op1 >> (op1 | (op2 >> op3)) >> op4
|
141
|
+
result = complex_mixed()
|
142
|
+
logger.info(f"Complex mixed result: {result}")
|
143
|
+
|
144
|
+
|
145
|
+
if __name__ == "__main__":
|
146
|
+
run1()
|
147
|
+
print("\n" + "=" * 50 + "\n")
|
148
|
+
run2()
|
@@ -0,0 +1,313 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import ray
|
5
|
+
from loguru import logger
|
6
|
+
from tqdm import tqdm
|
7
|
+
|
8
|
+
from flowllm.context.service_context import C
|
9
|
+
from flowllm.op.base_op import BaseOp
|
10
|
+
|
11
|
+
|
12
|
+
class BaseRayOp(BaseOp, ABC):
|
13
|
+
"""
|
14
|
+
Base class for Ray-based operations that provides parallel task execution capabilities.
|
15
|
+
Inherits from BaseOp and provides methods for submitting and joining Ray tasks.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def submit_and_join_ray_task(self, fn, parallel_key: str = "", task_desc: str = "",
|
19
|
+
enable_test: bool = False, **kwargs):
|
20
|
+
"""
|
21
|
+
Submit multiple Ray tasks in parallel and wait for all results.
|
22
|
+
|
23
|
+
This method automatically detects a list parameter to parallelize over, distributes
|
24
|
+
the work across multiple Ray workers, and returns the combined results.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
fn: Function to execute in parallel
|
28
|
+
parallel_key: Key of the parameter to parallelize over (auto-detected if empty)
|
29
|
+
task_desc: Description for logging and progress bars
|
30
|
+
enable_test: Enable test mode (prints results instead of executing)
|
31
|
+
**kwargs: Arguments to pass to the function, including the list to parallelize over
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
List of results from all parallel tasks
|
35
|
+
"""
|
36
|
+
max_workers = C.service_config.ray_max_workers
|
37
|
+
self.ray_task_list.clear()
|
38
|
+
|
39
|
+
# Auto-detect parallel key if not provided
|
40
|
+
if not parallel_key:
|
41
|
+
for key, value in kwargs.items():
|
42
|
+
if isinstance(value, list):
|
43
|
+
parallel_key = key
|
44
|
+
logger.info(f"using first list parallel_key={parallel_key}")
|
45
|
+
break
|
46
|
+
|
47
|
+
# Extract the list to parallelize over
|
48
|
+
parallel_list = kwargs.pop(parallel_key)
|
49
|
+
assert isinstance(parallel_list, list)
|
50
|
+
|
51
|
+
# Convert pandas DataFrames to Ray objects for efficient sharing
|
52
|
+
for key in sorted(kwargs.keys()):
|
53
|
+
value = kwargs[key]
|
54
|
+
if isinstance(value, pd.DataFrame):
|
55
|
+
kwargs[key] = ray.put(value)
|
56
|
+
|
57
|
+
if enable_test:
|
58
|
+
test_result_list = []
|
59
|
+
for value in parallel_list:
|
60
|
+
kwargs.update({"actor_index": 0, parallel_key: value})
|
61
|
+
t_result = fn(**kwargs)
|
62
|
+
if t_result:
|
63
|
+
if isinstance(t_result, list):
|
64
|
+
test_result_list.extend(t_result)
|
65
|
+
else:
|
66
|
+
test_result_list.append(t_result)
|
67
|
+
return test_result_list
|
68
|
+
|
69
|
+
# Create and submit tasks for each worker
|
70
|
+
for i in range(max_workers):
|
71
|
+
def fn_wrapper():
|
72
|
+
result_list = []
|
73
|
+
# Distribute work using stride: worker i-th processes items [i, i+max_workers, i+2*max_workers, ...]
|
74
|
+
for parallel_value in parallel_list[i::max_workers]:
|
75
|
+
kwargs.update({
|
76
|
+
"actor_index": i,
|
77
|
+
parallel_key: parallel_value,
|
78
|
+
})
|
79
|
+
part_result = fn(**kwargs)
|
80
|
+
if part_result:
|
81
|
+
if isinstance(part_result, list):
|
82
|
+
result_list.extend(part_result)
|
83
|
+
else:
|
84
|
+
result_list.append(part_result)
|
85
|
+
return result_list
|
86
|
+
|
87
|
+
self.submit_ray_task(fn=fn_wrapper)
|
88
|
+
logger.info(f"ray.submit task_desc={task_desc} id={i}")
|
89
|
+
|
90
|
+
# Wait for all tasks to complete and collect results
|
91
|
+
result = self.join_ray_task(task_desc=task_desc)
|
92
|
+
logger.info(f"{task_desc} complete. result_size={len(result)} resources={ray.available_resources()}")
|
93
|
+
return result
|
94
|
+
|
95
|
+
def submit_ray_task(self, fn, *args, **kwargs):
|
96
|
+
"""
|
97
|
+
Submit a single Ray task for asynchronous execution.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
fn: Function to execute remotely
|
101
|
+
*args: Positional arguments for the function
|
102
|
+
**kwargs: Keyword arguments for the function
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
Self for method chaining
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
RuntimeError: If Ray is not configured (ray_max_workers <= 1)
|
109
|
+
"""
|
110
|
+
if C.service_config.ray_max_workers <= 1:
|
111
|
+
raise RuntimeError("Ray is not configured. Please set ray_max_workers > 1 in service config.")
|
112
|
+
|
113
|
+
# Initialize Ray if not already done
|
114
|
+
if not ray.is_initialized():
|
115
|
+
logger.warning(f"Ray is not initialized. Initializing Ray with {C.service_config.ray_max_workers} workers.")
|
116
|
+
ray.init(num_cpus=C.service_config.ray_max_workers)
|
117
|
+
|
118
|
+
# Create remote function and submit task
|
119
|
+
remote_fn = ray.remote(fn)
|
120
|
+
task = remote_fn.remote(*args, **kwargs)
|
121
|
+
self.ray_task_list.append(task)
|
122
|
+
return self
|
123
|
+
|
124
|
+
def join_ray_task(self, task_desc: str = None) -> list:
|
125
|
+
"""
|
126
|
+
Wait for all submitted Ray tasks to complete and collect their results.
|
127
|
+
|
128
|
+
Args:
|
129
|
+
task_desc: Description for the progress bar
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
Combined list of results from all completed tasks
|
133
|
+
"""
|
134
|
+
result = []
|
135
|
+
# Process each task and collect results with progress bar
|
136
|
+
for task in tqdm(self.ray_task_list, desc=task_desc or f"{self.name}_ray"):
|
137
|
+
t_result = ray.get(task)
|
138
|
+
if t_result:
|
139
|
+
if isinstance(t_result, list):
|
140
|
+
result.extend(t_result)
|
141
|
+
else:
|
142
|
+
result.append(t_result)
|
143
|
+
self.ray_task_list.clear()
|
144
|
+
return result
|
145
|
+
|
146
|
+
|
147
|
+
def run():
|
148
|
+
"""Test Ray multiprocessing functionality"""
|
149
|
+
import time
|
150
|
+
import math
|
151
|
+
|
152
|
+
# CPU intensive task for testing
|
153
|
+
def cpu_intensive_task(n: int, task_id: str):
|
154
|
+
"""CPU intensive task: calculate prime numbers"""
|
155
|
+
start_t = time.time()
|
156
|
+
|
157
|
+
def is_prime(num):
|
158
|
+
if num < 2:
|
159
|
+
return False
|
160
|
+
for j in range(2, int(math.sqrt(num)) + 1):
|
161
|
+
if num % j == 0:
|
162
|
+
return False
|
163
|
+
return True
|
164
|
+
|
165
|
+
primes = [x for x in range(2, n) if is_prime(x)]
|
166
|
+
end_t = time.time()
|
167
|
+
|
168
|
+
result = {
|
169
|
+
'task_id': task_id,
|
170
|
+
'prime_count': len(primes),
|
171
|
+
'max_prime': max(primes) if primes else 0,
|
172
|
+
'execution_time': end_t - start_t
|
173
|
+
}
|
174
|
+
logger.info(f"Task {task_id} completed: found {len(primes)} primes, time: {result['execution_time']:.2f}s")
|
175
|
+
return result
|
176
|
+
|
177
|
+
class TestRayOp(BaseRayOp):
|
178
|
+
def execute(self):
|
179
|
+
logger.info(f"Executing {self.name}")
|
180
|
+
return f"Result from {self.name}"
|
181
|
+
|
182
|
+
# Initialize service config for Ray
|
183
|
+
from flowllm.schema.service_config import ServiceConfig
|
184
|
+
|
185
|
+
# Create a test service config with Ray enabled
|
186
|
+
test_config = ServiceConfig()
|
187
|
+
test_config.ray_max_workers = 4 # Enable Ray with 4 workers
|
188
|
+
test_config.thread_pool_max_workers = 4
|
189
|
+
|
190
|
+
# Set the service config
|
191
|
+
C.init_by_service_config(test_config)
|
192
|
+
|
193
|
+
logger.info("=== Testing Ray multiprocessing ===")
|
194
|
+
|
195
|
+
# Create test operation
|
196
|
+
ray_op = TestRayOp("ray_test_op")
|
197
|
+
|
198
|
+
logger.info("--- Testing submit_ray_task and join_ray_task ---")
|
199
|
+
|
200
|
+
# Test 1: Basic Ray task submission
|
201
|
+
task_size = 50000 # Find primes up to 50000 (more CPU intensive)
|
202
|
+
num_tasks = 4
|
203
|
+
|
204
|
+
try:
|
205
|
+
# Submit multiple CPU-intensive tasks
|
206
|
+
|
207
|
+
logger.info(f"Submitting {num_tasks} Ray tasks (finding primes up to {task_size})")
|
208
|
+
start_time = time.time()
|
209
|
+
|
210
|
+
for i in range(num_tasks):
|
211
|
+
ray_op.submit_ray_task(cpu_intensive_task, task_size, f"ray_task_{i}")
|
212
|
+
|
213
|
+
# Wait for all tasks to complete
|
214
|
+
results = ray_op.join_ray_task("Processing Ray tasks")
|
215
|
+
end_time = time.time()
|
216
|
+
|
217
|
+
logger.info(f"Ray tasks completed in {end_time - start_time:.2f}s")
|
218
|
+
logger.info(f"Ray results: {results}")
|
219
|
+
|
220
|
+
except Exception as e:
|
221
|
+
logger.error(f"Ray task execution failed: {e}")
|
222
|
+
|
223
|
+
# Test 2: Compare Ray vs ThreadPool performance
|
224
|
+
logger.info("\n--- Performance Comparison: Ray vs ThreadPool ---")
|
225
|
+
|
226
|
+
try:
|
227
|
+
# Test with ThreadPool
|
228
|
+
thread_op = TestRayOp("thread_test_op")
|
229
|
+
|
230
|
+
logger.info(f"Testing ThreadPool with {num_tasks} tasks")
|
231
|
+
start_time = time.time()
|
232
|
+
|
233
|
+
for i in range(num_tasks):
|
234
|
+
thread_op.submit_task(cpu_intensive_task, task_size, f"thread_task_{i}")
|
235
|
+
|
236
|
+
thread_results = thread_op.join_task("Processing ThreadPool tasks")
|
237
|
+
print(thread_results)
|
238
|
+
thread_time = time.time() - start_time
|
239
|
+
|
240
|
+
logger.info(f"ThreadPool completed in {thread_time:.2f}s")
|
241
|
+
|
242
|
+
# Test with Ray again for comparison
|
243
|
+
ray_op2 = TestRayOp("ray_test_op2")
|
244
|
+
|
245
|
+
logger.info(f"Testing Ray with {num_tasks} tasks")
|
246
|
+
start_time = time.time()
|
247
|
+
|
248
|
+
for i in range(num_tasks):
|
249
|
+
ray_op2.submit_ray_task(cpu_intensive_task, task_size, f"ray_task2_{i}")
|
250
|
+
|
251
|
+
ray_results2 = ray_op2.join_ray_task("Processing Ray tasks (comparison)")
|
252
|
+
print(ray_results2)
|
253
|
+
ray_time = time.time() - start_time
|
254
|
+
|
255
|
+
logger.info(f"Ray completed in {ray_time:.2f}s")
|
256
|
+
|
257
|
+
# Performance comparison
|
258
|
+
speedup = thread_time / ray_time if ray_time > 0 else 0
|
259
|
+
logger.info(f"\n=== Performance Summary ===")
|
260
|
+
logger.info(f"ThreadPool time: {thread_time:.2f}s")
|
261
|
+
logger.info(f"Ray time: {ray_time:.2f}s")
|
262
|
+
logger.info(f"Ray speedup: {speedup:.2f}x")
|
263
|
+
|
264
|
+
except Exception as e:
|
265
|
+
logger.error(f"Performance comparison failed: {e}")
|
266
|
+
|
267
|
+
# Test 3: Error handling
|
268
|
+
logger.info("\n--- Testing Error Handling ---")
|
269
|
+
|
270
|
+
def failing_task(task_id: str):
|
271
|
+
if task_id == "fail_task":
|
272
|
+
raise ValueError(f"Intentional error in {task_id}")
|
273
|
+
return f"Success: {task_id}"
|
274
|
+
|
275
|
+
try:
|
276
|
+
error_op = TestRayOp("error_test_op")
|
277
|
+
|
278
|
+
# Submit mix of successful and failing tasks
|
279
|
+
error_op.submit_ray_task(failing_task, "success_task_1")
|
280
|
+
error_op.submit_ray_task(failing_task, "fail_task")
|
281
|
+
error_op.submit_ray_task(failing_task, "success_task_2")
|
282
|
+
|
283
|
+
error_results = error_op.join_ray_task("Testing error handling")
|
284
|
+
logger.info(f"Error handling results: {error_results}")
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
logger.error(f"Expected error occurred: {e}")
|
288
|
+
|
289
|
+
# Test 4: Ray without proper configuration (should fail)
|
290
|
+
logger.info("\n--- Testing Ray Configuration Validation ---")
|
291
|
+
|
292
|
+
original_workers = C.service_config.ray_max_workers
|
293
|
+
try:
|
294
|
+
# Temporarily disable Ray in config
|
295
|
+
C.service_config.ray_max_workers = 1 # Disable Ray
|
296
|
+
|
297
|
+
config_test_op = TestRayOp("config_test_op")
|
298
|
+
config_test_op.submit_ray_task(cpu_intensive_task, 100, "config_test")
|
299
|
+
|
300
|
+
logger.error("This should not be reached - Ray should be disabled")
|
301
|
+
|
302
|
+
except RuntimeError as e:
|
303
|
+
logger.info(f"✓ Correctly caught configuration error: {e}")
|
304
|
+
|
305
|
+
finally:
|
306
|
+
# Restore original configuration
|
307
|
+
C.service_config.ray_max_workers = original_workers
|
308
|
+
|
309
|
+
logger.info("\n=== Ray testing completed ===")
|
310
|
+
|
311
|
+
|
312
|
+
if __name__ == "__main__":
|
313
|
+
run()
|
@@ -0,0 +1 @@
|
|
1
|
+
from .execute_code_op import ExecuteCodeOp
|