flowllm 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowllm/__init__.py +19 -6
- flowllm/app.py +4 -14
- flowllm/client/__init__.py +25 -0
- flowllm/client/async_http_client.py +81 -0
- flowllm/client/http_client.py +81 -0
- flowllm/client/mcp_client.py +133 -0
- flowllm/client/sync_mcp_client.py +116 -0
- flowllm/config/__init__.py +1 -0
- flowllm/config/{default_config.yaml → default.yaml} +3 -8
- flowllm/config/empty.yaml +37 -0
- flowllm/config/pydantic_config_parser.py +17 -17
- flowllm/context/base_context.py +27 -7
- flowllm/context/flow_context.py +6 -18
- flowllm/context/registry.py +5 -1
- flowllm/context/service_context.py +83 -37
- flowllm/embedding_model/__init__.py +1 -1
- flowllm/embedding_model/base_embedding_model.py +91 -0
- flowllm/embedding_model/openai_compatible_embedding_model.py +63 -5
- flowllm/flow/__init__.py +1 -0
- flowllm/flow/base_flow.py +74 -0
- flowllm/flow/base_tool_flow.py +15 -0
- flowllm/flow/gallery/__init__.py +8 -0
- flowllm/flow/gallery/cmd_flow.py +11 -0
- flowllm/flow/gallery/code_tool_flow.py +30 -0
- flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
- flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
- flowllm/flow/gallery/expression_tool_flow.py +18 -0
- flowllm/flow/gallery/mock_tool_flow.py +62 -0
- flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
- flowllm/flow/gallery/terminate_tool_flow.py +30 -0
- flowllm/flow/parser/__init__.py +0 -0
- flowllm/{flow_engine/simple_flow_engine.py → flow/parser/expression_parser.py} +25 -67
- flowllm/llm/__init__.py +2 -1
- flowllm/llm/base_llm.py +94 -4
- flowllm/llm/litellm_llm.py +456 -0
- flowllm/llm/openai_compatible_llm.py +205 -5
- flowllm/op/__init__.py +12 -3
- flowllm/op/agent/__init__.py +1 -0
- flowllm/op/agent/react_v1_op.py +109 -0
- flowllm/op/agent/react_v1_prompt.yaml +54 -0
- flowllm/op/agent/react_v2_op.py +86 -0
- flowllm/op/agent/react_v2_prompt.yaml +35 -0
- flowllm/op/akshare/__init__.py +3 -0
- flowllm/op/akshare/get_ak_a_code_op.py +14 -22
- flowllm/op/akshare/get_ak_a_info_op.py +17 -20
- flowllm/op/{llm_base_op.py → base_llm_op.py} +7 -5
- flowllm/op/base_op.py +40 -44
- flowllm/op/base_ray_op.py +313 -0
- flowllm/op/code/__init__.py +1 -0
- flowllm/op/code/execute_code_op.py +42 -0
- flowllm/op/gallery/__init__.py +2 -0
- flowllm/op/{mock_op.py → gallery/mock_op.py} +4 -4
- flowllm/op/gallery/terminate_op.py +29 -0
- flowllm/op/parallel_op.py +2 -9
- flowllm/op/search/__init__.py +3 -0
- flowllm/op/search/dashscope_deep_research_op.py +267 -0
- flowllm/op/search/dashscope_search_op.py +186 -0
- flowllm/op/search/dashscope_search_prompt.yaml +13 -0
- flowllm/op/search/tavily_search_op.py +109 -0
- flowllm/op/sequential_op.py +1 -9
- flowllm/schema/flow_request.py +12 -0
- flowllm/schema/message.py +2 -0
- flowllm/schema/service_config.py +12 -16
- flowllm/schema/tool_call.py +20 -8
- flowllm/schema/vector_node.py +1 -0
- flowllm/service/__init__.py +3 -2
- flowllm/service/base_service.py +50 -41
- flowllm/service/cmd_service.py +15 -0
- flowllm/service/http_service.py +34 -42
- flowllm/service/mcp_service.py +13 -11
- flowllm/storage/cache/__init__.py +1 -0
- flowllm/storage/cache/cache_data_handler.py +104 -0
- flowllm/{utils/dataframe_cache.py → storage/cache/data_cache.py} +136 -92
- flowllm/storage/vector_store/__init__.py +3 -3
- flowllm/storage/vector_store/base_vector_store.py +3 -0
- flowllm/storage/vector_store/es_vector_store.py +4 -5
- flowllm/storage/vector_store/local_vector_store.py +0 -1
- flowllm/utils/common_utils.py +9 -21
- flowllm/utils/fetch_url.py +16 -12
- flowllm/utils/llm_utils.py +28 -0
- flowllm/utils/logger_utils.py +28 -0
- flowllm/utils/ridge_v2.py +54 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/METADATA +43 -390
- flowllm-0.1.3.dist-info/RECORD +102 -0
- flowllm-0.1.3.dist-info/entry_points.txt +2 -0
- flowllm/flow_engine/__init__.py +0 -1
- flowllm/flow_engine/base_flow_engine.py +0 -34
- flowllm-0.1.1.dist-info/RECORD +0 -62
- flowllm-0.1.1.dist-info/entry_points.txt +0 -4
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/WHEEL +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {flowllm-0.1.1.dist-info → flowllm-0.1.3.dist-info}/top_level.txt +0 -0
@@ -4,8 +4,8 @@ import time
|
|
4
4
|
import akshare as ak
|
5
5
|
import pandas as pd
|
6
6
|
from loguru import logger
|
7
|
+
from tqdm import tqdm
|
7
8
|
|
8
|
-
from flowllm.config.pydantic_config_parser import get_default_config
|
9
9
|
from flowllm.context.flow_context import FlowContext
|
10
10
|
from flowllm.context.service_context import C
|
11
11
|
from flowllm.op.base_op import BaseOp
|
@@ -24,7 +24,7 @@ class GetAkAInfoOp(BaseOp):
|
|
24
24
|
|
25
25
|
def execute(self):
|
26
26
|
max_retries: int = self.op_params.get("max_retries", 3)
|
27
|
-
for code, info_dict in self.
|
27
|
+
for code, info_dict in self.context.code_infos.items():
|
28
28
|
result = {}
|
29
29
|
for i in range(max_retries):
|
30
30
|
try:
|
@@ -39,14 +39,14 @@ class GetAkAInfoOp(BaseOp):
|
|
39
39
|
info_dict.update(result)
|
40
40
|
|
41
41
|
time.sleep(1)
|
42
|
-
logger.info(f"code_infos={json.dumps(self.
|
42
|
+
logger.info(f"code_infos={json.dumps(self.context.code_infos, ensure_ascii=False, indent=2)}")
|
43
43
|
|
44
44
|
|
45
45
|
@C.register_op()
|
46
46
|
class GetAkASpotOp(GetAkAInfoOp):
|
47
47
|
|
48
48
|
def execute_code(self, code: str) -> dict:
|
49
|
-
from flowllm.op import GetAkACodeOp
|
49
|
+
from flowllm.op.akshare import GetAkACodeOp
|
50
50
|
|
51
51
|
df: pd.DataFrame = GetAkACodeOp.download_a_stock_df()
|
52
52
|
df = df.loc[df["代码"] == code, :]
|
@@ -87,7 +87,7 @@ class GetAkANewsOp(GetAkAInfoOp):
|
|
87
87
|
top_n_news: int = self.op_params.get("top_n_news", 1)
|
88
88
|
|
89
89
|
news_content_list = []
|
90
|
-
for i, line in enumerate(stock_news_em_df.to_dict(orient="records")[:top_n_news]):
|
90
|
+
for i, line in enumerate(tqdm(stock_news_em_df.to_dict(orient="records")[:top_n_news])):
|
91
91
|
url = line["新闻链接"]
|
92
92
|
# http://finance.eastmoney.com/a/202508133482756869.html
|
93
93
|
ts = url.split("/")[-1].split(".")[0]
|
@@ -104,7 +104,7 @@ class MergeAkAInfoOp(BaseOp):
|
|
104
104
|
|
105
105
|
def execute(self):
|
106
106
|
code_content = {}
|
107
|
-
for code, info_dict in self.
|
107
|
+
for code, info_dict in self.context.code_infos.items():
|
108
108
|
content_list = [f"\n\n### {code}"]
|
109
109
|
for key, value in info_dict.items():
|
110
110
|
content_list.append(f"\n#### {code}-{key}")
|
@@ -120,24 +120,21 @@ class MergeAkAInfoOp(BaseOp):
|
|
120
120
|
|
121
121
|
answer = "\n".join(code_content.values())
|
122
122
|
logger.info(f"answer=\n{answer}")
|
123
|
-
self.
|
123
|
+
self.context.response.answer = answer.strip()
|
124
124
|
|
125
125
|
|
126
126
|
if __name__ == "__main__":
|
127
|
-
|
127
|
+
C.set_default_service_config().init_by_service_config()
|
128
128
|
|
129
129
|
code_infos = {"000858": {}, "600519": {}}
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
op4 = GetAkAFinancialInfoOp(flow_context=flow_context)
|
139
|
-
op5 = GetAkANewsOp(flow_context=flow_context)
|
140
|
-
op6 = MergeAkAInfoOp(flow_context=flow_context)
|
130
|
+
context = FlowContext(code_infos=code_infos, query="茅台和五粮现在价格多少?")
|
131
|
+
|
132
|
+
op1 = GetAkAInfoOp()
|
133
|
+
op2 = GetAkASpotOp()
|
134
|
+
op3 = GetAkAMoneyFlowOp()
|
135
|
+
op4 = GetAkAFinancialInfoOp()
|
136
|
+
op5 = GetAkANewsOp()
|
137
|
+
op6 = MergeAkAInfoOp()
|
141
138
|
|
142
139
|
op = op1 >> op2 >> op3 >> op4 >> op5 >> op6
|
143
|
-
op
|
140
|
+
op(context=context)
|
@@ -14,26 +14,28 @@ class BaseLLMOp(BaseOp, ABC):
|
|
14
14
|
file_path: str = __file__
|
15
15
|
|
16
16
|
def __init__(self,
|
17
|
+
language: str = "",
|
17
18
|
prompt_path: str = "",
|
18
19
|
llm: str = "default",
|
19
20
|
embedding_model: str = "default",
|
20
21
|
vector_store: str = "default",
|
21
22
|
**kwargs):
|
22
|
-
|
23
23
|
super().__init__(**kwargs)
|
24
24
|
|
25
|
+
self.language: str = language or C.language
|
26
|
+
default_prompt_path = self.file_path.replace("op.py", "prompt.yaml")
|
27
|
+
self.prompt_path: Path = Path(prompt_path) if prompt_path else default_prompt_path
|
28
|
+
|
25
29
|
self._llm: BaseLLM | str = llm
|
26
30
|
self._embedding_model: BaseEmbeddingModel | str = embedding_model
|
27
31
|
self._vector_store: BaseVectorStore | str = vector_store
|
28
32
|
|
29
|
-
default_prompt_path: Path = Path(self.file_path).parent / self.name.replace("_op", "_prompt.yaml")
|
30
|
-
self.prompt_path: Path = Path(prompt_path) if prompt_path else default_prompt_path
|
31
33
|
self.prompt = PromptHandler(language=self.language).load_prompt_by_file(self.prompt_path)
|
32
34
|
|
33
35
|
@property
|
34
36
|
def llm(self) -> BaseLLM:
|
35
37
|
if isinstance(self._llm, str):
|
36
|
-
llm_config: LLMConfig =
|
38
|
+
llm_config: LLMConfig = C.service_config.llm[self._llm]
|
37
39
|
llm_cls = C.resolve_llm(llm_config.backend)
|
38
40
|
self._llm = llm_cls(model_name=llm_config.model_name, **llm_config.params)
|
39
41
|
|
@@ -43,7 +45,7 @@ class BaseLLMOp(BaseOp, ABC):
|
|
43
45
|
def embedding_model(self) -> BaseEmbeddingModel:
|
44
46
|
if isinstance(self._embedding_model, str):
|
45
47
|
embedding_model_config: EmbeddingModelConfig = \
|
46
|
-
|
48
|
+
C.service_config.embedding_model[self._embedding_model]
|
47
49
|
embedding_model_cls = C.resolve_embedding_model(embedding_model_config.backend)
|
48
50
|
self._embedding_model = embedding_model_cls(model_name=embedding_model_config.model_name,
|
49
51
|
**embedding_model_config.params)
|
flowllm/op/base_op.py
CHANGED
@@ -1,23 +1,3 @@
|
|
1
|
-
"""
|
2
|
-
BaseOp operator overloading implementation
|
3
|
-
|
4
|
-
Supported operators:
|
5
|
-
- op1 >> op2: Sequential execution, output of op1 becomes input of op2
|
6
|
-
- op1 | op2: Parallel execution, both operations use the same input, returns list of results
|
7
|
-
- Mixed calls: op1 >> (op2 | op3) >> op4
|
8
|
-
|
9
|
-
Usage examples:
|
10
|
-
# Sequential execution
|
11
|
-
result = op1 >> op2 >> op3
|
12
|
-
|
13
|
-
# Parallel execution
|
14
|
-
results = op1 | op2 | op3
|
15
|
-
|
16
|
-
# Mixed calls
|
17
|
-
result = op1 >> (op2 | op3) >> op4
|
18
|
-
result = op1 >> (op1 | (op2 >> op3)) >> op4
|
19
|
-
"""
|
20
|
-
|
21
1
|
from abc import abstractmethod, ABC
|
22
2
|
from concurrent.futures import Future
|
23
3
|
from typing import List
|
@@ -35,60 +15,76 @@ class BaseOp(ABC):
|
|
35
15
|
|
36
16
|
def __init__(self,
|
37
17
|
name: str = "",
|
38
|
-
language: str = "",
|
39
18
|
raise_exception: bool = True,
|
40
|
-
|
19
|
+
enable_multithread: bool = True,
|
41
20
|
**kwargs):
|
42
|
-
|
43
21
|
super().__init__()
|
44
22
|
|
45
23
|
self.name: str = name or camel_to_snake(self.__class__.__name__)
|
46
|
-
self.language: str = language or C.language
|
47
24
|
self.raise_exception: bool = raise_exception
|
48
|
-
|
49
|
-
self.flow_context: FlowContext | None = flow_context
|
25
|
+
self.enable_multithread: bool = enable_multithread
|
50
26
|
self.op_params: dict = kwargs
|
51
27
|
|
52
28
|
self.task_list: List[Future] = []
|
29
|
+
self.ray_task_list: List = [] # Ray ObjectRef list
|
53
30
|
self.timer = Timer(name=self.name)
|
31
|
+
self.context: FlowContext | None = None
|
54
32
|
|
55
33
|
@abstractmethod
|
56
34
|
def execute(self):
|
57
35
|
...
|
58
36
|
|
59
|
-
def __call__(self,
|
37
|
+
def __call__(self, context: FlowContext = None):
|
38
|
+
self.context = context
|
60
39
|
with self.timer:
|
61
40
|
if self.raise_exception:
|
62
|
-
|
41
|
+
self.execute()
|
63
42
|
|
64
43
|
else:
|
65
|
-
try:
|
66
|
-
return self.execute()
|
67
44
|
|
45
|
+
try:
|
46
|
+
self.execute()
|
68
47
|
except Exception as e:
|
69
48
|
logger.exception(f"op={self.name} execute failed, error={e.args}")
|
70
49
|
|
50
|
+
return self.context.response if self.context else None
|
51
|
+
|
71
52
|
def submit_task(self, fn, *args, **kwargs):
|
72
|
-
|
73
|
-
|
53
|
+
if self.enable_multithread:
|
54
|
+
task = C.thread_pool.submit(fn, *args, **kwargs)
|
55
|
+
self.task_list.append(task)
|
56
|
+
|
57
|
+
else:
|
58
|
+
result = fn(*args, **kwargs)
|
59
|
+
if result:
|
60
|
+
if isinstance(result, list):
|
61
|
+
result.extend(result)
|
62
|
+
else:
|
63
|
+
result.append(result)
|
64
|
+
|
74
65
|
return self
|
75
66
|
|
76
67
|
def join_task(self, task_desc: str = None) -> list:
|
77
68
|
result = []
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
if
|
82
|
-
|
83
|
-
|
84
|
-
|
69
|
+
if self.enable_multithread:
|
70
|
+
for task in tqdm(self.task_list, desc=task_desc or self.name):
|
71
|
+
t_result = task.result()
|
72
|
+
if t_result:
|
73
|
+
if isinstance(t_result, list):
|
74
|
+
result.extend(t_result)
|
75
|
+
else:
|
76
|
+
result.append(t_result)
|
77
|
+
|
78
|
+
else:
|
79
|
+
result.extend(self.task_list)
|
80
|
+
|
85
81
|
self.task_list.clear()
|
86
82
|
return result
|
87
83
|
|
88
84
|
def __rshift__(self, op: "BaseOp"):
|
89
85
|
from flowllm.op.sequential_op import SequentialOp
|
90
86
|
|
91
|
-
sequential_op = SequentialOp(ops=[self]
|
87
|
+
sequential_op = SequentialOp(ops=[self])
|
92
88
|
|
93
89
|
if isinstance(op, SequentialOp):
|
94
90
|
sequential_op.ops.extend(op.ops)
|
@@ -99,7 +95,7 @@ class BaseOp(ABC):
|
|
99
95
|
def __or__(self, op: "BaseOp"):
|
100
96
|
from flowllm.op.parallel_op import ParallelOp
|
101
97
|
|
102
|
-
parallel_op = ParallelOp(ops=[self]
|
98
|
+
parallel_op = ParallelOp(ops=[self])
|
103
99
|
|
104
100
|
if isinstance(op, ParallelOp):
|
105
101
|
parallel_op.ops.extend(op.ops)
|
@@ -127,9 +123,9 @@ def run2():
|
|
127
123
|
|
128
124
|
class TestOp(BaseOp):
|
129
125
|
|
130
|
-
def execute(self
|
131
|
-
time.sleep(0.1)
|
132
|
-
op_result = f"{self.name}
|
126
|
+
def execute(self):
|
127
|
+
time.sleep(0.1)
|
128
|
+
op_result = f"{self.name}"
|
133
129
|
logger.info(f"Executing {op_result}")
|
134
130
|
return op_result
|
135
131
|
|
@@ -0,0 +1,313 @@
|
|
1
|
+
from abc import ABC
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import ray
|
5
|
+
from loguru import logger
|
6
|
+
from tqdm import tqdm
|
7
|
+
|
8
|
+
from flowllm.context.service_context import C
|
9
|
+
from flowllm.op.base_op import BaseOp
|
10
|
+
|
11
|
+
|
12
|
+
class BaseRayOp(BaseOp, ABC):
|
13
|
+
"""
|
14
|
+
Base class for Ray-based operations that provides parallel task execution capabilities.
|
15
|
+
Inherits from BaseOp and provides methods for submitting and joining Ray tasks.
|
16
|
+
"""
|
17
|
+
|
18
|
+
def submit_and_join_ray_task(self, fn, parallel_key: str = "", task_desc: str = "",
|
19
|
+
enable_test: bool = False, **kwargs):
|
20
|
+
"""
|
21
|
+
Submit multiple Ray tasks in parallel and wait for all results.
|
22
|
+
|
23
|
+
This method automatically detects a list parameter to parallelize over, distributes
|
24
|
+
the work across multiple Ray workers, and returns the combined results.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
fn: Function to execute in parallel
|
28
|
+
parallel_key: Key of the parameter to parallelize over (auto-detected if empty)
|
29
|
+
task_desc: Description for logging and progress bars
|
30
|
+
enable_test: Enable test mode (prints results instead of executing)
|
31
|
+
**kwargs: Arguments to pass to the function, including the list to parallelize over
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
List of results from all parallel tasks
|
35
|
+
"""
|
36
|
+
max_workers = C.service_config.ray_max_workers
|
37
|
+
self.ray_task_list.clear()
|
38
|
+
|
39
|
+
# Auto-detect parallel key if not provided
|
40
|
+
if not parallel_key:
|
41
|
+
for key, value in kwargs.items():
|
42
|
+
if isinstance(value, list):
|
43
|
+
parallel_key = key
|
44
|
+
logger.info(f"using first list parallel_key={parallel_key}")
|
45
|
+
break
|
46
|
+
|
47
|
+
# Extract the list to parallelize over
|
48
|
+
parallel_list = kwargs.pop(parallel_key)
|
49
|
+
assert isinstance(parallel_list, list)
|
50
|
+
|
51
|
+
# Convert pandas DataFrames to Ray objects for efficient sharing
|
52
|
+
for key in sorted(kwargs.keys()):
|
53
|
+
value = kwargs[key]
|
54
|
+
if isinstance(value, pd.DataFrame):
|
55
|
+
kwargs[key] = ray.put(value)
|
56
|
+
|
57
|
+
if enable_test:
|
58
|
+
test_result_list = []
|
59
|
+
for value in parallel_list:
|
60
|
+
kwargs.update({"actor_index": 0, parallel_key: value})
|
61
|
+
t_result = fn(**kwargs)
|
62
|
+
if t_result:
|
63
|
+
if isinstance(t_result, list):
|
64
|
+
test_result_list.extend(t_result)
|
65
|
+
else:
|
66
|
+
test_result_list.append(t_result)
|
67
|
+
return test_result_list
|
68
|
+
|
69
|
+
# Create and submit tasks for each worker
|
70
|
+
for i in range(max_workers):
|
71
|
+
def fn_wrapper():
|
72
|
+
result_list = []
|
73
|
+
# Distribute work using stride: worker i-th processes items [i, i+max_workers, i+2*max_workers, ...]
|
74
|
+
for parallel_value in parallel_list[i::max_workers]:
|
75
|
+
kwargs.update({
|
76
|
+
"actor_index": i,
|
77
|
+
parallel_key: parallel_value,
|
78
|
+
})
|
79
|
+
part_result = fn(**kwargs)
|
80
|
+
if part_result:
|
81
|
+
if isinstance(part_result, list):
|
82
|
+
result_list.extend(part_result)
|
83
|
+
else:
|
84
|
+
result_list.append(part_result)
|
85
|
+
return result_list
|
86
|
+
|
87
|
+
self.submit_ray_task(fn=fn_wrapper)
|
88
|
+
logger.info(f"ray.submit task_desc={task_desc} id={i}")
|
89
|
+
|
90
|
+
# Wait for all tasks to complete and collect results
|
91
|
+
result = self.join_ray_task(task_desc=task_desc)
|
92
|
+
logger.info(f"{task_desc} complete. result_size={len(result)} resources={ray.available_resources()}")
|
93
|
+
return result
|
94
|
+
|
95
|
+
def submit_ray_task(self, fn, *args, **kwargs):
|
96
|
+
"""
|
97
|
+
Submit a single Ray task for asynchronous execution.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
fn: Function to execute remotely
|
101
|
+
*args: Positional arguments for the function
|
102
|
+
**kwargs: Keyword arguments for the function
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
Self for method chaining
|
106
|
+
|
107
|
+
Raises:
|
108
|
+
RuntimeError: If Ray is not configured (ray_max_workers <= 1)
|
109
|
+
"""
|
110
|
+
if C.service_config.ray_max_workers <= 1:
|
111
|
+
raise RuntimeError("Ray is not configured. Please set ray_max_workers > 1 in service config.")
|
112
|
+
|
113
|
+
# Initialize Ray if not already done
|
114
|
+
if not ray.is_initialized():
|
115
|
+
logger.warning(f"Ray is not initialized. Initializing Ray with {C.service_config.ray_max_workers} workers.")
|
116
|
+
ray.init(num_cpus=C.service_config.ray_max_workers)
|
117
|
+
|
118
|
+
# Create remote function and submit task
|
119
|
+
remote_fn = ray.remote(fn)
|
120
|
+
task = remote_fn.remote(*args, **kwargs)
|
121
|
+
self.ray_task_list.append(task)
|
122
|
+
return self
|
123
|
+
|
124
|
+
def join_ray_task(self, task_desc: str = None) -> list:
|
125
|
+
"""
|
126
|
+
Wait for all submitted Ray tasks to complete and collect their results.
|
127
|
+
|
128
|
+
Args:
|
129
|
+
task_desc: Description for the progress bar
|
130
|
+
|
131
|
+
Returns:
|
132
|
+
Combined list of results from all completed tasks
|
133
|
+
"""
|
134
|
+
result = []
|
135
|
+
# Process each task and collect results with progress bar
|
136
|
+
for task in tqdm(self.ray_task_list, desc=task_desc or f"{self.name}_ray"):
|
137
|
+
t_result = ray.get(task)
|
138
|
+
if t_result:
|
139
|
+
if isinstance(t_result, list):
|
140
|
+
result.extend(t_result)
|
141
|
+
else:
|
142
|
+
result.append(t_result)
|
143
|
+
self.ray_task_list.clear()
|
144
|
+
return result
|
145
|
+
|
146
|
+
|
147
|
+
def run():
|
148
|
+
"""Test Ray multiprocessing functionality"""
|
149
|
+
import time
|
150
|
+
import math
|
151
|
+
|
152
|
+
# CPU intensive task for testing
|
153
|
+
def cpu_intensive_task(n: int, task_id: str):
|
154
|
+
"""CPU intensive task: calculate prime numbers"""
|
155
|
+
start_t = time.time()
|
156
|
+
|
157
|
+
def is_prime(num):
|
158
|
+
if num < 2:
|
159
|
+
return False
|
160
|
+
for j in range(2, int(math.sqrt(num)) + 1):
|
161
|
+
if num % j == 0:
|
162
|
+
return False
|
163
|
+
return True
|
164
|
+
|
165
|
+
primes = [x for x in range(2, n) if is_prime(x)]
|
166
|
+
end_t = time.time()
|
167
|
+
|
168
|
+
result = {
|
169
|
+
'task_id': task_id,
|
170
|
+
'prime_count': len(primes),
|
171
|
+
'max_prime': max(primes) if primes else 0,
|
172
|
+
'execution_time': end_t - start_t
|
173
|
+
}
|
174
|
+
logger.info(f"Task {task_id} completed: found {len(primes)} primes, time: {result['execution_time']:.2f}s")
|
175
|
+
return result
|
176
|
+
|
177
|
+
class TestRayOp(BaseRayOp):
|
178
|
+
def execute(self):
|
179
|
+
logger.info(f"Executing {self.name}")
|
180
|
+
return f"Result from {self.name}"
|
181
|
+
|
182
|
+
# Initialize service config for Ray
|
183
|
+
from flowllm.schema.service_config import ServiceConfig
|
184
|
+
|
185
|
+
# Create a test service config with Ray enabled
|
186
|
+
test_config = ServiceConfig()
|
187
|
+
test_config.ray_max_workers = 4 # Enable Ray with 4 workers
|
188
|
+
test_config.thread_pool_max_workers = 4
|
189
|
+
|
190
|
+
# Set the service config
|
191
|
+
C.init_by_service_config(test_config)
|
192
|
+
|
193
|
+
logger.info("=== Testing Ray multiprocessing ===")
|
194
|
+
|
195
|
+
# Create test operation
|
196
|
+
ray_op = TestRayOp("ray_test_op")
|
197
|
+
|
198
|
+
logger.info("--- Testing submit_ray_task and join_ray_task ---")
|
199
|
+
|
200
|
+
# Test 1: Basic Ray task submission
|
201
|
+
task_size = 50000 # Find primes up to 50000 (more CPU intensive)
|
202
|
+
num_tasks = 4
|
203
|
+
|
204
|
+
try:
|
205
|
+
# Submit multiple CPU-intensive tasks
|
206
|
+
|
207
|
+
logger.info(f"Submitting {num_tasks} Ray tasks (finding primes up to {task_size})")
|
208
|
+
start_time = time.time()
|
209
|
+
|
210
|
+
for i in range(num_tasks):
|
211
|
+
ray_op.submit_ray_task(cpu_intensive_task, task_size, f"ray_task_{i}")
|
212
|
+
|
213
|
+
# Wait for all tasks to complete
|
214
|
+
results = ray_op.join_ray_task("Processing Ray tasks")
|
215
|
+
end_time = time.time()
|
216
|
+
|
217
|
+
logger.info(f"Ray tasks completed in {end_time - start_time:.2f}s")
|
218
|
+
logger.info(f"Ray results: {results}")
|
219
|
+
|
220
|
+
except Exception as e:
|
221
|
+
logger.error(f"Ray task execution failed: {e}")
|
222
|
+
|
223
|
+
# Test 2: Compare Ray vs ThreadPool performance
|
224
|
+
logger.info("\n--- Performance Comparison: Ray vs ThreadPool ---")
|
225
|
+
|
226
|
+
try:
|
227
|
+
# Test with ThreadPool
|
228
|
+
thread_op = TestRayOp("thread_test_op")
|
229
|
+
|
230
|
+
logger.info(f"Testing ThreadPool with {num_tasks} tasks")
|
231
|
+
start_time = time.time()
|
232
|
+
|
233
|
+
for i in range(num_tasks):
|
234
|
+
thread_op.submit_task(cpu_intensive_task, task_size, f"thread_task_{i}")
|
235
|
+
|
236
|
+
thread_results = thread_op.join_task("Processing ThreadPool tasks")
|
237
|
+
print(thread_results)
|
238
|
+
thread_time = time.time() - start_time
|
239
|
+
|
240
|
+
logger.info(f"ThreadPool completed in {thread_time:.2f}s")
|
241
|
+
|
242
|
+
# Test with Ray again for comparison
|
243
|
+
ray_op2 = TestRayOp("ray_test_op2")
|
244
|
+
|
245
|
+
logger.info(f"Testing Ray with {num_tasks} tasks")
|
246
|
+
start_time = time.time()
|
247
|
+
|
248
|
+
for i in range(num_tasks):
|
249
|
+
ray_op2.submit_ray_task(cpu_intensive_task, task_size, f"ray_task2_{i}")
|
250
|
+
|
251
|
+
ray_results2 = ray_op2.join_ray_task("Processing Ray tasks (comparison)")
|
252
|
+
print(ray_results2)
|
253
|
+
ray_time = time.time() - start_time
|
254
|
+
|
255
|
+
logger.info(f"Ray completed in {ray_time:.2f}s")
|
256
|
+
|
257
|
+
# Performance comparison
|
258
|
+
speedup = thread_time / ray_time if ray_time > 0 else 0
|
259
|
+
logger.info(f"\n=== Performance Summary ===")
|
260
|
+
logger.info(f"ThreadPool time: {thread_time:.2f}s")
|
261
|
+
logger.info(f"Ray time: {ray_time:.2f}s")
|
262
|
+
logger.info(f"Ray speedup: {speedup:.2f}x")
|
263
|
+
|
264
|
+
except Exception as e:
|
265
|
+
logger.error(f"Performance comparison failed: {e}")
|
266
|
+
|
267
|
+
# Test 3: Error handling
|
268
|
+
logger.info("\n--- Testing Error Handling ---")
|
269
|
+
|
270
|
+
def failing_task(task_id: str):
|
271
|
+
if task_id == "fail_task":
|
272
|
+
raise ValueError(f"Intentional error in {task_id}")
|
273
|
+
return f"Success: {task_id}"
|
274
|
+
|
275
|
+
try:
|
276
|
+
error_op = TestRayOp("error_test_op")
|
277
|
+
|
278
|
+
# Submit mix of successful and failing tasks
|
279
|
+
error_op.submit_ray_task(failing_task, "success_task_1")
|
280
|
+
error_op.submit_ray_task(failing_task, "fail_task")
|
281
|
+
error_op.submit_ray_task(failing_task, "success_task_2")
|
282
|
+
|
283
|
+
error_results = error_op.join_ray_task("Testing error handling")
|
284
|
+
logger.info(f"Error handling results: {error_results}")
|
285
|
+
|
286
|
+
except Exception as e:
|
287
|
+
logger.error(f"Expected error occurred: {e}")
|
288
|
+
|
289
|
+
# Test 4: Ray without proper configuration (should fail)
|
290
|
+
logger.info("\n--- Testing Ray Configuration Validation ---")
|
291
|
+
|
292
|
+
original_workers = C.service_config.ray_max_workers
|
293
|
+
try:
|
294
|
+
# Temporarily disable Ray in config
|
295
|
+
C.service_config.ray_max_workers = 1 # Disable Ray
|
296
|
+
|
297
|
+
config_test_op = TestRayOp("config_test_op")
|
298
|
+
config_test_op.submit_ray_task(cpu_intensive_task, 100, "config_test")
|
299
|
+
|
300
|
+
logger.error("This should not be reached - Ray should be disabled")
|
301
|
+
|
302
|
+
except RuntimeError as e:
|
303
|
+
logger.info(f"✓ Correctly caught configuration error: {e}")
|
304
|
+
|
305
|
+
finally:
|
306
|
+
# Restore original configuration
|
307
|
+
C.service_config.ray_max_workers = original_workers
|
308
|
+
|
309
|
+
logger.info("\n=== Ray testing completed ===")
|
310
|
+
|
311
|
+
|
312
|
+
if __name__ == "__main__":
|
313
|
+
run()
|
@@ -0,0 +1 @@
|
|
1
|
+
from .execute_code_op import ExecuteCodeOp
|
@@ -0,0 +1,42 @@
|
|
1
|
+
import sys
|
2
|
+
from io import StringIO
|
3
|
+
|
4
|
+
from loguru import logger
|
5
|
+
|
6
|
+
from flowllm.context.flow_context import FlowContext
|
7
|
+
from flowllm.context.service_context import C
|
8
|
+
from flowllm.op.base_op import BaseOp
|
9
|
+
|
10
|
+
|
11
|
+
@C.register_op()
|
12
|
+
class ExecuteCodeOp(BaseOp):
|
13
|
+
|
14
|
+
def execute(self):
|
15
|
+
old_stdout = sys.stdout
|
16
|
+
redirected_output = sys.stdout = StringIO()
|
17
|
+
|
18
|
+
try:
|
19
|
+
code_key: str = self.op_params.get("code_key", "code")
|
20
|
+
code_str: str = self.context[code_key]
|
21
|
+
exec(code_str)
|
22
|
+
code_result = redirected_output.getvalue()
|
23
|
+
|
24
|
+
except Exception as e:
|
25
|
+
logger.info(f"{self.name} encounter exception! error={e.args}")
|
26
|
+
code_result = str(e)
|
27
|
+
|
28
|
+
sys.stdout = old_stdout
|
29
|
+
self.context.code_result = code_result
|
30
|
+
|
31
|
+
|
32
|
+
if __name__ == "__main__":
|
33
|
+
C.set_default_service_config().init_by_service_config()
|
34
|
+
op = ExecuteCodeOp()
|
35
|
+
|
36
|
+
context = FlowContext(code="print('Hello World')")
|
37
|
+
op(context=context)
|
38
|
+
print(context.code_result)
|
39
|
+
|
40
|
+
context.code = "print('Hello World!'"
|
41
|
+
op(context=context)
|
42
|
+
print(context.code_result)
|
@@ -3,18 +3,18 @@ import time
|
|
3
3
|
from loguru import logger
|
4
4
|
|
5
5
|
from flowllm.context.service_context import C
|
6
|
-
from flowllm.op.
|
6
|
+
from flowllm.op.base_llm_op import BaseLLMOp
|
7
7
|
|
8
8
|
|
9
9
|
@C.register_op()
|
10
10
|
class Mock1Op(BaseLLMOp):
|
11
11
|
def execute(self):
|
12
12
|
time.sleep(1)
|
13
|
-
a = self.
|
14
|
-
b = self.
|
13
|
+
a = self.context.a
|
14
|
+
b = self.context.b
|
15
15
|
logger.info(f"enter class={self.name}. a={a} b={b}")
|
16
16
|
|
17
|
-
self.
|
17
|
+
self.context.response.answer = f"{self.name} {a} {b} answer=47"
|
18
18
|
|
19
19
|
|
20
20
|
@C.register_op()
|
@@ -0,0 +1,29 @@
|
|
1
|
+
from flowllm.context.service_context import C
|
2
|
+
from flowllm.op.base_op import BaseOp
|
3
|
+
|
4
|
+
|
5
|
+
@C.register_op()
|
6
|
+
class TerminateOp(BaseOp):
|
7
|
+
|
8
|
+
def execute(self):
|
9
|
+
# Get status from context
|
10
|
+
status = self.context.status
|
11
|
+
assert status in ["success", "failure"], f"Invalid status: {status}"
|
12
|
+
self.context.terminate_answer = f"The interaction has been completed with status: {status}"
|
13
|
+
|
14
|
+
|
15
|
+
if __name__ == "__main__":
|
16
|
+
from flowllm.context.flow_context import FlowContext
|
17
|
+
|
18
|
+
C.set_default_service_config().init_by_service_config()
|
19
|
+
|
20
|
+
# Test success termination
|
21
|
+
op = TerminateOp()
|
22
|
+
context = FlowContext(status="success")
|
23
|
+
result = op(context=context)
|
24
|
+
print(f"Result: {context.terminate_answer}")
|
25
|
+
|
26
|
+
# Test failure termination
|
27
|
+
context.status = "failure"
|
28
|
+
op(context=context)
|
29
|
+
print(f"Result: {context.terminate_answer}")
|