flowllm 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. flowllm/__init__.py +21 -0
  2. flowllm/app.py +15 -0
  3. flowllm/client/__init__.py +25 -0
  4. flowllm/client/async_http_client.py +81 -0
  5. flowllm/client/http_client.py +81 -0
  6. flowllm/client/mcp_client.py +133 -0
  7. flowllm/client/sync_mcp_client.py +116 -0
  8. flowllm/config/__init__.py +1 -0
  9. flowllm/config/default.yaml +77 -0
  10. flowllm/config/empty.yaml +37 -0
  11. flowllm/config/pydantic_config_parser.py +242 -0
  12. flowllm/context/base_context.py +79 -0
  13. flowllm/context/flow_context.py +16 -0
  14. llmflow/op/prompt_mixin.py → flowllm/context/prompt_handler.py +25 -14
  15. flowllm/context/registry.py +30 -0
  16. flowllm/context/service_context.py +147 -0
  17. flowllm/embedding_model/__init__.py +1 -0
  18. {llmflow → flowllm}/embedding_model/base_embedding_model.py +93 -2
  19. {llmflow → flowllm}/embedding_model/openai_compatible_embedding_model.py +71 -13
  20. flowllm/flow/__init__.py +1 -0
  21. flowllm/flow/base_flow.py +72 -0
  22. flowllm/flow/base_tool_flow.py +15 -0
  23. flowllm/flow/gallery/__init__.py +8 -0
  24. flowllm/flow/gallery/cmd_flow.py +11 -0
  25. flowllm/flow/gallery/code_tool_flow.py +30 -0
  26. flowllm/flow/gallery/dashscope_search_tool_flow.py +34 -0
  27. flowllm/flow/gallery/deepsearch_tool_flow.py +39 -0
  28. flowllm/flow/gallery/expression_tool_flow.py +18 -0
  29. flowllm/flow/gallery/mock_tool_flow.py +67 -0
  30. flowllm/flow/gallery/tavily_search_tool_flow.py +30 -0
  31. flowllm/flow/gallery/terminate_tool_flow.py +30 -0
  32. flowllm/flow/parser/expression_parser.py +171 -0
  33. flowllm/llm/__init__.py +2 -0
  34. {llmflow → flowllm}/llm/base_llm.py +100 -18
  35. flowllm/llm/litellm_llm.py +455 -0
  36. flowllm/llm/openai_compatible_llm.py +439 -0
  37. flowllm/op/__init__.py +11 -0
  38. llmflow/op/react/react_v1_op.py → flowllm/op/agent/react_op.py +17 -22
  39. flowllm/op/akshare/__init__.py +3 -0
  40. flowllm/op/akshare/get_ak_a_code_op.py +108 -0
  41. flowllm/op/akshare/get_ak_a_code_prompt.yaml +21 -0
  42. flowllm/op/akshare/get_ak_a_info_op.py +140 -0
  43. flowllm/op/base_llm_op.py +64 -0
  44. flowllm/op/base_op.py +148 -0
  45. flowllm/op/base_ray_op.py +313 -0
  46. flowllm/op/code/__init__.py +1 -0
  47. flowllm/op/code/execute_code_op.py +42 -0
  48. flowllm/op/gallery/__init__.py +2 -0
  49. flowllm/op/gallery/mock_op.py +42 -0
  50. flowllm/op/gallery/terminate_op.py +29 -0
  51. flowllm/op/parallel_op.py +23 -0
  52. flowllm/op/search/__init__.py +3 -0
  53. flowllm/op/search/dashscope_deep_research_op.py +260 -0
  54. flowllm/op/search/dashscope_search_op.py +179 -0
  55. flowllm/op/search/dashscope_search_prompt.yaml +13 -0
  56. flowllm/op/search/tavily_search_op.py +102 -0
  57. flowllm/op/sequential_op.py +21 -0
  58. flowllm/schema/flow_request.py +12 -0
  59. flowllm/schema/flow_response.py +12 -0
  60. flowllm/schema/message.py +35 -0
  61. flowllm/schema/service_config.py +72 -0
  62. flowllm/schema/tool_call.py +118 -0
  63. {llmflow → flowllm}/schema/vector_node.py +1 -0
  64. flowllm/service/__init__.py +3 -0
  65. flowllm/service/base_service.py +68 -0
  66. flowllm/service/cmd_service.py +15 -0
  67. flowllm/service/http_service.py +79 -0
  68. flowllm/service/mcp_service.py +47 -0
  69. flowllm/storage/__init__.py +1 -0
  70. flowllm/storage/cache/__init__.py +1 -0
  71. flowllm/storage/cache/cache_data_handler.py +104 -0
  72. flowllm/storage/cache/data_cache.py +375 -0
  73. flowllm/storage/vector_store/__init__.py +3 -0
  74. flowllm/storage/vector_store/base_vector_store.py +44 -0
  75. {llmflow → flowllm/storage}/vector_store/chroma_vector_store.py +11 -10
  76. {llmflow → flowllm/storage}/vector_store/es_vector_store.py +11 -11
  77. llmflow/vector_store/file_vector_store.py → flowllm/storage/vector_store/local_vector_store.py +110 -11
  78. flowllm/utils/common_utils.py +52 -0
  79. flowllm/utils/fetch_url.py +117 -0
  80. flowllm/utils/llm_utils.py +28 -0
  81. flowllm/utils/ridge_v2.py +54 -0
  82. {llmflow → flowllm}/utils/timer.py +5 -4
  83. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/METADATA +45 -388
  84. flowllm-0.1.2.dist-info/RECORD +99 -0
  85. flowllm-0.1.2.dist-info/entry_points.txt +2 -0
  86. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/licenses/LICENSE +1 -1
  87. flowllm-0.1.2.dist-info/top_level.txt +1 -0
  88. flowllm-0.1.0.dist-info/RECORD +0 -66
  89. flowllm-0.1.0.dist-info/entry_points.txt +0 -3
  90. flowllm-0.1.0.dist-info/top_level.txt +0 -1
  91. llmflow/app.py +0 -53
  92. llmflow/config/config_parser.py +0 -80
  93. llmflow/config/mock_config.yaml +0 -58
  94. llmflow/embedding_model/__init__.py +0 -5
  95. llmflow/enumeration/agent_state.py +0 -8
  96. llmflow/llm/__init__.py +0 -5
  97. llmflow/llm/openai_compatible_llm.py +0 -283
  98. llmflow/mcp_server.py +0 -110
  99. llmflow/op/__init__.py +0 -10
  100. llmflow/op/base_op.py +0 -125
  101. llmflow/op/mock_op.py +0 -40
  102. llmflow/op/vector_store/__init__.py +0 -13
  103. llmflow/op/vector_store/recall_vector_store_op.py +0 -48
  104. llmflow/op/vector_store/update_vector_store_op.py +0 -28
  105. llmflow/op/vector_store/vector_store_action_op.py +0 -46
  106. llmflow/pipeline/pipeline.py +0 -94
  107. llmflow/pipeline/pipeline_context.py +0 -37
  108. llmflow/schema/app_config.py +0 -69
  109. llmflow/schema/experience.py +0 -144
  110. llmflow/schema/message.py +0 -68
  111. llmflow/schema/request.py +0 -32
  112. llmflow/schema/response.py +0 -29
  113. llmflow/service/__init__.py +0 -0
  114. llmflow/service/llmflow_service.py +0 -96
  115. llmflow/tool/__init__.py +0 -9
  116. llmflow/tool/base_tool.py +0 -80
  117. llmflow/tool/code_tool.py +0 -43
  118. llmflow/tool/dashscope_search_tool.py +0 -162
  119. llmflow/tool/mcp_tool.py +0 -77
  120. llmflow/tool/tavily_search_tool.py +0 -109
  121. llmflow/tool/terminate_tool.py +0 -23
  122. llmflow/utils/__init__.py +0 -0
  123. llmflow/utils/common_utils.py +0 -17
  124. llmflow/utils/file_handler.py +0 -25
  125. llmflow/utils/http_client.py +0 -156
  126. llmflow/utils/op_utils.py +0 -102
  127. llmflow/utils/registry.py +0 -33
  128. llmflow/vector_store/__init__.py +0 -7
  129. llmflow/vector_store/base_vector_store.py +0 -136
  130. {llmflow → flowllm/context}/__init__.py +0 -0
  131. {llmflow/config → flowllm/enumeration}/__init__.py +0 -0
  132. {llmflow → flowllm}/enumeration/chunk_enum.py +0 -0
  133. {llmflow → flowllm}/enumeration/http_enum.py +0 -0
  134. {llmflow → flowllm}/enumeration/role.py +0 -0
  135. {llmflow/enumeration → flowllm/flow/parser}/__init__.py +0 -0
  136. {llmflow/op/react → flowllm/op/agent}/__init__.py +0 -0
  137. /llmflow/op/react/react_v1_prompt.yaml → /flowllm/op/agent/react_prompt.yaml +0 -0
  138. {llmflow/pipeline → flowllm/schema}/__init__.py +0 -0
  139. {llmflow/schema → flowllm/utils}/__init__.py +0 -0
  140. {llmflow → flowllm}/utils/singleton.py +0 -0
  141. {flowllm-0.1.0.dist-info → flowllm-0.1.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,140 @@
1
+ import json
2
+ import time
3
+
4
+ import akshare as ak
5
+ import pandas as pd
6
+ from loguru import logger
7
+ from tqdm import tqdm
8
+
9
+ from flowllm.context.flow_context import FlowContext
10
+ from flowllm.context.service_context import C
11
+ from flowllm.op.base_op import BaseOp
12
+ from flowllm.utils.fetch_url import fetch_webpage_text
13
+
14
+
15
+ @C.register_op()
16
+ class GetAkAInfoOp(BaseOp):
17
+
18
+ def execute_code(self, code: str) -> dict:
19
+ df = ak.stock_individual_info_em(symbol=code)
20
+ result = {}
21
+ for line in df.to_dict(orient="records"):
22
+ result[line["item"].strip()] = line["value"]
23
+ return {"基本信息": result}
24
+
25
+ def execute(self):
26
+ max_retries: int = self.op_params.get("max_retries", 3)
27
+ for code, info_dict in self.context.code_infos.items():
28
+ result = {}
29
+ for i in range(max_retries):
30
+ try:
31
+ result = self.execute_code(code)
32
+ break
33
+
34
+ except Exception as _:
35
+ if i != max_retries - 1:
36
+ time.sleep(i * 2 + 1)
37
+
38
+ if result:
39
+ info_dict.update(result)
40
+
41
+ time.sleep(1)
42
+ logger.info(f"code_infos={json.dumps(self.context.code_infos, ensure_ascii=False, indent=2)}")
43
+
44
+
45
+ @C.register_op()
46
+ class GetAkASpotOp(GetAkAInfoOp):
47
+
48
+ def execute_code(self, code: str) -> dict:
49
+ from flowllm.op.akshare import GetAkACodeOp
50
+
51
+ df: pd.DataFrame = GetAkACodeOp.download_a_stock_df()
52
+ df = df.loc[df["代码"] == code, :]
53
+ result = {}
54
+ if len(df) > 0:
55
+ result["实时行情"] = df.to_dict(orient="records")[-1]
56
+
57
+ return result
58
+
59
+
60
+ @C.register_op()
61
+ class GetAkAMoneyFlowOp(GetAkAInfoOp):
62
+
63
+ def execute_code(self, code: str) -> dict:
64
+ df = ak.stock_individual_fund_flow(stock=code)
65
+ result = {}
66
+ if len(df) > 0:
67
+ result["资金流入流出"] = {k: str(v) for k, v in df.to_dict(orient="records")[-1].items()}
68
+ return result
69
+
70
+
71
+ @C.register_op()
72
+ class GetAkAFinancialInfoOp(GetAkAInfoOp):
73
+
74
+ def execute_code(self, code: str) -> dict:
75
+ df = ak.stock_financial_abstract_ths(symbol=code, indicator="按报告期")
76
+ result = {}
77
+ if len(df) > 0:
78
+ result["财务信息"] = {k: str(v) for k, v in df.to_dict(orient="records")[-1].items()}
79
+ return result
80
+
81
+
82
+ @C.register_op()
83
+ class GetAkANewsOp(GetAkAInfoOp):
84
+
85
+ def execute_code(self, code: str) -> dict:
86
+ stock_news_em_df = ak.stock_news_em(symbol=code)
87
+ top_n_news: int = self.op_params.get("top_n_news", 1)
88
+
89
+ news_content_list = []
90
+ for i, line in enumerate(tqdm(stock_news_em_df.to_dict(orient="records")[:top_n_news])):
91
+ url = line["新闻链接"]
92
+ # http://finance.eastmoney.com/a/202508133482756869.html
93
+ ts = url.split("/")[-1].split(".")[0]
94
+ date = ts[:8]
95
+ content = fetch_webpage_text(url).strip()
96
+ content = f"新闻{i}\n时间{date}\n{content}"
97
+ news_content_list.append(content)
98
+
99
+ return {"新闻": "\n\n".join(news_content_list)}
100
+
101
+
102
+ @C.register_op()
103
+ class MergeAkAInfoOp(BaseOp):
104
+
105
+ def execute(self):
106
+ code_content = {}
107
+ for code, info_dict in self.context.code_infos.items():
108
+ content_list = [f"\n\n### {code}"]
109
+ for key, value in info_dict.items():
110
+ content_list.append(f"\n#### {code}-{key}")
111
+ if isinstance(value, str):
112
+ content_list.append(value)
113
+ elif isinstance(value, dict):
114
+ for attr_name, attr_value in value.items():
115
+ content_list.append(f"{attr_name}: {attr_value}")
116
+ elif isinstance(value, list):
117
+ content_list.extend([x.strip() for x in value if x])
118
+
119
+ code_content[code] = "\n".join(content_list)
120
+
121
+ answer = "\n".join(code_content.values())
122
+ logger.info(f"answer=\n{answer}")
123
+ self.context.response.answer = answer.strip()
124
+
125
+
126
+ if __name__ == "__main__":
127
+ C.set_default_service_config().init_by_service_config()
128
+
129
+ code_infos = {"000858": {}, "600519": {}}
130
+ context = FlowContext(code_infos=code_infos, query="茅台和五粮现在价格多少?")
131
+
132
+ op1 = GetAkAInfoOp()
133
+ op2 = GetAkASpotOp()
134
+ op3 = GetAkAMoneyFlowOp()
135
+ op4 = GetAkAFinancialInfoOp()
136
+ op5 = GetAkANewsOp()
137
+ op6 = MergeAkAInfoOp()
138
+
139
+ op = op1 >> op2 >> op3 >> op4 >> op5 >> op6
140
+ op(context=context)
@@ -0,0 +1,64 @@
1
+ from abc import ABC
2
+ from pathlib import Path
3
+
4
+ from flowllm.context.prompt_handler import PromptHandler
5
+ from flowllm.context.service_context import C
6
+ from flowllm.embedding_model.base_embedding_model import BaseEmbeddingModel
7
+ from flowllm.llm.base_llm import BaseLLM
8
+ from flowllm.op.base_op import BaseOp
9
+ from flowllm.schema.service_config import LLMConfig, EmbeddingModelConfig
10
+ from flowllm.storage.vector_store.base_vector_store import BaseVectorStore
11
+
12
+
13
+ class BaseLLMOp(BaseOp, ABC):
14
+ file_path: str = __file__
15
+
16
+ def __init__(self,
17
+ language: str = "",
18
+ prompt_path: str = "",
19
+ llm: str = "default",
20
+ embedding_model: str = "default",
21
+ vector_store: str = "default",
22
+ **kwargs):
23
+ super().__init__(**kwargs)
24
+
25
+ self.language: str = language or C.language
26
+ self.prompt_path: Path = Path(prompt_path) if prompt_path else \
27
+ Path(self.file_path).parent / self.name.replace("_op", "_prompt.yaml")
28
+ self._llm: BaseLLM | str = llm
29
+ self._embedding_model: BaseEmbeddingModel | str = embedding_model
30
+ self._vector_store: BaseVectorStore | str = vector_store
31
+
32
+ self.prompt = PromptHandler(language=self.language).load_prompt_by_file(self.prompt_path)
33
+
34
+ @property
35
+ def llm(self) -> BaseLLM:
36
+ if isinstance(self._llm, str):
37
+ llm_config: LLMConfig = C.service_config.llm[self._llm]
38
+ llm_cls = C.resolve_llm(llm_config.backend)
39
+ self._llm = llm_cls(model_name=llm_config.model_name, **llm_config.params)
40
+
41
+ return self._llm
42
+
43
+ @property
44
+ def embedding_model(self) -> BaseEmbeddingModel:
45
+ if isinstance(self._embedding_model, str):
46
+ embedding_model_config: EmbeddingModelConfig = \
47
+ C.service_config.embedding_model[self._embedding_model]
48
+ embedding_model_cls = C.resolve_embedding_model(embedding_model_config.backend)
49
+ self._embedding_model = embedding_model_cls(model_name=embedding_model_config.model_name,
50
+ **embedding_model_config.params)
51
+
52
+ return self._embedding_model
53
+
54
+ @property
55
+ def vector_store(self) -> BaseVectorStore:
56
+ if isinstance(self._vector_store, str):
57
+ self._vector_store = C.get_vector_store(self._vector_store)
58
+ return self._vector_store
59
+
60
+ def prompt_format(self, prompt_name: str, **kwargs) -> str:
61
+ return self.prompt.prompt_format(prompt_name=prompt_name, **kwargs)
62
+
63
+ def get_prompt(self, prompt_name: str) -> str:
64
+ return self.prompt.get_prompt(prompt_name=prompt_name)
flowllm/op/base_op.py ADDED
@@ -0,0 +1,148 @@
1
+ from abc import abstractmethod, ABC
2
+ from concurrent.futures import Future
3
+ from typing import List
4
+
5
+ from loguru import logger
6
+ from tqdm import tqdm
7
+
8
+ from flowllm.context.flow_context import FlowContext
9
+ from flowllm.context.service_context import C
10
+ from flowllm.utils.common_utils import camel_to_snake
11
+ from flowllm.utils.timer import Timer
12
+
13
+
14
+ class BaseOp(ABC):
15
+
16
+ def __init__(self,
17
+ name: str = "",
18
+ raise_exception: bool = True,
19
+ **kwargs):
20
+ super().__init__()
21
+
22
+ self.name: str = name or camel_to_snake(self.__class__.__name__)
23
+ self.raise_exception: bool = raise_exception
24
+ self.op_params: dict = kwargs
25
+
26
+ self.task_list: List[Future] = []
27
+ self.ray_task_list: List = [] # Ray ObjectRef list
28
+ self.timer = Timer(name=self.name)
29
+ self.context: FlowContext | None = None
30
+
31
+ @abstractmethod
32
+ def execute(self):
33
+ ...
34
+
35
+ def __call__(self, context: FlowContext = None):
36
+ self.context = context
37
+ with self.timer:
38
+ if self.raise_exception:
39
+ self.execute()
40
+
41
+ else:
42
+
43
+ try:
44
+ self.execute()
45
+ except Exception as e:
46
+ logger.exception(f"op={self.name} execute failed, error={e.args}")
47
+
48
+ return self.context.response if self.context else None
49
+
50
+ def submit_task(self, fn, *args, **kwargs):
51
+ task = C.thread_pool.submit(fn, *args, **kwargs)
52
+ self.task_list.append(task)
53
+ return self
54
+
55
+ def join_task(self, task_desc: str = None) -> list:
56
+ result = []
57
+ for task in tqdm(self.task_list, desc=task_desc or self.name):
58
+ t_result = task.result()
59
+ if t_result:
60
+ if isinstance(t_result, list):
61
+ result.extend(t_result)
62
+ else:
63
+ result.append(t_result)
64
+ self.task_list.clear()
65
+ return result
66
+
67
+ def __rshift__(self, op: "BaseOp"):
68
+ from flowllm.op.sequential_op import SequentialOp
69
+
70
+ sequential_op = SequentialOp(ops=[self])
71
+
72
+ if isinstance(op, SequentialOp):
73
+ sequential_op.ops.extend(op.ops)
74
+ else:
75
+ sequential_op.ops.append(op)
76
+ return sequential_op
77
+
78
+ def __or__(self, op: "BaseOp"):
79
+ from flowllm.op.parallel_op import ParallelOp
80
+
81
+ parallel_op = ParallelOp(ops=[self])
82
+
83
+ if isinstance(op, ParallelOp):
84
+ parallel_op.ops.extend(op.ops)
85
+ else:
86
+ parallel_op.ops.append(op)
87
+
88
+ return parallel_op
89
+
90
+
91
+ def run1():
92
+ """Basic test"""
93
+
94
+ class MockOp(BaseOp):
95
+ def execute(self):
96
+ logger.info(f"op={self.name} execute")
97
+
98
+ mock_op = MockOp()
99
+ mock_op()
100
+
101
+
102
+ def run2():
103
+ """Test operator overloading functionality"""
104
+ from concurrent.futures import ThreadPoolExecutor
105
+ import time
106
+
107
+ class TestOp(BaseOp):
108
+
109
+ def execute(self):
110
+ time.sleep(0.1)
111
+ op_result = f"{self.name}"
112
+ logger.info(f"Executing {op_result}")
113
+ return op_result
114
+
115
+ # Create service_context for parallel execution
116
+ C["thread_pool"] = ThreadPoolExecutor(max_workers=4)
117
+
118
+ # Create test operations
119
+ op1 = TestOp("op1")
120
+ op2 = TestOp("op2")
121
+ op3 = TestOp("op3")
122
+ op4 = TestOp("op4")
123
+
124
+ logger.info("=== Testing sequential execution op1 >> op2 ===")
125
+ sequential = op1 >> op2
126
+ result = sequential()
127
+ logger.info(f"Sequential result: {result}")
128
+
129
+ logger.info("=== Testing parallel execution op1 | op2 ===")
130
+ parallel = op1 | op2
131
+ result = parallel()
132
+ logger.info(f"Parallel result: {result}")
133
+
134
+ logger.info("=== Testing mixed calls op1 >> (op2 | op3) >> op4 ===")
135
+ mixed = op1 >> (op2 | op3) >> op4
136
+ result = mixed()
137
+ logger.info(f"Mixed result: {result}")
138
+
139
+ logger.info("=== Testing complex mixed calls op1 >> (op1 | (op2 >> op3)) >> op4 ===")
140
+ complex_mixed = op1 >> (op1 | (op2 >> op3)) >> op4
141
+ result = complex_mixed()
142
+ logger.info(f"Complex mixed result: {result}")
143
+
144
+
145
+ if __name__ == "__main__":
146
+ run1()
147
+ print("\n" + "=" * 50 + "\n")
148
+ run2()
@@ -0,0 +1,313 @@
1
+ from abc import ABC
2
+
3
+ import pandas as pd
4
+ import ray
5
+ from loguru import logger
6
+ from tqdm import tqdm
7
+
8
+ from flowllm.context.service_context import C
9
+ from flowllm.op.base_op import BaseOp
10
+
11
+
12
+ class BaseRayOp(BaseOp, ABC):
13
+ """
14
+ Base class for Ray-based operations that provides parallel task execution capabilities.
15
+ Inherits from BaseOp and provides methods for submitting and joining Ray tasks.
16
+ """
17
+
18
+ def submit_and_join_ray_task(self, fn, parallel_key: str = "", task_desc: str = "",
19
+ enable_test: bool = False, **kwargs):
20
+ """
21
+ Submit multiple Ray tasks in parallel and wait for all results.
22
+
23
+ This method automatically detects a list parameter to parallelize over, distributes
24
+ the work across multiple Ray workers, and returns the combined results.
25
+
26
+ Args:
27
+ fn: Function to execute in parallel
28
+ parallel_key: Key of the parameter to parallelize over (auto-detected if empty)
29
+ task_desc: Description for logging and progress bars
30
+ enable_test: Enable test mode (prints results instead of executing)
31
+ **kwargs: Arguments to pass to the function, including the list to parallelize over
32
+
33
+ Returns:
34
+ List of results from all parallel tasks
35
+ """
36
+ max_workers = C.service_config.ray_max_workers
37
+ self.ray_task_list.clear()
38
+
39
+ # Auto-detect parallel key if not provided
40
+ if not parallel_key:
41
+ for key, value in kwargs.items():
42
+ if isinstance(value, list):
43
+ parallel_key = key
44
+ logger.info(f"using first list parallel_key={parallel_key}")
45
+ break
46
+
47
+ # Extract the list to parallelize over
48
+ parallel_list = kwargs.pop(parallel_key)
49
+ assert isinstance(parallel_list, list)
50
+
51
+ # Convert pandas DataFrames to Ray objects for efficient sharing
52
+ for key in sorted(kwargs.keys()):
53
+ value = kwargs[key]
54
+ if isinstance(value, pd.DataFrame):
55
+ kwargs[key] = ray.put(value)
56
+
57
+ if enable_test:
58
+ test_result_list = []
59
+ for value in parallel_list:
60
+ kwargs.update({"actor_index": 0, parallel_key: value})
61
+ t_result = fn(**kwargs)
62
+ if t_result:
63
+ if isinstance(t_result, list):
64
+ test_result_list.extend(t_result)
65
+ else:
66
+ test_result_list.append(t_result)
67
+ return test_result_list
68
+
69
+ # Create and submit tasks for each worker
70
+ for i in range(max_workers):
71
+ def fn_wrapper():
72
+ result_list = []
73
+ # Distribute work using stride: worker i-th processes items [i, i+max_workers, i+2*max_workers, ...]
74
+ for parallel_value in parallel_list[i::max_workers]:
75
+ kwargs.update({
76
+ "actor_index": i,
77
+ parallel_key: parallel_value,
78
+ })
79
+ part_result = fn(**kwargs)
80
+ if part_result:
81
+ if isinstance(part_result, list):
82
+ result_list.extend(part_result)
83
+ else:
84
+ result_list.append(part_result)
85
+ return result_list
86
+
87
+ self.submit_ray_task(fn=fn_wrapper)
88
+ logger.info(f"ray.submit task_desc={task_desc} id={i}")
89
+
90
+ # Wait for all tasks to complete and collect results
91
+ result = self.join_ray_task(task_desc=task_desc)
92
+ logger.info(f"{task_desc} complete. result_size={len(result)} resources={ray.available_resources()}")
93
+ return result
94
+
95
+ def submit_ray_task(self, fn, *args, **kwargs):
96
+ """
97
+ Submit a single Ray task for asynchronous execution.
98
+
99
+ Args:
100
+ fn: Function to execute remotely
101
+ *args: Positional arguments for the function
102
+ **kwargs: Keyword arguments for the function
103
+
104
+ Returns:
105
+ Self for method chaining
106
+
107
+ Raises:
108
+ RuntimeError: If Ray is not configured (ray_max_workers <= 1)
109
+ """
110
+ if C.service_config.ray_max_workers <= 1:
111
+ raise RuntimeError("Ray is not configured. Please set ray_max_workers > 1 in service config.")
112
+
113
+ # Initialize Ray if not already done
114
+ if not ray.is_initialized():
115
+ logger.warning(f"Ray is not initialized. Initializing Ray with {C.service_config.ray_max_workers} workers.")
116
+ ray.init(num_cpus=C.service_config.ray_max_workers)
117
+
118
+ # Create remote function and submit task
119
+ remote_fn = ray.remote(fn)
120
+ task = remote_fn.remote(*args, **kwargs)
121
+ self.ray_task_list.append(task)
122
+ return self
123
+
124
+ def join_ray_task(self, task_desc: str = None) -> list:
125
+ """
126
+ Wait for all submitted Ray tasks to complete and collect their results.
127
+
128
+ Args:
129
+ task_desc: Description for the progress bar
130
+
131
+ Returns:
132
+ Combined list of results from all completed tasks
133
+ """
134
+ result = []
135
+ # Process each task and collect results with progress bar
136
+ for task in tqdm(self.ray_task_list, desc=task_desc or f"{self.name}_ray"):
137
+ t_result = ray.get(task)
138
+ if t_result:
139
+ if isinstance(t_result, list):
140
+ result.extend(t_result)
141
+ else:
142
+ result.append(t_result)
143
+ self.ray_task_list.clear()
144
+ return result
145
+
146
+
147
+ def run():
148
+ """Test Ray multiprocessing functionality"""
149
+ import time
150
+ import math
151
+
152
+ # CPU intensive task for testing
153
+ def cpu_intensive_task(n: int, task_id: str):
154
+ """CPU intensive task: calculate prime numbers"""
155
+ start_t = time.time()
156
+
157
+ def is_prime(num):
158
+ if num < 2:
159
+ return False
160
+ for j in range(2, int(math.sqrt(num)) + 1):
161
+ if num % j == 0:
162
+ return False
163
+ return True
164
+
165
+ primes = [x for x in range(2, n) if is_prime(x)]
166
+ end_t = time.time()
167
+
168
+ result = {
169
+ 'task_id': task_id,
170
+ 'prime_count': len(primes),
171
+ 'max_prime': max(primes) if primes else 0,
172
+ 'execution_time': end_t - start_t
173
+ }
174
+ logger.info(f"Task {task_id} completed: found {len(primes)} primes, time: {result['execution_time']:.2f}s")
175
+ return result
176
+
177
+ class TestRayOp(BaseRayOp):
178
+ def execute(self):
179
+ logger.info(f"Executing {self.name}")
180
+ return f"Result from {self.name}"
181
+
182
+ # Initialize service config for Ray
183
+ from flowllm.schema.service_config import ServiceConfig
184
+
185
+ # Create a test service config with Ray enabled
186
+ test_config = ServiceConfig()
187
+ test_config.ray_max_workers = 4 # Enable Ray with 4 workers
188
+ test_config.thread_pool_max_workers = 4
189
+
190
+ # Set the service config
191
+ C.init_by_service_config(test_config)
192
+
193
+ logger.info("=== Testing Ray multiprocessing ===")
194
+
195
+ # Create test operation
196
+ ray_op = TestRayOp("ray_test_op")
197
+
198
+ logger.info("--- Testing submit_ray_task and join_ray_task ---")
199
+
200
+ # Test 1: Basic Ray task submission
201
+ task_size = 50000 # Find primes up to 50000 (more CPU intensive)
202
+ num_tasks = 4
203
+
204
+ try:
205
+ # Submit multiple CPU-intensive tasks
206
+
207
+ logger.info(f"Submitting {num_tasks} Ray tasks (finding primes up to {task_size})")
208
+ start_time = time.time()
209
+
210
+ for i in range(num_tasks):
211
+ ray_op.submit_ray_task(cpu_intensive_task, task_size, f"ray_task_{i}")
212
+
213
+ # Wait for all tasks to complete
214
+ results = ray_op.join_ray_task("Processing Ray tasks")
215
+ end_time = time.time()
216
+
217
+ logger.info(f"Ray tasks completed in {end_time - start_time:.2f}s")
218
+ logger.info(f"Ray results: {results}")
219
+
220
+ except Exception as e:
221
+ logger.error(f"Ray task execution failed: {e}")
222
+
223
+ # Test 2: Compare Ray vs ThreadPool performance
224
+ logger.info("\n--- Performance Comparison: Ray vs ThreadPool ---")
225
+
226
+ try:
227
+ # Test with ThreadPool
228
+ thread_op = TestRayOp("thread_test_op")
229
+
230
+ logger.info(f"Testing ThreadPool with {num_tasks} tasks")
231
+ start_time = time.time()
232
+
233
+ for i in range(num_tasks):
234
+ thread_op.submit_task(cpu_intensive_task, task_size, f"thread_task_{i}")
235
+
236
+ thread_results = thread_op.join_task("Processing ThreadPool tasks")
237
+ print(thread_results)
238
+ thread_time = time.time() - start_time
239
+
240
+ logger.info(f"ThreadPool completed in {thread_time:.2f}s")
241
+
242
+ # Test with Ray again for comparison
243
+ ray_op2 = TestRayOp("ray_test_op2")
244
+
245
+ logger.info(f"Testing Ray with {num_tasks} tasks")
246
+ start_time = time.time()
247
+
248
+ for i in range(num_tasks):
249
+ ray_op2.submit_ray_task(cpu_intensive_task, task_size, f"ray_task2_{i}")
250
+
251
+ ray_results2 = ray_op2.join_ray_task("Processing Ray tasks (comparison)")
252
+ print(ray_results2)
253
+ ray_time = time.time() - start_time
254
+
255
+ logger.info(f"Ray completed in {ray_time:.2f}s")
256
+
257
+ # Performance comparison
258
+ speedup = thread_time / ray_time if ray_time > 0 else 0
259
+ logger.info(f"\n=== Performance Summary ===")
260
+ logger.info(f"ThreadPool time: {thread_time:.2f}s")
261
+ logger.info(f"Ray time: {ray_time:.2f}s")
262
+ logger.info(f"Ray speedup: {speedup:.2f}x")
263
+
264
+ except Exception as e:
265
+ logger.error(f"Performance comparison failed: {e}")
266
+
267
+ # Test 3: Error handling
268
+ logger.info("\n--- Testing Error Handling ---")
269
+
270
+ def failing_task(task_id: str):
271
+ if task_id == "fail_task":
272
+ raise ValueError(f"Intentional error in {task_id}")
273
+ return f"Success: {task_id}"
274
+
275
+ try:
276
+ error_op = TestRayOp("error_test_op")
277
+
278
+ # Submit mix of successful and failing tasks
279
+ error_op.submit_ray_task(failing_task, "success_task_1")
280
+ error_op.submit_ray_task(failing_task, "fail_task")
281
+ error_op.submit_ray_task(failing_task, "success_task_2")
282
+
283
+ error_results = error_op.join_ray_task("Testing error handling")
284
+ logger.info(f"Error handling results: {error_results}")
285
+
286
+ except Exception as e:
287
+ logger.error(f"Expected error occurred: {e}")
288
+
289
+ # Test 4: Ray without proper configuration (should fail)
290
+ logger.info("\n--- Testing Ray Configuration Validation ---")
291
+
292
+ original_workers = C.service_config.ray_max_workers
293
+ try:
294
+ # Temporarily disable Ray in config
295
+ C.service_config.ray_max_workers = 1 # Disable Ray
296
+
297
+ config_test_op = TestRayOp("config_test_op")
298
+ config_test_op.submit_ray_task(cpu_intensive_task, 100, "config_test")
299
+
300
+ logger.error("This should not be reached - Ray should be disabled")
301
+
302
+ except RuntimeError as e:
303
+ logger.info(f"✓ Correctly caught configuration error: {e}")
304
+
305
+ finally:
306
+ # Restore original configuration
307
+ C.service_config.ray_max_workers = original_workers
308
+
309
+ logger.info("\n=== Ray testing completed ===")
310
+
311
+
312
+ if __name__ == "__main__":
313
+ run()
@@ -0,0 +1 @@
1
+ from .execute_code_op import ExecuteCodeOp