celestialflow 3.0.6__tar.gz → 3.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {celestialflow-3.0.6 → celestialflow-3.0.7}/PKG-INFO +3 -2
  2. {celestialflow-3.0.6 → celestialflow-3.0.7}/README.md +2 -1
  3. {celestialflow-3.0.6 → celestialflow-3.0.7}/pyproject.toml +1 -1
  4. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/__init__.py +4 -0
  5. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/adapters/celestialtree/client.py +28 -14
  6. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/adapters/celestialtree/tools.py +6 -1
  7. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_graph.py +16 -14
  8. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_logging.py +7 -0
  9. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_manage.py +27 -176
  10. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_nodes.py +76 -17
  11. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_queue.py +3 -0
  12. celestialflow-3.0.7/src/celestialflow/task_stage.py +186 -0
  13. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_tools.py +3 -3
  14. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_types.py +0 -1
  15. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow.egg-info/PKG-INFO +3 -2
  16. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow.egg-info/SOURCES.txt +1 -0
  17. {celestialflow-3.0.6 → celestialflow-3.0.7}/tests/test_graph.py +12 -12
  18. {celestialflow-3.0.6 → celestialflow-3.0.7}/tests/test_nodes.py +49 -15
  19. {celestialflow-3.0.6 → celestialflow-3.0.7}/tests/test_structure.py +57 -55
  20. {celestialflow-3.0.6 → celestialflow-3.0.7}/setup.cfg +0 -0
  21. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/adapters/__init__.py +0 -0
  22. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/adapters/celestialtree/__init__.py +0 -0
  23. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/css/base.css +0 -0
  24. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/css/dashboard.css +0 -0
  25. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/css/errors.css +0 -0
  26. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/css/inject.css +0 -0
  27. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/favicon.ico +0 -0
  28. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/main.js +0 -0
  29. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/task_errors.js +0 -0
  30. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/task_injection.js +0 -0
  31. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/task_statuses.js +0 -0
  32. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/task_structure.js +0 -0
  33. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/task_topology.js +0 -0
  34. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/static/js/utils.js +0 -0
  35. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_progress.py +0 -0
  36. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_report.py +0 -0
  37. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_structure.py +0 -0
  38. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/task_web.py +0 -0
  39. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow/templates/index.html +0 -0
  40. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow.egg-info/dependency_links.txt +0 -0
  41. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow.egg-info/entry_points.txt +0 -0
  42. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow.egg-info/requires.txt +0 -0
  43. {celestialflow-3.0.6 → celestialflow-3.0.7}/src/celestialflow.egg-info/top_level.txt +0 -0
  44. {celestialflow-3.0.6 → celestialflow-3.0.7}/tests/test_manage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: celestialflow
3
- Version: 3.0.6
3
+ Version: 3.0.7
4
4
  Summary: A flexible GRAPH-based task orchestration framework.
5
5
  Author-email: Mr-xiaotian <mingxiaomingtian@gmail.com>
6
6
  License: MIT
@@ -326,7 +326,7 @@ flowchart TD
326
326
  - 12/12/2024-12/16/2024: 在原有链式结构基础上允许节点有复数下级节点, 实现Tree结构; 将原有TaskChain改名为TaskTree
327
327
  - 3/16/2025: 支持Web端任务完成情况可视化
328
328
  - 6/9/2025: 支持节点拥有复数上级节点, 脱离纯Tree结构, 为之后循环图做准备
329
- - 6/11/2025: 自[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)项目instances.inst_task迁入
329
+ - 6/11/2025: 自[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)项目instances.inst_task迁出
330
330
  - 6/12/2025: 支持循环图, 下级节点可指向上级节点
331
331
  - 6/13/2025: 支持loop结构, 即节点可指向自己
332
332
  - 6/14/2025: 支持forest结构, 即可有多个根节点
@@ -335,6 +335,7 @@ flowchart TD
335
335
  - 3.0.4: 新增一个抽象结构TaskQueue, 用于表示节点的所有"入边"与"出边"; 恢复未消费任务的保存功能
336
336
  - 3.0.5: 删除原有的TaskRedisTransfer节点, 并增添三种新的redis交互节点TaskRedisSink TaskRedisSource TaskRedisAck, 用于跨语言 跨进程 跨设备处理任务; 并在Web页面添加展示拓扑信息的卡片
337
337
  - 3.0.6: 添加对[CelestialTree](https://github.com/Mr-xiaotian/CelestialTree)系统的支持, 现在可以追踪单个任务的流向
338
+ - 3.0.7: 将TaskStage从TaskManager中单独抽出来作为一个子类; 增加新节点TaskRouter, 可以将传入的任务选择的传给不同的下游节点, 而不是进行广播
338
339
 
339
340
  ## Star 历史趋势(Star History)
340
341
 
@@ -301,7 +301,7 @@ flowchart TD
301
301
  - 12/12/2024-12/16/2024: 在原有链式结构基础上允许节点有复数下级节点, 实现Tree结构; 将原有TaskChain改名为TaskTree
302
302
  - 3/16/2025: 支持Web端任务完成情况可视化
303
303
  - 6/9/2025: 支持节点拥有复数上级节点, 脱离纯Tree结构, 为之后循环图做准备
304
- - 6/11/2025: 自[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)项目instances.inst_task迁入
304
+ - 6/11/2025: 自[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)项目instances.inst_task迁出
305
305
  - 6/12/2025: 支持循环图, 下级节点可指向上级节点
306
306
  - 6/13/2025: 支持loop结构, 即节点可指向自己
307
307
  - 6/14/2025: 支持forest结构, 即可有多个根节点
@@ -310,6 +310,7 @@ flowchart TD
310
310
  - 3.0.4: 新增一个抽象结构TaskQueue, 用于表示节点的所有"入边"与"出边"; 恢复未消费任务的保存功能
311
311
  - 3.0.5: 删除原有的TaskRedisTransfer节点, 并增添三种新的redis交互节点TaskRedisSink TaskRedisSource TaskRedisAck, 用于跨语言 跨进程 跨设备处理任务; 并在Web页面添加展示拓扑信息的卡片
312
312
  - 3.0.6: 添加对[CelestialTree](https://github.com/Mr-xiaotian/CelestialTree)系统的支持, 现在可以追踪单个任务的流向
313
+ - 3.0.7: 将TaskStage从TaskManager中单独抽出来作为一个子类; 增加新节点TaskRouter, 可以将传入的任务选择的传给不同的下游节点, 而不是进行广播
313
314
 
314
315
  ## Star 历史趋势(Star History)
315
316
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "celestialflow"
7
- version = "3.0.6"
7
+ version = "3.0.7"
8
8
  description = "A flexible GRAPH-based task orchestration framework."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -1,10 +1,12 @@
1
1
  from .task_graph import TaskGraph
2
2
  from .task_manage import TaskManager
3
+ from .task_stage import TaskStage
3
4
  from .task_nodes import (
4
5
  TaskSplitter,
5
6
  TaskRedisSink,
6
7
  TaskRedisSource,
7
8
  TaskRedisAck,
9
+ TaskRouter,
8
10
  )
9
11
  from .task_structure import (
10
12
  TaskChain,
@@ -33,10 +35,12 @@ __all__ = [
33
35
  "TaskWheel",
34
36
  "TaskGrid",
35
37
  "TaskManager",
38
+ "TaskStage",
36
39
  "TaskSplitter",
37
40
  "TaskRedisSink",
38
41
  "TaskRedisSource",
39
42
  "TaskRedisAck",
43
+ "TaskRouter",
40
44
  "TerminationSignal",
41
45
  "TaskWebServer",
42
46
  "CelestialTreeClient",
@@ -9,8 +9,8 @@ class Client:
9
9
  Python client for CelestialTree HTTP API.
10
10
  """
11
11
 
12
- def __init__(self, base_url: str, timeout: float = 5.0):
13
- self.base_url = base_url.rstrip("/")
12
+ def __init__(self, host: str = "127.0.0.1", port: int = 7777, timeout: float = 5.0):
13
+ self.base_url = f"http://{host}:{port}"
14
14
  self.timeout = timeout
15
15
 
16
16
  def init_session(self):
@@ -60,7 +60,9 @@ class Client:
60
60
  data=json.dumps(body),
61
61
  timeout=self.timeout,
62
62
  )
63
- r.raise_for_status()
63
+
64
+ if not (200 <= r.status_code < 300):
65
+ raise RuntimeError(r.json()["error"])
64
66
  return r.json()["id"]
65
67
 
66
68
  def get_event(self, event_id: int) -> Dict[str, Any]:
@@ -70,7 +72,9 @@ class Client:
70
72
  f"{self.base_url}/event/{event_id}",
71
73
  timeout=self.timeout,
72
74
  )
73
- r.raise_for_status()
75
+
76
+ if not (200 <= r.status_code < 300):
77
+ raise RuntimeError(r.json()["error"])
74
78
  return r.json()
75
79
 
76
80
  def children(self, event_id: int) -> List[int]:
@@ -80,7 +84,9 @@ class Client:
80
84
  f"{self.base_url}/children/{event_id}",
81
85
  timeout=self.timeout,
82
86
  )
83
- r.raise_for_status()
87
+
88
+ if not (200 <= r.status_code < 300):
89
+ raise RuntimeError(r.json()["error"])
84
90
  return r.json()["children"]
85
91
 
86
92
  def descendants(self, event_id: int) -> Dict[str, Any]:
@@ -90,7 +96,9 @@ class Client:
90
96
  f"{self.base_url}/descendants/{event_id}",
91
97
  timeout=self.timeout,
92
98
  )
93
- r.raise_for_status()
99
+
100
+ if not (200 <= r.status_code < 300):
101
+ raise RuntimeError(r.json()["error"])
94
102
  return r.json()
95
103
 
96
104
  def heads(self) -> List[int]:
@@ -100,17 +108,21 @@ class Client:
100
108
  f"{self.base_url}/heads",
101
109
  timeout=self.timeout,
102
110
  )
103
- r.raise_for_status()
111
+
112
+ if not (200 <= r.status_code < 300):
113
+ raise RuntimeError(r.json()["error"])
104
114
  return r.json()["heads"]
105
115
 
106
116
  def health(self) -> bool:
107
117
  self.init_session()
108
-
109
- r = self.session.get(
110
- f"{self.base_url}/healthz",
111
- timeout=self.timeout,
112
- )
113
- return r.status_code == 200
118
+ try:
119
+ r = self.session.get(
120
+ f"{self.base_url}/healthz",
121
+ timeout=self.timeout,
122
+ )
123
+ return r.status_code == 200
124
+ except Exception:
125
+ return False
114
126
 
115
127
  def version(self) -> Dict[str, Any]:
116
128
  self.init_session()
@@ -119,7 +131,9 @@ class Client:
119
131
  f"{self.base_url}/version",
120
132
  timeout=self.timeout,
121
133
  )
122
- r.raise_for_status()
134
+
135
+ if not (200 <= r.status_code < 300):
136
+ raise RuntimeError(r.json()["error"])
123
137
  return r.json()
124
138
 
125
139
  # ---------- SSE Subscribe ----------
@@ -10,7 +10,12 @@ def format_tree(node: dict, prefix: str = "", is_last: bool = True) -> str:
10
10
  lines = []
11
11
 
12
12
  connector = "└── " if is_last else "├── "
13
- lines.append(f"{prefix}{connector}{node['id']}")
13
+
14
+ label = str(node["id"])
15
+ if node.get("is_ref"):
16
+ label += " (ref)"
17
+
18
+ lines.append(f"{prefix}{connector}{label}")
14
19
 
15
20
  children = node.get("children", [])
16
21
  if children:
@@ -5,7 +5,7 @@ from datetime import datetime
5
5
  from multiprocessing import Queue as MPQueue
6
6
  from typing import Any, Dict, List
7
7
 
8
- from .task_manage import TaskManager
8
+ from .task_stage import TaskStage
9
9
  from .task_report import TaskReporter, NullTaskReporter
10
10
  from .task_logging import LogListener, TaskLogger
11
11
  from .task_queue import TaskQueue
@@ -32,16 +32,16 @@ from .adapters.celestialtree import (
32
32
 
33
33
 
34
34
  class TaskGraph:
35
- def __init__(self, root_stages: List[TaskManager], layout_mode: str = "process"):
35
+ def __init__(self, root_stages: List[TaskStage], layout_mode: str = "process"):
36
36
  """
37
37
  初始化 TaskGraph 实例。
38
38
 
39
- TaskGraph 表示一组 TaskManager 节点所构成的任务图,可用于构建并行、串行、
39
+ TaskGraph 表示一组 TaskStage 节点所构成的任务图,可用于构建并行、串行、
40
40
  分层等多种形式的任务执行流程。通过分析图结构和调度布局策略,实现灵活的
41
41
  DAG 任务调度控制。
42
42
 
43
- :param root_stages : List[TaskManager]
44
- 根节点 TaskManager 列表,用于构建任务图的入口节点。
43
+ :param root_stages : List[TaskStage]
44
+ 根节点 TaskStage 列表,用于构建任务图的入口节点。
45
45
  支持多根节点(森林结构),系统将自动构建整个任务依赖图。
46
46
 
47
47
  :param layout_mode : str, optional, default = 'process'
@@ -130,7 +130,7 @@ class TaskGraph:
130
130
  queue.extend(stage.next_stages)
131
131
 
132
132
  for stage_tag in self.stages_status_dict:
133
- stage: TaskManager = self.stages_status_dict[stage_tag]["stage"]
133
+ stage: TaskStage = self.stages_status_dict[stage_tag]["stage"]
134
134
  in_queue: TaskQueue = self.stages_status_dict[stage_tag]["in_queue"]
135
135
 
136
136
  # 遍历每个前驱,创建边队列
@@ -162,7 +162,7 @@ class TaskGraph:
162
162
  """
163
163
  self.structure_json = build_structure_graph(self.root_stages)
164
164
 
165
- def set_root_stages(self, root_stages: List[TaskManager]):
165
+ def set_root_stages(self, root_stages: List[TaskStage]):
166
166
  """
167
167
  设置根节点
168
168
 
@@ -212,8 +212,10 @@ class TaskGraph:
212
212
  self._ctree_port = port
213
213
 
214
214
  if use_ctree:
215
- base_url = f"http://{host}:{port}"
216
- self.ctree_client = CelestialTreeClient(base_url)
215
+ self.ctree_client = CelestialTreeClient(host=host, port=port)
216
+ if not self.ctree_client.health():
217
+ self._use_ctree = False
218
+ self.ctree_client = NullCelestialTreeClient()
217
219
  else:
218
220
  self.ctree_client = NullCelestialTreeClient()
219
221
 
@@ -225,7 +227,7 @@ class TaskGraph:
225
227
  :param execution_mode: 节点内部执行模式, 可选值为 'serial' 或 'thread''
226
228
  """
227
229
 
228
- def set_subsequent_stage_mode(stage: TaskManager):
230
+ def set_subsequent_stage_mode(stage: TaskStage):
229
231
  stage.set_stage_mode(stage_mode)
230
232
  stage.set_execution_mode(execution_mode)
231
233
  visited_stages.add(stage)
@@ -248,7 +250,7 @@ class TaskGraph:
248
250
  :param put_termination_signal: 是否放入终止信号
249
251
  """
250
252
  for tag, tasks in tasks_dict.items():
251
- stage: TaskManager = self.stages_status_dict[tag]["stage"]
253
+ stage: TaskStage = self.stages_status_dict[tag]["stage"]
252
254
  in_queue: TaskQueue = self.stages_status_dict[tag]["in_queue"]
253
255
 
254
256
  for task in tasks:
@@ -324,7 +326,7 @@ class TaskGraph:
324
326
 
325
327
  processes = []
326
328
  for stage_tag in layer:
327
- stage: TaskManager = self.stages_status_dict[stage_tag]["stage"]
329
+ stage: TaskStage = self.stages_status_dict[stage_tag]["stage"]
328
330
  self._execute_stage(stage)
329
331
  if stage.stage_mode == "process":
330
332
  processes.append(self.processes[-1]) # 最新的进程
@@ -337,7 +339,7 @@ class TaskGraph:
337
339
 
338
340
  self.task_logger.end_layer(layer, time.time() - start_time)
339
341
 
340
- def _execute_stage(self, stage: TaskManager):
342
+ def _execute_stage(self, stage: TaskStage):
341
343
  """
342
344
  执行单个节点
343
345
 
@@ -497,7 +499,7 @@ class TaskGraph:
497
499
  interval = self.reporter.interval
498
500
 
499
501
  for tag, stage_status_dict in self.stages_status_dict.items():
500
- stage: TaskManager = stage_status_dict["stage"]
502
+ stage: TaskStage = stage_status_dict["stage"]
501
503
  last_stage_status_dict: dict = self.last_status_dict.get(tag, {})
502
504
 
503
505
  status = stage_status_dict.get("status", StageStatus.NOT_STARTED)
@@ -163,6 +163,13 @@ class TaskLogger:
163
163
  f"In '{func_name}', Task {task_info} has split into {split_count} parts. Used {use_time:.2f} seconds.",
164
164
  )
165
165
 
166
+ # ==== router ====
167
+ def router_success(self, func_name, task_info, target_node, use_time):
168
+ self._log(
169
+ "SUCCESS",
170
+ f"In '{func_name}', Task {task_info} has routed to {target_node}. Used {use_time:.2f} seconds.",
171
+ )
172
+
166
173
  # ==== queue ====
167
174
  def put_source(self, source, queue_tag, stage_tag, direction):
168
175
  if isinstance(source, TerminationSignal):
@@ -9,7 +9,6 @@ from multiprocessing import Value as MPValue
9
9
  from multiprocessing import Queue as MPQueue
10
10
  from queue import Queue as ThreadQueue
11
11
  from threading import Event, Lock
12
- from typing import List
13
12
 
14
13
  from .task_progress import ProgressManager, NullProgress
15
14
  from .task_logging import LogListener, TaskLogger
@@ -71,10 +70,6 @@ class TaskManager:
71
70
  self.thread_pool = None
72
71
  self.process_pool = None
73
72
 
74
- self.next_stages: List[TaskManager] = []
75
- self.prev_stages: List[TaskManager] = []
76
- self.set_stage_name()
77
-
78
73
  self.retry_exceptions = tuple() # 需要重试的异常类型
79
74
  self.ctree_client = NullCelestialTreeClient()
80
75
 
@@ -163,14 +158,14 @@ class TaskManager:
163
158
  [queue_map[self.execution_mode]()],
164
159
  [None],
165
160
  self.logger_queue,
166
- self.get_stage_tag(),
161
+ self.get_manager_tag(),
167
162
  "in",
168
163
  )
169
164
  self.result_queues: TaskQueue = result_queues or TaskQueue(
170
165
  [queue_map[self.execution_mode]()],
171
166
  [None],
172
167
  self.logger_queue,
173
- self.get_stage_tag(),
168
+ self.get_manager_tag(),
174
169
  "out",
175
170
  )
176
171
  self.fail_queue: ThreadQueue | MPQueue | AsyncQueue = (
@@ -217,69 +212,6 @@ class TaskManager:
217
212
  else "serial"
218
213
  )
219
214
 
220
- def set_graph_context(
221
- self,
222
- next_stages: List[TaskManager] = None,
223
- stage_mode: str = None,
224
- stage_name: str = None,
225
- ):
226
- """
227
- 设置链式上下文(仅限组成graph时)
228
-
229
- :param next_stages: 后续节点列表
230
- :param stage_mode: 当前节点执行模式, 可以是 'serial'(串行)或 'process'(并行)
231
- :param name: 当前节点名称
232
- """
233
- self.set_next_stages(next_stages)
234
- self.set_stage_mode(stage_mode)
235
- self.set_stage_name(stage_name)
236
-
237
- def set_next_stages(self, next_stages: List[TaskManager]):
238
- """
239
- 设置后续节点列表, 并为后续节点添加本节点为前置节点
240
-
241
- :param next_stages: 后续节点列表
242
- """
243
- self.next_stages = next_stages
244
- for next_stage in self.next_stages:
245
- next_stage.add_prev_stages(self)
246
-
247
- def set_stage_mode(self, stage_mode: str):
248
- """
249
- 设置当前节点在graph中的执行模式, 可以是 'serial'(串行)或 'process'(并行)
250
-
251
- :param stage_mode: 当前节点执行模式
252
- """
253
- self.stage_mode = stage_mode if stage_mode == "process" else "serial"
254
-
255
- def set_stage_name(self, name: str = None):
256
- """
257
- 设置当前节点名称
258
-
259
- :param name: 当前节点名称
260
- """
261
- self.stage_name = name or id(self)
262
-
263
- def add_prev_stages(self, prev_stage: TaskManager):
264
- """
265
- 添加前置节点
266
-
267
- :param prev_stage: 前置节点
268
- """
269
- from .task_nodes import TaskSplitter
270
-
271
- if prev_stage in self.prev_stages:
272
- return
273
- self.prev_stages.append(prev_stage)
274
-
275
- if prev_stage is None:
276
- return
277
-
278
- if isinstance(prev_stage, TaskSplitter):
279
- self.task_counter.add_counter(prev_stage.split_output_counter)
280
- else:
281
- self.task_counter.add_counter(prev_stage.success_counter)
282
-
283
215
  def set_ctree(self, host="127.0.0.1", port=7777):
284
216
  """
285
217
  设置CelestialTreeClient
@@ -287,8 +219,9 @@ class TaskManager:
287
219
  :param host: CelestialTreeClient host
288
220
  :param port: CelestialTreeClient port
289
221
  """
290
- base_url = f"http://{host}:{port}"
291
- self.ctree_client = CelestialTreeClient(base_url)
222
+ self.ctree_client = CelestialTreeClient(host=host, port=port)
223
+ if not self.ctree_client.health():
224
+ self.ctree_client = NullCelestialTreeClient()
292
225
 
293
226
  def reset_counter(self):
294
227
  """
@@ -304,34 +237,6 @@ class TaskManager:
304
237
  if isinstance(self, TaskSplitter):
305
238
  self.split_output_counter.value = 0
306
239
 
307
- def get_stage_tag(self) -> str:
308
- """
309
- 获取当前节点在graph中的标签
310
-
311
- :return: 当前节点标签
312
- """
313
- if hasattr(self, "_stage_tag"):
314
- return self._stage_tag
315
- self._stage_tag = f"{self.stage_name}[{self.func.__name__}]"
316
- return self._stage_tag
317
-
318
- def get_stage_summary(self) -> dict:
319
- """
320
- 获取当前节点的状态快照
321
-
322
- :return: 当前节点状态快照
323
- """
324
- return {
325
- "stage_mode": self.stage_mode,
326
- "execution_mode": (
327
- self.execution_mode
328
- if self.execution_mode == "serial"
329
- else f"{self.execution_mode}-{self.worker_limit}"
330
- ),
331
- "func_name": self.get_stage_tag(),
332
- "class_name": self.__class__.__name__,
333
- }
334
-
335
240
  def get_func_name(self) -> str:
336
241
  """
337
242
  获取当前节点函数名
@@ -340,6 +245,17 @@ class TaskManager:
340
245
  """
341
246
  return self.func.__name__
342
247
 
248
+ def get_manager_tag(self) -> str:
249
+ """
250
+ 获取当前节点管理器标签
251
+
252
+ :return: 当前节点管理器标签
253
+ """
254
+ if hasattr(self, "_manager_tag"):
255
+ return self._manager_tag
256
+ self._manager_tag = f"Manager[{self.func.__name__}]"
257
+ return self._manager_tag
258
+
343
259
  def add_retry_exceptions(self, *exceptions):
344
260
  """
345
261
  添加需要重试的异常类型
@@ -356,16 +272,14 @@ class TaskManager:
356
272
  """
357
273
  progress_num = 0
358
274
  for task in task_source:
359
- task_id = self.ctree_client.emit(
360
- "task.input", message=f"In '{self.get_stage_tag()}'"
361
- )
275
+ task_id = self.ctree_client.emit("task.input")
362
276
  envelope = TaskEnvelope.wrap(task, task_id)
363
277
  self.task_queues.put_first(envelope)
364
278
  self.update_task_counter()
365
279
  self.task_logger.task_inject(
366
280
  self.get_func_name(),
367
281
  self.get_task_info(task),
368
- self.get_stage_tag(),
282
+ self.get_manager_tag(),
369
283
  f"[{task_id}]",
370
284
  )
371
285
 
@@ -382,16 +296,14 @@ class TaskManager:
382
296
  """
383
297
  progress_num = 0
384
298
  for task in task_source:
385
- task_id = self.ctree_client.emit(
386
- "task.input", message=f"In '{self.get_stage_tag()}'"
387
- )
299
+ task_id = self.ctree_client.emit("task.input")
388
300
  envelope = TaskEnvelope.wrap(task, task_id)
389
301
  await self.task_queues.put_first_async(envelope)
390
302
  self.update_task_counter()
391
303
  self.task_logger.task_inject(
392
304
  self.get_func_name(),
393
305
  self.get_task_info(task),
394
- self.get_stage_tag(),
306
+ self.get_manager_tag(),
395
307
  f"[{task_id}]",
396
308
  )
397
309
 
@@ -409,7 +321,6 @@ class TaskManager:
409
321
  """
410
322
  self.fail_queue.put(
411
323
  {
412
- "stage_tag": self.get_stage_tag(),
413
324
  "task": str(task),
414
325
  "error_info": f"{type(error).__name__}({error})",
415
326
  "timestamp": time.time(),
@@ -425,7 +336,6 @@ class TaskManager:
425
336
  """
426
337
  await self.fail_queue.put(
427
338
  {
428
- "stage_tag": self.get_stage_tag(),
429
339
  "task": str(task),
430
340
  "error_info": f"{type(error).__name__}({error})",
431
341
  "timestamp": time.time(),
@@ -577,9 +487,7 @@ class TaskManager:
577
487
  if self.enable_result_cache:
578
488
  self.success_dict[task] = processed_result
579
489
 
580
- result_id = self.ctree_client.emit(
581
- "task.success", parents=[task_id], message=f"In '{self.get_stage_tag()}'"
582
- )
490
+ result_id = self.ctree_client.emit("task.success", parents=[task_id])
583
491
  result_envelope = TaskEnvelope.wrap(result, result_id)
584
492
 
585
493
  # ✅ 清理 retry_time_dict
@@ -614,9 +522,7 @@ class TaskManager:
614
522
  if self.enable_result_cache:
615
523
  self.success_dict[task] = processed_result
616
524
 
617
- result_id = self.ctree_client.emit(
618
- "task.success", parents=[task_id], message=f"In '{self.get_stage_tag()}'"
619
- )
525
+ result_id = self.ctree_client.emit("task.success", parents=[task_id])
620
526
  result_envelope = TaskEnvelope.wrap(result, result_id)
621
527
 
622
528
  # ✅ 清理 retry_time_dict
@@ -660,7 +566,6 @@ class TaskManager:
660
566
  retry_id = self.ctree_client.emit(
661
567
  f"task.retry.{self.retry_time_dict[task_hash]}",
662
568
  parents=[task_id],
663
- message=f"In '{self.get_stage_tag()}'",
664
569
  )
665
570
 
666
571
  self.task_logger.task_retry(
@@ -675,9 +580,7 @@ class TaskManager:
675
580
  if self.enable_result_cache:
676
581
  self.error_dict[task] = exception
677
582
 
678
- error_id = self.ctree_client.emit(
679
- "task.error", parents=[task_id], message=f"In '{self.get_stage_tag()}'"
680
- )
583
+ error_id = self.ctree_client.emit("task.error", parents=[task_id])
681
584
 
682
585
  # ✅ 清理 retry_time_dict
683
586
  self.retry_time_dict.pop(task_hash, None)
@@ -722,7 +625,6 @@ class TaskManager:
722
625
  retry_id = self.ctree_client.emit(
723
626
  f"task.retry.{self.retry_time_dict[task_hash]}",
724
627
  parents=[task_id],
725
- message=f"In '{self.get_stage_tag()}'",
726
628
  )
727
629
 
728
630
  self.task_logger.task_retry(
@@ -737,9 +639,7 @@ class TaskManager:
737
639
  if self.enable_result_cache:
738
640
  self.error_dict[task] = exception
739
641
 
740
- error_id = self.ctree_client.emit(
741
- "task.error", parents=[task_id], message=f"In '{self.get_stage_tag()}'"
742
- )
642
+ error_id = self.ctree_client.emit("task.error", parents=[task_id])
743
643
 
744
644
  # ✅ 清理 retry_time_dict
745
645
  self.retry_time_dict.pop(task_hash, None)
@@ -764,7 +664,6 @@ class TaskManager:
764
664
  duplicate_id = self.ctree_client.emit(
765
665
  "task.duplicate",
766
666
  parents=[task_envelope.id],
767
- message=f"In '{self.get_stage_tag()}'",
768
667
  )
769
668
  self.task_logger.task_duplicate(
770
669
  self.get_func_name(),
@@ -854,48 +753,6 @@ class TaskManager:
854
753
  )
855
754
  self.log_listener.stop()
856
755
 
857
- def start_stage(
858
- self,
859
- input_queues: List[MPQueue],
860
- output_queues: List[MPQueue],
861
- fail_queue: MPQueue,
862
- logger_queue: MPQueue,
863
- ):
864
- """
865
- 根据 start_type 的值,选择串行、并行执行任务
866
-
867
- :param input_queues: 输入队列
868
- :param output_queue: 输出队列
869
- :param fail_queue: 失败队列
870
- """
871
- start_time = time.time()
872
- self.active = True
873
- self.init_progress()
874
- self.init_env(input_queues, output_queues, fail_queue, logger_queue)
875
- self.task_logger.start_stage(
876
- self.get_stage_tag(), self.execution_mode, self.worker_limit
877
- )
878
-
879
- # 根据模式运行对应的任务处理函数
880
- if self.execution_mode == "thread":
881
- self.run_with_executor(self.thread_pool)
882
- else:
883
- self.run_in_serial()
884
-
885
- # cleanup_mpqueue(input_queues) # 会影响之后finalize_nodes
886
- self.release_pool()
887
- self.result_queues.put(TERMINATION_SIGNAL)
888
-
889
- self.progress_manager.close()
890
- self.task_logger.end_stage(
891
- self.get_stage_tag(),
892
- self.execution_mode,
893
- time.time() - start_time,
894
- self.success_counter.value,
895
- self.error_counter.value,
896
- self.duplicate_counter.value,
897
- )
898
-
899
756
  def run_in_serial(self):
900
757
  """
901
758
  串行地执行任务
@@ -925,9 +782,7 @@ class TaskManager:
925
782
  self.task_queues.reset()
926
783
 
927
784
  if not self.is_tasks_finished():
928
- self.task_logger._log(
929
- "DEBUG", f"Retrying tasks for '{self.get_stage_tag()}'"
930
- )
785
+ self.task_logger._log("DEBUG", f"Retrying tasks")
931
786
  self.task_queues.put(TERMINATION_SIGNAL)
932
787
  self.run_in_serial()
933
788
 
@@ -1003,9 +858,7 @@ class TaskManager:
1003
858
  self.task_queues.reset()
1004
859
 
1005
860
  if not self.is_tasks_finished():
1006
- self.task_logger._log(
1007
- "DEBUG", f"Retrying tasks for '{self.get_stage_tag()}'"
1008
- )
861
+ self.task_logger._log("DEBUG", f"Retrying tasks")
1009
862
  self.task_queues.put(TERMINATION_SIGNAL)
1010
863
  self.run_with_executor(executor)
1011
864
 
@@ -1052,9 +905,7 @@ class TaskManager:
1052
905
  self.task_queues.reset()
1053
906
 
1054
907
  if not self.is_tasks_finished():
1055
- self.task_logger._log(
1056
- "DEBUG", f"Retrying tasks for '{self.get_stage_tag()}'"
1057
- )
908
+ self.task_logger._log("DEBUG", f"Retrying tasks")
1058
909
  await self.task_queues.put_async(TERMINATION_SIGNAL)
1059
910
  await self.run_in_async()
1060
911