celestialflow 3.0.3__tar.gz → 3.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {celestialflow-3.0.3 → celestialflow-3.0.4}/PKG-INFO +3 -3
  2. {celestialflow-3.0.3 → celestialflow-3.0.4}/README.md +2 -2
  3. {celestialflow-3.0.3 → celestialflow-3.0.4}/pyproject.toml +1 -1
  4. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/__init__.py +6 -1
  5. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_graph.py +106 -187
  6. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_logging.py +35 -7
  7. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_manage.py +114 -247
  8. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_nodes.py +3 -3
  9. celestialflow-3.0.4/src/celestialflow/task_queue.py +227 -0
  10. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_report.py +5 -18
  11. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_structure.py +1 -0
  12. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_tools.py +4 -6
  13. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_types.py +8 -0
  14. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_web.py +2 -2
  15. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow.egg-info/PKG-INFO +3 -3
  16. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow.egg-info/SOURCES.txt +1 -0
  17. {celestialflow-3.0.3 → celestialflow-3.0.4}/tests/test_graph.py +8 -6
  18. {celestialflow-3.0.3 → celestialflow-3.0.4}/tests/test_nodes.py +16 -48
  19. {celestialflow-3.0.3 → celestialflow-3.0.4}/tests/test_structure.py +39 -8
  20. {celestialflow-3.0.3 → celestialflow-3.0.4}/setup.cfg +0 -0
  21. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/css/base.css +0 -0
  22. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/css/dashboard.css +0 -0
  23. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/css/errors.css +0 -0
  24. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/css/inject.css +0 -0
  25. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/favicon.ico +0 -0
  26. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/main.js +0 -0
  27. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/task_errors.js +0 -0
  28. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/task_injection.js +0 -0
  29. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/task_statuses.js +0 -0
  30. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/task_structure.js +0 -0
  31. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/task_topology.js +0 -0
  32. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/static/js/utils.js +0 -0
  33. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/task_progress.py +0 -0
  34. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow/templates/index.html +0 -0
  35. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow.egg-info/dependency_links.txt +0 -0
  36. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow.egg-info/entry_points.txt +0 -0
  37. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow.egg-info/requires.txt +0 -0
  38. {celestialflow-3.0.3 → celestialflow-3.0.4}/src/celestialflow.egg-info/top_level.txt +0 -0
  39. {celestialflow-3.0.3 → celestialflow-3.0.4}/tests/test_manage.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: celestialflow
3
- Version: 3.0.3
3
+ Version: 3.0.4
4
4
  Summary: A flexible GRAPH-based task orchestration framework.
5
5
  Author-email: Mr-xiaotian <mingxiaomingtian@gmail.com>
6
6
  License: MIT
@@ -26,7 +26,7 @@ Requires-Dist: jinja2
26
26
  # CelestialFlow ——一个轻量级、可并行、基于图结构的 Python 任务调度框架
27
27
 
28
28
  <p align="center">
29
- <img src="img/startup.png" width="1080" alt="CelestialFlow Logo">
29
+ <img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/logo.png" width="1080" alt="CelestialFlow Logo">
30
30
  </p>
31
31
 
32
32
  <p align="center">
@@ -111,7 +111,7 @@ python src/celestialflow/task_web.py 5005
111
111
 
112
112
  可查看任务结构、执行状态、错误日志、以及实时注入任务等功能。
113
113
 
114
- ![](img/web_display.png)
114
+ ![web_display.png](https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/web_display.png)
115
115
 
116
116
  ### 运行测试示例
117
117
 
@@ -1,7 +1,7 @@
1
1
  # CelestialFlow ——一个轻量级、可并行、基于图结构的 Python 任务调度框架
2
2
 
3
3
  <p align="center">
4
- <img src="img/startup.png" width="1080" alt="CelestialFlow Logo">
4
+ <img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/logo.png" width="1080" alt="CelestialFlow Logo">
5
5
  </p>
6
6
 
7
7
  <p align="center">
@@ -86,7 +86,7 @@ python src/celestialflow/task_web.py 5005
86
86
 
87
87
  可查看任务结构、执行状态、错误日志、以及实时注入任务等功能。
88
88
 
89
- ![](img/web_display.png)
89
+ ![web_display.png](https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/web_display.png)
90
90
 
91
91
  ### 运行测试示例
92
92
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "celestialflow"
7
- version = "3.0.3"
7
+ version = "3.0.4"
8
8
  description = "A flexible GRAPH-based task orchestration framework."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -10,7 +10,12 @@ from .task_structure import (
10
10
  TaskGrid,
11
11
  )
12
12
  from .task_types import TerminationSignal
13
- from .task_tools import load_task_by_stage, load_task_by_error, make_hashable, format_table
13
+ from .task_tools import (
14
+ load_task_by_stage,
15
+ load_task_by_error,
16
+ make_hashable,
17
+ format_table,
18
+ )
14
19
  from .task_web import TaskWebServer
15
20
 
16
21
  __all__ = [
@@ -2,21 +2,14 @@ import time
2
2
  import multiprocessing
3
3
  from collections import defaultdict, deque
4
4
  from datetime import datetime
5
- from multiprocessing import Value as MPValue, Lock as MPLock
6
5
  from multiprocessing import Queue as MPQueue
7
- from typing import Any, Dict, List, Tuple
6
+ from typing import Any, Dict, List
8
7
 
9
8
  from .task_manage import TaskManager
10
- from .task_nodes import TaskSplitter
11
9
  from .task_report import TaskReporter
12
10
  from .task_logging import LogListener, TaskLogger
13
- from .task_types import (
14
- StageStatus,
15
- ValueWrapper,
16
- SumCounter,
17
- TerminationSignal,
18
- TERMINATION_SIGNAL
19
- )
11
+ from .task_queue import TaskQueue
12
+ from .task_types import StageStatus, TerminationSignal, TERMINATION_SIGNAL
20
13
  from .task_tools import (
21
14
  format_duration,
22
15
  format_timestamp,
@@ -29,6 +22,8 @@ from .task_tools import (
29
22
  is_directed_acyclic_graph,
30
23
  compute_node_levels,
31
24
  cluster_by_value_sorted,
25
+ load_task_by_stage,
26
+ load_task_by_error
32
27
  )
33
28
 
34
29
 
@@ -73,8 +68,8 @@ class TaskGraph:
73
68
  self.processes: List[multiprocessing.Process] = []
74
69
 
75
70
  self.init_dict()
76
- self.init_resources()
77
71
  self.init_log()
72
+ self.init_resources()
78
73
 
79
74
  def init_dict(self):
80
75
  """
@@ -83,25 +78,11 @@ class TaskGraph:
83
78
  self.stages_status_dict: Dict[str, dict] = defaultdict(
84
79
  dict
85
80
  ) # 用于保存每个节点的状态信息
86
- self.stage_extra_stats = defaultdict(dict) # 用于保存每个阶段的额外统计信息
87
- self.last_status_dict = {} # 用于保存每个节点的最后状态信息
88
-
89
- self.edge_queue_map: Dict[Tuple[str, str], MPQueue] = (
90
- {}
91
- ) # 用于保存每个节点到下一个节点的队列
92
-
93
- self.stage_locks = {} # 锁,用于控制每个阶段success_counter的并发
94
- self.stage_task_counter = {} # 用于保存每个阶段处理的任务数
95
- self.stage_success_counter = {} # 用于保存每个阶段成功处理的任务数
96
- self.stage_error_counter = {} # 用于保存每个阶段失败处理的任务数
97
- self.stage_duplicate_counter = {} # 用于保存每个阶段重复处理的任务数
98
-
99
- self.error_timeline_dict: Dict[str, list] = defaultdict(
100
- list
101
- ) # 用于保存错误到出现该错误任务的映射
102
- self.all_stage_error_dict: Dict[str, dict] = defaultdict(
81
+ self.last_status_dict: Dict[str, dict] = defaultdict(
103
82
  dict
104
- ) # 用于保存节点到节点失败任务的映射
83
+ ) # 用于保存每个节点的上一次状态信息
84
+
85
+ self.error_data: List[dict] = []
105
86
 
106
87
  def init_resources(self):
107
88
  """
@@ -110,66 +91,56 @@ class TaskGraph:
110
91
  self.fail_queue = MPQueue()
111
92
 
112
93
  visited_stages = set()
113
- queue = deque(self.root_stages) # BFS 用队列代替递归
94
+ queue = deque(self.root_stages)
114
95
 
96
+ # BFS 连接
115
97
  while queue:
116
98
  stage = queue.popleft()
117
99
  stage_tag = stage.get_stage_tag()
118
100
  if stage_tag in visited_stages:
119
101
  continue
120
102
 
103
+ # 刷新所有 counter
104
+ stage.reset_counter()
105
+
121
106
  # 记录节点
122
107
  self.stages_status_dict[stage_tag]["stage"] = stage
123
-
124
- # 初始化 counters(全部用 MPValue)
125
- self.stage_task_counter[stage_tag] = SumCounter()
126
- self.stage_success_counter[stage_tag] = self.stage_success_counter.get(
127
- stage_tag, MPValue("i", 0)
108
+ self.stages_status_dict[stage_tag]["in_queue"] = TaskQueue(
109
+ queue_list=[],
110
+ queue_tag=[],
111
+ logger_queue=self.log_listener.get_queue(),
112
+ stage_tag=stage_tag,
113
+ direction="in",
128
114
  )
129
- self.stage_error_counter[stage_tag] = MPValue("i", 0)
130
- self.stage_duplicate_counter[stage_tag] = MPValue("i", 0)
131
- self.stage_locks[stage_tag] = MPLock()
132
115
 
133
- self.stage_extra_stats[stage_tag] = self.stage_extra_stats.get(
134
- stage_tag, {}
116
+ self.stages_status_dict[stage_tag]["out_queue"] = TaskQueue(
117
+ queue_list=[],
118
+ queue_tag=[],
119
+ logger_queue=self.log_listener.get_queue(),
120
+ stage_tag=stage_tag,
121
+ direction="out",
135
122
  )
136
- if isinstance(stage, TaskSplitter):
137
- self.stage_extra_stats[stage_tag].setdefault(
138
- "split_output_count", MPValue("i", 0)
139
- )
123
+ visited_stages.add(stage_tag)
140
124
 
141
- # 为每个边 (prev -> stage) 创建队列
142
- for prev_stage in stage.prev_stages:
143
- prev_tag = prev_stage.get_stage_tag() if prev_stage else None
144
- self.edge_queue_map[(prev_tag, stage_tag)] = MPQueue()
125
+ queue.extend(stage.next_stages)
145
126
 
146
- if isinstance(prev_stage, TaskSplitter):
147
- self.stage_extra_stats[prev_tag] = self.stage_extra_stats.get(
148
- prev_tag, {}
149
- )
150
- self.stage_extra_stats[prev_tag].setdefault(
151
- "split_output_count", MPValue("i", 0)
152
- )
153
- self.stage_task_counter[stage_tag].add_counter(
154
- self.stage_extra_stats[prev_tag]["split_output_count"]
155
- )
156
- else:
157
- # 确保上游 success_counter 已存在
158
- self.stage_success_counter[prev_tag] = (
159
- self.stage_success_counter.get(prev_tag, MPValue("i", 0))
160
- )
161
- self.stage_task_counter[stage_tag].add_counter(
162
- self.stage_success_counter[prev_tag]
163
- )
127
+ for stage_tag in self.stages_status_dict:
128
+ stage: TaskManager = self.stages_status_dict[stage_tag]["stage"]
129
+ in_queue: TaskQueue = self.stages_status_dict[stage_tag]["in_queue"]
164
130
 
165
- if not stage.prev_stages:
166
- # 起点节点
167
- self.edge_queue_map[(None, stage_tag)] = MPQueue()
131
+ # 遍历每个前驱,创建边队列
132
+ for prev_stage in stage.prev_stages:
133
+ prev_stage_tag = prev_stage.get_stage_tag() if prev_stage else None
134
+ q = MPQueue()
168
135
 
169
- visited_stages.add(stage_tag)
136
+ # sink side
137
+ in_queue.add_queue(q, prev_stage_tag)
170
138
 
171
- for next_stage in stage.next_stages:
172
- queue.append(next_stage)
139
+ # source side
140
+ if prev_stage is not None:
141
+ self.stages_status_dict[prev_stage_tag]["out_queue"].add_queue(
142
+ q, stage_tag
143
+ )
173
144
 
174
145
  def init_log(self, level="INFO"):
175
146
  """
@@ -184,7 +155,7 @@ class TaskGraph:
184
155
  """
185
156
  初始化任务图结构
186
157
  """
187
- self.structure_graph = build_structure_graph(self.root_stages)
158
+ self.structure_json = build_structure_graph(self.root_stages)
188
159
 
189
160
  def set_root_stages(self, root_stages: List[TaskManager]):
190
161
  """
@@ -242,19 +213,6 @@ class TaskGraph:
242
213
  set_subsequent_stage_mode(root_stage)
243
214
  self.init_structure_graph()
244
215
 
245
- def put_termination(self, tag):
246
- """
247
- 放入终止信号
248
-
249
- :param tag: 阶段标签
250
- """
251
- preg_stages: List[TaskManager] = self.stages_status_dict[tag]["stage"].prev_stages
252
-
253
- for prev_stage in preg_stages:
254
- prev_tag = prev_stage.get_stage_tag() if prev_stage else None
255
- self.edge_queue_map[(prev_tag, tag)].put(TERMINATION_SIGNAL)
256
- self.task_logger._log("TRACE", f"TERMINATION_SIGNAL put into {(prev_tag, tag)}")
257
-
258
216
  def put_stage_queue(self, tasks_dict: dict, put_termination_signal=True):
259
217
  """
260
218
  将任务放入队列
@@ -263,26 +221,24 @@ class TaskGraph:
263
221
  :param put_termination_signal: 是否放入终止信号
264
222
  """
265
223
  for tag, tasks in tasks_dict.items():
266
- prev_stage: TaskManager = self.stages_status_dict[tag]["stage"].prev_stages[
267
- 0
268
- ]
269
- prev_tag = prev_stage.get_stage_tag() if prev_stage else None
224
+ stage: TaskManager = self.stages_status_dict[tag]["stage"]
225
+ in_queue: TaskQueue = self.stages_status_dict[tag]["in_queue"]
226
+
270
227
  for task in tasks:
271
228
  if isinstance(task, TerminationSignal):
272
- self.put_termination(tag)
229
+ in_queue.put(TERMINATION_SIGNAL)
273
230
  continue
274
231
 
275
- self.edge_queue_map[(prev_tag, tag)].put(make_hashable(task))
276
- self.task_logger._log("TRACE", f"{task} put into {(prev_tag, tag)}")
277
- self.stage_task_counter[tag] = self.stage_task_counter.get(
278
- tag, SumCounter()
279
- )
280
- self.stage_task_counter[tag].add_init_value(1)
232
+ in_queue.put_first(make_hashable(task))
233
+ stage.task_counter.add_init_value(1)
281
234
 
282
235
  if put_termination_signal:
283
236
  for root_stage in self.root_stages:
284
237
  root_stage_tag = root_stage.get_stage_tag()
285
- self.put_termination(root_stage_tag)
238
+ root_in_queue: TaskQueue = self.stages_status_dict[root_stage_tag][
239
+ "in_queue"
240
+ ]
241
+ root_in_queue.put(TERMINATION_SIGNAL)
286
242
 
287
243
  def start_graph(self, init_tasks_dict: dict, put_termination_signal: bool = True):
288
244
  """
@@ -347,40 +303,20 @@ class TaskGraph:
347
303
  def _execute_stage(self, stage: TaskManager):
348
304
  """
349
305
  执行单个节点
350
-
306
+
351
307
  :param stage: 节点
352
308
  """
353
309
  stage_tag = stage.get_stage_tag()
354
310
 
355
- # 输入输出队列
356
- input_queues = [
357
- self.edge_queue_map[(prev.get_stage_tag() if prev else None, stage_tag)]
358
- for prev in stage.prev_stages
359
- ]
360
- output_queues = (
361
- [
362
- self.edge_queue_map[(stage_tag, next_stage.get_stage_tag())]
363
- for next_stage in stage.next_stages
364
- ]
365
- if stage.next_stages
366
- else []
367
- )
368
-
369
311
  logger_queue = self.log_listener.get_queue()
370
312
 
313
+ # 输入输出队列
314
+ input_queues = self.stages_status_dict[stage_tag]["in_queue"]
315
+ output_queues = self.stages_status_dict[stage_tag]["out_queue"]
316
+
371
317
  self.stages_status_dict[stage_tag]["status"] = StageStatus.RUNNING
372
318
  self.stages_status_dict[stage_tag]["start_time"] = time.time()
373
319
 
374
- # counter 都在 init_resources 里初始化完了,这里直接用
375
- stage.init_counter(
376
- self.stage_task_counter[stage_tag],
377
- self.stage_success_counter[stage_tag],
378
- self.stage_error_counter[stage_tag],
379
- self.stage_duplicate_counter[stage_tag],
380
- self.stage_locks[stage_tag],
381
- self.stage_extra_stats[stage_tag],
382
- )
383
-
384
320
  if stage.stage_mode == "process":
385
321
  p = multiprocessing.Process(
386
322
  target=stage.start_stage,
@@ -416,16 +352,23 @@ class TaskGraph:
416
352
  stage_status["status"] = StageStatus.STOPPED # 已停止
417
353
 
418
354
  # 3️⃣ 收集并持久化每个 stage 中未消费的任务
419
- # for stage_tag, stage_status in self.stages_status_dict.items():
420
- # queue: MPQueue = stage_status["task_queue"]
421
- # while not queue.empty():
422
- # try:
423
- # task = queue.get_nowait()
424
- # self.task_logger._log("DEBUG", f"获取 {stage_tag} 剩余任务: {task}")
355
+ for stage_tag, stage_status in self.stages_status_dict.items():
356
+ in_queue: TaskQueue = stage_status["in_queue"]
357
+
358
+ # 用你刚才统一的 drain() 提取当前剩余任务
359
+ remaining_sources = in_queue.drain()
360
+
361
+ # 如无剩余,跳过
362
+ if not remaining_sources:
363
+ continue
364
+
365
+ # 持久化逻辑(写日志 / 存储到全局 structure)
366
+ for source in remaining_sources:
367
+ task_str = str(source)
368
+ error_info = f"(UnconsumeError)"
369
+ timestamp = time.time()
425
370
 
426
- # self._persist_unconsumed_task(stage_tag, task)
427
- # except Exception as e:
428
- # self.task_logger._log("WARNING", f"获取 {stage_tag} 剩余任务失败: {e}")
371
+ self._persist_single_failure(task_str, error_info, stage_tag, timestamp)
429
372
 
430
373
  def release_resources(self):
431
374
  """
@@ -446,13 +389,13 @@ class TaskGraph:
446
389
  task_str = item["task"]
447
390
  error_info = item["error_info"]
448
391
  timestamp = item["timestamp"]
449
- error_key = (error_info, stage_tag)
450
392
 
451
- if task_str not in self.error_timeline_dict[error_key]:
452
- self.error_timeline_dict[error_key].append((task_str, timestamp))
453
-
454
- if task_str not in self.all_stage_error_dict[stage_tag]:
455
- self.all_stage_error_dict[stage_tag][task_str] = error_key
393
+ self.error_data.append({
394
+ "timestamp": timestamp,
395
+ "node": stage_tag,
396
+ "error": error_info,
397
+ "task_id": task_str if len(task_str) < 100 else task_str[:100] + "...",
398
+ })
456
399
 
457
400
  self._persist_single_failure(task_str, error_info, stage_tag, timestamp)
458
401
 
@@ -460,17 +403,21 @@ class TaskGraph:
460
403
  """
461
404
  在运行开始时写入任务结构元信息到 jsonl 文件
462
405
  """
406
+ date_str = datetime.fromtimestamp(self.start_time).strftime("%Y-%m-%d")
407
+ time_str = datetime.fromtimestamp(self.start_time).strftime("%H-%M-%S-%f")[:-3]
408
+ self.error_jsonl_path = f"./fallback/{date_str}/realtime_errors({time_str}).jsonl"
409
+
463
410
  log_item = {
464
411
  "timestamp": datetime.now().isoformat(),
465
412
  "structure": self.get_structure_json(),
466
413
  }
467
414
  append_jsonl_log(
468
- log_item, self.start_time, "./fallback", "realtime_errors", self.task_logger
415
+ log_item, self.error_jsonl_path, self.task_logger
469
416
  )
470
417
 
471
418
  def _persist_single_failure(self, task_str, error_info, stage_tag, timestamp):
472
419
  """
473
- 增量写入单条错误日志到每日文件中
420
+ 增量写入单条错误日志到 jsonl 文件中
474
421
 
475
422
  :param task_str: 任务字符串
476
423
  :param error_info: 错误信息
@@ -484,48 +431,20 @@ class TaskGraph:
484
431
  "task": task_str,
485
432
  }
486
433
  append_jsonl_log(
487
- log_item, self.start_time, "./fallback", "realtime_errors", self.task_logger
434
+ log_item, self.error_jsonl_path, self.task_logger
488
435
  )
489
436
 
490
- def _persist_unconsumed_task(self, stage_tag, task):
437
+ def get_error_data(self):
491
438
  """
492
- 写入单个未消费任务到 JSONL 文件
493
-
494
- :param stage_tag: 阶段标签
495
- :param task: 任务对象
439
+ 返回错误数据
496
440
  """
497
- log_item = {
498
- "timestamp": datetime.now().isoformat(),
499
- "stage": stage_tag,
500
- "task": str(task),
501
- }
502
- append_jsonl_log(
503
- log_item, self.start_time, "./fallback", "leftover_tasks", self.task_logger
504
- )
441
+ return self.error_data
505
442
 
506
- def get_error_timeline_dict(self):
507
- """
508
- 返回最终错误字典
509
- """
510
- return dict(self.error_timeline_dict)
511
-
512
- def get_all_stage_error_dict(self):
513
- """
514
- 返回最终失败字典
515
- """
516
- return dict(self.all_stage_error_dict)
443
+ def get_fail_by_stage_dict(self):
444
+ return load_task_by_stage(self.error_jsonl_path)
517
445
 
518
446
  def get_fail_by_error_dict(self):
519
- return {
520
- key: [a for a, _ in tuple_list]
521
- for key, tuple_list in self.get_error_timeline_dict().items()
522
- }
523
-
524
- def get_fail_by_stage_dict(self):
525
- return {
526
- stage: list(inner_dict.keys())
527
- for stage, inner_dict in self.get_all_stage_error_dict().items()
528
- }
447
+ return load_task_by_error(self.error_jsonl_path)
529
448
 
530
449
  def get_status_dict(self) -> Dict[str, dict]:
531
450
  """
@@ -543,10 +462,10 @@ class TaskGraph:
543
462
 
544
463
  status = stage_status_dict.get("status", StageStatus.NOT_STARTED)
545
464
 
546
- input = self.stage_task_counter.get(tag, ValueWrapper()).value
547
- successed = self.stage_success_counter.get(tag, ValueWrapper()).value
548
- failed = self.stage_error_counter.get(tag, ValueWrapper()).value
549
- duplicated = self.stage_duplicate_counter.get(tag, ValueWrapper()).value
465
+ input = stage.task_counter.value
466
+ successed = stage.success_counter.value
467
+ failed = stage.error_counter.value
468
+ duplicated = stage.duplicate_counter.value
550
469
  processed = successed + failed + duplicated
551
470
  pending = max(0, input - processed)
552
471
 
@@ -633,13 +552,13 @@ class TaskGraph:
633
552
  }
634
553
 
635
554
  def get_structure_json(self):
636
- return self.structure_graph
555
+ return self.structure_json
637
556
 
638
557
  def get_structure_list(self):
639
- return format_structure_list_from_graph(self.structure_graph)
558
+ return format_structure_list_from_graph(self.structure_json)
640
559
 
641
560
  def get_networkx_graph(self):
642
- return format_networkx_graph(self.structure_graph)
561
+ return format_networkx_graph(self.structure_json)
643
562
 
644
563
  def analyze_graph(self):
645
564
  """
@@ -650,8 +569,8 @@ class TaskGraph:
650
569
 
651
570
  self.isDAG = is_directed_acyclic_graph(networkx_graph)
652
571
  if self.isDAG:
653
- self.stage_level_dict = compute_node_levels(networkx_graph)
654
- self.layers_dict = cluster_by_value_sorted(self.stage_level_dict)
572
+ stage_level_dict = compute_node_levels(networkx_graph)
573
+ self.layers_dict = cluster_by_value_sorted(stage_level_dict)
655
574
 
656
575
  def test_methods(
657
576
  self,
@@ -681,10 +600,10 @@ class TaskGraph:
681
600
  self.init_env()
682
601
  self.set_graph_mode(stage_mode, execution_mode)
683
602
  self.start_graph(init_tasks_dict)
603
+ fail_by_stage_dict.update(self.get_fail_by_stage_dict())
604
+ fail_by_error_dict.update(self.get_fail_by_error_dict())
684
605
 
685
606
  time_list.append(time.time() - start_time)
686
- fail_by_error_dict.update(self.get_fail_by_error_dict())
687
- fail_by_stage_dict.update(self.get_fail_by_stage_dict())
688
607
 
689
608
  test_table_list.append(time_list)
690
609
 
@@ -694,6 +613,6 @@ class TaskGraph:
694
613
  execution_modes,
695
614
  r"stage\execution",
696
615
  )
697
- results["Fail error dict"] = fail_by_error_dict
698
616
  results["Fail stage dict"] = fail_by_stage_dict
617
+ results["Fail error dict"] = fail_by_error_dict
699
618
  return results
@@ -13,6 +13,7 @@ class LogListener:
13
13
  """
14
14
  日志监听进程,用于将日志写入文件
15
15
  """
16
+
16
17
  def __init__(self, level="INFO"):
17
18
  now = strftime("%Y-%m-%d", localtime())
18
19
  self.log_path = f"logs/task_logger({now}).log"
@@ -66,7 +67,7 @@ class TaskLogger:
66
67
 
67
68
  # ==== manager ====
68
69
  def start_manager(self, func_name, task_num, execution_mode, worker_limit):
69
- text = f"'{func_name}' start {task_num} tasks by {execution_mode}"
70
+ text = f"'Manager[{func_name}]' start {task_num} tasks by {execution_mode}"
70
71
  text += f"({worker_limit} workers)." if execution_mode != "serial" else "."
71
72
  self._log("INFO", text)
72
73
 
@@ -81,20 +82,19 @@ class TaskLogger:
81
82
  ):
82
83
  self._log(
83
84
  "INFO",
84
- f"'{func_name}' end tasks by {execution_mode}. Use {use_time:.2f} second. "
85
+ f"'Manager[{func_name}]' end tasks by {execution_mode}. Use {use_time:.2f} second. "
85
86
  f"{success_num} tasks successed, {failed_num} tasks failed, {duplicated_num} tasks duplicated.",
86
87
  )
87
88
 
88
89
  # ==== stage ====
89
- def start_stage(self, stage_name, func_name, execution_mode, worker_limit):
90
- text = f"The {stage_name} in '{func_name}' start tasks by {execution_mode}"
90
+ def start_stage(self, stage_tag, execution_mode, worker_limit):
91
+ text = f"'{stage_tag}' start tasks by {execution_mode}"
91
92
  text += f"({worker_limit} workers)." if execution_mode != "serial" else "."
92
93
  self._log("INFO", text)
93
94
 
94
95
  def end_stage(
95
96
  self,
96
- stage_name,
97
- func_name,
97
+ stage_tag,
98
98
  execution_mode,
99
99
  use_time,
100
100
  success_num,
@@ -103,7 +103,7 @@ class TaskLogger:
103
103
  ):
104
104
  self._log(
105
105
  "INFO",
106
- f"The {stage_name} in '{func_name}' end tasks by {execution_mode}. Use {use_time:.2f} second. "
106
+ f"'{stage_tag}' end tasks by {execution_mode}. Use {use_time:.2f} second. "
107
107
  f"{success_num} tasks successed, {failed_num} tasks failed, {duplicated_num} tasks duplicated.",
108
108
  )
109
109
 
@@ -152,3 +152,31 @@ class TaskLogger:
152
152
  "SUCCESS",
153
153
  f"In '{func_name}', Task {task_info} has split into {split_count} parts. Used {use_time:.2f} seconds.",
154
154
  )
155
+
156
+ # ==== queue ====
157
+ def put_source(self, source, queue_tag, stage_tag, direction):
158
+ if isinstance(source, TerminationSignal):
159
+ source = "TerminationSignal"
160
+
161
+ edge = f"'{queue_tag}' -> '{stage_tag}'" if direction == "in" else f"'{stage_tag}' -> '{queue_tag}'"
162
+ self._log(
163
+ "TRACE",
164
+ f"Put {source} into Edge({edge})."
165
+ )
166
+
167
+ def get_source(self, source, queue_tag, stage_tag):
168
+ if isinstance(source, TerminationSignal):
169
+ source = "TerminationSignal"
170
+
171
+ edge = f"'{queue_tag}' -> '{stage_tag}'"
172
+ self._log(
173
+ "TRACE",
174
+ f"Get {source} from Edge({edge})"
175
+ )
176
+
177
+ def get_source_error(self, queue_tag, stage_tag, exception):
178
+ exception_text = str(exception).replace("\n", " ")
179
+ self._log(
180
+ "WARNING",
181
+ f"Error get from Edge({queue_tag} -> {stage_tag}): ({type(exception).__name__}){exception_text}",
182
+ )