celestialflow 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+ # 版本 3.01
3
+ # 作者:Mr-xiaotian, GPT-4o, GPT-5
4
+ # 时间:11/5/2025
5
+ # Github: https://github.com/Mr-xiaotian/CelestialFlow
6
+
7
+ from .task_graph import TaskGraph
8
+ from .task_manage import TaskManager
9
+ from .task_nodes import TaskSplitter, TaskRedisTransfer
10
+ from .task_structure import (
11
+ TaskChain,
12
+ TaskLoop,
13
+ TaskCross,
14
+ TaskComplete,
15
+ TaskWheel,
16
+ TaskGrid,
17
+ )
18
+ from .task_types import TerminationSignal
19
+ from .task_tools import load_task_by_stage, load_task_by_error, make_hashable, format_table
20
+ from .task_web import TaskWebServer
21
+
22
+ __all__ = [
23
+ "TaskGraph",
24
+ "TaskChain",
25
+ "TaskLoop",
26
+ "TaskCross",
27
+ "TaskComplete",
28
+ "TaskWheel",
29
+ "TaskGrid",
30
+ "TaskManager",
31
+ "TaskSplitter",
32
+ "TaskRedisTransfer",
33
+ "TerminationSignal",
34
+ "TaskWebServer",
35
+ "load_task_by_stage",
36
+ "load_task_by_error",
37
+ "make_hashable",
38
+ "format_table",
39
+ ]
@@ -0,0 +1,665 @@
1
+ import time
2
+ import multiprocessing
3
+ from collections import defaultdict, deque
4
+ from datetime import datetime
5
+ from multiprocessing import Value as MPValue, Lock as MPLock
6
+ from multiprocessing import Queue as MPQueue
7
+ from typing import Any, Dict, List, Tuple
8
+
9
+ from .task_manage import TaskManager
10
+ from .task_nodes import TaskSplitter
11
+ from .task_report import TaskReporter
12
+ from .task_logging import LogListener, TaskLogger
13
+ from .task_types import (
14
+ StageStatus,
15
+ ValueWrapper,
16
+ SumCounter,
17
+ TerminationSignal,
18
+ TERMINATION_SIGNAL
19
+ )
20
+ from .task_tools import (
21
+ format_duration,
22
+ format_timestamp,
23
+ cleanup_mpqueue,
24
+ make_hashable,
25
+ build_structure_graph,
26
+ format_structure_list_from_graph,
27
+ append_jsonl_log,
28
+ format_networkx_graph,
29
+ is_directed_acyclic_graph,
30
+ compute_node_levels,
31
+ cluster_by_value_sorted,
32
+ )
33
+
34
+
35
+ class TaskGraph:
36
+ def __init__(self, root_stages: List[TaskManager], layout_mode: str = "process"):
37
+ """
38
+ 初始化 TaskGraph 实例。
39
+
40
+ TaskGraph 表示一组 TaskManager 节点所构成的任务图,可用于构建并行、串行、
41
+ 分层等多种形式的任务执行流程。通过分析图结构和调度布局策略,实现灵活的
42
+ DAG 任务调度控制。
43
+
44
+ Parameters
45
+ ----------
46
+ root_stages : List[TaskManager]
47
+ 根节点 TaskManager 列表,用于构建任务图的入口节点。
48
+ 支持多根节点(森林结构),系统将自动构建整个任务依赖图。
49
+
50
+ layout_mode : str, optional, default = 'process'
51
+ 控制任务图的调度布局模式,支持以下两种策略:
52
+
53
+ - 'process':
54
+ 默认模式。所有节点一次性调度并发执行,依赖关系通过队列流自动控制。
55
+ 适用于最大化并行度的执行场景。
56
+
57
+ - 'serial':
58
+ 分层执行模式。任务图必须为有向无环图(DAG)。
59
+ 节点按层级顺序逐层启动,确保上层所有任务完成后再启动下一层。
60
+ 更利于调试、性能分析和阶段性资源控制。
61
+
62
+ Raises
63
+ ------
64
+ ValueError
65
+ 如果输入图不合法或 layout_mode 参数错误。
66
+ """
67
+ self.set_root_stages(root_stages)
68
+
69
+ self.init_env()
70
+ self.init_structure_graph()
71
+ self.analyze_graph()
72
+ self.set_layout_mode(layout_mode)
73
+ self.set_reporter()
74
+
75
+ def init_env(self):
76
+ """
77
+ 初始化环境
78
+ """
79
+ self.processes: List[multiprocessing.Process] = []
80
+
81
+ self.init_dict()
82
+ self.init_resources()
83
+ self.init_log()
84
+
85
+ def init_dict(self):
86
+ """
87
+ 初始化字典
88
+ """
89
+ self.stages_status_dict: Dict[str, dict] = defaultdict(
90
+ dict
91
+ ) # 用于保存每个节点的状态信息
92
+ self.stage_extra_stats = defaultdict(dict) # 用于保存每个阶段的额外统计信息
93
+ self.last_status_dict = {} # 用于保存每个节点的最后状态信息
94
+
95
+ self.edge_queue_map: Dict[Tuple[str, str], MPQueue] = (
96
+ {}
97
+ ) # 用于保存每个节点到下一个节点的队列
98
+
99
+ self.stage_locks = {} # 锁,用于控制每个阶段success_counter的并发
100
+ self.stage_task_counter = {} # 用于保存每个阶段处理的任务数
101
+ self.stage_success_counter = {} # 用于保存每个阶段成功处理的任务数
102
+ self.stage_error_counter = {} # 用于保存每个阶段失败处理的任务数
103
+ self.stage_duplicate_counter = {} # 用于保存每个阶段重复处理的任务数
104
+
105
+ self.error_timeline_dict: Dict[str, list] = defaultdict(
106
+ list
107
+ ) # 用于保存错误到出现该错误任务的映射
108
+ self.all_stage_error_dict: Dict[str, dict] = defaultdict(
109
+ dict
110
+ ) # 用于保存节点到节点失败任务的映射
111
+
112
+ def init_resources(self):
113
+ """
114
+ 初始化每个阶段资源
115
+ """
116
+ self.fail_queue = MPQueue()
117
+
118
+ visited_stages = set()
119
+ queue = deque(self.root_stages) # BFS 用队列代替递归
120
+
121
+ while queue:
122
+ stage = queue.popleft()
123
+ stage_tag = stage.get_stage_tag()
124
+ if stage_tag in visited_stages:
125
+ continue
126
+
127
+ # 记录节点
128
+ self.stages_status_dict[stage_tag]["stage"] = stage
129
+
130
+ # 初始化 counters(全部用 MPValue)
131
+ self.stage_task_counter[stage_tag] = SumCounter()
132
+ self.stage_success_counter[stage_tag] = self.stage_success_counter.get(
133
+ stage_tag, MPValue("i", 0)
134
+ )
135
+ self.stage_error_counter[stage_tag] = MPValue("i", 0)
136
+ self.stage_duplicate_counter[stage_tag] = MPValue("i", 0)
137
+ self.stage_locks[stage_tag] = MPLock()
138
+
139
+ self.stage_extra_stats[stage_tag] = self.stage_extra_stats.get(
140
+ stage_tag, {}
141
+ )
142
+ if isinstance(stage, TaskSplitter):
143
+ self.stage_extra_stats[stage_tag].setdefault(
144
+ "split_output_count", MPValue("i", 0)
145
+ )
146
+
147
+ # 为每个边 (prev -> stage) 创建队列
148
+ for prev_stage in stage.prev_stages:
149
+ prev_tag = prev_stage.get_stage_tag() if prev_stage else None
150
+ self.edge_queue_map[(prev_tag, stage_tag)] = MPQueue()
151
+
152
+ if isinstance(prev_stage, TaskSplitter):
153
+ self.stage_extra_stats[prev_tag] = self.stage_extra_stats.get(
154
+ prev_tag, {}
155
+ )
156
+ self.stage_extra_stats[prev_tag].setdefault(
157
+ "split_output_count", MPValue("i", 0)
158
+ )
159
+ self.stage_task_counter[stage_tag].add_counter(
160
+ self.stage_extra_stats[prev_tag]["split_output_count"]
161
+ )
162
+ else:
163
+ # 确保上游 success_counter 已存在
164
+ self.stage_success_counter[prev_tag] = (
165
+ self.stage_success_counter.get(prev_tag, MPValue("i", 0))
166
+ )
167
+ self.stage_task_counter[stage_tag].add_counter(
168
+ self.stage_success_counter[prev_tag]
169
+ )
170
+
171
+ if not stage.prev_stages:
172
+ # 起点节点
173
+ self.edge_queue_map[(None, stage_tag)] = MPQueue()
174
+
175
+ visited_stages.add(stage_tag)
176
+
177
+ for next_stage in stage.next_stages:
178
+ queue.append(next_stage)
179
+
180
+ def init_log(self):
181
+ """
182
+ 初始化日志
183
+ """
184
+ self.log_listener = LogListener(level="INFO")
185
+ self.task_logger = TaskLogger(self.log_listener.get_queue())
186
+
187
+ def init_structure_graph(self):
188
+ """
189
+ 初始化任务图结构
190
+ """
191
+ self.structure_graph = build_structure_graph(self.root_stages)
192
+
193
+ def set_root_stages(self, root_stages: List[TaskManager]):
194
+ """
195
+ 设置根节点
196
+ :param root_stages: 根节点列表
197
+ """
198
+ self.root_stages = root_stages
199
+ for stage in root_stages:
200
+ if not stage.prev_stages:
201
+ stage.add_prev_stages(None)
202
+
203
+ def set_layout_mode(self, layout_mode: str):
204
+ """
205
+ 设置任务链的执行模式
206
+ :param layout_mode: 节点执行模式, 可选值为 'serial' 或 'process'
207
+ """
208
+ if layout_mode == "serial" and self.isDAG:
209
+ self.layout_mode = "serial"
210
+ else:
211
+ self.layout_mode = "process"
212
+
213
+ def set_reporter(self, is_report=False, host="127.0.0.1", port=5000):
214
+ """
215
+ 设定报告器
216
+ """
217
+ self.is_report = is_report
218
+ self.reporter = TaskReporter(self, self.log_listener.get_queue(), host, port)
219
+
220
+ def set_graph_mode(self, stage_mode: str, execution_mode: str):
221
+ """
222
+ 设置任务链的执行模式
223
+ :param stage_mode: 节点执行模式, 可选值为 'serial' 或 'process'
224
+ :param execution_mode: 节点内部执行模式, 可选值为 'serial' 或 'thread''
225
+ """
226
+
227
+ def set_subsequent_stage_mode(stage: TaskManager):
228
+ stage.set_stage_mode(stage_mode)
229
+ stage.set_execution_mode(execution_mode)
230
+ visited_stages.add(stage)
231
+
232
+ for next_stage in stage.next_stages:
233
+ if next_stage in visited_stages:
234
+ continue
235
+ set_subsequent_stage_mode(next_stage)
236
+
237
+ visited_stages = set()
238
+ for root_stage in self.root_stages:
239
+ set_subsequent_stage_mode(root_stage)
240
+ self.init_structure_graph()
241
+
242
+ def put_stage_queue(self, tasks_dict: dict, put_termination_signal=True):
243
+ """
244
+ 将任务放入队列
245
+ :param tasks_dict: 待处理的任务字典
246
+ :param put_termination_signal: 是否放入终止信号
247
+ """
248
+ for tag, tasks in tasks_dict.items():
249
+ prev_stage: TaskManager = self.stages_status_dict[tag]["stage"].prev_stages[
250
+ 0
251
+ ]
252
+ prev_tag = prev_stage.get_stage_tag() if prev_stage else None
253
+ for task in tasks:
254
+ self.edge_queue_map[(prev_tag, tag)].put(make_hashable(task))
255
+ if isinstance(task, TerminationSignal):
256
+ self.task_logger._log(
257
+ "TRACE", f"TERMINATION_SIGNAL put into {(prev_tag, tag)}"
258
+ )
259
+ continue
260
+ self.task_logger._log("TRACE", f"{task} put into {(prev_tag, tag)}")
261
+ self.stage_task_counter[tag] = self.stage_task_counter.get(
262
+ tag, SumCounter()
263
+ )
264
+ self.stage_task_counter[tag].add_init_value(1)
265
+
266
+ if put_termination_signal:
267
+ for root_stage in self.root_stages:
268
+ pre_stage_tag = (
269
+ root_stage.prev_stages[0].get_stage_tag()
270
+ if root_stage.prev_stages[0]
271
+ else None
272
+ )
273
+ edge_key = (pre_stage_tag, root_stage.get_stage_tag())
274
+ self.edge_queue_map[edge_key].put(TERMINATION_SIGNAL)
275
+ self.task_logger._log(
276
+ "TRACE", f"TERMINATION_SIGNAL put into {edge_key}"
277
+ )
278
+
279
+ def start_graph(self, init_tasks_dict: dict, put_termination_signal: bool = True):
280
+ """
281
+ 启动任务链
282
+ """
283
+ try:
284
+ self.log_listener.start()
285
+ self.start_time = time.time()
286
+ self.task_logger.start_graph(self.get_structure_list())
287
+ self._persist_structure_metadata()
288
+ self.reporter.start() if self.is_report else None
289
+
290
+ self.put_stage_queue(init_tasks_dict, put_termination_signal)
291
+ self._excute_stages()
292
+
293
+ finally:
294
+ self.finalize_nodes()
295
+ self.reporter.stop()
296
+ self.handle_fail_queue()
297
+ self.release_resources()
298
+
299
+ self.task_logger.end_graph(time.time() - self.start_time)
300
+ self.log_listener.stop()
301
+
302
+ def _excute_stages(self):
303
+ if self.layout_mode == "process":
304
+ # 默认逻辑:一次性执行所有节点
305
+ for tag in self.stages_status_dict:
306
+ self._execute_stage(self.stages_status_dict[tag]["stage"])
307
+
308
+ for p in self.processes:
309
+ p.join()
310
+ self.stages_status_dict[p.name]["status"] = StageStatus.STOPPED
311
+ self.task_logger._log("DEBUG", f"{p.name} exitcode: {p.exitcode}")
312
+ else:
313
+ # serial layout_mode:一层层地顺序执行
314
+ for layer_level, layer in self.layers_dict.items():
315
+ self.task_logger.start_layer(layer, layer_level)
316
+ start_time = time.time()
317
+
318
+ processes = []
319
+ for stage_tag in layer:
320
+ stage: TaskManager = self.stages_status_dict[stage_tag]["stage"]
321
+ self._execute_stage(stage)
322
+ if stage.stage_mode == "process":
323
+ processes.append(self.processes[-1]) # 最新的进程
324
+
325
+ # join 当前层的所有进程(如果有)
326
+ for p in processes:
327
+ p.join()
328
+ self.stages_status_dict[p.name]["status"] = StageStatus.STOPPED
329
+ self.task_logger._log("DEBUG", f"{p.name} exitcode: {p.exitcode}")
330
+
331
+ self.task_logger.end_layer(layer, time.time() - start_time)
332
+
333
+ def _execute_stage(self, stage: TaskManager):
334
+ stage_tag = stage.get_stage_tag()
335
+
336
+ # 输入输出队列
337
+ input_queues = [
338
+ self.edge_queue_map[(prev.get_stage_tag() if prev else None, stage_tag)]
339
+ for prev in stage.prev_stages
340
+ ]
341
+ output_queues = (
342
+ [
343
+ self.edge_queue_map[(stage_tag, next_stage.get_stage_tag())]
344
+ for next_stage in stage.next_stages
345
+ ]
346
+ if stage.next_stages
347
+ else []
348
+ )
349
+
350
+ logger_queue = self.log_listener.get_queue()
351
+
352
+ self.stages_status_dict[stage_tag]["status"] = StageStatus.RUNNING
353
+ self.stages_status_dict[stage_tag]["start_time"] = time.time()
354
+
355
+ # counter 都在 init_resources 里初始化完了,这里直接用
356
+ stage.init_counter(
357
+ self.stage_task_counter[stage_tag],
358
+ self.stage_success_counter[stage_tag],
359
+ self.stage_error_counter[stage_tag],
360
+ self.stage_duplicate_counter[stage_tag],
361
+ self.stage_locks[stage_tag],
362
+ self.stage_extra_stats[stage_tag],
363
+ )
364
+
365
+ if stage.stage_mode == "process":
366
+ p = multiprocessing.Process(
367
+ target=stage.start_stage,
368
+ args=(input_queues, output_queues, self.fail_queue, logger_queue),
369
+ name=stage_tag,
370
+ )
371
+ p.start()
372
+ self.processes.append(p)
373
+ else:
374
+ stage.start_stage(
375
+ input_queues, output_queues, self.fail_queue, logger_queue
376
+ )
377
+ self.stages_status_dict[stage_tag]["status"] = StageStatus.STOPPED
378
+
379
+ def finalize_nodes(self):
380
+ """
381
+ 确保所有子进程安全结束,更新节点状态,并导出每个节点队列剩余任务。
382
+ 返回: dict, {stage_tag: [剩余任务列表]}
383
+ """
384
+ # 1️⃣ 确保所有进程安全结束(不一定要 terminate,但如果没结束就强制)
385
+ for p in self.processes:
386
+ if p.is_alive():
387
+ self.task_logger._log(
388
+ "WARNING", f"检测到进程 {p.name} 仍在运行, 尝试终止"
389
+ )
390
+ p.terminate()
391
+ p.join(timeout=5)
392
+ if p.is_alive():
393
+ self.task_logger._log("WARNING", f"进程 {p.name} 仍未完全退出")
394
+ self.task_logger._log("DEBUG", f"{p.name} exitcode: {p.exitcode}")
395
+
396
+ # 2️⃣ 更新所有节点状态为“已停止”
397
+ for stage_tag, stage_status in self.stages_status_dict.items():
398
+ stage_status["status"] = StageStatus.STOPPED # 已停止
399
+
400
+ # 3️⃣ 收集并持久化每个 stage 中未消费的任务
401
+ # for stage_tag, stage_status in self.stages_status_dict.items():
402
+ # queue: MPQueue = stage_status["task_queue"]
403
+ # while not queue.empty():
404
+ # try:
405
+ # task = queue.get_nowait()
406
+ # self.task_logger._log("DEBUG", f"获取 {stage_tag} 剩余任务: {task}")
407
+
408
+ # self._persist_unconsumed_task(stage_tag, task)
409
+ # except Exception as e:
410
+ # self.task_logger._log("WARNING", f"获取 {stage_tag} 剩余任务失败: {e}")
411
+
412
+ def release_resources(self):
413
+ """
414
+ 释放资源
415
+ """
416
+ for stage_status_dict in self.stages_status_dict.values():
417
+ stage_status_dict["stage"].release_queue()
418
+
419
+ cleanup_mpqueue(self.fail_queue)
420
+
421
+ def handle_fail_queue(self):
422
+ """
423
+ 消费 fail_queue, 构建失败字典
424
+ """
425
+ while not self.fail_queue.empty():
426
+ item: dict = self.fail_queue.get_nowait()
427
+ stage_tag = item["stage_tag"]
428
+ task_str = item["task"]
429
+ error_info = item["error_info"]
430
+ timestamp = item["timestamp"]
431
+ error_key = (error_info, stage_tag)
432
+
433
+ if task_str not in self.error_timeline_dict[error_key]:
434
+ self.error_timeline_dict[error_key].append((task_str, timestamp))
435
+
436
+ if task_str not in self.all_stage_error_dict[stage_tag]:
437
+ self.all_stage_error_dict[stage_tag][task_str] = error_key
438
+
439
+ self._persist_single_failure(task_str, error_info, stage_tag, timestamp)
440
+
441
+ def _persist_structure_metadata(self):
442
+ """
443
+ 在运行开始时写入任务结构元信息到 jsonl 文件
444
+ """
445
+ log_item = {
446
+ "timestamp": datetime.now().isoformat(),
447
+ "structure": self.get_structure_json(),
448
+ }
449
+ append_jsonl_log(
450
+ log_item, self.start_time, "./fallback", "realtime_errors", self.task_logger
451
+ )
452
+
453
+ def _persist_single_failure(self, task_str, error_info, stage_tag, timestamp):
454
+ """
455
+ 增量写入单条错误日志到每日文件中
456
+ """
457
+ log_item = {
458
+ "timestamp": datetime.fromtimestamp(timestamp).isoformat(),
459
+ "stage": stage_tag,
460
+ "error": error_info,
461
+ "task": task_str,
462
+ }
463
+ append_jsonl_log(
464
+ log_item, self.start_time, "./fallback", "realtime_errors", self.task_logger
465
+ )
466
+
467
+ def _persist_unconsumed_task(self, stage_tag, task):
468
+ """
469
+ 写入单个未消费任务到 JSONL 文件
470
+ """
471
+ log_item = {
472
+ "timestamp": datetime.now().isoformat(),
473
+ "stage": stage_tag,
474
+ "task": str(task),
475
+ }
476
+ append_jsonl_log(
477
+ log_item, self.start_time, "./fallback", "leftover_tasks", self.task_logger
478
+ )
479
+
480
+ def get_error_timeline_dict(self):
481
+ """
482
+ 返回最终错误字典
483
+ """
484
+ return dict(self.error_timeline_dict)
485
+
486
+ def get_all_stage_error_dict(self):
487
+ """
488
+ 返回最终失败字典
489
+ """
490
+ return dict(self.all_stage_error_dict)
491
+
492
+ def get_fail_by_error_dict(self):
493
+ return {
494
+ key: [a for a, _ in tuple_list]
495
+ for key, tuple_list in self.get_error_timeline_dict().items()
496
+ }
497
+
498
+ def get_fail_by_stage_dict(self):
499
+ return {
500
+ stage: list(inner_dict.keys())
501
+ for stage, inner_dict in self.get_all_stage_error_dict().items()
502
+ }
503
+
504
+ def get_status_dict(self) -> Dict[str, dict]:
505
+ """
506
+ 获取任务链的状态字典
507
+ """
508
+ status_dict = {}
509
+ now = time.time()
510
+ interval = self.reporter.interval
511
+
512
+ for tag, stage_status_dict in self.stages_status_dict.items():
513
+ stage: TaskManager = stage_status_dict["stage"]
514
+ last_stage_status_dict: dict = self.last_status_dict.get(tag, {})
515
+
516
+ status = stage_status_dict.get("status", StageStatus.NOT_STARTED)
517
+
518
+ input = self.stage_task_counter.get(tag, ValueWrapper()).value
519
+ successed = self.stage_success_counter.get(tag, ValueWrapper()).value
520
+ failed = self.stage_error_counter.get(tag, ValueWrapper()).value
521
+ duplicated = self.stage_duplicate_counter.get(tag, ValueWrapper()).value
522
+ processed = successed + failed + duplicated
523
+ pending = max(0, input - processed)
524
+
525
+ add_successed = successed - last_stage_status_dict.get("tasks_successed", 0)
526
+ add_failed = failed - last_stage_status_dict.get("tasks_failed", 0)
527
+ add_duplicated = duplicated - last_stage_status_dict.get(
528
+ "tasks_duplicated", 0
529
+ )
530
+ add_processed = processed - last_stage_status_dict.get("tasks_processed", 0)
531
+ add_pending = pending - last_stage_status_dict.get("tasks_pending", 0)
532
+
533
+ start_time = stage_status_dict.get("start_time", 0)
534
+ # 更新时间消耗(仅在 pending 非 0 时刷新)
535
+ if start_time:
536
+ elapsed = stage_status_dict.get("elapsed_time", 0)
537
+ # 如果上一次是 pending,则累计时间
538
+ if last_stage_status_dict.get("tasks_pending", 0):
539
+ # 如果上一次活跃, 那么无论当前状况,累计一次更新时间
540
+ elapsed += interval
541
+ else:
542
+ elapsed = 0
543
+
544
+ stage_status_dict["elapsed_time"] = elapsed
545
+
546
+ # 估算剩余时间
547
+ remaining = (pending / processed * elapsed) if processed and pending else 0
548
+
549
+ # 计算平均时间(秒/任务)并格式化为字符串
550
+ if processed:
551
+ avg_time = elapsed / processed
552
+ if avg_time >= 1.0:
553
+ # 显示 "X.XX s/it"
554
+ avg_time_str = f"{avg_time:.2f}s/it"
555
+ else:
556
+ # 显示 "X.XX it/s"(取倒数)
557
+ its_per_sec = processed / elapsed if elapsed else 0
558
+ avg_time_str = f"{its_per_sec:.2f}it/s"
559
+ else:
560
+ avg_time_str = "N/A" # 或 "0.00s/it"
561
+
562
+ history: list = stage_status_dict.get("history", [])
563
+ history.append(
564
+ {
565
+ "timestamp": now,
566
+ "tasks_processed": processed,
567
+ }
568
+ )
569
+ history.pop(0) if len(history) > 20 else None
570
+ stage_status_dict["history"] = history
571
+
572
+ status_dict[tag] = {
573
+ **stage.get_stage_summary(),
574
+ "status": status,
575
+ "tasks_successed": successed,
576
+ "tasks_failed": failed,
577
+ "tasks_duplicated": duplicated,
578
+ "tasks_processed": processed,
579
+ "tasks_pending": pending,
580
+ "add_tasks_successed": add_successed,
581
+ "add_tasks_failed": add_failed,
582
+ "add_tasks_duplicated": add_duplicated,
583
+ "add_tasks_processed": add_processed,
584
+ "add_tasks_pending": add_pending,
585
+ "start_time": format_timestamp(start_time),
586
+ "elapsed_time": format_duration(elapsed),
587
+ "remaining_time": format_duration(remaining),
588
+ "task_avg_time": avg_time_str,
589
+ "history": history,
590
+ }
591
+
592
+ self.last_status_dict = status_dict
593
+
594
+ return status_dict
595
+
596
+ def get_graph_topology(self):
597
+ return {
598
+ "isDAG": self.isDAG,
599
+ "layout_mode": self.layout_mode,
600
+ "class_name": self.__class__.__name__,
601
+ "layers_dict": self.layers_dict,
602
+ }
603
+
604
+ def get_structure_json(self):
605
+ return self.structure_graph
606
+
607
+ def get_structure_list(self):
608
+ return format_structure_list_from_graph(self.structure_graph)
609
+
610
+ def get_networkx_graph(self):
611
+ return format_networkx_graph(self.structure_graph)
612
+
613
+ def analyze_graph(self):
614
+ networkx_graph = self.get_networkx_graph()
615
+ self.layers_dict = {}
616
+
617
+ self.isDAG = is_directed_acyclic_graph(networkx_graph)
618
+ if self.isDAG:
619
+ self.stage_level_dict = compute_node_levels(networkx_graph)
620
+ self.layers_dict = cluster_by_value_sorted(self.stage_level_dict)
621
+
622
+ def test_methods(
623
+ self,
624
+ init_tasks_dict: Dict[str, List],
625
+ stage_modes: list = None,
626
+ execution_modes: list = None,
627
+ ) -> Dict[str, Any]:
628
+ """
629
+ 测试 TaskGraph 在 'serial' 和 'process' 模式下的执行时间。
630
+
631
+ :param init_tasks_dict: 初始化任务字典
632
+ :param stage_modes: 阶段模式列表,默认为 ['serial', 'process']
633
+ :param execution_modes: 执行模式列表,默认为 ['serial', 'thread']
634
+ :return: 包含两种执行模式下的执行时间的字典
635
+ """
636
+ results = {}
637
+ test_table_list = []
638
+ fail_by_error_dict = {}
639
+ fail_by_stage_dict = {}
640
+
641
+ stage_modes = stage_modes or ["serial", "process"]
642
+ execution_modes = execution_modes or ["serial", "thread"]
643
+ for stage_mode in stage_modes:
644
+ time_list = []
645
+ for execution_mode in execution_modes:
646
+ start_time = time.time()
647
+ self.init_env()
648
+ self.set_graph_mode(stage_mode, execution_mode)
649
+ self.start_graph(init_tasks_dict)
650
+
651
+ time_list.append(time.time() - start_time)
652
+ fail_by_error_dict.update(self.get_fail_by_error_dict())
653
+ fail_by_stage_dict.update(self.get_fail_by_stage_dict())
654
+
655
+ test_table_list.append(time_list)
656
+
657
+ results["Time table"] = (
658
+ test_table_list,
659
+ stage_modes,
660
+ execution_modes,
661
+ r"stage\execution",
662
+ )
663
+ results["Fail error dict"] = fail_by_error_dict
664
+ results["Fail stage dict"] = fail_by_stage_dict
665
+ return results