celestialflow 3.1.4__tar.gz → 3.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {celestialflow-3.1.4 → celestialflow-3.1.5}/PKG-INFO +13 -20
- {celestialflow-3.1.4 → celestialflow-3.1.5}/README.md +12 -19
- {celestialflow-3.1.4 → celestialflow-3.1.5}/pyproject.toml +2 -3
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/core_graph.py +1 -4
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/core_structure.py +10 -3
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/observability/core_report.py +16 -12
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/core_log.py +2 -2
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_runner.py +6 -6
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_estimators.py +9 -5
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/core_executor.py +6 -6
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/core_stage.py +1 -1
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/core_stages.py +15 -1
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_clone.py +1 -1
- celestialflow-3.1.5/src/celestialflow/web/config.json +35 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/server.py +80 -15
- celestialflow-3.1.5/src/celestialflow/web/static/css/_colors.css +130 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/base.css +302 -305
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/dashboard.css +346 -368
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/errors.css +60 -37
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/inject.css +599 -638
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/main.js +2 -21
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_errors.js +23 -9
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_history.js +9 -3
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_injection.js +1 -1
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_statuses.js +58 -36
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_structure.js +9 -3
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_summary.js +14 -8
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_topology.js +9 -3
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/utils.js +20 -3
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/main.ts +3 -29
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_errors.ts +24 -10
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_history.ts +9 -4
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_injection.ts +379 -379
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_statuses.ts +174 -149
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_structure.ts +9 -4
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_summary.ts +14 -9
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_topology.ts +9 -4
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/utils.ts +186 -169
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/templates/index.html +16 -16
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/PKG-INFO +13 -20
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/SOURCES.txt +3 -1
- {celestialflow-3.1.4 → celestialflow-3.1.5}/tests/test_executor.py +5 -38
- {celestialflow-3.1.4 → celestialflow-3.1.5}/tests/test_stages.py +35 -115
- {celestialflow-3.1.4 → celestialflow-3.1.5}/tests/test_structure.py +39 -110
- celestialflow-3.1.5/tests/test_utils.py +237 -0
- celestialflow-3.1.4/src/celestialflow/web/static/css/_colors.css +0 -183
- {celestialflow-3.1.4 → celestialflow-3.1.5}/setup.cfg +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/util_analysis.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/util_serialize.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/observability/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/core_base.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/core_fail.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/util_constant.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/util_jsonl.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_envelope.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_metrics.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_progress.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_queue.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_errors.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_factories.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_hash.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_queue.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_types.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/__init__.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_benchmark.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_collections.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_debug.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_format.py +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/favicon.ico +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_config.js +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/web_config.js +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/globals.d.ts +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/web_config.ts +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/dependency_links.txt +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/entry_points.txt +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/requires.txt +0 -0
- {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: celestialflow
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.5
|
|
4
4
|
Summary: A flexible GRAPH-based task orchestration framework.
|
|
5
5
|
Author-email: Mr-xiaotian <mingxiaomingtian@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -254,35 +254,28 @@ flowchart TD
|
|
|
254
254
|
<p align="center">
|
|
255
255
|
<img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
|
|
256
256
|
<br/>
|
|
257
|
-
<em>celestial-flow 3.1.
|
|
257
|
+
<em>celestial-flow 3.1.5</em>
|
|
258
258
|
</p>
|
|
259
259
|
|
|
260
260
|
(该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
|
|
261
261
|
|
|
262
262
|
## 版本日志(Version Log)
|
|
263
|
-
- 3.1.
|
|
264
|
-
- feat
|
|
265
|
-
-
|
|
266
|
-
-
|
|
267
|
-
|
|
268
|
-
-
|
|
269
|
-
-
|
|
270
|
-
-
|
|
271
|
-
-
|
|
272
|
-
-
|
|
273
|
-
- 将run_*函数分离并移入TaskRunner类, 同时将pool管理也迁入;
|
|
274
|
-
- 将TaskQueue分离为更具体的TaskInQueue与TaskOutQueue, 同时TAskInQueue只接受一个MPQueue以避免原有的轮询逻辑, 减少CPU运算消耗;
|
|
275
|
-
- 前端代码换用ts;
|
|
276
|
-
- 重命名所有代码文件, 现在用core_与util_前缀来区分核心代码与辅助代码;
|
|
277
|
-
- 将history数据从status中移出, 使用单独的/api/*_history端口;
|
|
278
|
-
- fix:
|
|
279
|
-
- TaskRedisTransport节点在mermaid中没有展示为parallelogram;
|
|
263
|
+
- 3.1.5
|
|
264
|
+
- feat
|
|
265
|
+
- 大幅修改节点状态卡片的色彩视觉设计. 包括: 取消原本的悬浮设计, 改为平面化设计; 让进度条更直接的显示不同任务完成状态的比例; 使用左边框来显示节点是否在运行中. 而非原本的badge;
|
|
266
|
+
- 前端的error数据拉取不再每次都全量拉取, 而是只拉取自己当前没有的数据;
|
|
267
|
+
- refactor
|
|
268
|
+
- 大幅重构前端代码中的色彩管理, 现在色彩的一致性更好;
|
|
269
|
+
- 修改前端代码中的数据更新判断, 现在相关判断交给serve.py;
|
|
270
|
+
- fix
|
|
271
|
+
- 修复节点已运行时间在节点完成后显示为0的问题
|
|
272
|
+
- 修复在最新fastapi版本下TemplateResponse参数改变导致的问题
|
|
280
273
|
|
|
281
274
|
## Star 历史趋势(Star History)
|
|
282
275
|
|
|
283
276
|
如果对项目感兴趣的话,欢迎star。如果有问题或者建议的话, 欢迎提交[Issues](https://github.com/Mr-xiaotian/CelestialFlow/issues)或者在[Discussion](https://github.com/Mr-xiaotian/CelestialFlow/discussions)中告诉我。
|
|
284
277
|
|
|
285
|
-
|
|
278
|
+

|
|
286
279
|
|
|
287
280
|
## 许可(License)
|
|
288
281
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -229,35 +229,28 @@ flowchart TD
|
|
|
229
229
|
<p align="center">
|
|
230
230
|
<img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
|
|
231
231
|
<br/>
|
|
232
|
-
<em>celestial-flow 3.1.
|
|
232
|
+
<em>celestial-flow 3.1.5</em>
|
|
233
233
|
</p>
|
|
234
234
|
|
|
235
235
|
(该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
|
|
236
236
|
|
|
237
237
|
## 版本日志(Version Log)
|
|
238
|
-
- 3.1.
|
|
239
|
-
- feat
|
|
240
|
-
-
|
|
241
|
-
-
|
|
242
|
-
|
|
243
|
-
-
|
|
244
|
-
-
|
|
245
|
-
-
|
|
246
|
-
-
|
|
247
|
-
-
|
|
248
|
-
- 将run_*函数分离并移入TaskRunner类, 同时将pool管理也迁入;
|
|
249
|
-
- 将TaskQueue分离为更具体的TaskInQueue与TaskOutQueue, 同时TAskInQueue只接受一个MPQueue以避免原有的轮询逻辑, 减少CPU运算消耗;
|
|
250
|
-
- 前端代码换用ts;
|
|
251
|
-
- 重命名所有代码文件, 现在用core_与util_前缀来区分核心代码与辅助代码;
|
|
252
|
-
- 将history数据从status中移出, 使用单独的/api/*_history端口;
|
|
253
|
-
- fix:
|
|
254
|
-
- TaskRedisTransport节点在mermaid中没有展示为parallelogram;
|
|
238
|
+
- 3.1.5
|
|
239
|
+
- feat
|
|
240
|
+
- 大幅修改节点状态卡片的色彩视觉设计. 包括: 取消原本的悬浮设计, 改为平面化设计; 让进度条更直接的显示不同任务完成状态的比例; 使用左边框来显示节点是否在运行中. 而非原本的badge;
|
|
241
|
+
- 前端的error数据拉取不再每次都全量拉取, 而是只拉取自己当前没有的数据;
|
|
242
|
+
- refactor
|
|
243
|
+
- 大幅重构前端代码中的色彩管理, 现在色彩的一致性更好;
|
|
244
|
+
- 修改前端代码中的数据更新判断, 现在相关判断交给serve.py;
|
|
245
|
+
- fix
|
|
246
|
+
- 修复节点已运行时间在节点完成后显示为0的问题
|
|
247
|
+
- 修复在最新fastapi版本下TemplateResponse参数改变导致的问题
|
|
255
248
|
|
|
256
249
|
## Star 历史趋势(Star History)
|
|
257
250
|
|
|
258
251
|
如果对项目感兴趣的话,欢迎star。如果有问题或者建议的话, 欢迎提交[Issues](https://github.com/Mr-xiaotian/CelestialFlow/issues)或者在[Discussion](https://github.com/Mr-xiaotian/CelestialFlow/discussions)中告诉我。
|
|
259
252
|
|
|
260
|
-
|
|
253
|
+

|
|
261
254
|
|
|
262
255
|
## 许可(License)
|
|
263
256
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "celestialflow"
|
|
7
|
-
version = "3.1.
|
|
7
|
+
version = "3.1.5"
|
|
8
8
|
description = "A flexible GRAPH-based task orchestration framework."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -43,10 +43,9 @@ license-files = []
|
|
|
43
43
|
|
|
44
44
|
[tool.setuptools.package-data]
|
|
45
45
|
celestialflow = [
|
|
46
|
-
"web/templates/*.html",
|
|
47
46
|
"web/templates/**/*.html",
|
|
48
|
-
"web/static/*",
|
|
49
47
|
"web/static/**/*",
|
|
48
|
+
"web/config.json"
|
|
50
49
|
]
|
|
51
50
|
|
|
52
51
|
[tool.pytest.ini_options]
|
|
@@ -22,7 +22,6 @@ from ..runtime.util_errors import UnconsumedError
|
|
|
22
22
|
from ..runtime.util_types import (
|
|
23
23
|
StageStatus,
|
|
24
24
|
TerminationSignal,
|
|
25
|
-
NullPrevStage,
|
|
26
25
|
STAGE_STYLE,
|
|
27
26
|
NULL_PREV_STAGE,
|
|
28
27
|
)
|
|
@@ -585,9 +584,7 @@ class TaskGraph:
|
|
|
585
584
|
start_time = stage_runtime.get("start_time", 0)
|
|
586
585
|
last_elapsed = last_stage_status_dict.get("elapsed_time", 0)
|
|
587
586
|
last_pending = last_stage_status_dict.get("tasks_pending", 0)
|
|
588
|
-
elapsed = calc_elapsed(
|
|
589
|
-
status, start_time, last_elapsed, last_pending, interval
|
|
590
|
-
)
|
|
587
|
+
elapsed = calc_elapsed(status, last_elapsed, last_pending, interval)
|
|
591
588
|
|
|
592
589
|
# 估算剩余时间
|
|
593
590
|
remaining = calc_remaining(
|
|
@@ -17,6 +17,7 @@ class TaskChain(TaskGraph):
|
|
|
17
17
|
|
|
18
18
|
:param stages: TaskStage 列表, 每个 TaskStage 节点将连接到下一个节点
|
|
19
19
|
:param chain_mode: 控制任务链中各节点同时运行(process), 亦或者依次运行(serial)
|
|
20
|
+
:param log_level: 日志级别
|
|
20
21
|
"""
|
|
21
22
|
for num, stage in enumerate(stages):
|
|
22
23
|
stage_name = f"Stage {num + 1}"
|
|
@@ -24,7 +25,9 @@ class TaskChain(TaskGraph):
|
|
|
24
25
|
stage.set_graph_context(next_stages, chain_mode, stage_name)
|
|
25
26
|
|
|
26
27
|
root_stage = stages[0]
|
|
27
|
-
super().__init__(
|
|
28
|
+
super().__init__(
|
|
29
|
+
root_stages=[root_stage], schedule_mode="eager", log_level=log_level
|
|
30
|
+
)
|
|
28
31
|
|
|
29
32
|
def start_chain(
|
|
30
33
|
self, init_tasks_dict: dict, put_termination_signal: bool = True
|
|
@@ -53,8 +56,8 @@ class TaskCross(TaskGraph):
|
|
|
53
56
|
:param layers:
|
|
54
57
|
按层划分的任务节点列表。每个子列表代表一层,列表中的 TaskStage 将并行执行。
|
|
55
58
|
相邻层之间的所有节点将建立全连接依赖(即每个上一层节点都连接到下一层所有节点)。
|
|
56
|
-
:param schedule_mode:
|
|
57
|
-
|
|
59
|
+
:param schedule_mode: 控制任务图的调度布局模式
|
|
60
|
+
:param log_level: 日志级别
|
|
58
61
|
"""
|
|
59
62
|
for i in range(len(layers)):
|
|
60
63
|
curr_layer = layers[i]
|
|
@@ -98,6 +101,7 @@ class TaskGrid(TaskGraph):
|
|
|
98
101
|
任务网格,每个子列表代表一行,列表中的 TaskStage 将按行并行执行。
|
|
99
102
|
每个节点将连接到其右侧和下方的节点。
|
|
100
103
|
:param schedule_mode: 控制任务图的调度布局模式
|
|
104
|
+
:param log_level: 日志级别
|
|
101
105
|
"""
|
|
102
106
|
rows, cols = len(grid), len(grid[0])
|
|
103
107
|
for i in range(rows):
|
|
@@ -133,6 +137,7 @@ class TaskLoop(TaskGraph):
|
|
|
133
137
|
由于环的结构特性, 强制使用 'eager' 节点模式
|
|
134
138
|
|
|
135
139
|
:param stages: TaskStage 列表, 每个 TaskStage 节点将连接到下一个节点, 形成一个闭环
|
|
140
|
+
:param log_level: 日志级别
|
|
136
141
|
"""
|
|
137
142
|
for num, stage in enumerate(stages):
|
|
138
143
|
stage_name = f"Stage {num + 1}"
|
|
@@ -163,6 +168,7 @@ class TaskWheel(TaskGraph):
|
|
|
163
168
|
|
|
164
169
|
:param center: 中心节点
|
|
165
170
|
:param ring: 环节点
|
|
171
|
+
:param log_level: 日志级别
|
|
166
172
|
"""
|
|
167
173
|
# 中心连向环
|
|
168
174
|
center.set_graph_context(ring, "process", "Center")
|
|
@@ -190,6 +196,7 @@ class TaskComplete(TaskGraph):
|
|
|
190
196
|
TaskComplete: 完全图结构,每个节点都连向除自己以外的所有其他节点
|
|
191
197
|
|
|
192
198
|
:param stages: 所有 TaskStage 节点
|
|
199
|
+
:param log_level: 日志级别
|
|
193
200
|
"""
|
|
194
201
|
for i, stage in enumerate(stages):
|
|
195
202
|
next_stages = [s for j, s in enumerate(stages) if i != j]
|
|
@@ -44,6 +44,7 @@ class TaskReporter:
|
|
|
44
44
|
self._stop_flag = Event()
|
|
45
45
|
self._thread = None
|
|
46
46
|
self._push_errors_mode = "meta"
|
|
47
|
+
self._session = requests.Session()
|
|
47
48
|
|
|
48
49
|
self.interval = 5
|
|
49
50
|
self.history_limit = 20
|
|
@@ -62,6 +63,7 @@ class TaskReporter:
|
|
|
62
63
|
self._stop_flag.set()
|
|
63
64
|
self._thread.join(timeout=2)
|
|
64
65
|
self._thread = None
|
|
66
|
+
self._session.close()
|
|
65
67
|
self.log_sinker.stop_reporter()
|
|
66
68
|
|
|
67
69
|
def _pull_timeout(self) -> float:
|
|
@@ -88,6 +90,9 @@ class TaskReporter:
|
|
|
88
90
|
self._pull_history_limit()
|
|
89
91
|
self._pull_and_inject_tasks()
|
|
90
92
|
|
|
93
|
+
# 收集最新的任务图状态快照,确保推送的数据是最新的
|
|
94
|
+
self.task_graph.collect_runtime_snapshot()
|
|
95
|
+
|
|
91
96
|
# 推送逻辑
|
|
92
97
|
self._push_errors()
|
|
93
98
|
self._push_status()
|
|
@@ -99,7 +104,7 @@ class TaskReporter:
|
|
|
99
104
|
def _pull_interval(self) -> None:
|
|
100
105
|
"""从远程服务拉取上报间隔配置"""
|
|
101
106
|
try:
|
|
102
|
-
res =
|
|
107
|
+
res = self._session.get(
|
|
103
108
|
f"{self.base_url}/api/pull_interval", timeout=self._pull_timeout()
|
|
104
109
|
)
|
|
105
110
|
if res.ok:
|
|
@@ -111,7 +116,7 @@ class TaskReporter:
|
|
|
111
116
|
def _pull_history_limit(self) -> None:
|
|
112
117
|
"""从远程服务拉取历史记录限制配置"""
|
|
113
118
|
try:
|
|
114
|
-
res =
|
|
119
|
+
res = self._session.get(
|
|
115
120
|
f"{self.base_url}/api/pull_history_limit", timeout=self._pull_timeout()
|
|
116
121
|
)
|
|
117
122
|
if res.ok:
|
|
@@ -123,7 +128,7 @@ class TaskReporter:
|
|
|
123
128
|
def _pull_and_inject_tasks(self) -> None:
|
|
124
129
|
"""从远程服务拉取任务注入信息并注入任务"""
|
|
125
130
|
try:
|
|
126
|
-
res =
|
|
131
|
+
res = self._session.get(
|
|
127
132
|
f"{self.base_url}/api/pull_task_injection", timeout=self._pull_timeout()
|
|
128
133
|
)
|
|
129
134
|
if res.ok:
|
|
@@ -178,7 +183,7 @@ class TaskReporter:
|
|
|
178
183
|
"jsonl_path": jsonl_path,
|
|
179
184
|
"rev": rev,
|
|
180
185
|
}
|
|
181
|
-
response =
|
|
186
|
+
response = self._session.post(
|
|
182
187
|
f"{self.base_url}/api/push_errors_meta",
|
|
183
188
|
json=payload,
|
|
184
189
|
timeout=self._push_timeout(),
|
|
@@ -192,14 +197,14 @@ class TaskReporter:
|
|
|
192
197
|
|
|
193
198
|
error_store = load_jsonl_logs(
|
|
194
199
|
path=jsonl_path,
|
|
195
|
-
keys=["ts", "error_id", "error_repr", "stage", "task_repr"],
|
|
200
|
+
keys=["ts", "error_id", "error_repr", "error", "stage", "task_repr"],
|
|
196
201
|
)
|
|
197
202
|
payload = {
|
|
198
203
|
"errors": error_store,
|
|
199
204
|
"jsonl_path": jsonl_path,
|
|
200
205
|
"rev": rev,
|
|
201
206
|
}
|
|
202
|
-
response =
|
|
207
|
+
response = self._session.post(
|
|
203
208
|
f"{self.base_url}/api/push_errors_content",
|
|
204
209
|
json=payload,
|
|
205
210
|
timeout=self._push_timeout(),
|
|
@@ -209,10 +214,9 @@ class TaskReporter:
|
|
|
209
214
|
def _push_status(self) -> None:
|
|
210
215
|
"""推送状态信息"""
|
|
211
216
|
try:
|
|
212
|
-
self.task_graph.collect_runtime_snapshot()
|
|
213
217
|
status_data = self.task_graph.get_status_dict()
|
|
214
218
|
payload = {"status": status_data}
|
|
215
|
-
|
|
219
|
+
self._session.post(
|
|
216
220
|
f"{self.base_url}/api/push_status",
|
|
217
221
|
json=payload,
|
|
218
222
|
timeout=self._push_timeout(),
|
|
@@ -225,7 +229,7 @@ class TaskReporter:
|
|
|
225
229
|
try:
|
|
226
230
|
structure = self.task_graph.get_structure_json()
|
|
227
231
|
payload = {"items": structure}
|
|
228
|
-
|
|
232
|
+
self._session.post(
|
|
229
233
|
f"{self.base_url}/api/push_structure",
|
|
230
234
|
json=payload,
|
|
231
235
|
timeout=self._push_timeout(),
|
|
@@ -238,7 +242,7 @@ class TaskReporter:
|
|
|
238
242
|
try:
|
|
239
243
|
topology = self.task_graph.get_graph_topology()
|
|
240
244
|
payload = {"topology": topology}
|
|
241
|
-
|
|
245
|
+
self._session.post(
|
|
242
246
|
f"{self.base_url}/api/push_topology",
|
|
243
247
|
json=payload,
|
|
244
248
|
timeout=self._push_timeout(),
|
|
@@ -251,7 +255,7 @@ class TaskReporter:
|
|
|
251
255
|
try:
|
|
252
256
|
summary = self.task_graph.get_graph_summary()
|
|
253
257
|
payload = {"summary": summary}
|
|
254
|
-
|
|
258
|
+
self._session.post(
|
|
255
259
|
f"{self.base_url}/api/push_summary",
|
|
256
260
|
json=payload,
|
|
257
261
|
timeout=self._push_timeout(),
|
|
@@ -264,7 +268,7 @@ class TaskReporter:
|
|
|
264
268
|
try:
|
|
265
269
|
history = self.task_graph.get_stage_history()
|
|
266
270
|
payload = {"history": history}
|
|
267
|
-
|
|
271
|
+
self._session.post(
|
|
268
272
|
f"{self.base_url}/api/push_history",
|
|
269
273
|
json=payload,
|
|
270
274
|
timeout=self._push_timeout(),
|
|
@@ -87,9 +87,9 @@ class LogSinker(BaseSinker):
|
|
|
87
87
|
|
|
88
88
|
# ==== stage ====
|
|
89
89
|
def start_stage(
|
|
90
|
-
self, stage_tag: str, stage_mode: str, execution_mode: str,
|
|
90
|
+
self, stage_tag: str, stage_mode: str, execution_mode: str, max_workers: int
|
|
91
91
|
) -> None:
|
|
92
|
-
worker_repr = f"({
|
|
92
|
+
worker_repr = f"({max_workers} workers)" if execution_mode != "serial" else ""
|
|
93
93
|
text = f"'{stage_tag}' start in {stage_mode}; execute tasks by {execution_mode}{worker_repr}."
|
|
94
94
|
self._sink("INFO", text)
|
|
95
95
|
|
|
@@ -15,17 +15,17 @@ if TYPE_CHECKING:
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class TaskRunner:
|
|
18
|
-
def __init__(self, task_executor: TaskExecutor, func,
|
|
18
|
+
def __init__(self, task_executor: TaskExecutor, func, max_workers: int):
|
|
19
19
|
"""
|
|
20
20
|
初始化任务运行器
|
|
21
21
|
|
|
22
22
|
:param task_executor: 任务执行器
|
|
23
23
|
:param func: 任务函数
|
|
24
|
-
:param
|
|
24
|
+
:param max_workers: 工作线程或进程数量限制
|
|
25
25
|
"""
|
|
26
26
|
self.task_executor = task_executor
|
|
27
27
|
self.func = func
|
|
28
|
-
self.
|
|
28
|
+
self.max_workers = max_workers
|
|
29
29
|
|
|
30
30
|
self._pool: ThreadPoolExecutor | ProcessPoolExecutor | None = None
|
|
31
31
|
|
|
@@ -37,9 +37,9 @@ class TaskRunner:
|
|
|
37
37
|
"""
|
|
38
38
|
# 可以复用的线程池或进程池
|
|
39
39
|
if execution_mode == "thread" and self._pool is None:
|
|
40
|
-
self._pool = ThreadPoolExecutor(max_workers=self.
|
|
40
|
+
self._pool = ThreadPoolExecutor(max_workers=self.max_workers)
|
|
41
41
|
elif execution_mode == "process" and self._pool is None:
|
|
42
|
-
self._pool = ProcessPoolExecutor(max_workers=self.
|
|
42
|
+
self._pool = ProcessPoolExecutor(max_workers=self.max_workers)
|
|
43
43
|
|
|
44
44
|
def process_termination_signal(
|
|
45
45
|
self, termination_pool: TerminationIdPool
|
|
@@ -206,7 +206,7 @@ class TaskRunner:
|
|
|
206
206
|
"""
|
|
207
207
|
while True:
|
|
208
208
|
semaphore = asyncio.Semaphore(
|
|
209
|
-
self.task_executor.
|
|
209
|
+
self.task_executor.max_workers
|
|
210
210
|
) # 限制并发数量
|
|
211
211
|
|
|
212
212
|
async def sem_task(envelope: TaskEnvelope):
|
|
@@ -6,6 +6,10 @@ from .util_types import StageStatus
|
|
|
6
6
|
|
|
7
7
|
# ==== calculate ====
|
|
8
8
|
def calc_remaining(processed: int, pending: int, elapsed: float) -> float:
|
|
9
|
+
"""
|
|
10
|
+
基于已处理任务,剩余任务以及已消耗时间来计算剩余时间.
|
|
11
|
+
不要瞧不起均值,在大规模数据下它可能是最有效的.
|
|
12
|
+
"""
|
|
9
13
|
if processed and pending:
|
|
10
14
|
return pending / processed * elapsed
|
|
11
15
|
return 0
|
|
@@ -13,15 +17,15 @@ def calc_remaining(processed: int, pending: int, elapsed: float) -> float:
|
|
|
13
17
|
|
|
14
18
|
def calc_elapsed(
|
|
15
19
|
status: StageStatus,
|
|
16
|
-
start_time: float,
|
|
17
20
|
last_elapsed: float,
|
|
18
21
|
last_pending: int,
|
|
19
22
|
interval: float,
|
|
20
23
|
) -> float:
|
|
21
|
-
"""
|
|
22
|
-
|
|
24
|
+
"""
|
|
25
|
+
更新时间消耗
|
|
26
|
+
"""
|
|
27
|
+
if status in (StageStatus.RUNNING, StageStatus.STOPPED):
|
|
23
28
|
elapsed = last_elapsed
|
|
24
|
-
# 如果上一次是 pending,则累计时间
|
|
25
29
|
if last_pending:
|
|
26
30
|
# 如果上一次活跃, 那么无论当前状况,累计一次更新时间
|
|
27
31
|
elapsed += interval
|
|
@@ -121,7 +125,7 @@ def calc_global_remain_equal_pred(
|
|
|
121
125
|
total_v += obs_each * scale.get(u, 1.0)
|
|
122
126
|
|
|
123
127
|
scale[v] = total_v / max(1.0, proc_v) # 下游放大系数
|
|
124
|
-
expect_pend_v = max(0.0, total_v - proc_v)
|
|
128
|
+
expect_pend_v = max(0.0, total_v - proc_v) # 理论上expect_pend_v >= pend_v
|
|
125
129
|
|
|
126
130
|
# 时间估算:需要 avg time(秒/任务)
|
|
127
131
|
expected_pending_map[v] = calc_remaining(proc_v, expect_pend_v, elapsed_v)
|
|
@@ -42,7 +42,7 @@ class TaskExecutor:
|
|
|
42
42
|
self,
|
|
43
43
|
func,
|
|
44
44
|
execution_mode="serial",
|
|
45
|
-
|
|
45
|
+
max_workers=20,
|
|
46
46
|
max_retries=1,
|
|
47
47
|
max_info=50,
|
|
48
48
|
unpack_task_args=False,
|
|
@@ -58,7 +58,7 @@ class TaskExecutor:
|
|
|
58
58
|
|
|
59
59
|
:param func: 可调用对象
|
|
60
60
|
:param execution_mode: 执行模式,可选 'serial', 'thread', 'process', 'async' (组合为 'TaskGraph' 时不可用 'process' 和 'async' 模式)
|
|
61
|
-
:param
|
|
61
|
+
:param max_workers: 同时处理数量
|
|
62
62
|
:param max_retries: 任务的最大重试次数
|
|
63
63
|
:param max_info: 日志中每条信息的最大长度
|
|
64
64
|
:param unpack_task_args: 是否将任务参数解包
|
|
@@ -80,7 +80,7 @@ class TaskExecutor:
|
|
|
80
80
|
|
|
81
81
|
self.set_func(func)
|
|
82
82
|
self.set_execution_mode(execution_mode)
|
|
83
|
-
self.
|
|
83
|
+
self.max_workers = max_workers
|
|
84
84
|
self.max_retries = max_retries
|
|
85
85
|
self.max_info = max_info
|
|
86
86
|
|
|
@@ -177,7 +177,7 @@ class TaskExecutor:
|
|
|
177
177
|
"""
|
|
178
178
|
初始化任务运行器
|
|
179
179
|
"""
|
|
180
|
-
self.runner = TaskRunner(self, self.func, self.
|
|
180
|
+
self.runner = TaskRunner(self, self.func, self.max_workers)
|
|
181
181
|
|
|
182
182
|
def init_listener(self) -> None:
|
|
183
183
|
"""
|
|
@@ -198,7 +198,7 @@ class TaskExecutor:
|
|
|
198
198
|
return
|
|
199
199
|
|
|
200
200
|
extra_desc = (
|
|
201
|
-
f"{self.execution_mode}-{self.
|
|
201
|
+
f"{self.execution_mode}-{self.max_workers}"
|
|
202
202
|
if self.execution_mode != "serial"
|
|
203
203
|
else "serial"
|
|
204
204
|
)
|
|
@@ -307,7 +307,7 @@ class TaskExecutor:
|
|
|
307
307
|
return (
|
|
308
308
|
self.execution_mode
|
|
309
309
|
if self.execution_mode == "serial"
|
|
310
|
-
else f"{self.execution_mode}-{self.
|
|
310
|
+
else f"{self.execution_mode}-{self.max_workers}"
|
|
311
311
|
)
|
|
312
312
|
|
|
313
313
|
def get_summary(self) -> dict:
|
|
@@ -218,7 +218,7 @@ class TaskStage(TaskExecutor):
|
|
|
218
218
|
self.init_progress()
|
|
219
219
|
self.init_env(input_queues, output_queues, fail_queue, log_queue)
|
|
220
220
|
self.log_sinker.start_stage(
|
|
221
|
-
self.get_tag(), self.stage_mode, self.execution_mode, self.
|
|
221
|
+
self.get_tag(), self.stage_mode, self.execution_mode, self.max_workers
|
|
222
222
|
)
|
|
223
223
|
self.mark_running()
|
|
224
224
|
|
|
@@ -25,12 +25,15 @@ class TaskSplitter(TaskStage):
|
|
|
25
25
|
self.init_extra_counter()
|
|
26
26
|
|
|
27
27
|
def init_extra_counter(self) -> None:
|
|
28
|
+
"""初始化额外的计数器"""
|
|
28
29
|
self.split_counter = MPValue("i", 0)
|
|
29
30
|
|
|
30
31
|
def reset_extra_counter(self) -> None:
|
|
32
|
+
"""重置额外的计数器"""
|
|
31
33
|
self.split_counter.value = 0
|
|
32
34
|
|
|
33
35
|
def update_split_counter(self, add_value: int) -> None:
|
|
36
|
+
"""更新 split 计数器"""
|
|
34
37
|
self.split_counter.value += add_value
|
|
35
38
|
|
|
36
39
|
def _split(self, *task: Any) -> tuple:
|
|
@@ -40,6 +43,13 @@ class TaskSplitter(TaskStage):
|
|
|
40
43
|
return task
|
|
41
44
|
|
|
42
45
|
def put_split_result(self, result: tuple, task_id: int) -> int:
|
|
46
|
+
"""
|
|
47
|
+
将 split 结果放入队列,并发出对应事件
|
|
48
|
+
|
|
49
|
+
:param result: split 的结果,必须是一个可迭代对象
|
|
50
|
+
:param task_id: 原始任务 ID,用于事件关联
|
|
51
|
+
:return: split 的子任务数量
|
|
52
|
+
"""
|
|
43
53
|
split_count = len(result)
|
|
44
54
|
for idx, item in enumerate(result):
|
|
45
55
|
split_id = self.ctree_client.emit(
|
|
@@ -94,6 +104,7 @@ class TaskSplitter(TaskStage):
|
|
|
94
104
|
|
|
95
105
|
class TaskRouter(TaskStage):
|
|
96
106
|
def __init__(self):
|
|
107
|
+
"""初始化 TaskRouter"""
|
|
97
108
|
super().__init__(
|
|
98
109
|
func=self._route,
|
|
99
110
|
execution_mode="serial",
|
|
@@ -103,14 +114,17 @@ class TaskRouter(TaskStage):
|
|
|
103
114
|
self.init_extra_counter()
|
|
104
115
|
|
|
105
116
|
def init_extra_counter(self) -> None:
|
|
117
|
+
"""初始化额外的计数器"""
|
|
106
118
|
# 每个 target_tag 一个计数器:用于让不同下游 stage 的 task_counter 统计正确
|
|
107
119
|
self.route_counters: dict = {}
|
|
108
120
|
|
|
109
121
|
def reset_extra_counter(self) -> None:
|
|
122
|
+
"""重置额外的计数器"""
|
|
110
123
|
for counter in self.route_counters.values():
|
|
111
124
|
counter.value = 0
|
|
112
125
|
|
|
113
126
|
def update_route_counter(self, target: str) -> None:
|
|
127
|
+
"""更新 route 计数器"""
|
|
114
128
|
self.route_counters[target].value += 1
|
|
115
129
|
|
|
116
130
|
def _route(self, routed: tuple) -> tuple:
|
|
@@ -189,7 +203,7 @@ class TaskRedisTransport(TaskStage):
|
|
|
189
203
|
super().__init__(
|
|
190
204
|
func=self._transport,
|
|
191
205
|
execution_mode="thread",
|
|
192
|
-
|
|
206
|
+
max_workers=4, # 允许 1~2 个线程偶发阻塞,但不会导致整体阻塞
|
|
193
207
|
unpack_task_args=unpack_task_args,
|
|
194
208
|
)
|
|
195
209
|
self.key = key
|
|
@@ -17,7 +17,7 @@ def _get_clone_init_kwargs(executor: TaskExecutor) -> dict:
|
|
|
17
17
|
return {
|
|
18
18
|
"func": executor.func,
|
|
19
19
|
"execution_mode": executor.execution_mode,
|
|
20
|
-
"
|
|
20
|
+
"max_workers": executor.max_workers,
|
|
21
21
|
"max_retries": executor.max_retries,
|
|
22
22
|
"max_info": executor.max_info,
|
|
23
23
|
"unpack_task_args": executor.unpack_task_args,
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"theme": "dark",
|
|
3
|
+
"refreshInterval": 2000,
|
|
4
|
+
"historyLimit": 20,
|
|
5
|
+
"dashboard": {
|
|
6
|
+
"left": [
|
|
7
|
+
"mermaid",
|
|
8
|
+
"topology"
|
|
9
|
+
],
|
|
10
|
+
"middle": [
|
|
11
|
+
"status"
|
|
12
|
+
],
|
|
13
|
+
"right": [
|
|
14
|
+
"progress",
|
|
15
|
+
"summary"
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
"cards": {
|
|
19
|
+
"mermaid": {
|
|
20
|
+
"title": "任务结构图"
|
|
21
|
+
},
|
|
22
|
+
"topology": {
|
|
23
|
+
"title": "图拓扑信息"
|
|
24
|
+
},
|
|
25
|
+
"status": {
|
|
26
|
+
"title": "节点运行状态"
|
|
27
|
+
},
|
|
28
|
+
"progress": {
|
|
29
|
+
"title": "节点完成走向"
|
|
30
|
+
},
|
|
31
|
+
"summary": {
|
|
32
|
+
"title": "总体状态摘要"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|