celestialflow 3.1.4__tar.gz → 3.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {celestialflow-3.1.4 → celestialflow-3.1.5}/PKG-INFO +13 -20
  2. {celestialflow-3.1.4 → celestialflow-3.1.5}/README.md +12 -19
  3. {celestialflow-3.1.4 → celestialflow-3.1.5}/pyproject.toml +2 -3
  4. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/core_graph.py +1 -4
  5. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/core_structure.py +10 -3
  6. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/observability/core_report.py +16 -12
  7. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/core_log.py +2 -2
  8. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_runner.py +6 -6
  9. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_estimators.py +9 -5
  10. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/core_executor.py +6 -6
  11. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/core_stage.py +1 -1
  12. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/core_stages.py +15 -1
  13. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_clone.py +1 -1
  14. celestialflow-3.1.5/src/celestialflow/web/config.json +35 -0
  15. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/server.py +80 -15
  16. celestialflow-3.1.5/src/celestialflow/web/static/css/_colors.css +130 -0
  17. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/base.css +302 -305
  18. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/dashboard.css +346 -368
  19. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/errors.css +60 -37
  20. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/css/inject.css +599 -638
  21. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/main.js +2 -21
  22. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_errors.js +23 -9
  23. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_history.js +9 -3
  24. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_injection.js +1 -1
  25. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_statuses.js +58 -36
  26. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_structure.js +9 -3
  27. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_summary.js +14 -8
  28. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_topology.js +9 -3
  29. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/utils.js +20 -3
  30. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/main.ts +3 -29
  31. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_errors.ts +24 -10
  32. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_history.ts +9 -4
  33. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_injection.ts +379 -379
  34. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_statuses.ts +174 -149
  35. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_structure.ts +9 -4
  36. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_summary.ts +14 -9
  37. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/task_topology.ts +9 -4
  38. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/utils.ts +186 -169
  39. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/templates/index.html +16 -16
  40. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/PKG-INFO +13 -20
  41. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/SOURCES.txt +3 -1
  42. {celestialflow-3.1.4 → celestialflow-3.1.5}/tests/test_executor.py +5 -38
  43. {celestialflow-3.1.4 → celestialflow-3.1.5}/tests/test_stages.py +35 -115
  44. {celestialflow-3.1.4 → celestialflow-3.1.5}/tests/test_structure.py +39 -110
  45. celestialflow-3.1.5/tests/test_utils.py +237 -0
  46. celestialflow-3.1.4/src/celestialflow/web/static/css/_colors.css +0 -183
  47. {celestialflow-3.1.4 → celestialflow-3.1.5}/setup.cfg +0 -0
  48. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/__init__.py +0 -0
  49. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/__init__.py +0 -0
  50. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/util_analysis.py +0 -0
  51. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/graph/util_serialize.py +0 -0
  52. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/observability/__init__.py +0 -0
  53. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/__init__.py +0 -0
  54. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/core_base.py +0 -0
  55. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/core_fail.py +0 -0
  56. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/util_constant.py +0 -0
  57. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/persistence/util_jsonl.py +0 -0
  58. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/__init__.py +0 -0
  59. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_envelope.py +0 -0
  60. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_metrics.py +0 -0
  61. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_progress.py +0 -0
  62. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/core_queue.py +0 -0
  63. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_errors.py +0 -0
  64. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_factories.py +0 -0
  65. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_hash.py +0 -0
  66. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_queue.py +0 -0
  67. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/runtime/util_types.py +0 -0
  68. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/stage/__init__.py +0 -0
  69. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/__init__.py +0 -0
  70. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_benchmark.py +0 -0
  71. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_collections.py +0 -0
  72. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_debug.py +0 -0
  73. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/utils/util_format.py +0 -0
  74. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/favicon.ico +0 -0
  75. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/task_config.js +0 -0
  76. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/js/web_config.js +0 -0
  77. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/globals.d.ts +0 -0
  78. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow/web/static/ts/web_config.ts +0 -0
  79. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/dependency_links.txt +0 -0
  80. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/entry_points.txt +0 -0
  81. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/requires.txt +0 -0
  82. {celestialflow-3.1.4 → celestialflow-3.1.5}/src/celestialflow.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: celestialflow
3
- Version: 3.1.4
3
+ Version: 3.1.5
4
4
  Summary: A flexible GRAPH-based task orchestration framework.
5
5
  Author-email: Mr-xiaotian <mingxiaomingtian@gmail.com>
6
6
  License: MIT
@@ -254,35 +254,28 @@ flowchart TD
254
254
  <p align="center">
255
255
  <img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
256
256
  <br/>
257
- <em>celestial-flow 3.1.4</em>
257
+ <em>celestial-flow 3.1.5</em>
258
258
  </p>
259
259
 
260
260
  (该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
261
261
 
262
262
  ## 版本日志(Version Log)
263
- - 3.1.4:
264
- - feat:
265
- - 添加前端设置文件config.json, 包含主题(白天与黑夜), 刷新时间, 历史长度, 卡片种类, 仪表盘布局;
266
- - 完善对termination_signal在ctree上的事件管理;
267
- - 新添termination_*系日志, 同时优化部分原有日志;
268
- - 在前端的错误数字上(包括单个stage的卡片与summary卡片)绑定跳转事件, 可以跳转到ErrorLog页面, 并显示对应的错误;
269
- - 修复部分原有的文档错误, 并添加新的前端代码文档;
270
- - refactor:
271
- - fail_sinker.task_error中不必再传时间, 方法会自己补充;
272
- - 将所有counter放入TaskMetrics管理, 断绝对TAskExecutor的调用依赖;
273
- - 将run_*函数分离并移入TaskRunner类, 同时将pool管理也迁入;
274
- - 将TaskQueue分离为更具体的TaskInQueue与TaskOutQueue, 同时TAskInQueue只接受一个MPQueue以避免原有的轮询逻辑, 减少CPU运算消耗;
275
- - 前端代码换用ts;
276
- - 重命名所有代码文件, 现在用core_与util_前缀来区分核心代码与辅助代码;
277
- - 将history数据从status中移出, 使用单独的/api/*_history端口;
278
- - fix:
279
- - TaskRedisTransport节点在mermaid中没有展示为parallelogram;
263
+ - 3.1.5
264
+ - feat
265
+ - 大幅修改节点状态卡片的色彩视觉设计. 包括: 取消原本的悬浮设计, 改为平面化设计; 让进度条更直接的显示不同任务完成状态的比例; 使用左边框来显示节点是否在运行中. 而非原本的badge;
266
+ - 前端的error数据拉取不再每次都全量拉取, 而是只拉取自己当前没有的数据;
267
+ - refactor
268
+ - 大幅重构前端代码中的色彩管理, 现在色彩的一致性更好;
269
+ - 修改前端代码中的数据更新判断, 现在相关判断交给serve.py;
270
+ - fix
271
+ - 修复节点已运行时间在节点完成后显示为0的问题
272
+ - 修复在最新fastapi版本下TemplateResponse参数改变导致的问题
280
273
 
281
274
  ## Star 历史趋势(Star History)
282
275
 
283
276
  如果对项目感兴趣的话,欢迎star。如果有问题或者建议的话, 欢迎提交[Issues](https://github.com/Mr-xiaotian/CelestialFlow/issues)或者在[Discussion](https://github.com/Mr-xiaotian/CelestialFlow/discussions)中告诉我。
284
277
 
285
- [![Star History Chart](https://api.star-history.com/svg?repos=Mr-xiaotian/CelestialFlow&type=Date)](https://star-history.com/#Mr-xiaotian/CelestialFlow&Date)
278
+ ![Star History Chart](https://api.star-history.com/svg?repos=Mr-xiaotian/CelestialFlow&type=Date)
286
279
 
287
280
  ## 许可(License)
288
281
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -229,35 +229,28 @@ flowchart TD
229
229
  <p align="center">
230
230
  <img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
231
231
  <br/>
232
- <em>celestial-flow 3.1.4</em>
232
+ <em>celestial-flow 3.1.5</em>
233
233
  </p>
234
234
 
235
235
  (该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
236
236
 
237
237
  ## 版本日志(Version Log)
238
- - 3.1.4:
239
- - feat:
240
- - 添加前端设置文件config.json, 包含主题(白天与黑夜), 刷新时间, 历史长度, 卡片种类, 仪表盘布局;
241
- - 完善对termination_signal在ctree上的事件管理;
242
- - 新添termination_*系日志, 同时优化部分原有日志;
243
- - 在前端的错误数字上(包括单个stage的卡片与summary卡片)绑定跳转事件, 可以跳转到ErrorLog页面, 并显示对应的错误;
244
- - 修复部分原有的文档错误, 并添加新的前端代码文档;
245
- - refactor:
246
- - fail_sinker.task_error中不必再传时间, 方法会自己补充;
247
- - 将所有counter放入TaskMetrics管理, 断绝对TAskExecutor的调用依赖;
248
- - 将run_*函数分离并移入TaskRunner类, 同时将pool管理也迁入;
249
- - 将TaskQueue分离为更具体的TaskInQueue与TaskOutQueue, 同时TAskInQueue只接受一个MPQueue以避免原有的轮询逻辑, 减少CPU运算消耗;
250
- - 前端代码换用ts;
251
- - 重命名所有代码文件, 现在用core_与util_前缀来区分核心代码与辅助代码;
252
- - 将history数据从status中移出, 使用单独的/api/*_history端口;
253
- - fix:
254
- - TaskRedisTransport节点在mermaid中没有展示为parallelogram;
238
+ - 3.1.5
239
+ - feat
240
+ - 大幅修改节点状态卡片的色彩视觉设计. 包括: 取消原本的悬浮设计, 改为平面化设计; 让进度条更直接的显示不同任务完成状态的比例; 使用左边框来显示节点是否在运行中. 而非原本的badge;
241
+ - 前端的error数据拉取不再每次都全量拉取, 而是只拉取自己当前没有的数据;
242
+ - refactor
243
+ - 大幅重构前端代码中的色彩管理, 现在色彩的一致性更好;
244
+ - 修改前端代码中的数据更新判断, 现在相关判断交给serve.py;
245
+ - fix
246
+ - 修复节点已运行时间在节点完成后显示为0的问题
247
+ - 修复在最新fastapi版本下TemplateResponse参数改变导致的问题
255
248
 
256
249
  ## Star 历史趋势(Star History)
257
250
 
258
251
  如果对项目感兴趣的话,欢迎star。如果有问题或者建议的话, 欢迎提交[Issues](https://github.com/Mr-xiaotian/CelestialFlow/issues)或者在[Discussion](https://github.com/Mr-xiaotian/CelestialFlow/discussions)中告诉我。
259
252
 
260
- [![Star History Chart](https://api.star-history.com/svg?repos=Mr-xiaotian/CelestialFlow&type=Date)](https://star-history.com/#Mr-xiaotian/CelestialFlow&Date)
253
+ ![Star History Chart](https://api.star-history.com/svg?repos=Mr-xiaotian/CelestialFlow&type=Date)
261
254
 
262
255
  ## 许可(License)
263
256
  This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "celestialflow"
7
- version = "3.1.4"
7
+ version = "3.1.5"
8
8
  description = "A flexible GRAPH-based task orchestration framework."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -43,10 +43,9 @@ license-files = []
43
43
 
44
44
  [tool.setuptools.package-data]
45
45
  celestialflow = [
46
- "web/templates/*.html",
47
46
  "web/templates/**/*.html",
48
- "web/static/*",
49
47
  "web/static/**/*",
48
+ "web/config.json"
50
49
  ]
51
50
 
52
51
  [tool.pytest.ini_options]
@@ -22,7 +22,6 @@ from ..runtime.util_errors import UnconsumedError
22
22
  from ..runtime.util_types import (
23
23
  StageStatus,
24
24
  TerminationSignal,
25
- NullPrevStage,
26
25
  STAGE_STYLE,
27
26
  NULL_PREV_STAGE,
28
27
  )
@@ -585,9 +584,7 @@ class TaskGraph:
585
584
  start_time = stage_runtime.get("start_time", 0)
586
585
  last_elapsed = last_stage_status_dict.get("elapsed_time", 0)
587
586
  last_pending = last_stage_status_dict.get("tasks_pending", 0)
588
- elapsed = calc_elapsed(
589
- status, start_time, last_elapsed, last_pending, interval
590
- )
587
+ elapsed = calc_elapsed(status, last_elapsed, last_pending, interval)
591
588
 
592
589
  # 估算剩余时间
593
590
  remaining = calc_remaining(
@@ -17,6 +17,7 @@ class TaskChain(TaskGraph):
17
17
 
18
18
  :param stages: TaskStage 列表, 每个 TaskStage 节点将连接到下一个节点
19
19
  :param chain_mode: 控制任务链中各节点同时运行(process), 亦或者依次运行(serial)
20
+ :param log_level: 日志级别
20
21
  """
21
22
  for num, stage in enumerate(stages):
22
23
  stage_name = f"Stage {num + 1}"
@@ -24,7 +25,9 @@ class TaskChain(TaskGraph):
24
25
  stage.set_graph_context(next_stages, chain_mode, stage_name)
25
26
 
26
27
  root_stage = stages[0]
27
- super().__init__(root_stages=[root_stage], log_level=log_level)
28
+ super().__init__(
29
+ root_stages=[root_stage], schedule_mode="eager", log_level=log_level
30
+ )
28
31
 
29
32
  def start_chain(
30
33
  self, init_tasks_dict: dict, put_termination_signal: bool = True
@@ -53,8 +56,8 @@ class TaskCross(TaskGraph):
53
56
  :param layers:
54
57
  按层划分的任务节点列表。每个子列表代表一层,列表中的 TaskStage 将并行执行。
55
58
  相邻层之间的所有节点将建立全连接依赖(即每个上一层节点都连接到下一层所有节点)。
56
- :param schedule_mode:
57
- 控制任务图的调度布局模式
59
+ :param schedule_mode: 控制任务图的调度布局模式
60
+ :param log_level: 日志级别
58
61
  """
59
62
  for i in range(len(layers)):
60
63
  curr_layer = layers[i]
@@ -98,6 +101,7 @@ class TaskGrid(TaskGraph):
98
101
  任务网格,每个子列表代表一行,列表中的 TaskStage 将按行并行执行。
99
102
  每个节点将连接到其右侧和下方的节点。
100
103
  :param schedule_mode: 控制任务图的调度布局模式
104
+ :param log_level: 日志级别
101
105
  """
102
106
  rows, cols = len(grid), len(grid[0])
103
107
  for i in range(rows):
@@ -133,6 +137,7 @@ class TaskLoop(TaskGraph):
133
137
  由于环的结构特性, 强制使用 'eager' 节点模式
134
138
 
135
139
  :param stages: TaskStage 列表, 每个 TaskStage 节点将连接到下一个节点, 形成一个闭环
140
+ :param log_level: 日志级别
136
141
  """
137
142
  for num, stage in enumerate(stages):
138
143
  stage_name = f"Stage {num + 1}"
@@ -163,6 +168,7 @@ class TaskWheel(TaskGraph):
163
168
 
164
169
  :param center: 中心节点
165
170
  :param ring: 环节点
171
+ :param log_level: 日志级别
166
172
  """
167
173
  # 中心连向环
168
174
  center.set_graph_context(ring, "process", "Center")
@@ -190,6 +196,7 @@ class TaskComplete(TaskGraph):
190
196
  TaskComplete: 完全图结构,每个节点都连向除自己以外的所有其他节点
191
197
 
192
198
  :param stages: 所有 TaskStage 节点
199
+ :param log_level: 日志级别
193
200
  """
194
201
  for i, stage in enumerate(stages):
195
202
  next_stages = [s for j, s in enumerate(stages) if i != j]
@@ -44,6 +44,7 @@ class TaskReporter:
44
44
  self._stop_flag = Event()
45
45
  self._thread = None
46
46
  self._push_errors_mode = "meta"
47
+ self._session = requests.Session()
47
48
 
48
49
  self.interval = 5
49
50
  self.history_limit = 20
@@ -62,6 +63,7 @@ class TaskReporter:
62
63
  self._stop_flag.set()
63
64
  self._thread.join(timeout=2)
64
65
  self._thread = None
66
+ self._session.close()
65
67
  self.log_sinker.stop_reporter()
66
68
 
67
69
  def _pull_timeout(self) -> float:
@@ -88,6 +90,9 @@ class TaskReporter:
88
90
  self._pull_history_limit()
89
91
  self._pull_and_inject_tasks()
90
92
 
93
+ # 收集最新的任务图状态快照,确保推送的数据是最新的
94
+ self.task_graph.collect_runtime_snapshot()
95
+
91
96
  # 推送逻辑
92
97
  self._push_errors()
93
98
  self._push_status()
@@ -99,7 +104,7 @@ class TaskReporter:
99
104
  def _pull_interval(self) -> None:
100
105
  """从远程服务拉取上报间隔配置"""
101
106
  try:
102
- res = requests.get(
107
+ res = self._session.get(
103
108
  f"{self.base_url}/api/pull_interval", timeout=self._pull_timeout()
104
109
  )
105
110
  if res.ok:
@@ -111,7 +116,7 @@ class TaskReporter:
111
116
  def _pull_history_limit(self) -> None:
112
117
  """从远程服务拉取历史记录限制配置"""
113
118
  try:
114
- res = requests.get(
119
+ res = self._session.get(
115
120
  f"{self.base_url}/api/pull_history_limit", timeout=self._pull_timeout()
116
121
  )
117
122
  if res.ok:
@@ -123,7 +128,7 @@ class TaskReporter:
123
128
  def _pull_and_inject_tasks(self) -> None:
124
129
  """从远程服务拉取任务注入信息并注入任务"""
125
130
  try:
126
- res = requests.get(
131
+ res = self._session.get(
127
132
  f"{self.base_url}/api/pull_task_injection", timeout=self._pull_timeout()
128
133
  )
129
134
  if res.ok:
@@ -178,7 +183,7 @@ class TaskReporter:
178
183
  "jsonl_path": jsonl_path,
179
184
  "rev": rev,
180
185
  }
181
- response = requests.post(
186
+ response = self._session.post(
182
187
  f"{self.base_url}/api/push_errors_meta",
183
188
  json=payload,
184
189
  timeout=self._push_timeout(),
@@ -192,14 +197,14 @@ class TaskReporter:
192
197
 
193
198
  error_store = load_jsonl_logs(
194
199
  path=jsonl_path,
195
- keys=["ts", "error_id", "error_repr", "stage", "task_repr"],
200
+ keys=["ts", "error_id", "error_repr", "error", "stage", "task_repr"],
196
201
  )
197
202
  payload = {
198
203
  "errors": error_store,
199
204
  "jsonl_path": jsonl_path,
200
205
  "rev": rev,
201
206
  }
202
- response = requests.post(
207
+ response = self._session.post(
203
208
  f"{self.base_url}/api/push_errors_content",
204
209
  json=payload,
205
210
  timeout=self._push_timeout(),
@@ -209,10 +214,9 @@ class TaskReporter:
209
214
  def _push_status(self) -> None:
210
215
  """推送状态信息"""
211
216
  try:
212
- self.task_graph.collect_runtime_snapshot()
213
217
  status_data = self.task_graph.get_status_dict()
214
218
  payload = {"status": status_data}
215
- requests.post(
219
+ self._session.post(
216
220
  f"{self.base_url}/api/push_status",
217
221
  json=payload,
218
222
  timeout=self._push_timeout(),
@@ -225,7 +229,7 @@ class TaskReporter:
225
229
  try:
226
230
  structure = self.task_graph.get_structure_json()
227
231
  payload = {"items": structure}
228
- requests.post(
232
+ self._session.post(
229
233
  f"{self.base_url}/api/push_structure",
230
234
  json=payload,
231
235
  timeout=self._push_timeout(),
@@ -238,7 +242,7 @@ class TaskReporter:
238
242
  try:
239
243
  topology = self.task_graph.get_graph_topology()
240
244
  payload = {"topology": topology}
241
- requests.post(
245
+ self._session.post(
242
246
  f"{self.base_url}/api/push_topology",
243
247
  json=payload,
244
248
  timeout=self._push_timeout(),
@@ -251,7 +255,7 @@ class TaskReporter:
251
255
  try:
252
256
  summary = self.task_graph.get_graph_summary()
253
257
  payload = {"summary": summary}
254
- requests.post(
258
+ self._session.post(
255
259
  f"{self.base_url}/api/push_summary",
256
260
  json=payload,
257
261
  timeout=self._push_timeout(),
@@ -264,7 +268,7 @@ class TaskReporter:
264
268
  try:
265
269
  history = self.task_graph.get_stage_history()
266
270
  payload = {"history": history}
267
- requests.post(
271
+ self._session.post(
268
272
  f"{self.base_url}/api/push_history",
269
273
  json=payload,
270
274
  timeout=self._push_timeout(),
@@ -87,9 +87,9 @@ class LogSinker(BaseSinker):
87
87
 
88
88
  # ==== stage ====
89
89
  def start_stage(
90
- self, stage_tag: str, stage_mode: str, execution_mode: str, worker_limit: int
90
+ self, stage_tag: str, stage_mode: str, execution_mode: str, max_workers: int
91
91
  ) -> None:
92
- worker_repr = f"({worker_limit} workers)" if execution_mode != "serial" else ""
92
+ worker_repr = f"({max_workers} workers)" if execution_mode != "serial" else ""
93
93
  text = f"'{stage_tag}' start in {stage_mode}; execute tasks by {execution_mode}{worker_repr}."
94
94
  self._sink("INFO", text)
95
95
 
@@ -15,17 +15,17 @@ if TYPE_CHECKING:
15
15
 
16
16
 
17
17
  class TaskRunner:
18
- def __init__(self, task_executor: TaskExecutor, func, worker_limit: int):
18
+ def __init__(self, task_executor: TaskExecutor, func, max_workers: int):
19
19
  """
20
20
  初始化任务运行器
21
21
 
22
22
  :param task_executor: 任务执行器
23
23
  :param func: 任务函数
24
- :param worker_limit: 工作线程或进程数量限制
24
+ :param max_workers: 工作线程或进程数量限制
25
25
  """
26
26
  self.task_executor = task_executor
27
27
  self.func = func
28
- self.worker_limit = worker_limit
28
+ self.max_workers = max_workers
29
29
 
30
30
  self._pool: ThreadPoolExecutor | ProcessPoolExecutor | None = None
31
31
 
@@ -37,9 +37,9 @@ class TaskRunner:
37
37
  """
38
38
  # 可以复用的线程池或进程池
39
39
  if execution_mode == "thread" and self._pool is None:
40
- self._pool = ThreadPoolExecutor(max_workers=self.worker_limit)
40
+ self._pool = ThreadPoolExecutor(max_workers=self.max_workers)
41
41
  elif execution_mode == "process" and self._pool is None:
42
- self._pool = ProcessPoolExecutor(max_workers=self.worker_limit)
42
+ self._pool = ProcessPoolExecutor(max_workers=self.max_workers)
43
43
 
44
44
  def process_termination_signal(
45
45
  self, termination_pool: TerminationIdPool
@@ -206,7 +206,7 @@ class TaskRunner:
206
206
  """
207
207
  while True:
208
208
  semaphore = asyncio.Semaphore(
209
- self.task_executor.worker_limit
209
+ self.task_executor.max_workers
210
210
  ) # 限制并发数量
211
211
 
212
212
  async def sem_task(envelope: TaskEnvelope):
@@ -6,6 +6,10 @@ from .util_types import StageStatus
6
6
 
7
7
  # ==== calculate ====
8
8
  def calc_remaining(processed: int, pending: int, elapsed: float) -> float:
9
+ """
10
+ 基于已处理任务,剩余任务以及已消耗时间来计算剩余时间.
11
+ 不要瞧不起均值,在大规模数据下它可能是最有效的.
12
+ """
9
13
  if processed and pending:
10
14
  return pending / processed * elapsed
11
15
  return 0
@@ -13,15 +17,15 @@ def calc_remaining(processed: int, pending: int, elapsed: float) -> float:
13
17
 
14
18
  def calc_elapsed(
15
19
  status: StageStatus,
16
- start_time: float,
17
20
  last_elapsed: float,
18
21
  last_pending: int,
19
22
  interval: float,
20
23
  ) -> float:
21
- """更新时间消耗(仅在 RUNNING 且 pending 非 0 时刷新)"""
22
- if status == StageStatus.RUNNING and start_time:
24
+ """
25
+ 更新时间消耗
26
+ """
27
+ if status in (StageStatus.RUNNING, StageStatus.STOPPED):
23
28
  elapsed = last_elapsed
24
- # 如果上一次是 pending,则累计时间
25
29
  if last_pending:
26
30
  # 如果上一次活跃, 那么无论当前状况,累计一次更新时间
27
31
  elapsed += interval
@@ -121,7 +125,7 @@ def calc_global_remain_equal_pred(
121
125
  total_v += obs_each * scale.get(u, 1.0)
122
126
 
123
127
  scale[v] = total_v / max(1.0, proc_v) # 下游放大系数
124
- expect_pend_v = max(0.0, total_v - proc_v)
128
+ expect_pend_v = max(0.0, total_v - proc_v) # 理论上expect_pend_v >= pend_v
125
129
 
126
130
  # 时间估算:需要 avg time(秒/任务)
127
131
  expected_pending_map[v] = calc_remaining(proc_v, expect_pend_v, elapsed_v)
@@ -42,7 +42,7 @@ class TaskExecutor:
42
42
  self,
43
43
  func,
44
44
  execution_mode="serial",
45
- worker_limit=20,
45
+ max_workers=20,
46
46
  max_retries=1,
47
47
  max_info=50,
48
48
  unpack_task_args=False,
@@ -58,7 +58,7 @@ class TaskExecutor:
58
58
 
59
59
  :param func: 可调用对象
60
60
  :param execution_mode: 执行模式,可选 'serial', 'thread', 'process', 'async' (组合为 'TaskGraph' 时不可用 'process' 和 'async' 模式)
61
- :param worker_limit: 同时处理数量
61
+ :param max_workers: 同时处理数量
62
62
  :param max_retries: 任务的最大重试次数
63
63
  :param max_info: 日志中每条信息的最大长度
64
64
  :param unpack_task_args: 是否将任务参数解包
@@ -80,7 +80,7 @@ class TaskExecutor:
80
80
 
81
81
  self.set_func(func)
82
82
  self.set_execution_mode(execution_mode)
83
- self.worker_limit = worker_limit
83
+ self.max_workers = max_workers
84
84
  self.max_retries = max_retries
85
85
  self.max_info = max_info
86
86
 
@@ -177,7 +177,7 @@ class TaskExecutor:
177
177
  """
178
178
  初始化任务运行器
179
179
  """
180
- self.runner = TaskRunner(self, self.func, self.worker_limit)
180
+ self.runner = TaskRunner(self, self.func, self.max_workers)
181
181
 
182
182
  def init_listener(self) -> None:
183
183
  """
@@ -198,7 +198,7 @@ class TaskExecutor:
198
198
  return
199
199
 
200
200
  extra_desc = (
201
- f"{self.execution_mode}-{self.worker_limit}"
201
+ f"{self.execution_mode}-{self.max_workers}"
202
202
  if self.execution_mode != "serial"
203
203
  else "serial"
204
204
  )
@@ -307,7 +307,7 @@ class TaskExecutor:
307
307
  return (
308
308
  self.execution_mode
309
309
  if self.execution_mode == "serial"
310
- else f"{self.execution_mode}-{self.worker_limit}"
310
+ else f"{self.execution_mode}-{self.max_workers}"
311
311
  )
312
312
 
313
313
  def get_summary(self) -> dict:
@@ -218,7 +218,7 @@ class TaskStage(TaskExecutor):
218
218
  self.init_progress()
219
219
  self.init_env(input_queues, output_queues, fail_queue, log_queue)
220
220
  self.log_sinker.start_stage(
221
- self.get_tag(), self.stage_mode, self.execution_mode, self.worker_limit
221
+ self.get_tag(), self.stage_mode, self.execution_mode, self.max_workers
222
222
  )
223
223
  self.mark_running()
224
224
 
@@ -25,12 +25,15 @@ class TaskSplitter(TaskStage):
25
25
  self.init_extra_counter()
26
26
 
27
27
  def init_extra_counter(self) -> None:
28
+ """初始化额外的计数器"""
28
29
  self.split_counter = MPValue("i", 0)
29
30
 
30
31
  def reset_extra_counter(self) -> None:
32
+ """重置额外的计数器"""
31
33
  self.split_counter.value = 0
32
34
 
33
35
  def update_split_counter(self, add_value: int) -> None:
36
+ """更新 split 计数器"""
34
37
  self.split_counter.value += add_value
35
38
 
36
39
  def _split(self, *task: Any) -> tuple:
@@ -40,6 +43,13 @@ class TaskSplitter(TaskStage):
40
43
  return task
41
44
 
42
45
  def put_split_result(self, result: tuple, task_id: int) -> int:
46
+ """
47
+ 将 split 结果放入队列,并发出对应事件
48
+
49
+ :param result: split 的结果,必须是一个可迭代对象
50
+ :param task_id: 原始任务 ID,用于事件关联
51
+ :return: split 的子任务数量
52
+ """
43
53
  split_count = len(result)
44
54
  for idx, item in enumerate(result):
45
55
  split_id = self.ctree_client.emit(
@@ -94,6 +104,7 @@ class TaskSplitter(TaskStage):
94
104
 
95
105
  class TaskRouter(TaskStage):
96
106
  def __init__(self):
107
+ """初始化 TaskRouter"""
97
108
  super().__init__(
98
109
  func=self._route,
99
110
  execution_mode="serial",
@@ -103,14 +114,17 @@ class TaskRouter(TaskStage):
103
114
  self.init_extra_counter()
104
115
 
105
116
  def init_extra_counter(self) -> None:
117
+ """初始化额外的计数器"""
106
118
  # 每个 target_tag 一个计数器:用于让不同下游 stage 的 task_counter 统计正确
107
119
  self.route_counters: dict = {}
108
120
 
109
121
  def reset_extra_counter(self) -> None:
122
+ """重置额外的计数器"""
110
123
  for counter in self.route_counters.values():
111
124
  counter.value = 0
112
125
 
113
126
  def update_route_counter(self, target: str) -> None:
127
+ """更新 route 计数器"""
114
128
  self.route_counters[target].value += 1
115
129
 
116
130
  def _route(self, routed: tuple) -> tuple:
@@ -189,7 +203,7 @@ class TaskRedisTransport(TaskStage):
189
203
  super().__init__(
190
204
  func=self._transport,
191
205
  execution_mode="thread",
192
- worker_limit=4, # 允许 1~2 个线程偶发阻塞,但不会导致整体阻塞
206
+ max_workers=4, # 允许 1~2 个线程偶发阻塞,但不会导致整体阻塞
193
207
  unpack_task_args=unpack_task_args,
194
208
  )
195
209
  self.key = key
@@ -17,7 +17,7 @@ def _get_clone_init_kwargs(executor: TaskExecutor) -> dict:
17
17
  return {
18
18
  "func": executor.func,
19
19
  "execution_mode": executor.execution_mode,
20
- "worker_limit": executor.worker_limit,
20
+ "max_workers": executor.max_workers,
21
21
  "max_retries": executor.max_retries,
22
22
  "max_info": executor.max_info,
23
23
  "unpack_task_args": executor.unpack_task_args,
@@ -0,0 +1,35 @@
1
+ {
2
+ "theme": "dark",
3
+ "refreshInterval": 2000,
4
+ "historyLimit": 20,
5
+ "dashboard": {
6
+ "left": [
7
+ "mermaid",
8
+ "topology"
9
+ ],
10
+ "middle": [
11
+ "status"
12
+ ],
13
+ "right": [
14
+ "progress",
15
+ "summary"
16
+ ]
17
+ },
18
+ "cards": {
19
+ "mermaid": {
20
+ "title": "任务结构图"
21
+ },
22
+ "topology": {
23
+ "title": "图拓扑信息"
24
+ },
25
+ "status": {
26
+ "title": "节点运行状态"
27
+ },
28
+ "progress": {
29
+ "title": "节点完成走向"
30
+ },
31
+ "summary": {
32
+ "title": "总体状态摘要"
33
+ }
34
+ }
35
+ }