celestialflow 3.1.8__tar.gz → 3.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {celestialflow-3.1.8 → celestialflow-3.1.9}/PKG-INFO +22 -31
- {celestialflow-3.1.8 → celestialflow-3.1.9}/README.md +21 -30
- {celestialflow-3.1.8 → celestialflow-3.1.9}/pyproject.toml +1 -1
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/__init__.py +5 -1
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/graph/core_graph.py +1 -1
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/observability/__init__.py +4 -2
- celestialflow-3.1.9/src/celestialflow/observability/core_observer.py +26 -0
- celestialflow-3.1.9/src/celestialflow/observability/core_progress.py +30 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/observability/core_report.py +33 -26
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/persistence/core_log.py +11 -18
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/core_dispatch.py +6 -3
- celestialflow-3.1.9/src/celestialflow/runtime/core_envelope.py +58 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/core_metrics.py +1 -12
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/core_queue.py +8 -3
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/util_factories.py +2 -2
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/stage/core_executor.py +48 -38
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/stage/core_stage.py +4 -14
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/stage/core_stages.py +6 -4
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/utils/util_benchmark.py +1 -1
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/utils/util_clone.py +1 -5
- celestialflow-3.1.9/src/celestialflow/web/__init__.py +6 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/core_server.py +12 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_errors.js +34 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_statuses.js +90 -90
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_structure.js +130 -130
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_summary.js +49 -49
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/utils.js +269 -268
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/utils.ts +3 -2
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow.egg-info/PKG-INFO +22 -31
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow.egg-info/SOURCES.txt +2 -0
- celestialflow-3.1.9/tests/test_envelope.py +51 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/tests/test_executor.py +177 -17
- {celestialflow-3.1.8 → celestialflow-3.1.9}/tests/test_graph.py +197 -36
- {celestialflow-3.1.8 → celestialflow-3.1.9}/tests/test_queue.py +17 -9
- {celestialflow-3.1.8 → celestialflow-3.1.9}/tests/test_stage.py +18 -4
- celestialflow-3.1.8/src/celestialflow/observability/core_progress.py +0 -93
- celestialflow-3.1.8/src/celestialflow/runtime/core_envelope.py +0 -56
- celestialflow-3.1.8/tests/test_envelope.py +0 -45
- {celestialflow-3.1.8 → celestialflow-3.1.9}/setup.cfg +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/funnel/__init__.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/funnel/core_inlet.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/funnel/core_spout.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/graph/__init__.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/graph/core_structure.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/graph/util_analysis.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/graph/util_serialize.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/persistence/__init__.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/persistence/core_fail.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/persistence/core_success.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/persistence/util_constant.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/persistence/util_jsonl.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/__init__.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/util_errors.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/util_estimators.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/util_hash.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/util_queue.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/runtime/util_types.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/stage/__init__.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/utils/__init__.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/utils/util_collections.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/utils/util_debug.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/utils/util_format.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/config.json +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/css/_colors.css +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/css/base.css +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/css/dashboard.css +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/css/errors.css +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/css/inject.css +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/favicon.ico +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/main.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_analysis.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_config.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_history.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_injection.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/task_topology.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/js/web_config.js +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/globals.d.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/main.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_analysis.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_errors.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_history.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_injection.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_statuses.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_structure.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/task_summary.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/static/ts/web_config.ts +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/templates/index.html +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/util_cal.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/util_config.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow/web/util_error.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow.egg-info/dependency_links.txt +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow.egg-info/entry_points.txt +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow.egg-info/requires.txt +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/src/celestialflow.egg-info/top_level.txt +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/tests/test_metrics.py +0 -0
- {celestialflow-3.1.8 → celestialflow-3.1.9}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: celestialflow
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.9
|
|
4
4
|
Summary: A flexible GRAPH-based task orchestration framework.
|
|
5
5
|
Author-email: Mr-xiaotian <mingxiaomingtian@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -62,10 +62,7 @@ TaskExecutor 实现了对任务的结果缓存,任务去重,进度条显示
|
|
|
62
62
|
|
|
63
63
|
但除去直接使用 TaskExecutor,更重要的是使用其子类**TaskStage**。TaskStage 可以互相连接,形成具有上游与下游依赖关系的任务图(**TaskGraph**)。下游 stage 会自动接收上游执行完成的结果作为输入,从而形成明确的数据流。
|
|
64
64
|
|
|
65
|
-
TaskStage
|
|
66
|
-
|
|
67
|
-
* **线性(serial)**
|
|
68
|
-
* **多线程(thread)**
|
|
65
|
+
TaskStage 的任务执行模式同样包含三种,与TaskExecutor中一致。
|
|
69
66
|
|
|
70
67
|
在图级别上,每个 Stage 支持三种上下文模式:
|
|
71
68
|
|
|
@@ -153,13 +150,10 @@ def square(x):
|
|
|
153
150
|
|
|
154
151
|
if __name__ == "__main__":
|
|
155
152
|
# 定义两个任务节点
|
|
156
|
-
stage1 = TaskStage(add, execution_mode="thread", unpack_task_args=True)
|
|
157
|
-
stage2 = TaskStage(square, execution_mode="thread")
|
|
153
|
+
stage1 = TaskStage(name="Adder", func=add, execution_mode="thread", unpack_task_args=True, stage_mode="process")
|
|
154
|
+
stage2 = TaskStage(name="Squarer"func=square, execution_mode="thread", stage_mode="process")
|
|
158
155
|
|
|
159
156
|
# 构建任务图结构
|
|
160
|
-
stage1 = TaskStage(func=add, execution_mode="thread", unpack_task_args=True, stage_mode="process", name="Adder")
|
|
161
|
-
stage2 = TaskStage(func=square, execution_mode="thread", stage_mode="process", name="Squarer")
|
|
162
|
-
|
|
163
157
|
graph = TaskGraph()
|
|
164
158
|
graph.set_stages(root_stages=[stage1], stages=[stage2])
|
|
165
159
|
graph.connect([stage1], [stage2])
|
|
@@ -253,12 +247,12 @@ flowchart TD
|
|
|
253
247
|
| 依赖包 | 说明 |
|
|
254
248
|
| ----------------- | ---- |
|
|
255
249
|
| **Python ≥ 3.10** | 运行环境,建议使用 3.10 及以上版本 |
|
|
256
|
-
| **tqdm** | 控制台进度条显示,用于任务执行可视化 |
|
|
257
250
|
| **fastapi** | Web 服务接口框架(用于任务可视化与远程控制) |
|
|
258
251
|
| **uvicorn** | FastAPI 的高性能 ASGI 服务器 |
|
|
259
252
|
| **requests** | HTTP 客户端库,用于任务状态上报与远程调用 |
|
|
260
253
|
| **networkx** | 任务图(TaskGraph)结构与依赖分析 |
|
|
261
254
|
| **jinja2** | FastAPI 模板引擎,用于 Web 可视化界面渲染 |
|
|
255
|
+
| **tqdm** | 可选组件,进度条显示,用于任务执行可视化 |
|
|
262
256
|
| **redis** | 可选组件,用于分布式任务通信(`TaskRedis*` 系列模块) |
|
|
263
257
|
| **celestialtree** | 可选组件,用于任务状态上报与远程调用(`ctree_client`) |
|
|
264
258
|
|
|
@@ -267,32 +261,29 @@ flowchart TD
|
|
|
267
261
|
<p align="center">
|
|
268
262
|
<img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
|
|
269
263
|
<br/>
|
|
270
|
-
<em>celestial-flow 3.1.
|
|
264
|
+
<em>celestial-flow 3.1.9</em>
|
|
271
265
|
</p>
|
|
272
266
|
|
|
273
267
|
(该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
|
|
274
268
|
|
|
275
269
|
## 版本日志(Version Log)
|
|
276
|
-
- 3.1.
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
-
|
|
291
|
-
- 修复retry任务无法被去重的问题;
|
|
270
|
+
- 3.1.9
|
|
271
|
+
- feat:
|
|
272
|
+
- [Important] 在TaskStage中允许使用 `async` executor_mode;
|
|
273
|
+
- 3.1.8版本中其实已经可以实现, 在3.1.9版本中彻底测试通过;
|
|
274
|
+
- [Important] 创建基类BaseObservor, 现在可以从外界接入自定义Observor来检测运行状况;
|
|
275
|
+
- 同时创建轻量观察者CallbackObserver;
|
|
276
|
+
- 在TaskStage中添加init字段`log_level`, 自此stage init字段集实现对executor init字段集的包含关系, 仅多出`stage_mode`字段;
|
|
277
|
+
- escapeHtml中添加更多替换项;
|
|
278
|
+
- 在dispatch_thread中添加对futures的清理, 避免无用future的堆积;
|
|
279
|
+
- 基于bench\bench_futures_memory.py, 在十万级任务下futures内存开销为177.01 MB, 在定期清理后则为0.48 MB
|
|
280
|
+
- refactor:
|
|
281
|
+
- 重构TaskProgress, 使其继承BaseObservor;
|
|
282
|
+
- 接口破坏性重构, TaskExecutor的show_progress参数已删除, 现在只有显性的add_observer TaskProgress后才会使用progress
|
|
283
|
+
- 具体使用参考demo\demo_executor.py
|
|
284
|
+
- 重构TaskEnvelope, 不再使用wrap与unwrap, 同时hash值惰性求解;
|
|
292
285
|
- chore:
|
|
293
|
-
-
|
|
294
|
-
- 重组docs目录结构, 将reference改为src;
|
|
295
|
-
- docs中新增en/与ja/, 并将原有中文文档移至zh-CN/;
|
|
286
|
+
- 删除部分无用代码以提高代码质量;
|
|
296
287
|
|
|
297
288
|
更多过往日志可看:
|
|
298
289
|
|
|
@@ -37,10 +37,7 @@ TaskExecutor 实现了对任务的结果缓存,任务去重,进度条显示
|
|
|
37
37
|
|
|
38
38
|
但除去直接使用 TaskExecutor,更重要的是使用其子类**TaskStage**。TaskStage 可以互相连接,形成具有上游与下游依赖关系的任务图(**TaskGraph**)。下游 stage 会自动接收上游执行完成的结果作为输入,从而形成明确的数据流。
|
|
39
39
|
|
|
40
|
-
TaskStage
|
|
41
|
-
|
|
42
|
-
* **线性(serial)**
|
|
43
|
-
* **多线程(thread)**
|
|
40
|
+
TaskStage 的任务执行模式同样包含三种,与TaskExecutor中一致。
|
|
44
41
|
|
|
45
42
|
在图级别上,每个 Stage 支持三种上下文模式:
|
|
46
43
|
|
|
@@ -128,13 +125,10 @@ def square(x):
|
|
|
128
125
|
|
|
129
126
|
if __name__ == "__main__":
|
|
130
127
|
# 定义两个任务节点
|
|
131
|
-
stage1 = TaskStage(add, execution_mode="thread", unpack_task_args=True)
|
|
132
|
-
stage2 = TaskStage(square, execution_mode="thread")
|
|
128
|
+
stage1 = TaskStage(name="Adder", func=add, execution_mode="thread", unpack_task_args=True, stage_mode="process")
|
|
129
|
+
stage2 = TaskStage(name="Squarer"func=square, execution_mode="thread", stage_mode="process")
|
|
133
130
|
|
|
134
131
|
# 构建任务图结构
|
|
135
|
-
stage1 = TaskStage(func=add, execution_mode="thread", unpack_task_args=True, stage_mode="process", name="Adder")
|
|
136
|
-
stage2 = TaskStage(func=square, execution_mode="thread", stage_mode="process", name="Squarer")
|
|
137
|
-
|
|
138
132
|
graph = TaskGraph()
|
|
139
133
|
graph.set_stages(root_stages=[stage1], stages=[stage2])
|
|
140
134
|
graph.connect([stage1], [stage2])
|
|
@@ -228,12 +222,12 @@ flowchart TD
|
|
|
228
222
|
| 依赖包 | 说明 |
|
|
229
223
|
| ----------------- | ---- |
|
|
230
224
|
| **Python ≥ 3.10** | 运行环境,建议使用 3.10 及以上版本 |
|
|
231
|
-
| **tqdm** | 控制台进度条显示,用于任务执行可视化 |
|
|
232
225
|
| **fastapi** | Web 服务接口框架(用于任务可视化与远程控制) |
|
|
233
226
|
| **uvicorn** | FastAPI 的高性能 ASGI 服务器 |
|
|
234
227
|
| **requests** | HTTP 客户端库,用于任务状态上报与远程调用 |
|
|
235
228
|
| **networkx** | 任务图(TaskGraph)结构与依赖分析 |
|
|
236
229
|
| **jinja2** | FastAPI 模板引擎,用于 Web 可视化界面渲染 |
|
|
230
|
+
| **tqdm** | 可选组件,进度条显示,用于任务执行可视化 |
|
|
237
231
|
| **redis** | 可选组件,用于分布式任务通信(`TaskRedis*` 系列模块) |
|
|
238
232
|
| **celestialtree** | 可选组件,用于任务状态上报与远程调用(`ctree_client`) |
|
|
239
233
|
|
|
@@ -242,32 +236,29 @@ flowchart TD
|
|
|
242
236
|
<p align="center">
|
|
243
237
|
<img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
|
|
244
238
|
<br/>
|
|
245
|
-
<em>celestial-flow 3.1.
|
|
239
|
+
<em>celestial-flow 3.1.9</em>
|
|
246
240
|
</p>
|
|
247
241
|
|
|
248
242
|
(该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
|
|
249
243
|
|
|
250
244
|
## 版本日志(Version Log)
|
|
251
|
-
- 3.1.
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
-
|
|
266
|
-
- 修复retry任务无法被去重的问题;
|
|
245
|
+
- 3.1.9
|
|
246
|
+
- feat:
|
|
247
|
+
- [Important] 在TaskStage中允许使用 `async` executor_mode;
|
|
248
|
+
- 3.1.8版本中其实已经可以实现, 在3.1.9版本中彻底测试通过;
|
|
249
|
+
- [Important] 创建基类BaseObservor, 现在可以从外界接入自定义Observor来检测运行状况;
|
|
250
|
+
- 同时创建轻量观察者CallbackObserver;
|
|
251
|
+
- 在TaskStage中添加init字段`log_level`, 自此stage init字段集实现对executor init字段集的包含关系, 仅多出`stage_mode`字段;
|
|
252
|
+
- escapeHtml中添加更多替换项;
|
|
253
|
+
- 在dispatch_thread中添加对futures的清理, 避免无用future的堆积;
|
|
254
|
+
- 基于bench\bench_futures_memory.py, 在十万级任务下futures内存开销为177.01 MB, 在定期清理后则为0.48 MB
|
|
255
|
+
- refactor:
|
|
256
|
+
- 重构TaskProgress, 使其继承BaseObservor;
|
|
257
|
+
- 接口破坏性重构, TaskExecutor的show_progress参数已删除, 现在只有显性的add_observer TaskProgress后才会使用progress
|
|
258
|
+
- 具体使用参考demo\demo_executor.py
|
|
259
|
+
- 重构TaskEnvelope, 不再使用wrap与unwrap, 同时hash值惰性求解;
|
|
267
260
|
- chore:
|
|
268
|
-
-
|
|
269
|
-
- 重组docs目录结构, 将reference改为src;
|
|
270
|
-
- docs中新增en/与ja/, 并将原有中文文档移至zh-CN/;
|
|
261
|
+
- 删除部分无用代码以提高代码质量;
|
|
271
262
|
|
|
272
263
|
更多过往日志可看:
|
|
273
264
|
|
|
@@ -8,6 +8,7 @@ from .graph import (
|
|
|
8
8
|
TaskLoop,
|
|
9
9
|
TaskWheel,
|
|
10
10
|
)
|
|
11
|
+
from .observability import BaseObserver, CallbackObserver, TaskProgress
|
|
11
12
|
from .persistence.util_jsonl import (
|
|
12
13
|
load_jsonl_logs,
|
|
13
14
|
load_task_by_error,
|
|
@@ -26,7 +27,7 @@ from .stage import (
|
|
|
26
27
|
)
|
|
27
28
|
from .utils.util_benchmark import benchmark_executor, benchmark_graph
|
|
28
29
|
from .utils.util_format import format_table
|
|
29
|
-
from .web
|
|
30
|
+
from .web import TaskWebServer
|
|
30
31
|
|
|
31
32
|
__all__ = [
|
|
32
33
|
"TaskGraph",
|
|
@@ -36,6 +37,9 @@ __all__ = [
|
|
|
36
37
|
"TaskComplete",
|
|
37
38
|
"TaskWheel",
|
|
38
39
|
"TaskGrid",
|
|
40
|
+
"BaseObserver",
|
|
41
|
+
"CallbackObserver",
|
|
42
|
+
"TaskProgress",
|
|
39
43
|
"TaskExecutor",
|
|
40
44
|
"TaskStage",
|
|
41
45
|
"TaskSplitter",
|
|
@@ -423,7 +423,7 @@ class TaskGraph:
|
|
|
423
423
|
CTreeEvent.TASK_INPUT,
|
|
424
424
|
payload=stage.get_summary(),
|
|
425
425
|
)
|
|
426
|
-
envelope = TaskEnvelope
|
|
426
|
+
envelope = TaskEnvelope(task, input_id, source="input")
|
|
427
427
|
in_queue.put(envelope)
|
|
428
428
|
input_ids.add(input_id)
|
|
429
429
|
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# observability/__init__.py
|
|
2
|
-
from .
|
|
2
|
+
from .core_observer import BaseObserver, CallbackObserver
|
|
3
|
+
from .core_progress import TaskProgress
|
|
3
4
|
from .core_report import NullTaskReporter, TaskReporter
|
|
4
5
|
|
|
5
6
|
__all__ = [
|
|
7
|
+
"BaseObserver",
|
|
8
|
+
"CallbackObserver",
|
|
6
9
|
"TaskReporter",
|
|
7
10
|
"NullTaskReporter",
|
|
8
11
|
"TaskProgress",
|
|
9
|
-
"NullTaskProgress",
|
|
10
12
|
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# observability/core_observer.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BaseObserver:
|
|
6
|
+
"""执行器生命周期观察者基类,子类按需覆写。"""
|
|
7
|
+
|
|
8
|
+
def on_start(self, name: str, total: int) -> None: ...
|
|
9
|
+
|
|
10
|
+
def on_task_success(self, count: int = 1) -> None: ...
|
|
11
|
+
|
|
12
|
+
def on_task_fail(self, count: int = 1) -> None: ...
|
|
13
|
+
|
|
14
|
+
def on_task_duplicate(self, count: int = 1) -> None: ...
|
|
15
|
+
|
|
16
|
+
def on_tasks_added(self, count: int) -> None: ...
|
|
17
|
+
|
|
18
|
+
def on_finish(self) -> None: ...
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class CallbackObserver(BaseObserver):
|
|
22
|
+
"""通过回调函数创建的轻量观察者,无需定义子类。"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, **callbacks):
|
|
25
|
+
for name, fn in callbacks.items():
|
|
26
|
+
setattr(self, name, fn)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# observability/core_progress.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
|
|
6
|
+
from .core_observer import BaseObserver
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TaskProgress(BaseObserver):
|
|
10
|
+
"""基于 tqdm 的进度条观察者"""
|
|
11
|
+
|
|
12
|
+
def on_start(self, name: str, total: int) -> None:
|
|
13
|
+
self._bar = tqdm(total=total, desc=name)
|
|
14
|
+
|
|
15
|
+
def on_task_success(self, count: int = 1) -> None:
|
|
16
|
+
self._bar.update(count)
|
|
17
|
+
|
|
18
|
+
def on_task_fail(self, count: int = 1) -> None:
|
|
19
|
+
self._bar.update(count)
|
|
20
|
+
|
|
21
|
+
def on_task_duplicate(self, count: int = 1) -> None:
|
|
22
|
+
self._bar.update(count)
|
|
23
|
+
|
|
24
|
+
def on_tasks_added(self, count: int) -> None:
|
|
25
|
+
if count:
|
|
26
|
+
self._bar.total += count
|
|
27
|
+
self._bar.refresh()
|
|
28
|
+
|
|
29
|
+
def on_finish(self) -> None:
|
|
30
|
+
self._bar.close()
|
|
@@ -109,9 +109,11 @@ class TaskReporter:
|
|
|
109
109
|
res = self._session.get(
|
|
110
110
|
f"{self.base_url}/api/pull_interval", timeout=self._pull_timeout()
|
|
111
111
|
)
|
|
112
|
-
if res.ok:
|
|
113
|
-
interval
|
|
114
|
-
|
|
112
|
+
if not res.ok:
|
|
113
|
+
raise RuntimeError(f"Failed to pull interval: {res.status_code}")
|
|
114
|
+
|
|
115
|
+
interval = res.json().get("interval", 5)
|
|
116
|
+
self.interval = max(1.0, min(interval, 60.0))
|
|
115
117
|
except Exception as e:
|
|
116
118
|
self.log_inlet.pull_interval_failed(e)
|
|
117
119
|
|
|
@@ -121,9 +123,11 @@ class TaskReporter:
|
|
|
121
123
|
res = self._session.get(
|
|
122
124
|
f"{self.base_url}/api/pull_history_limit", timeout=self._pull_timeout()
|
|
123
125
|
)
|
|
124
|
-
if res.ok:
|
|
125
|
-
|
|
126
|
-
|
|
126
|
+
if not res.ok:
|
|
127
|
+
raise RuntimeError(f"Failed to pull history limit: {res.status_code}")
|
|
128
|
+
|
|
129
|
+
history_limit = res.json().get("historyLimit", 20)
|
|
130
|
+
self.history_limit = max(1, min(history_limit, 100))
|
|
127
131
|
except Exception as e:
|
|
128
132
|
self.log_inlet.pull_history_limit_failed(e)
|
|
129
133
|
|
|
@@ -133,26 +137,28 @@ class TaskReporter:
|
|
|
133
137
|
res = self._session.get(
|
|
134
138
|
f"{self.base_url}/api/pull_task_injection", timeout=self._pull_timeout()
|
|
135
139
|
)
|
|
136
|
-
if res.ok:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
140
|
+
if not res.ok:
|
|
141
|
+
raise RuntimeError(f"Failed to pull task injection: {res.status_code}")
|
|
142
|
+
|
|
143
|
+
injection_tasks: list[dict] = res.json()
|
|
144
|
+
for injection in injection_tasks:
|
|
145
|
+
target_stage = injection.get("node")
|
|
146
|
+
task_datas = injection.get("task_datas")
|
|
147
|
+
if target_stage is None or task_datas is None:
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
# 这里你可以按需注入到不同的节点
|
|
151
|
+
task_datas = [
|
|
152
|
+
task if task != "TERMINATION_SIGNAL" else TERMINATION_SIGNAL
|
|
153
|
+
for task in task_datas
|
|
154
|
+
]
|
|
155
|
+
try:
|
|
156
|
+
self.task_graph.put_stage_queue(
|
|
157
|
+
{target_stage: task_datas}, put_termination_signal=False
|
|
158
|
+
)
|
|
159
|
+
self.log_inlet.inject_tasks_success(target_stage, task_datas)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
self.log_inlet.inject_tasks_failed(target_stage, task_datas, e)
|
|
156
162
|
except Exception as e:
|
|
157
163
|
self.log_inlet.pull_tasks_failed(e)
|
|
158
164
|
|
|
@@ -162,6 +168,7 @@ class TaskReporter:
|
|
|
162
168
|
try:
|
|
163
169
|
current_rev = self.task_graph.get_total_error_num()
|
|
164
170
|
jsonl_path = self.task_graph.get_fallback_path()
|
|
171
|
+
resp = {"ok": True, "cached": False} # 默认响应,避免未定义变量
|
|
165
172
|
|
|
166
173
|
# 无新增错误,跳过
|
|
167
174
|
if current_rev == self._last_pushed_errors_rev:
|
|
@@ -312,17 +312,17 @@ class LogInlet(BaseInlet):
|
|
|
312
312
|
|
|
313
313
|
# ==== queue ====
|
|
314
314
|
def put_item(
|
|
315
|
-
self, item_type: str, item_id: int,
|
|
315
|
+
self, item_type: str, item_id: int, in_tag: str, out_tag: str
|
|
316
316
|
) -> None:
|
|
317
317
|
"""记录队列 put 操作"""
|
|
318
|
-
edge = f"'{
|
|
318
|
+
edge = f"'{in_tag}' -> '{out_tag}'"
|
|
319
319
|
self._log("TRACE", f"Put {item_type}#{item_id} into Edge({edge}).")
|
|
320
320
|
|
|
321
321
|
def put_item_error(
|
|
322
|
-
self,
|
|
322
|
+
self, in_tag: str, out_tag: str, exception: Exception
|
|
323
323
|
) -> None:
|
|
324
324
|
"""记录队列 put 失败"""
|
|
325
|
-
edge = f"'{
|
|
325
|
+
edge = f"'{in_tag}' -> '{out_tag}'"
|
|
326
326
|
exception_text = str(exception).replace("\n", " ")
|
|
327
327
|
self._log(
|
|
328
328
|
"WARNING",
|
|
@@ -330,17 +330,17 @@ class LogInlet(BaseInlet):
|
|
|
330
330
|
)
|
|
331
331
|
|
|
332
332
|
def get_item(
|
|
333
|
-
self, item_type: str, item_id: int,
|
|
333
|
+
self, item_type: str, item_id: int, in_tag: str, out_tag: str
|
|
334
334
|
) -> None:
|
|
335
335
|
"""记录队列 get 操作"""
|
|
336
|
-
edge = f"'{
|
|
336
|
+
edge = f"'{in_tag}' -> '{out_tag}'"
|
|
337
337
|
self._log("TRACE", f"Get {item_type}#{item_id} from Edge({edge}).")
|
|
338
338
|
|
|
339
339
|
def get_item_error(
|
|
340
|
-
self,
|
|
340
|
+
self, in_tag: str, out_tag: str, exception: Exception
|
|
341
341
|
) -> None:
|
|
342
342
|
"""记录队列 get 失败"""
|
|
343
|
-
edge = f"'{
|
|
343
|
+
edge = f"'{in_tag}' -> '{out_tag}'"
|
|
344
344
|
exception_text = str(exception).replace("\n", " ")
|
|
345
345
|
self._log(
|
|
346
346
|
"WARNING",
|
|
@@ -415,11 +415,11 @@ class LogInlet(BaseInlet):
|
|
|
415
415
|
f"[Reporter] Push 'structure' failed: {type(exception).__name__}({exception}).",
|
|
416
416
|
)
|
|
417
417
|
|
|
418
|
-
def
|
|
419
|
-
"""
|
|
418
|
+
def push_analysis_failed(self, exception: Exception) -> None:
|
|
419
|
+
"""记录推送分析信息失败"""
|
|
420
420
|
self._log(
|
|
421
421
|
"WARNING",
|
|
422
|
-
f"[Reporter] Push '
|
|
422
|
+
f"[Reporter] Push 'analysis' failed: {type(exception).__name__}({exception}).",
|
|
423
423
|
)
|
|
424
424
|
|
|
425
425
|
def push_summary_failed(self, exception: Exception) -> None:
|
|
@@ -429,13 +429,6 @@ class LogInlet(BaseInlet):
|
|
|
429
429
|
f"[Reporter] Push 'summary' failed: {type(exception).__name__}({exception}).",
|
|
430
430
|
)
|
|
431
431
|
|
|
432
|
-
def push_analysis_failed(self, exception: Exception) -> None:
|
|
433
|
-
"""记录推送分析信息失败"""
|
|
434
|
-
self._log(
|
|
435
|
-
"WARNING",
|
|
436
|
-
f"[Reporter] Push 'analysis' failed: {type(exception).__name__}({exception}).",
|
|
437
|
-
)
|
|
438
|
-
|
|
439
432
|
def push_history_failed(self, exception: Exception) -> None:
|
|
440
433
|
"""记录推送历史信息失败"""
|
|
441
434
|
self._log(
|
|
@@ -73,7 +73,7 @@ class TaskDispatch:
|
|
|
73
73
|
:param task_envelope: 包含任务信息的信封
|
|
74
74
|
:return: 是否命中重复任务
|
|
75
75
|
"""
|
|
76
|
-
|
|
76
|
+
task_hash = task_envelope.get_hash()
|
|
77
77
|
|
|
78
78
|
if self.task_executor.metrics.is_duplicate(task_hash):
|
|
79
79
|
self.task_executor.deal_duplicate(task_envelope)
|
|
@@ -87,7 +87,7 @@ class TaskDispatch:
|
|
|
87
87
|
|
|
88
88
|
:param task_envelope: 包含任务信息的信封
|
|
89
89
|
"""
|
|
90
|
-
task
|
|
90
|
+
task = task_envelope.get_task()
|
|
91
91
|
max_retries = self.task_executor.max_retries
|
|
92
92
|
|
|
93
93
|
for retry_time in range(max_retries + 1):
|
|
@@ -114,7 +114,7 @@ class TaskDispatch:
|
|
|
114
114
|
|
|
115
115
|
:param task_envelope: 包含任务信息的信封
|
|
116
116
|
"""
|
|
117
|
-
task
|
|
117
|
+
task = task_envelope.get_task()
|
|
118
118
|
max_retries = self.task_executor.max_retries
|
|
119
119
|
|
|
120
120
|
for retry_time in range(max_retries + 1):
|
|
@@ -177,7 +177,10 @@ class TaskDispatch:
|
|
|
177
177
|
|
|
178
178
|
if self._pool is None:
|
|
179
179
|
raise RuntimeError("execution pool has not been initialized")
|
|
180
|
+
|
|
180
181
|
futures.append(self._pool.submit(self._worker, envelope))
|
|
182
|
+
if len(futures) >= self.max_workers * 2:
|
|
183
|
+
futures = [f for f in futures if not f.done()]
|
|
181
184
|
|
|
182
185
|
# 等待当前批次的所有任务完成
|
|
183
186
|
for future in futures:
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# runtime/core_envelope.py
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from .util_hash import object_to_str_hash
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TaskEnvelope:
|
|
8
|
+
"""任务信封,封装原始任务及其哈希、ID、来源等元信息。"""
|
|
9
|
+
|
|
10
|
+
__slots__ = ("task", "hash", "id", "source", "prev")
|
|
11
|
+
|
|
12
|
+
def __init__(self, task: Any, id: int, source: str, prev: Any = None):
|
|
13
|
+
"""
|
|
14
|
+
:param task: 原始任务
|
|
15
|
+
:param id: 任务 ID
|
|
16
|
+
:param source: 任务来源标识
|
|
17
|
+
:param prev: 前一个任务(用于结果缓存时回溯)
|
|
18
|
+
"""
|
|
19
|
+
self.task = task
|
|
20
|
+
self.hash = None
|
|
21
|
+
self.id = id
|
|
22
|
+
|
|
23
|
+
self.source = source
|
|
24
|
+
self.prev = prev
|
|
25
|
+
|
|
26
|
+
def get_task(self) -> Any:
|
|
27
|
+
"""
|
|
28
|
+
获取原始任务
|
|
29
|
+
|
|
30
|
+
:return: 原始任务
|
|
31
|
+
"""
|
|
32
|
+
return self.task
|
|
33
|
+
|
|
34
|
+
def get_hash(self) -> str:
|
|
35
|
+
"""
|
|
36
|
+
获取任务哈希
|
|
37
|
+
|
|
38
|
+
:return: 任务哈希
|
|
39
|
+
"""
|
|
40
|
+
if self.hash is None:
|
|
41
|
+
self.hash = object_to_str_hash(self.task)
|
|
42
|
+
return self.hash
|
|
43
|
+
|
|
44
|
+
def get_id(self) -> int:
|
|
45
|
+
"""
|
|
46
|
+
获取任务 ID
|
|
47
|
+
|
|
48
|
+
:return: 任务 ID
|
|
49
|
+
"""
|
|
50
|
+
return self.id
|
|
51
|
+
|
|
52
|
+
def change_id(self, new_id: int) -> None:
|
|
53
|
+
"""
|
|
54
|
+
修改 id
|
|
55
|
+
|
|
56
|
+
:param new_id: 新的任务 id
|
|
57
|
+
"""
|
|
58
|
+
self.id = new_id
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
# runtime/core_metrics.py
|
|
2
|
-
import asyncio
|
|
3
2
|
from threading import Lock
|
|
4
3
|
|
|
5
4
|
from .util_factories import (
|
|
@@ -85,7 +84,7 @@ class TaskMetrics:
|
|
|
85
84
|
# ==== 去重 ====
|
|
86
85
|
def is_duplicate(self, task_hash: str) -> bool:
|
|
87
86
|
"""
|
|
88
|
-
|
|
87
|
+
检查任务是否重复, 是原子操作
|
|
89
88
|
|
|
90
89
|
:param task_hash: 任务的哈希值
|
|
91
90
|
:return: 如果启用了去重检查且任务哈希存在于已处理集合中,返回 True;否则返回 False。
|
|
@@ -147,16 +146,6 @@ class TaskMetrics:
|
|
|
147
146
|
with self.success_counter.get_lock():
|
|
148
147
|
self.success_counter.value += count
|
|
149
148
|
|
|
150
|
-
async def add_success_count_async(self, count: int = 1):
|
|
151
|
-
"""
|
|
152
|
-
异步更新成功任务计数器
|
|
153
|
-
|
|
154
|
-
在独立线程中执行 add_success_count,避免阻塞事件循环。
|
|
155
|
-
|
|
156
|
-
:param count: 增加的成功任务数量,默认值为 1。
|
|
157
|
-
"""
|
|
158
|
-
await asyncio.to_thread(self.add_success_count, count)
|
|
159
|
-
|
|
160
149
|
def add_error_count(self, count: int = 1):
|
|
161
150
|
"""
|
|
162
151
|
更新失败任务计数器
|
|
@@ -136,10 +136,15 @@ class TaskInQueue:
|
|
|
136
136
|
:return: 出队的任务或终止符号id池
|
|
137
137
|
"""
|
|
138
138
|
while True:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
try:
|
|
140
|
+
item: TaskEnvelope | TerminationSignal = self.queue.get()
|
|
141
|
+
result = self._deal_get_item(item)
|
|
142
|
+
if result is None:
|
|
143
|
+
continue
|
|
142
144
|
return result
|
|
145
|
+
except Exception as e:
|
|
146
|
+
self.log_inlet.get_item_error("unknown", self.out_tag, e)
|
|
147
|
+
continue
|
|
143
148
|
|
|
144
149
|
def _deal_get_item(
|
|
145
150
|
self, item: TaskEnvelope | TerminationSignal
|
|
@@ -57,7 +57,7 @@ def make_queue_backend(mode: str) -> type:
|
|
|
57
57
|
|
|
58
58
|
def make_task_in_queue(
|
|
59
59
|
*,
|
|
60
|
-
queue:
|
|
60
|
+
queue: Any,
|
|
61
61
|
executor: "TaskExecutor",
|
|
62
62
|
) -> TaskInQueue:
|
|
63
63
|
"""
|
|
@@ -77,7 +77,7 @@ def make_task_in_queue(
|
|
|
77
77
|
|
|
78
78
|
def make_task_out_queue(
|
|
79
79
|
*,
|
|
80
|
-
queue:
|
|
80
|
+
queue: Any,
|
|
81
81
|
executor: "TaskExecutor",
|
|
82
82
|
) -> TaskOutQueue:
|
|
83
83
|
"""
|