celestialflow 3.1.2__tar.gz → 3.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {celestialflow-3.1.2 → celestialflow-3.1.3}/PKG-INFO +14 -7
  2. {celestialflow-3.1.2 → celestialflow-3.1.3}/README.md +12 -4
  3. {celestialflow-3.1.2 → celestialflow-3.1.3}/pyproject.toml +2 -3
  4. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/__init__.py +8 -5
  5. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/graph/analysis.py +2 -2
  6. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/graph/graph.py +30 -62
  7. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/graph/serialize.py +1 -1
  8. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/persistence/__init__.py +3 -0
  9. celestialflow-3.1.3/src/celestialflow/persistence/base.py +61 -0
  10. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/persistence/fail.py +32 -46
  11. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/persistence/jsonl.py +0 -47
  12. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/persistence/log.py +77 -87
  13. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/__init__.py +2 -0
  14. celestialflow-3.1.3/src/celestialflow/runtime/envelope.py +28 -0
  15. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/metrics.py +86 -35
  16. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/queue.py +2 -1
  17. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/types.py +1 -32
  18. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/stage/__init__.py +2 -2
  19. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/stage/executor.py +130 -220
  20. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/stage/nodes.py +16 -20
  21. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/stage/stage.py +15 -6
  22. celestialflow-3.1.3/src/celestialflow/utils/benchmark.py +103 -0
  23. celestialflow-3.1.3/src/celestialflow/utils/clone.py +118 -0
  24. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow.egg-info/PKG-INFO +14 -7
  25. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow.egg-info/SOURCES.txt +4 -2
  26. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow.egg-info/requires.txt +1 -2
  27. {celestialflow-3.1.2 → celestialflow-3.1.3}/tests/test_nodes.py +12 -6
  28. celestialflow-3.1.2/tests/test_executor.py +0 -121
  29. celestialflow-3.1.2/tests/test_graph.py +0 -219
  30. {celestialflow-3.1.2 → celestialflow-3.1.3}/setup.cfg +0 -0
  31. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/graph/__init__.py +0 -0
  32. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/graph/structure.py +0 -0
  33. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/observability/__init__.py +0 -0
  34. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/observability/report.py +0 -0
  35. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/persistence/constant.py +0 -0
  36. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/errors.py +0 -0
  37. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/estimators.py +0 -0
  38. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/factories.py +0 -0
  39. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/hash.py +0 -0
  40. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/progress.py +0 -0
  41. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/runtime/tools.py +0 -0
  42. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/utils/__init__.py +0 -0
  43. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/utils/collections.py +0 -0
  44. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/utils/debug.py +0 -0
  45. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/utils/format.py +0 -0
  46. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow/web/server.py +0 -0
  47. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow.egg-info/dependency_links.txt +0 -0
  48. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow.egg-info/entry_points.txt +0 -0
  49. {celestialflow-3.1.2 → celestialflow-3.1.3}/src/celestialflow.egg-info/top_level.txt +0 -0
  50. {celestialflow-3.1.2 → celestialflow-3.1.3}/tests/test_structure.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: celestialflow
3
- Version: 3.1.2
3
+ Version: 3.1.3
4
4
  Summary: A flexible GRAPH-based task orchestration framework.
5
5
  Author-email: Mr-xiaotian <mingxiaomingtian@gmail.com>
6
6
  License: MIT
@@ -15,14 +15,13 @@ Classifier: Topic :: Software Development :: Libraries
15
15
  Requires-Python: >=3.10
16
16
  Description-Content-Type: text/markdown
17
17
  Requires-Dist: tqdm
18
- Requires-Dist: loguru
19
18
  Requires-Dist: fastapi
20
19
  Requires-Dist: uvicorn
21
20
  Requires-Dist: requests
22
21
  Requires-Dist: networkx
23
22
  Requires-Dist: redis
24
23
  Requires-Dist: jinja2
25
- Requires-Dist: celestialtree
24
+ Requires-Dist: celestialtree>=0.1.2
26
25
 
27
26
  # CelestialFlow ——一个轻量级、可并行、基于图结构的 Python 任务调度框架
28
27
 
@@ -240,9 +239,8 @@ flowchart TD
240
239
 
241
240
  | 依赖包 | 说明 |
242
241
  | ----------------- | ---- |
243
- | **Python ≥ 3.8** | 运行环境,建议使用 3.10 及以上版本 |
242
+ | **Python ≥ 3.10** | 运行环境,建议使用 3.10 及以上版本 |
244
243
  | **tqdm** | 控制台进度条显示,用于任务执行可视化 |
245
- | **loguru** | 高性能日志系统,支持多进程安全输出 |
246
244
  | **fastapi** | Web 服务接口框架(用于任务可视化与远程控制) |
247
245
  | **uvicorn** | FastAPI 的高性能 ASGI 服务器 |
248
246
  | **requests** | HTTP 客户端库,用于任务状态上报与远程调用 |
@@ -256,7 +254,7 @@ flowchart TD
256
254
  <p align="center">
257
255
  <img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
258
256
  <br/>
259
- <em>celestial-flow 3.1.1</em>
257
+ <em>celestial-flow 3.1.3</em>
260
258
  </p>
261
259
 
262
260
  (该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
@@ -276,7 +274,7 @@ flowchart TD
276
274
  - 6/16/2025: 多轮评测后, 当前框架已支持完整有向图结构, 将TaskTree改名为TaskGraph
277
275
  - 3.0.1: 上线Pypi, 可喜可贺
278
276
  - 3.0.4: 新增一个抽象结构TaskQueue, 用于表示节点的所有"入边"与"出边"; 恢复未消费任务的保存功能
279
- - 3.0.5: 删除原有的TaskRedisTransfer节点, 并增添三种新的redis交互节点TaskRedisSink TaskRedisSource TaskRedisAck, 用于跨语言 跨进程 跨设备处理任务; 并在Web页面添加展示拓扑信息的卡片
277
+ - 3.0.5: 删除原有的TaskRedisTransfer节点, 并增添三种新的redis交互节点TaskRedisTransport TaskRedisSource TaskRedisAck, 用于跨语言 跨进程 跨设备处理任务; 并在Web页面添加展示拓扑信息的卡片
280
278
  - 3.0.6: 添加对[CelestialTree](https://github.com/Mr-xiaotian/CelestialTree)系统的支持, 现在可以追踪单个任务的流向
281
279
  - 3.0.7: 将TaskStage从TaskExecutor中单独抽出来作为一个子类; 增加新节点TaskRouter, 可以将传入的任务选择的传给不同的下游节点, 而不是进行广播
282
280
  - 3.0.8: 在ctree逻辑上将"任务重试"事件后的"任务成功/失败/重试"事件视为因果关系, 而非之前的并行关系; 重构错误搜集部分逻辑; 修复大量3.0.6与3.07版本引入的bug; 优化部分log表现
@@ -319,6 +317,15 @@ flowchart TD
319
317
  - fix
320
318
  - 修复前端renderNodeList中参数设置错误;
321
319
  - 修复其他微小bug;
320
+ - 3.1.3
321
+ - feat:
322
+ - 抽象出BaseListener与BaseSinker;
323
+ - 移除loguru, 完全由LogListener和LogSinker实现log记录;
324
+ - 将bench相关代码从TaskExecutor和TaskGraph中抽离, 不再作为方法, 而是单独bench函数;
325
+ - 重构TaskExecutor部分代码, 以尽量瘦身;
326
+ - 优化log处理代码中对时间戳的处理, 现在更加准确;
327
+ - fix:
328
+ - 修复一些影响性能的小问题;
322
329
 
323
330
  ## Star 历史趋势(Star History)
324
331
 
@@ -214,9 +214,8 @@ flowchart TD
214
214
 
215
215
  | 依赖包 | 说明 |
216
216
  | ----------------- | ---- |
217
- | **Python ≥ 3.8** | 运行环境,建议使用 3.10 及以上版本 |
217
+ | **Python ≥ 3.10** | 运行环境,建议使用 3.10 及以上版本 |
218
218
  | **tqdm** | 控制台进度条显示,用于任务执行可视化 |
219
- | **loguru** | 高性能日志系统,支持多进程安全输出 |
220
219
  | **fastapi** | Web 服务接口框架(用于任务可视化与远程控制) |
221
220
  | **uvicorn** | FastAPI 的高性能 ASGI 服务器 |
222
221
  | **requests** | HTTP 客户端库,用于任务状态上报与远程调用 |
@@ -230,7 +229,7 @@ flowchart TD
230
229
  <p align="center">
231
230
  <img src="https://raw.githubusercontent.com/Mr-xiaotian/CelestialFlow/main/img/file_structure.svg" alt="FileStructure" />
232
231
  <br/>
233
- <em>celestial-flow 3.1.1</em>
232
+ <em>celestial-flow 3.1.3</em>
234
233
  </p>
235
234
 
236
235
  (该视图由我的另一个项目[CelestialVault](https://github.com/Mr-xiaotian/CelestialVault)中inst_file.FileTree.print_tree()生成。转换为图片则借助[Carbon](https://carbon.now.sh)。)
@@ -250,7 +249,7 @@ flowchart TD
250
249
  - 6/16/2025: 多轮评测后, 当前框架已支持完整有向图结构, 将TaskTree改名为TaskGraph
251
250
  - 3.0.1: 上线Pypi, 可喜可贺
252
251
  - 3.0.4: 新增一个抽象结构TaskQueue, 用于表示节点的所有"入边"与"出边"; 恢复未消费任务的保存功能
253
- - 3.0.5: 删除原有的TaskRedisTransfer节点, 并增添三种新的redis交互节点TaskRedisSink TaskRedisSource TaskRedisAck, 用于跨语言 跨进程 跨设备处理任务; 并在Web页面添加展示拓扑信息的卡片
252
+ - 3.0.5: 删除原有的TaskRedisTransfer节点, 并增添三种新的redis交互节点TaskRedisTransport TaskRedisSource TaskRedisAck, 用于跨语言 跨进程 跨设备处理任务; 并在Web页面添加展示拓扑信息的卡片
254
253
  - 3.0.6: 添加对[CelestialTree](https://github.com/Mr-xiaotian/CelestialTree)系统的支持, 现在可以追踪单个任务的流向
255
254
  - 3.0.7: 将TaskStage从TaskExecutor中单独抽出来作为一个子类; 增加新节点TaskRouter, 可以将传入的任务选择的传给不同的下游节点, 而不是进行广播
256
255
  - 3.0.8: 在ctree逻辑上将"任务重试"事件后的"任务成功/失败/重试"事件视为因果关系, 而非之前的并行关系; 重构错误搜集部分逻辑; 修复大量3.0.6与3.07版本引入的bug; 优化部分log表现
@@ -293,6 +292,15 @@ flowchart TD
293
292
  - fix
294
293
  - 修复前端renderNodeList中参数设置错误;
295
294
  - 修复其他微小bug;
295
+ - 3.1.3
296
+ - feat:
297
+ - 抽象出BaseListener与BaseSinker;
298
+ - 移除loguru, 完全由LogListener和LogSinker实现log记录;
299
+ - 将bench相关代码从TaskExecutor和TaskGraph中抽离, 不再作为方法, 而是单独bench函数;
300
+ - 重构TaskExecutor部分代码, 以尽量瘦身;
301
+ - 优化log处理代码中对时间戳的处理, 现在更加准确;
302
+ - fix:
303
+ - 修复一些影响性能的小问题;
296
304
 
297
305
  ## Star 历史趋势(Star History)
298
306
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "celestialflow"
7
- version = "3.1.2"
7
+ version = "3.1.3"
8
8
  description = "A flexible GRAPH-based task orchestration framework."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -14,14 +14,13 @@ requires-python = ">=3.10"
14
14
 
15
15
  dependencies = [
16
16
  "tqdm",
17
- "loguru",
18
17
  "fastapi",
19
18
  "uvicorn",
20
19
  "requests",
21
20
  "networkx",
22
21
  "redis",
23
22
  "jinja2",
24
- "celestialtree",
23
+ "celestialtree>=0.1.2",
25
24
  ]
26
25
 
27
26
  classifiers = [
@@ -3,7 +3,7 @@ from .stage import (
3
3
  TaskExecutor,
4
4
  TaskStage,
5
5
  TaskSplitter,
6
- TaskRedisSink,
6
+ TaskRedisTransport,
7
7
  TaskRedisSource,
8
8
  TaskRedisAck,
9
9
  TaskRouter,
@@ -17,14 +17,15 @@ from .graph import (
17
17
  TaskWheel,
18
18
  TaskGrid,
19
19
  )
20
- from .runtime.types import TerminationSignal
21
- from .runtime.hash import make_hashable
22
- from .utils.format import format_table
23
20
  from .persistence.jsonl import (
24
21
  load_jsonl_logs,
25
22
  load_task_by_stage,
26
23
  load_task_by_error,
27
24
  )
25
+ from .runtime.types import TerminationSignal
26
+ from .runtime.hash import make_hashable
27
+ from .utils.format import format_table
28
+ from .utils.benchmark import benchmark_graph, benchmark_executor
28
29
  from .web.server import TaskWebServer
29
30
 
30
31
  __all__ = [
@@ -38,7 +39,7 @@ __all__ = [
38
39
  "TaskExecutor",
39
40
  "TaskStage",
40
41
  "TaskSplitter",
41
- "TaskRedisSink",
42
+ "TaskRedisTransport",
42
43
  "TaskRedisSource",
43
44
  "TaskRedisAck",
44
45
  "TaskRouter",
@@ -49,4 +50,6 @@ __all__ = [
49
50
  "load_task_by_error",
50
51
  "make_hashable",
51
52
  "format_table",
53
+ "benchmark_graph",
54
+ "benchmark_executor",
52
55
  ]
@@ -15,11 +15,11 @@ def format_networkx_graph(structure_graph: List[Dict[str, Any]]) -> nx.DiGraph:
15
15
  G = nx.DiGraph()
16
16
 
17
17
  def add_node_and_edges(node: Dict[str, Any]):
18
- node_id = f'{node["stage_name"]}[{node["func_name"]}]'
18
+ node_id = f'{node["name"]}[{node["func_name"]}]'
19
19
  G.add_node(node_id, **{"mode": node.get("stage_mode")})
20
20
 
21
21
  for child in node.get("next_stages", []):
22
- child_id = f'{child["stage_name"]}[{child["func_name"]}]'
22
+ child_id = f'{child["name"]}[{child["func_name"]}]'
23
23
  G.add_edge(node_id, child_id)
24
24
  # 递归添加子节点
25
25
  add_node_and_edges(child)
@@ -4,7 +4,7 @@ import warnings
4
4
  import multiprocessing
5
5
  from collections import defaultdict, deque
6
6
  from multiprocessing import Queue as MPQueue
7
- from typing import Any, Dict, List
7
+ from typing import Dict, List
8
8
 
9
9
  from celestialtree import (
10
10
  Client as CelestialTreeClient,
@@ -13,7 +13,7 @@ from celestialtree import (
13
13
  format_provenance_forest,
14
14
  )
15
15
 
16
- from ..runtime import TaskQueue
16
+ from ..runtime import TaskQueue, TaskEnvelope
17
17
  from ..runtime.estimators import (
18
18
  calc_elapsed,
19
19
  calc_remaining,
@@ -21,7 +21,6 @@ from ..runtime.estimators import (
21
21
  )
22
22
  from ..runtime.errors import UnconsumedError
23
23
  from ..runtime.types import (
24
- TaskEnvelope,
25
24
  StageStatus,
26
25
  TerminationSignal,
27
26
  STAGE_STYLE,
@@ -228,6 +227,9 @@ class TaskGraph:
228
227
  :param host: 报告器主机地址
229
228
  :param port: 报告器端口
230
229
  """
230
+ self._is_report = is_report
231
+ self._report_host = host
232
+ self._report_port = port
231
233
  if is_report:
232
234
  self.reporter = TaskReporter(
233
235
  host=host,
@@ -239,7 +241,12 @@ class TaskGraph:
239
241
  self.reporter = NullTaskReporter()
240
242
 
241
243
  def set_ctree(
242
- self, use_ctree=False, host="127.0.0.1", http_port=7777, grpc_port=7778
244
+ self,
245
+ use_ctree=False,
246
+ host="127.0.0.1",
247
+ http_port=7777,
248
+ grpc_port=7778,
249
+ transport="grpc",
243
250
  ):
244
251
  """
245
252
  设定事件树客户端
@@ -252,10 +259,11 @@ class TaskGraph:
252
259
  self._ctree_host = host
253
260
  self._CTREE_HTTP_PORT = http_port
254
261
  self._ctree_grpc_port = grpc_port
262
+ self._ctree_transport = transport
255
263
 
256
264
  if use_ctree:
257
265
  self.ctree_client = CelestialTreeClient(
258
- host=host, http_port=http_port, grpc_port=grpc_port, transport="grpc"
266
+ host=host, http_port=http_port, grpc_port=grpc_port, transport=transport
259
267
  )
260
268
  if not self.ctree_client.health():
261
269
  raise Exception("CelestialTreeClient is not available")
@@ -362,7 +370,7 @@ class TaskGraph:
362
370
  )
363
371
 
364
372
  try:
365
- start_time = time.time()
373
+ start_time = time.perf_counter()
366
374
  self.fail_listener.start()
367
375
  self.log_listener.start()
368
376
  self.log_sinker.start_graph(self.get_structure_list())
@@ -377,7 +385,7 @@ class TaskGraph:
377
385
 
378
386
  self.reporter.stop()
379
387
  self.release_resources()
380
- self.log_sinker.end_graph(time.time() - start_time)
388
+ self.log_sinker.end_graph(time.perf_counter() - start_time)
381
389
  self.fail_listener.stop()
382
390
  self.log_listener.stop()
383
391
 
@@ -393,11 +401,11 @@ class TaskGraph:
393
401
  for p in self.processes:
394
402
  p.join()
395
403
  self.log_sinker.process_exit(p.name, p.exitcode)
396
- else:
404
+ elif self.schedule_mode == "staged":
397
405
  # staged schedule_mode:一层层地顺序执行
398
406
  for layer_level, layer in self.layers_dict.items():
399
407
  self.log_sinker.start_layer(layer, layer_level)
400
- start_time = time.time()
408
+ start_time = time.perf_counter()
401
409
 
402
410
  processes = []
403
411
  for stage_tag in layer:
@@ -411,7 +419,7 @@ class TaskGraph:
411
419
  p.join()
412
420
  self.log_sinker.process_exit(p.name, p.exitcode)
413
421
 
414
- self.log_sinker.end_layer(layer, time.time() - start_time)
422
+ self.log_sinker.end_layer(layer, time.perf_counter() - start_time)
415
423
 
416
424
  def _execute_stage(self, stage: TaskStage):
417
425
  """
@@ -603,9 +611,11 @@ class TaskGraph:
603
611
  if not self.isDAG:
604
612
  totals["total_remain"] = max(running_remaining_map.values(), default=0.0)
605
613
  else:
606
- G = self.get_networkx_graph()
607
614
  expected_pending_map = calc_global_remain_equal_pred(
608
- G, running_processed_map, running_pending_map, running_elapsed_map
615
+ self.networkx_graph,
616
+ running_processed_map,
617
+ running_pending_map,
618
+ running_elapsed_map,
609
619
  )
610
620
  totals["total_remain"] = max(expected_pending_map.values(), default=0.0)
611
621
 
@@ -613,10 +623,10 @@ class TaskGraph:
613
623
  self.graph_summary = dict(totals)
614
624
 
615
625
  def get_fail_by_stage_dict(self):
616
- return load_task_by_stage(self.fail_listener.fallback_path)
626
+ return load_task_by_stage(self.fail_listener.jsonl_path)
617
627
 
618
628
  def get_fail_by_error_dict(self):
619
- return load_task_by_error(self.fail_listener.fallback_path)
629
+ return load_task_by_error(self.fail_listener.jsonl_path)
620
630
 
621
631
  def get_status_dict(self) -> Dict[str, dict]:
622
632
  """
@@ -647,6 +657,9 @@ class TaskGraph:
647
657
  return format_structure_list_from_graph(self.structure_json)
648
658
 
649
659
  def get_networkx_graph(self):
660
+ """
661
+ 获取任务图的 networkx 有向图(DiGraph)
662
+ """
650
663
  return format_networkx_graph(self.structure_json)
651
664
 
652
665
  def get_fallback_path(self) -> str:
@@ -668,55 +681,10 @@ class TaskGraph:
668
681
  """
669
682
  分析任务图,计算 DAG 属性和层级信息
670
683
  """
671
- networkx_graph = self.get_networkx_graph()
684
+ self.networkx_graph = self.get_networkx_graph()
672
685
  self.layers_dict = {}
673
686
 
674
- self.isDAG = is_directed_acyclic_graph(networkx_graph)
687
+ self.isDAG = is_directed_acyclic_graph(self.networkx_graph)
675
688
  if self.isDAG:
676
- stage_level_dict = compute_node_levels(networkx_graph)
689
+ stage_level_dict = compute_node_levels(self.networkx_graph)
677
690
  self.layers_dict = cluster_by_value_sorted(stage_level_dict)
678
-
679
- def test_methods(
680
- self,
681
- init_tasks_dict: Dict[str, List],
682
- stage_modes: list = None,
683
- execution_modes: list = None,
684
- ) -> Dict[str, Any]:
685
- """
686
- 测试 TaskGraph 在 'serial' 和 'process' 模式下的执行时间。
687
-
688
- :param init_tasks_dict: 初始化任务字典
689
- :param stage_modes: 阶段模式列表,默认为 ['serial', 'process']
690
- :param execution_modes: 执行模式列表,默认为 ['serial', 'thread']
691
- :return: 包含两种执行模式下的执行时间的字典
692
- """
693
- results = {}
694
- test_table_list = []
695
- fail_by_error_dict = {}
696
- fail_by_stage_dict = {}
697
-
698
- stage_modes = stage_modes or ["serial", "process"]
699
- execution_modes = execution_modes or ["serial", "thread"]
700
- for stage_mode in stage_modes:
701
- time_list = []
702
- for execution_mode in execution_modes:
703
- start_time = time.time()
704
- self.init_env()
705
- self.set_graph_mode(stage_mode, execution_mode)
706
- self.start_graph(init_tasks_dict)
707
- fail_by_stage_dict.update(self.get_fail_by_stage_dict())
708
- fail_by_error_dict.update(self.get_fail_by_error_dict())
709
-
710
- time_list.append(time.time() - start_time)
711
-
712
- test_table_list.append(time_list)
713
-
714
- results["Time table"] = (
715
- test_table_list,
716
- stage_modes,
717
- execution_modes,
718
- r"stage\execution",
719
- )
720
- results["Fail stage dict"] = fail_by_stage_dict
721
- results["Fail error dict"] = fail_by_error_dict
722
- return results
@@ -59,7 +59,7 @@ def format_structure_list_from_graph(root_roots: List[Dict] = None) -> List[str]
59
59
 
60
60
  def node_label(node: Dict) -> str:
61
61
  visited_note = " [Ref]" if node.get("is_ref") else ""
62
- N = node.get("actor_name", "?") # N
62
+ N = node.get("name", "?") # N
63
63
  F = node.get("func_name", "?") # F
64
64
  S = node.get("stage_mode", "?") # S
65
65
  E = node.get("execution_mode", "?") # E
@@ -1,8 +1,11 @@
1
1
  # persistence/__init__.py
2
+ from .base import BaseListener, BaseSinker
2
3
  from .fail import FailListener, FailSinker
3
4
  from .log import LogListener, LogSinker
4
5
 
5
6
  __all__ = [
7
+ "BaseListener",
8
+ "BaseSinker",
6
9
  "FailListener",
7
10
  "FailSinker",
8
11
  "LogListener",
@@ -0,0 +1,61 @@
1
+ # persistence/base.py
2
+ from __future__ import annotations
3
+
4
+ from multiprocessing import Queue as MPQueue
5
+ from queue import Empty
6
+ from threading import Thread
7
+
8
+ from ..runtime.tools import cleanup_mpqueue
9
+ from ..runtime.types import TerminationSignal, TERMINATION_SIGNAL
10
+
11
+
12
+ class BaseListener:
13
+ def __init__(self):
14
+ self.queue = MPQueue()
15
+ self._thread: Thread | None = None
16
+
17
+ def _before_start(self):
18
+ return None
19
+
20
+ def _handle_record(self, record):
21
+ raise NotImplementedError
22
+
23
+ def _after_stop(self):
24
+ return None
25
+
26
+ def start(self):
27
+ self._before_start()
28
+ if self._thread is None or not self._thread.is_alive():
29
+ self._thread = Thread(target=self._listen, daemon=True)
30
+ self._thread.start()
31
+
32
+ def _listen(self):
33
+ while True:
34
+ try:
35
+ record = self.queue.get(timeout=0.5)
36
+ if isinstance(record, TerminationSignal):
37
+ break
38
+ self._handle_record(record)
39
+ except Empty:
40
+ continue
41
+
42
+ def get_queue(self):
43
+ return self.queue
44
+
45
+ def stop(self):
46
+ if self._thread is None:
47
+ return
48
+
49
+ self.queue.put(TERMINATION_SIGNAL)
50
+ self._thread.join()
51
+ self._thread = None
52
+ cleanup_mpqueue(self.queue)
53
+ self._after_stop()
54
+
55
+
56
+ class BaseSinker:
57
+ def __init__(self, queue):
58
+ self.queue: MPQueue = queue
59
+
60
+ def _sink(self, record):
61
+ self.queue.put(record)
@@ -1,77 +1,63 @@
1
1
  # persistence/fail.py
2
- import os
2
+ import json
3
+ from pathlib import Path
3
4
  from datetime import datetime
4
- from multiprocessing import Queue as MPQueue
5
- from queue import Empty
6
- from threading import Lock, Thread
7
5
 
8
- from ..runtime.tools import cleanup_mpqueue
9
- from ..runtime.types import TerminationSignal, TERMINATION_SIGNAL
10
6
  from ..utils.format import format_repr
11
- from .jsonl import append_jsonl_log
7
+ from .base import BaseListener, BaseSinker
12
8
 
13
9
 
14
- class FailListener:
10
+ class FailListener(BaseListener):
15
11
  def __init__(self, error_source: str):
12
+ super().__init__()
13
+
16
14
  self.error_source = error_source
17
- self.fail_queue = MPQueue()
18
- self._thread = None
19
- self.fallback_path = ""
15
+ self.fail_queue = self.queue
16
+ self.jsonl_path: Path | None = None
17
+
20
18
  self.total_error_num = 0
21
- self._counter_lock = Lock()
19
+ self._file = None
22
20
 
23
- def start(self):
21
+ def _before_start(self):
22
+ # 创建 fallback 目录
24
23
  now = datetime.now()
25
24
  date_str = now.strftime("%Y-%m-%d")
26
25
  time_str = now.strftime("%H-%M-%S-%f")[:-3]
27
- self.fallback_path = (
26
+ self.jsonl_path = Path(
28
27
  f"./fallback/{date_str}/{self.error_source}({time_str}).jsonl"
29
28
  )
30
- self.total_error_num = 0
29
+ self.jsonl_path.parent.mkdir(parents=True, exist_ok=True)
31
30
 
32
- if self._thread is None or not self._thread.is_alive():
33
- self._thread = Thread(target=self._listen, daemon=True)
34
- self._thread.start()
31
+ # 打开失败记录文件
32
+ self._file = self.jsonl_path.open("a", encoding="utf-8")
35
33
 
36
- def _listen(self):
37
- while True:
38
- try:
39
- record = self.fail_queue.get(timeout=0.5)
40
- if isinstance(record, TerminationSignal):
41
- break
42
- append_jsonl_log(record, self.fallback_path)
43
- if isinstance(record, dict) and record.get("error_id") is not None:
44
- with self._counter_lock:
45
- self.total_error_num += 1
46
- except Empty:
47
- continue
34
+ # 初始化错误计数器
35
+ self.total_error_num = 0
48
36
 
49
- def get_queue(self):
50
- return self.fail_queue
37
+ def _handle_record(self, record):
38
+ jsonl_record = json.dumps(record, ensure_ascii=False)
51
39
 
52
- def get_fallback_path(self) -> str:
53
- return os.path.abspath(self.fallback_path)
40
+ self._file.write(f"{jsonl_record}\n")
41
+ self._file.flush()
54
42
 
55
- def stop(self):
56
- if self._thread is None:
57
- return
43
+ if isinstance(record, dict) and record.get("error_id") is not None:
44
+ self.total_error_num += 1
58
45
 
59
- self.fail_queue.put(TERMINATION_SIGNAL)
60
- self._thread.join()
61
- self._thread = None
62
- cleanup_mpqueue(self.fail_queue)
46
+ def _after_stop(self):
47
+ if self._file:
48
+ self._file.flush()
49
+ self._file.close()
50
+ self._file = None
63
51
 
64
52
 
65
- class FailSinker:
53
+ class FailSinker(BaseSinker):
66
54
  """
67
55
  多进程安全失败记录包装类,所有失败记录通过队列发送到监听进程写入
68
56
  """
69
57
 
70
58
  def __init__(self, fail_queue):
71
- self.fail_queue: MPQueue = fail_queue
72
-
73
- def _sink(self, record: dict):
74
- self.fail_queue.put(record)
59
+ super().__init__(fail_queue)
60
+ self.fail_queue = self.queue
75
61
 
76
62
  def start_graph(self, structure_json):
77
63
  """
@@ -1,57 +1,10 @@
1
1
  # persistence/jsonl.py
2
2
  import json
3
3
  import ast
4
- from pathlib import Path
5
- from collections.abc import Iterable
6
4
  from collections import defaultdict
7
5
  from typing import Dict, Any, List, Optional
8
6
 
9
-
10
7
  # ======== jsonl文件处理 ========
11
- def append_jsonl_log(log_data: dict, file_path: str, logger=None):
12
- """
13
- 将日志字典写入指定目录下的 JSONL 文件。
14
-
15
- :param log_data: 要写入的日志项(字典)
16
- :param start_time: 运行开始时间,用于构造路径
17
- :param base_path: 基础路径,例如 './fallback'
18
- :param prefix: 文件名前缀,例如 'realtime_errors'
19
- :param logger: 可选的日志对象用于记录失败信息
20
- """
21
- try:
22
- file_path: Path = Path(file_path)
23
- file_path.parent.mkdir(parents=True, exist_ok=True)
24
-
25
- with open(file_path, "a", encoding="utf-8") as f:
26
- f.write(json.dumps(log_data, ensure_ascii=False) + "\n")
27
- except Exception as e:
28
- if logger:
29
- logger._log("WARNING", f"[Persist] 写入日志失败: {e}")
30
-
31
-
32
- def append_jsonl_logs(log_items: Iterable[dict], file_path: str, logger=None):
33
- """
34
- 将多条日志一次性写入 JSONL 文件(batch 追加)。
35
-
36
- :param log_items: Iterable[dict],每个元素写成一行 JSON
37
- :param file_path: JSONL 文件路径
38
- :param logger: 可选日志对象
39
- """
40
- try:
41
- if not isinstance(log_items, Iterable):
42
- raise TypeError("log_items must be an iterable of dict")
43
-
44
- file_path: Path = Path(file_path)
45
- file_path.parent.mkdir(parents=True, exist_ok=True)
46
-
47
- with open(file_path, "a", encoding="utf-8") as f:
48
- for item in log_items:
49
- if not isinstance(item, dict):
50
- raise TypeError(f"each log item must be dict, got {type(item)}")
51
- f.write(json.dumps(item, ensure_ascii=False) + "\n")
52
- except Exception as e:
53
- if logger:
54
- logger._log("WARNING", f"[Persist] 批量写入日志失败: {e}")
55
8
 
56
9
 
57
10
  def load_jsonl_logs(