pyflowx 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyflowx/__init__.py ADDED
@@ -0,0 +1,75 @@
1
+ """PyFlowX —— 轻量、类型安全的 DAG 任务调度器。
2
+
3
+ 公共 API
4
+ --------
5
+ * :class:`TaskSpec` —— 不可变任务描述符(唯一需要配置的东西)。
6
+ * :class:`Graph` —— 由一组 spec 构建的 DAG;负责校验、分层、可视化。
7
+ * :func:`run` —— 以 ``sequential`` / ``thread`` / ``async`` 策略执行图。
8
+ * :class:`RunReport` —— 类型化、可查询的运行结果。
9
+ * :class:`Context` —— 整体上下文注入的标注标记。
10
+ * 状态后端::class:`StateBackend`、:class:`MemoryBackend`、:class:`JSONBackend`。
11
+
12
+ 快速上手
13
+ --------
14
+ import pyflowx as px
15
+
16
+ def extract() -> list[int]: return [1, 2, 3]
17
+ def double(extract: list[int]) -> list[int]: return [x * 2 for x in extract]
18
+
19
+ graph = px.Graph.from_specs([
20
+ px.TaskSpec("extract", extract),
21
+ px.TaskSpec("double", double, ("extract",)),
22
+ ])
23
+ report = px.run(graph, strategy="sequential")
24
+ print(report["double"]) # [2, 4, 6]
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ from .context import Context, build_call_args, describe_injection
30
+ from .errors import (
31
+ CycleError,
32
+ DuplicateTaskError,
33
+ InjectionError,
34
+ MissingDependencyError,
35
+ PyFlowXError,
36
+ StorageError,
37
+ TaskFailedError,
38
+ TaskTimeoutError,
39
+ )
40
+ from .executors import run
41
+ from .graph import Graph
42
+ from .report import RunReport
43
+ from .storage import JSONBackend, MemoryBackend, StateBackend
44
+ from .task import TaskEvent, TaskResult, TaskSpec, TaskStatus
45
+
46
+ __version__ = "0.1.1"
47
+
48
+ __all__ = [
49
+ # 核心类型
50
+ "TaskSpec",
51
+ "TaskStatus",
52
+ "TaskResult",
53
+ "TaskEvent",
54
+ "Context",
55
+ "Graph",
56
+ "RunReport",
57
+ # 执行
58
+ "run",
59
+ # 状态后端
60
+ "StateBackend",
61
+ "MemoryBackend",
62
+ "JSONBackend",
63
+ # 错误
64
+ "PyFlowXError",
65
+ "DuplicateTaskError",
66
+ "MissingDependencyError",
67
+ "CycleError",
68
+ "TaskFailedError",
69
+ "TaskTimeoutError",
70
+ "InjectionError",
71
+ "StorageError",
72
+ # 辅助(高级)
73
+ "build_call_args",
74
+ "describe_injection",
75
+ ]
pyflowx/__main__.py ADDED
@@ -0,0 +1,9 @@
1
+ from pyflowx.examples.async_aggregation import main as async_aggregation_main
2
+ from pyflowx.examples.etl_pipeline import main as etl_pipeline_main
3
+ from pyflowx.examples.parallel_run import main as parallel_run_main
4
+
5
+
6
+ def main():
7
+ async_aggregation_main()
8
+ etl_pipeline_main()
9
+ parallel_run_main()
pyflowx/context.py ADDED
@@ -0,0 +1,194 @@
1
+ """上下文注入:把上游结果转换为函数参数。
2
+
3
+ 本机制让用户可以编写普通函数,其参数名*就是*依赖声明,从而消除其他
4
+ DAG 库中泛滥的样板包装器(如 ``def wrapper(): return fn(workflow.get_task_result('x'))``)。
5
+
6
+ 注入规则(按顺序求值)
7
+ ----------------------
8
+ 1. **标注为** :class:`Context` 的参数接收完整结果映射。适用于需要遍历
9
+ 所有输入的任务。
10
+ 2. **名称匹配某个依赖**的参数接收该依赖的结果。
11
+ 3. ``**kwargs`` 参数以 dict 形式接收*所有*依赖结果。
12
+ 4. ``TaskSpec.args`` / ``TaskSpec.kwargs`` 为*非依赖*参数提供静态值。
13
+
14
+ 若某参数无法解析且无默认值,则抛出 :class:`~pyflowx.errors.InjectionError`,
15
+ 并附带精确错误信息。
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import inspect
21
+ from typing import Any, Dict, List, Mapping, Set, Tuple
22
+
23
+ from .errors import InjectionError
24
+ from .task import Context, TaskSpec
25
+
26
+ __all__ = ["Context", "build_call_args", "describe_injection", "_is_context_annotation"]
27
+
28
+
29
+ def _is_context_annotation(annotation: Any) -> bool:
30
+ """判断参数标注是否为(或指向)``Context``。
31
+
32
+ 处理三种形式:
33
+ * ``Context`` 别名对象本身;
34
+ * ``__name__``/``_name`` 为 ``Context`` 或 ``Mapping`` 的 typing 别名;
35
+ * *字符串*标注(``from __future__ import annotations`` 会在运行时
36
+ 把所有标注变为字符串),如 ``"Context"`` 或 ``"px.Context"``。
37
+ """
38
+ if annotation is Context:
39
+ return True
40
+ # `from __future__ import annotations` 产生的字符串标注。
41
+ if isinstance(annotation, str):
42
+ # 匹配 "Context"、"px.Context"、"pyflowx.Context" 等。
43
+ return annotation == "Context" or annotation.endswith(".Context")
44
+ # 按限定名匹配,支持 ``from pyflowx import Context`` 再导出。
45
+ name = getattr(annotation, "__name__", None) or getattr(annotation, "_name", None)
46
+ if name in ("Context", "Mapping"):
47
+ return True
48
+ return False
49
+
50
+
51
+ def build_call_args(
52
+ spec: TaskSpec[object],
53
+ context: Mapping[str, Any],
54
+ ) -> Tuple[Tuple[Any, ...], Dict[str, Any]]:
55
+ """解析用于调用 ``spec.fn`` 的 ``(args, kwargs)``。
56
+
57
+ 参数
58
+ ----
59
+ spec:
60
+ 任务 spec,提供 ``fn``、``depends_on``、``args``、``kwargs``。
61
+ context:
62
+ 依赖名 -> 结果值的映射。仅保证本任务自身的 ``depends_on`` 条目
63
+ 存在;其他任务的结果被排除,以保持注入的确定性。
64
+
65
+ 返回
66
+ ----
67
+ (args, kwargs)
68
+ 可直接展开为 ``spec.fn(*args, **kwargs)``。
69
+
70
+ 抛出
71
+ ----
72
+ InjectionError
73
+ 若必需参数无法满足,或静态 ``kwargs`` 与注入依赖名冲突。
74
+ """
75
+ sig = inspect.signature(spec.fn)
76
+ params = sig.parameters
77
+
78
+ # 检测特殊参数类型。
79
+ var_keyword = next(
80
+ (p for p in params.values() if p.kind == inspect.Parameter.VAR_KEYWORD),
81
+ None,
82
+ )
83
+
84
+ # 与本任务相关的上下文子集。
85
+ dep_context: Dict[str, Any] = {
86
+ name: context[name] for name in spec.depends_on if name in context
87
+ }
88
+
89
+ # 检测静态 kwargs 与依赖名的冲突。
90
+ collisions = set(spec.kwargs) & set(dep_context)
91
+ if collisions:
92
+ raise InjectionError(
93
+ spec.name,
94
+ f"static kwargs {sorted(collisions)} collide with dependency names; "
95
+ "rename the static kwarg or the dependency.",
96
+ )
97
+
98
+ injected_kwargs: Dict[str, Any] = {}
99
+ leftover_dep_results: Dict[str, Any] = dict(dep_context)
100
+
101
+ # 被 spec.args 消费的位置参数。记录哪些参数名已被位置填充,
102
+ # 以便在基于名称的注入(依赖 / Context / 静态 kwargs)时跳过。
103
+ positional_params: List[str] = []
104
+ positional_kinds = (
105
+ inspect.Parameter.POSITIONAL_ONLY,
106
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
107
+ )
108
+ for pname, param in params.items():
109
+ if param.kind in positional_kinds:
110
+ positional_params.append(pname)
111
+ # 前 len(spec.args) 个位置参数由 spec.args 填充。
112
+ args_filled: Set[str] = set(positional_params[: len(spec.args)])
113
+
114
+ for pname, param in params.items():
115
+ # 跳过已被位置 spec.args 填充的参数。
116
+ if pname in args_filled:
117
+ continue
118
+
119
+ # 规则 1:标注为 Context -> 完整映射。
120
+ if _is_context_annotation(param.annotation):
121
+ injected_kwargs[pname] = dep_context
122
+ continue
123
+
124
+ # 规则 2:名称匹配某个依赖。
125
+ if pname in dep_context:
126
+ injected_kwargs[pname] = dep_context[pname]
127
+ leftover_dep_results.pop(pname, None)
128
+ continue
129
+
130
+ # 规则 3:在循环后通过 **kwargs 处理。
131
+
132
+ # 规则 4:静态 kwargs 填充其余参数。
133
+ if pname in spec.kwargs:
134
+ injected_kwargs[pname] = spec.kwargs[pname]
135
+ continue
136
+
137
+ # 该参数无来源:必须有默认值,否则报错。
138
+ if param.default is inspect.Parameter.empty and param.kind not in (
139
+ inspect.Parameter.VAR_POSITIONAL,
140
+ inspect.Parameter.VAR_KEYWORD,
141
+ ):
142
+ raise InjectionError(
143
+ spec.name,
144
+ f"parameter {pname!r} has no dependency, static value, or default.",
145
+ )
146
+
147
+ # 规则 3:**kwargs 吞掉剩余依赖结果。
148
+ if var_keyword is not None and leftover_dep_results:
149
+ # 先合并静态 kwargs,再合并依赖结果(冲突已在上方拒绝)。
150
+ merged = dict(spec.kwargs)
151
+ merged.update(injected_kwargs)
152
+ merged.update(leftover_dep_results)
153
+ injected_kwargs = merged
154
+
155
+ return tuple(spec.args), injected_kwargs
156
+
157
+
158
+ def describe_injection(spec: TaskSpec[object]) -> str:
159
+ """生成任务参数注入方式的人类可读描述。
160
+
161
+ 供 ``dry_run`` 使用,在不执行的情况下展示执行计划。
162
+ """
163
+ sig = inspect.signature(spec.fn)
164
+ # 确定哪些位置参数由 spec.args 填充。
165
+ positional_params = [
166
+ p
167
+ for p, param in sig.parameters.items()
168
+ if param.kind
169
+ in (
170
+ inspect.Parameter.POSITIONAL_ONLY,
171
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
172
+ )
173
+ ]
174
+ args_filled = set(positional_params[: len(spec.args)])
175
+ parts = []
176
+ for pname, param in sig.parameters.items():
177
+ if pname in args_filled:
178
+ idx = positional_params.index(pname)
179
+ parts.append(f"{pname}={spec.args[idx]!r}")
180
+ elif _is_context_annotation(param.annotation):
181
+ parts.append(f"{pname}=<Context>")
182
+ elif pname in spec.depends_on:
183
+ parts.append(f"{pname}=<result:{pname}>")
184
+ elif pname in spec.kwargs:
185
+ parts.append(f"{pname}={spec.kwargs[pname]!r}")
186
+ elif param.default is not inspect.Parameter.empty:
187
+ parts.append(f"{pname}=<default>")
188
+ elif param.kind == inspect.Parameter.VAR_KEYWORD:
189
+ parts.append("**kwargs=<all-deps>")
190
+ elif param.kind == inspect.Parameter.VAR_POSITIONAL:
191
+ parts.append("*args")
192
+ else:
193
+ parts.append(f"{pname}=<UNRESOLVED>")
194
+ return f"{spec.name}({', '.join(parts)})"
pyflowx/errors.py ADDED
@@ -0,0 +1,92 @@
1
+ """PyFlowX 错误层级。
2
+
3
+ 所有错误都是 :class:`PyFlowXError` 的具体子类,调用者可以用单个
4
+ ``except`` 子句捕获整个错误家族,同时仍可按类型区分以做细粒度处理。
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Iterable, Optional
10
+
11
+
12
+ class PyFlowXError(Exception):
13
+ """所有 PyFlowX 错误的基类。"""
14
+
15
+
16
+ class DuplicateTaskError(PyFlowXError):
17
+ """任务名被重复注册时抛出。"""
18
+
19
+ def __init__(self, name: str) -> None:
20
+ super().__init__(f"Task '{name}' is already registered in the graph.")
21
+ self.name = name
22
+
23
+
24
+ class MissingDependencyError(PyFlowXError):
25
+ """任务依赖了图中不存在的名称时抛出。"""
26
+
27
+ def __init__(self, task: str, dependency: str) -> None:
28
+ super().__init__(
29
+ f"Task '{task}' depends on unknown task '{dependency}'. "
30
+ "Add the dependency before (or together with) this task."
31
+ )
32
+ self.task = task
33
+ self.dependency = dependency
34
+
35
+
36
+ class CycleError(PyFlowXError):
37
+ """依赖图存在环时抛出。"""
38
+
39
+ def __init__(self, cycle: Iterable[str]) -> None:
40
+ cycle_list = list(cycle)
41
+ chain = " -> ".join(cycle_list + cycle_list[:1])
42
+ super().__init__(f"The dependency graph contains a cycle: {chain}")
43
+ self.cycle = cycle_list
44
+
45
+
46
+ class TaskFailedError(PyFlowXError):
47
+ """任务耗尽所有重试后仍失败时抛出。
48
+
49
+ 原始异常保留在 :attr:`__cause__` 上,同时通过 :attr:`cause` 暴露,
50
+ 便于用户代码访问。
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ task: str,
56
+ cause: BaseException,
57
+ attempts: int,
58
+ layer: Optional[int] = None,
59
+ ) -> None:
60
+ location = f" (layer {layer})" if layer is not None else ""
61
+ super().__init__(
62
+ f"Task '{task}' failed after {attempts} attempt(s){location}: {cause}"
63
+ )
64
+ self.task = task
65
+ self.cause = cause
66
+ self.attempts = attempts
67
+ self.layer = layer
68
+
69
+
70
+ class TaskTimeoutError(PyFlowXError):
71
+ """任务超出配置的超时时间时抛出。"""
72
+
73
+ def __init__(self, task: str, timeout: float) -> None:
74
+ super().__init__(f"Task '{task}' timed out after {timeout:.3f}s.")
75
+ self.task = task
76
+ self.timeout = timeout
77
+
78
+
79
+ class InjectionError(PyFlowXError):
80
+ """上下文注入无法满足任务签名时抛出。"""
81
+
82
+ def __init__(self, task: str, detail: str) -> None:
83
+ super().__init__(f"Cannot inject context for task '{task}': {detail}")
84
+ self.task = task
85
+
86
+
87
+ class StorageError(PyFlowXError):
88
+ """状态后端在持久化失败时抛出。"""
89
+
90
+ def __init__(self, detail: str, cause: Optional[BaseException] = None) -> None:
91
+ super().__init__(f"State storage error: {detail}")
92
+ self.cause: Any = cause
File without changes
@@ -0,0 +1,58 @@
1
+ """Example 3: async aggregation with static args and Context injection.
2
+
3
+ Shows:
4
+ * async task functions executed with strategy="async".
5
+ * static positional args (TaskSpec.args) for parameterised tasks.
6
+ * Context annotation to receive the full upstream result mapping.
7
+ * on_event callback for real-time progress.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ from typing import Any, Dict, List
14
+
15
+ import pyflowx as px
16
+
17
+
18
+ async def fetch_user(uid: int) -> dict:
19
+ await asyncio.sleep(0.2)
20
+ return {"id": uid, "name": f"User{uid}"}
21
+
22
+
23
+ async def fetch_posts(uid: int) -> List[int]:
24
+ await asyncio.sleep(0.2)
25
+ return [uid, uid + 1]
26
+
27
+
28
+ # Context annotation → receives the full mapping of upstream results.
29
+ def aggregate(ctx: px.Context) -> Dict[str, Any]:
30
+ return dict(ctx)
31
+
32
+
33
+ def main() -> None:
34
+ graph = px.Graph.from_specs(
35
+ [
36
+ # Static positional args parameterise the same function twice.
37
+ px.TaskSpec("fetch_user", fetch_user, args=(1,)),
38
+ px.TaskSpec("fetch_posts", fetch_posts, args=(1,)),
39
+ px.TaskSpec("aggregate", aggregate, ("fetch_user", "fetch_posts")),
40
+ ]
41
+ )
42
+
43
+ print("=== Dry run ===")
44
+ px.run(graph, strategy="async", dry_run=True)
45
+
46
+ events: List[px.TaskEvent] = []
47
+ print("\n=== Async execution ===")
48
+ report = px.run(graph, strategy="async", on_event=events.append)
49
+
50
+ for ev in events:
51
+ print(f" event: {ev.task} -> {ev.status.value}")
52
+
53
+ print(f"\naggregate = {report['aggregate']}")
54
+ print(report.describe())
55
+
56
+
57
+ if __name__ == "__main__":
58
+ main()
@@ -0,0 +1,81 @@
1
+ """Example 1: ETL pipeline (sequential strategy).
2
+
3
+ Demonstrates the core PyFlowX workflow:
4
+ * Define tasks as plain functions.
5
+ * Declare the DAG with a list of TaskSpec.
6
+ * Parameter names == dependency names → automatic context injection,
7
+ no wrappers needed (contrast with flowweaver's get_task_result boilerplate).
8
+ * dry_run to preview, then execute and read typed results from RunReport.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from typing import List
14
+
15
+ import pyflowx as px
16
+
17
+ # --- task functions: pure, testable, no framework coupling ------------- #
18
+
19
+
20
+ def extract_customers() -> List[dict]:
21
+ return [
22
+ {"id": "C001", "name": "Alice"},
23
+ {"id": "C002", "name": "Bob"},
24
+ ]
25
+
26
+
27
+ def extract_orders() -> List[dict]:
28
+ return [
29
+ {"id": "O001", "customer_id": "C001", "amount": 150.0},
30
+ {"id": "O002", "customer_id": "C002", "amount": 200.5},
31
+ ]
32
+
33
+
34
+ # Parameter names match dependency names → automatic injection.
35
+ def transform(
36
+ extract_customers: List[dict],
37
+ extract_orders: List[dict],
38
+ ) -> List[dict]:
39
+ cmap = {c["id"]: c for c in extract_customers}
40
+ return [
41
+ {**o, "customer_name": cmap[o["customer_id"]]["name"]}
42
+ for o in extract_orders
43
+ if o["customer_id"] in cmap
44
+ ]
45
+
46
+
47
+ def load(transform: List[dict]) -> int:
48
+ print(f" loaded {len(transform)} records")
49
+ return len(transform)
50
+
51
+
52
+ def main() -> None:
53
+ graph = px.Graph.from_specs(
54
+ [
55
+ px.TaskSpec("extract_customers", extract_customers, tags=("extract",)),
56
+ px.TaskSpec("extract_orders", extract_orders, tags=("extract",)),
57
+ px.TaskSpec(
58
+ "transform",
59
+ transform,
60
+ ("extract_customers", "extract_orders"),
61
+ tags=("transform",),
62
+ ),
63
+ px.TaskSpec("load", load, ("transform",), retries=1, tags=("load",)),
64
+ ]
65
+ )
66
+
67
+ print("=== Execution plan ===")
68
+ print(graph.describe())
69
+
70
+ print("\n=== Dry run (no execution) ===")
71
+ px.run(graph, strategy="sequential", dry_run=True)
72
+
73
+ print("\n=== Sequential execution ===")
74
+ report = px.run(graph, strategy="sequential")
75
+ print(report.describe())
76
+ print(f"\nload result = {report['load']}")
77
+ print(f"summary = {report.summary()}")
78
+
79
+
80
+ if __name__ == "__main__":
81
+ main()
@@ -0,0 +1,59 @@
1
+ """Example 2: parallel execution (thread strategy).
2
+
3
+ Same DAG run with sequential vs. thread strategy to show layer-internal
4
+ parallelism. Tasks within a layer run concurrently; layers are barriers.
5
+
6
+ Layer 1: [fetch_a, fetch_b] (parallel)
7
+ Layer 2: [merge] (waits for both)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import time
13
+
14
+ import pyflowx as px
15
+
16
+
17
+ def fetch_a() -> str:
18
+ time.sleep(0.5)
19
+ return "a"
20
+
21
+
22
+ def fetch_b() -> str:
23
+ time.sleep(0.5)
24
+ return "b"
25
+
26
+
27
+ def merge(fetch_a: str, fetch_b: str) -> str:
28
+ return fetch_a + fetch_b
29
+
30
+
31
+ def main() -> None:
32
+ graph = px.Graph.from_specs(
33
+ [
34
+ px.TaskSpec("fetch_a", fetch_a),
35
+ px.TaskSpec("fetch_b", fetch_b),
36
+ px.TaskSpec("merge", merge, ("fetch_a", "fetch_b")),
37
+ ]
38
+ )
39
+
40
+ print("=== Mermaid diagram ===")
41
+ print(graph.to_mermaid("LR"))
42
+
43
+ print("\n=== Sequential (expect ~1.0s) ===")
44
+ start = time.time()
45
+ report_seq = px.run(graph, strategy="sequential")
46
+ t_seq = time.time() - start
47
+ print(f" result={report_seq['merge']} time={t_seq:.2f}s")
48
+
49
+ print("\n=== Threaded (expect ~0.5s) ===")
50
+ start = time.time()
51
+ report_thr = px.run(graph, strategy="thread", max_workers=2)
52
+ t_thr = time.time() - start
53
+ print(f" result={report_thr['merge']} time={t_thr:.2f}s")
54
+
55
+ print(f"\nspeedup = {t_seq / t_thr:.2f}x")
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()