pyworkflowy 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyworkflowy/__init__.py +56 -0
- pyworkflowy/_backends.py +204 -0
- pyworkflowy/_classbased.py +108 -0
- pyworkflowy/_core.py +489 -0
- pyworkflowy/_dag.py +120 -0
- pyworkflowy/_persistence.py +186 -0
- pyworkflowy/_runner.py +772 -0
- pyworkflowy/exceptions.py +88 -0
- pyworkflowy/py.typed +0 -0
- pyworkflowy/schedule.py +426 -0
- pyworkflowy-0.1.0.dist-info/METADATA +348 -0
- pyworkflowy-0.1.0.dist-info/RECORD +14 -0
- pyworkflowy-0.1.0.dist-info/WHEEL +4 -0
- pyworkflowy-0.1.0.dist-info/licenses/LICENSE +21 -0
pyworkflowy/__init__.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""pyWorkflowy — a full workflow engine for async/parallelized Python tasks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pyworkflowy._classbased import TaskBase
|
|
6
|
+
from pyworkflowy._core import (
|
|
7
|
+
Backend,
|
|
8
|
+
Backoff,
|
|
9
|
+
DepFailurePolicy,
|
|
10
|
+
Task,
|
|
11
|
+
TaskContext,
|
|
12
|
+
TaskHandle,
|
|
13
|
+
TaskResult,
|
|
14
|
+
TaskStatus,
|
|
15
|
+
current_task,
|
|
16
|
+
task,
|
|
17
|
+
)
|
|
18
|
+
from pyworkflowy._persistence import Checkpointer, JSONCheckpointer, PickleCheckpointer
|
|
19
|
+
from pyworkflowy._runner import TaskRunner, get_current_runner
|
|
20
|
+
from pyworkflowy.exceptions import (
|
|
21
|
+
CheckpointError,
|
|
22
|
+
CycleError,
|
|
23
|
+
DependencyFailedError,
|
|
24
|
+
RetryExhaustedError,
|
|
25
|
+
TaskCancelledError,
|
|
26
|
+
TaskError,
|
|
27
|
+
TaskTimeoutError,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"Backend",
|
|
32
|
+
"Backoff",
|
|
33
|
+
"CheckpointError",
|
|
34
|
+
"Checkpointer",
|
|
35
|
+
"CycleError",
|
|
36
|
+
"DepFailurePolicy",
|
|
37
|
+
"DependencyFailedError",
|
|
38
|
+
"JSONCheckpointer",
|
|
39
|
+
"PickleCheckpointer",
|
|
40
|
+
"RetryExhaustedError",
|
|
41
|
+
"Task",
|
|
42
|
+
"TaskBase",
|
|
43
|
+
"TaskCancelledError",
|
|
44
|
+
"TaskContext",
|
|
45
|
+
"TaskError",
|
|
46
|
+
"TaskHandle",
|
|
47
|
+
"TaskResult",
|
|
48
|
+
"TaskRunner",
|
|
49
|
+
"TaskStatus",
|
|
50
|
+
"TaskTimeoutError",
|
|
51
|
+
"current_task",
|
|
52
|
+
"get_current_runner",
|
|
53
|
+
"task",
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
__version__ = "0.1.0"
|
pyworkflowy/_backends.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
"""Execution backends: asyncio, thread pool, process pool.
|
|
2
|
+
|
|
3
|
+
Each backend implements :meth:`Backend.execute` which takes a task body and
|
|
4
|
+
returns its value, honoring the runner-level concerns the caller has already
|
|
5
|
+
arranged (cancellation flag, contextvars). Retries and timeouts are layered
|
|
6
|
+
*on top* of these backends by :mod:`pyworkflowy._runner` — backends only run a
|
|
7
|
+
single attempt.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import threading
|
|
14
|
+
from collections.abc import Awaitable, Callable
|
|
15
|
+
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
|
16
|
+
from inspect import iscoroutinefunction
|
|
17
|
+
from typing import TYPE_CHECKING, Any
|
|
18
|
+
|
|
19
|
+
from pyworkflowy.exceptions import TaskCancelledError, TaskTimeoutError
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from pyworkflowy._core import TaskContext
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"BackendExecutor",
|
|
26
|
+
"ProcessBackend",
|
|
27
|
+
"ThreadBackend",
|
|
28
|
+
"asyncio_execute",
|
|
29
|
+
"build_backend",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ---------- thread / process backend ----------
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class BackendExecutor:
|
|
37
|
+
"""Protocol-shaped base for the thread/process backends.
|
|
38
|
+
|
|
39
|
+
Both real backends own a ``concurrent.futures`` executor and translate
|
|
40
|
+
submit/cancel into futures calls. The asyncio backend is *not* a
|
|
41
|
+
BackendExecutor — it runs cooperatively inside the runner's loop and
|
|
42
|
+
needs no pool.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
name: str
|
|
46
|
+
_executor: ThreadPoolExecutor | ProcessPoolExecutor
|
|
47
|
+
|
|
48
|
+
def execute(
|
|
49
|
+
self,
|
|
50
|
+
fn: Callable[..., Any],
|
|
51
|
+
args: tuple[Any, ...],
|
|
52
|
+
kwargs: dict[str, Any],
|
|
53
|
+
*,
|
|
54
|
+
timeout: float | None,
|
|
55
|
+
cancel_event: threading.Event,
|
|
56
|
+
task_name: str,
|
|
57
|
+
) -> Any:
|
|
58
|
+
raise NotImplementedError
|
|
59
|
+
|
|
60
|
+
def shutdown(self, *, wait: bool = True) -> None:
|
|
61
|
+
self._executor.shutdown(wait=wait)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ThreadBackend(BackendExecutor):
|
|
65
|
+
"""Runs each attempt on a :class:`concurrent.futures.ThreadPoolExecutor`.
|
|
66
|
+
|
|
67
|
+
Cancellation is cooperative — the task body must check
|
|
68
|
+
``current_task().cancel_event`` (or accept the timeout) to actually
|
|
69
|
+
stop. Threads cannot be force-killed, so a runaway task will hang
|
|
70
|
+
shutdown if it ignores cooperative signals.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
name = "thread"
|
|
74
|
+
|
|
75
|
+
def __init__(self, max_workers: int) -> None:
|
|
76
|
+
self._executor = ThreadPoolExecutor(
|
|
77
|
+
max_workers=max_workers, thread_name_prefix="pyworkflowy"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
def execute(
|
|
81
|
+
self,
|
|
82
|
+
fn: Callable[..., Any],
|
|
83
|
+
args: tuple[Any, ...],
|
|
84
|
+
kwargs: dict[str, Any],
|
|
85
|
+
*,
|
|
86
|
+
timeout: float | None,
|
|
87
|
+
cancel_event: threading.Event,
|
|
88
|
+
task_name: str,
|
|
89
|
+
) -> Any:
|
|
90
|
+
future = self._executor.submit(fn, *args, **kwargs)
|
|
91
|
+
try:
|
|
92
|
+
return future.result(timeout=timeout)
|
|
93
|
+
except TimeoutError as exc:
|
|
94
|
+
cancel_event.set()
|
|
95
|
+
raise TaskTimeoutError(
|
|
96
|
+
f"Task {task_name!r} exceeded its timeout of {timeout}s on the thread backend"
|
|
97
|
+
) from exc
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class ProcessBackend(BackendExecutor):
|
|
101
|
+
"""Runs each attempt on a :class:`concurrent.futures.ProcessPoolExecutor`.
|
|
102
|
+
|
|
103
|
+
Task functions must be importable (top-level or class-level — not nested
|
|
104
|
+
closures or lambdas), because the multiprocessing pickler serializes the
|
|
105
|
+
function reference to ship it to the worker. On timeout, the runner
|
|
106
|
+
cancels the future and the worker is left to finish or be reaped; a
|
|
107
|
+
*force* terminate is intentionally avoided to keep pool stability.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
name = "process"
|
|
111
|
+
|
|
112
|
+
def __init__(self, max_workers: int) -> None:
|
|
113
|
+
self._executor = ProcessPoolExecutor(max_workers=max_workers)
|
|
114
|
+
|
|
115
|
+
def execute(
|
|
116
|
+
self,
|
|
117
|
+
fn: Callable[..., Any],
|
|
118
|
+
args: tuple[Any, ...],
|
|
119
|
+
kwargs: dict[str, Any],
|
|
120
|
+
*,
|
|
121
|
+
timeout: float | None,
|
|
122
|
+
cancel_event: threading.Event,
|
|
123
|
+
task_name: str,
|
|
124
|
+
) -> Any:
|
|
125
|
+
future = self._executor.submit(fn, *args, **kwargs)
|
|
126
|
+
try:
|
|
127
|
+
return future.result(timeout=timeout)
|
|
128
|
+
except TimeoutError as exc:
|
|
129
|
+
future.cancel()
|
|
130
|
+
raise TaskTimeoutError(
|
|
131
|
+
f"Task {task_name!r} exceeded its timeout of {timeout}s on the process backend"
|
|
132
|
+
) from exc
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def build_backend(name: str, max_workers: int) -> BackendExecutor:
|
|
136
|
+
"""Build a :class:`BackendExecutor` for ``name``.
|
|
137
|
+
|
|
138
|
+
The asyncio backend is special-cased upstream and does not produce a
|
|
139
|
+
:class:`BackendExecutor` — only ``thread`` and ``process`` return one.
|
|
140
|
+
"""
|
|
141
|
+
if name == "thread":
|
|
142
|
+
return ThreadBackend(max_workers)
|
|
143
|
+
if name == "process":
|
|
144
|
+
return ProcessBackend(max_workers)
|
|
145
|
+
raise ValueError(f"Unknown backend {name!r}; expected 'thread' or 'process'.")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ---------- asyncio backend ----------
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def asyncio_execute(
|
|
152
|
+
fn: Callable[..., Any],
|
|
153
|
+
args: tuple[Any, ...],
|
|
154
|
+
kwargs: dict[str, Any],
|
|
155
|
+
*,
|
|
156
|
+
timeout: float | None,
|
|
157
|
+
cancel_event: threading.Event,
|
|
158
|
+
task_name: str,
|
|
159
|
+
ctx: TaskContext,
|
|
160
|
+
setup: Callable[[], Any],
|
|
161
|
+
teardown: Callable[[Any], None],
|
|
162
|
+
) -> Any:
|
|
163
|
+
"""Run one attempt on the asyncio event loop.
|
|
164
|
+
|
|
165
|
+
Supports both sync and async ``fn``. Sync ``fn`` runs inline on the loop
|
|
166
|
+
(we deliberately don't shove it onto a thread — if the user wanted that,
|
|
167
|
+
they'd pick the ``thread`` backend). Cancellation works cooperatively
|
|
168
|
+
via asyncio's normal task cancellation machinery; the runner also sets
|
|
169
|
+
``cancel_event`` for parity with the other backends.
|
|
170
|
+
"""
|
|
171
|
+
token = setup()
|
|
172
|
+
try:
|
|
173
|
+
if iscoroutinefunction(fn):
|
|
174
|
+
coro: Awaitable[Any] = fn(*args, **kwargs)
|
|
175
|
+
try:
|
|
176
|
+
if timeout is not None:
|
|
177
|
+
return await asyncio.wait_for(coro, timeout=timeout)
|
|
178
|
+
return await coro
|
|
179
|
+
except TimeoutError as exc:
|
|
180
|
+
cancel_event.set()
|
|
181
|
+
raise TaskTimeoutError(
|
|
182
|
+
f"Task {task_name!r} exceeded its timeout of {timeout}s on the asyncio backend"
|
|
183
|
+
) from exc
|
|
184
|
+
except asyncio.CancelledError as exc:
|
|
185
|
+
cancel_event.set()
|
|
186
|
+
raise TaskCancelledError(f"Task {task_name!r} was cancelled") from exc
|
|
187
|
+
# Sync fn on asyncio: run inline. Timeout is checked *after*; for
|
|
188
|
+
# CPU-bound sync code, prefer the thread backend.
|
|
189
|
+
if timeout is not None:
|
|
190
|
+
loop = asyncio.get_running_loop()
|
|
191
|
+
start = loop.time()
|
|
192
|
+
result = fn(*args, **kwargs)
|
|
193
|
+
elapsed = loop.time() - start
|
|
194
|
+
if elapsed > timeout:
|
|
195
|
+
raise TaskTimeoutError(
|
|
196
|
+
f"Task {task_name!r} took {elapsed:.3f}s, exceeding timeout of {timeout}s "
|
|
197
|
+
"(sync function on asyncio backend; consider backend='thread' for "
|
|
198
|
+
"true timeout cancellation)",
|
|
199
|
+
elapsed=elapsed,
|
|
200
|
+
)
|
|
201
|
+
return result
|
|
202
|
+
return fn(*args, **kwargs)
|
|
203
|
+
finally:
|
|
204
|
+
teardown(token)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Class-based task definition.
|
|
2
|
+
|
|
3
|
+
Subclass :class:`TaskBase`, override ``run``, set class-level config attributes,
|
|
4
|
+
and instantiate to get a :class:`Task`. The :class:`Task` instance forwards
|
|
5
|
+
``.submit()`` and ``.__call__()`` to the wrapped ``run`` method — so once
|
|
6
|
+
constructed, a class-based task is indistinguishable from one built by
|
|
7
|
+
:func:`@task`.
|
|
8
|
+
|
|
9
|
+
Class-based form is purely a convenience for code that prefers config-as-class
|
|
10
|
+
over config-as-decorator-kwargs; the underlying engine sees a normal
|
|
11
|
+
:class:`Task`.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from pyworkflowy._core import (
|
|
19
|
+
Backend,
|
|
20
|
+
Backoff,
|
|
21
|
+
DepFailurePolicy,
|
|
22
|
+
Task,
|
|
23
|
+
_build_task,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
__all__ = ["TaskBase"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
_SENTINEL = object()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TaskBase:
|
|
33
|
+
"""Base class for class-based task definitions.
|
|
34
|
+
|
|
35
|
+
Subclass and override :meth:`run` (sync or async). Class-level attributes
|
|
36
|
+
(``name``, ``backend``, ``retries``, ``timeout``, ``backoff``,
|
|
37
|
+
``backoff_base``, ``backoff_max``, ``retry_on``, ``on_dep_failure``)
|
|
38
|
+
configure the task. Instantiating returns a fully configured
|
|
39
|
+
:class:`pyworkflowy.Task` — the instance is *the* task, not a wrapper around
|
|
40
|
+
one::
|
|
41
|
+
|
|
42
|
+
class FetchUser(TaskBase):
|
|
43
|
+
name = "fetch-user"
|
|
44
|
+
backend = "thread"
|
|
45
|
+
retries = 2
|
|
46
|
+
|
|
47
|
+
def run(self, user_id: int) -> dict[str, Any]:
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
fetch_user = FetchUser()
|
|
51
|
+
handle = fetch_user.submit(42)
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
name: str | None = None
|
|
55
|
+
backend: Backend = "asyncio"
|
|
56
|
+
retries: int = 0
|
|
57
|
+
timeout: float | None = None
|
|
58
|
+
backoff: Backoff = "exponential"
|
|
59
|
+
backoff_base: float = 1.0
|
|
60
|
+
backoff_max: float = 30.0
|
|
61
|
+
retry_on: type[BaseException] | tuple[type[BaseException], ...] | None = None
|
|
62
|
+
on_dep_failure: DepFailurePolicy = "fail"
|
|
63
|
+
max_attempts: int | None = None
|
|
64
|
+
|
|
65
|
+
def run(self, *args: Any, **kwargs: Any) -> Any:
|
|
66
|
+
"""Override me. Sync or async ``def`` both fine."""
|
|
67
|
+
raise NotImplementedError(
|
|
68
|
+
f"{type(self).__name__}.run is not implemented — override it in your subclass."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def __new__(cls, *args: Any, **kwargs: Any) -> Task[Any]: # type: ignore[misc]
|
|
72
|
+
if cls is TaskBase:
|
|
73
|
+
raise TypeError(
|
|
74
|
+
"TaskBase is abstract — subclass it and override `run` instead of "
|
|
75
|
+
"instantiating it directly."
|
|
76
|
+
)
|
|
77
|
+
if args or kwargs:
|
|
78
|
+
raise TypeError(
|
|
79
|
+
f"{cls.__name__} takes no constructor arguments — pass values to "
|
|
80
|
+
"`.submit(*args, **kwargs)` instead. Class-level attributes "
|
|
81
|
+
"configure the task itself; runtime args go to `run`."
|
|
82
|
+
)
|
|
83
|
+
instance = object.__new__(cls)
|
|
84
|
+
# Bind `run` so the resulting Task.fn behaves like a plain function.
|
|
85
|
+
run_method = instance.run
|
|
86
|
+
retries = cls.retries
|
|
87
|
+
if cls.max_attempts is not None:
|
|
88
|
+
if retries:
|
|
89
|
+
raise ValueError(
|
|
90
|
+
f"{cls.__name__}: set either `retries` or `max_attempts`, not both"
|
|
91
|
+
)
|
|
92
|
+
if cls.max_attempts < 1:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"{cls.__name__}.max_attempts must be >= 1, got {cls.max_attempts}"
|
|
95
|
+
)
|
|
96
|
+
retries = cls.max_attempts - 1
|
|
97
|
+
return _build_task(
|
|
98
|
+
run_method,
|
|
99
|
+
name=cls.name or f"{cls.__module__}.{cls.__qualname__}",
|
|
100
|
+
backend=cls.backend,
|
|
101
|
+
retries=retries,
|
|
102
|
+
timeout=cls.timeout,
|
|
103
|
+
backoff=cls.backoff,
|
|
104
|
+
backoff_base=cls.backoff_base,
|
|
105
|
+
backoff_max=cls.backoff_max,
|
|
106
|
+
retry_on=cls.retry_on,
|
|
107
|
+
on_dep_failure=cls.on_dep_failure,
|
|
108
|
+
)
|