dagrun 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dagrun-0.1.0/.gitignore +4 -0
- dagrun-0.1.0/LICENSE +21 -0
- dagrun-0.1.0/PKG-INFO +87 -0
- dagrun-0.1.0/README.md +71 -0
- dagrun-0.1.0/pyproject.toml +41 -0
- dagrun-0.1.0/src/dagrun/__init__.py +6 -0
- dagrun-0.1.0/src/dagrun/chain.py +71 -0
- dagrun-0.1.0/src/dagrun/control.py +9 -0
- dagrun-0.1.0/src/dagrun/dag.py +396 -0
- dagrun-0.1.0/src/dagrun/dag_runner.py +59 -0
- dagrun-0.1.0/src/dagrun/events.py +96 -0
- dagrun-0.1.0/src/dagrun/ext/__init__.py +0 -0
- dagrun-0.1.0/src/dagrun/ext/blob.py +80 -0
- dagrun-0.1.0/src/dagrun/ext/progress.py +277 -0
- dagrun-0.1.0/src/dagrun/ext/sqlite.py +326 -0
- dagrun-0.1.0/src/dagrun/model.py +272 -0
- dagrun-0.1.0/src/dagrun/plan.py +67 -0
- dagrun-0.1.0/src/dagrun/py.typed +0 -0
- dagrun-0.1.0/src/dagrun/store.py +88 -0
- dagrun-0.1.0/src/dagrun/strategy.py +778 -0
- dagrun-0.1.0/tests/test_api_spec.py +131 -0
- dagrun-0.1.0/tests/test_column_groups_e2e.py +44 -0
- dagrun-0.1.0/tests/test_combine.py +45 -0
- dagrun-0.1.0/tests/test_dag_compile.py +368 -0
- dagrun-0.1.0/tests/test_defer.py +87 -0
- dagrun-0.1.0/tests/test_external_columns.py +155 -0
- dagrun-0.1.0/tests/test_fk_normalization.py +53 -0
- dagrun-0.1.0/tests/test_max_age.py +198 -0
- dagrun-0.1.0/tests/test_memory_store.py +185 -0
- dagrun-0.1.0/tests/test_model.py +181 -0
- dagrun-0.1.0/tests/test_observer_events.py +81 -0
- dagrun-0.1.0/tests/test_plan_filter.py +81 -0
- dagrun-0.1.0/tests/test_required_validation.py +122 -0
- dagrun-0.1.0/tests/test_single_thread.py +244 -0
- dagrun-0.1.0/tests/test_sqlite_store.py +268 -0
- dagrun-0.1.0/tests/test_unified_executor.py +170 -0
- dagrun-0.1.0/uv.lock +243 -0
dagrun-0.1.0/.gitignore
ADDED
dagrun-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Pierre Hugo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dagrun-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dagrun
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Declarative DAG runner with pipelining and concurrency control.
|
|
5
|
+
Author-email: Pierre Hugo <pierrekin@househugo.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
13
|
+
Classifier: Typing :: Typed
|
|
14
|
+
Requires-Python: >=3.14
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# dagrun
|
|
18
|
+
|
|
19
|
+
A small, dependency-free DAG runner for Python where you never declare an edge.
|
|
20
|
+
Annotate your functions with the columns they read and produce, and dagrun will
|
|
21
|
+
figure out the rest.
|
|
22
|
+
|
|
23
|
+
## Example
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from collections.abc import Iterable
|
|
27
|
+
from dagrun import Dag, DagRunner
|
|
28
|
+
from dagrun.model import PK, FK, Column, Entity
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Site(Entity):
|
|
32
|
+
site_id = PK[str]
|
|
33
|
+
name = Column[str | None]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class File(Entity):
|
|
37
|
+
file_id = PK[str]
|
|
38
|
+
site = FK[Site]
|
|
39
|
+
name = Column[str | None]
|
|
40
|
+
title = Column[str | None]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
dag = Dag()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dag.fn
|
|
47
|
+
def discover_sites(sp: Sharepoint) -> Iterable[Site[Site.name]]:
|
|
48
|
+
for raw in sp.list_sites():
|
|
49
|
+
yield Site(site_id=raw["id"], name=raw["name"])
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dag.fn
|
|
53
|
+
def discover_files(site: Site, sp: Sharepoint) -> Iterable[File[File.name]]:
|
|
54
|
+
for raw in sp.list_files(site.site_id):
|
|
55
|
+
yield File(file_id=raw["id"], site=site, name=raw["name"])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dag.fn
|
|
59
|
+
def extract_title(file: File[File.name]) -> File[File.title]:
|
|
60
|
+
return File(file_id=file.file_id, title=file.name.removesuffix(".txt"))
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
runner = DagRunner()
|
|
64
|
+
runner.provide(Sharepoint, Sharepoint)
|
|
65
|
+
runner.execute(dag)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Parameter annotations declare what a function reads (`File[File.name]`), return
|
|
69
|
+
annotations declare what it produces, and the dependency graph is built from
|
|
70
|
+
those. Reads and writes are keyed per column, so functions that enrich different
|
|
71
|
+
columns of the same entity run independently.
|
|
72
|
+
|
|
73
|
+
## What you get
|
|
74
|
+
|
|
75
|
+
- **Compile-time checks.** `compile()` fails on a missing dependency, a column
|
|
76
|
+
produced twice, or a cycle, before anything runs.
|
|
77
|
+
- **Incremental reruns.** The store records when each `(function, input)` pair
|
|
78
|
+
last ran. Pass `max_age=timedelta(hours=6)` to skip anything still fresh.
|
|
79
|
+
- **Deferred nodes.** A node can `raise Defer()` to skip the current input at
|
|
80
|
+
runtime, writing and recording nothing so the next run reconsiders it.
|
|
81
|
+
- **Pipelining.** Tasks are scheduled and gated per input, so `extract_title`
|
|
82
|
+
starts on the first file while `discover_files` is still finding the rest.
|
|
83
|
+
- **Concurrency control.** Each function declares a `cost` against named pools to
|
|
84
|
+
bound resource usage and respect rate limits.
|
|
85
|
+
- **Blob columns.** A `Blob` column stashes bytes in a dedicated backend and
|
|
86
|
+
keeps only a reference in the row.
|
|
87
|
+
|
dagrun-0.1.0/README.md
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# dagrun
|
|
2
|
+
|
|
3
|
+
A small, dependency-free DAG runner for Python where you never declare an edge.
|
|
4
|
+
Annotate your functions with the columns they read and produce, and dagrun will
|
|
5
|
+
figure out the rest.
|
|
6
|
+
|
|
7
|
+
## Example
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from collections.abc import Iterable
|
|
11
|
+
from dagrun import Dag, DagRunner
|
|
12
|
+
from dagrun.model import PK, FK, Column, Entity
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Site(Entity):
|
|
16
|
+
site_id = PK[str]
|
|
17
|
+
name = Column[str | None]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class File(Entity):
|
|
21
|
+
file_id = PK[str]
|
|
22
|
+
site = FK[Site]
|
|
23
|
+
name = Column[str | None]
|
|
24
|
+
title = Column[str | None]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
dag = Dag()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dag.fn
|
|
31
|
+
def discover_sites(sp: Sharepoint) -> Iterable[Site[Site.name]]:
|
|
32
|
+
for raw in sp.list_sites():
|
|
33
|
+
yield Site(site_id=raw["id"], name=raw["name"])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dag.fn
|
|
37
|
+
def discover_files(site: Site, sp: Sharepoint) -> Iterable[File[File.name]]:
|
|
38
|
+
for raw in sp.list_files(site.site_id):
|
|
39
|
+
yield File(file_id=raw["id"], site=site, name=raw["name"])
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dag.fn
|
|
43
|
+
def extract_title(file: File[File.name]) -> File[File.title]:
|
|
44
|
+
return File(file_id=file.file_id, title=file.name.removesuffix(".txt"))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
runner = DagRunner()
|
|
48
|
+
runner.provide(Sharepoint, Sharepoint)
|
|
49
|
+
runner.execute(dag)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Parameter annotations declare what a function reads (`File[File.name]`), return
|
|
53
|
+
annotations declare what it produces, and the dependency graph is built from
|
|
54
|
+
those. Reads and writes are keyed per column, so functions that enrich different
|
|
55
|
+
columns of the same entity run independently.
|
|
56
|
+
|
|
57
|
+
## What you get
|
|
58
|
+
|
|
59
|
+
- **Compile-time checks.** `compile()` fails on a missing dependency, a column
|
|
60
|
+
produced twice, or a cycle, before anything runs.
|
|
61
|
+
- **Incremental reruns.** The store records when each `(function, input)` pair
|
|
62
|
+
last ran. Pass `max_age=timedelta(hours=6)` to skip anything still fresh.
|
|
63
|
+
- **Deferred nodes.** A node can `raise Defer()` to skip the current input at
|
|
64
|
+
runtime, writing and recording nothing so the next run reconsiders it.
|
|
65
|
+
- **Pipelining.** Tasks are scheduled and gated per input, so `extract_title`
|
|
66
|
+
starts on the first file while `discover_files` is still finding the rest.
|
|
67
|
+
- **Concurrency control.** Each function declares a `cost` against named pools to
|
|
68
|
+
bound resource usage and respect rate limits.
|
|
69
|
+
- **Blob columns.** A `Blob` column stashes bytes in a dedicated backend and
|
|
70
|
+
keeps only a reference in the row.
|
|
71
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "dagrun"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Declarative DAG runner with pipelining and concurrency control."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
license-files = ["LICENSE"]
|
|
8
|
+
requires-python = ">=3.14"
|
|
9
|
+
authors = [{ name = "Pierre Hugo", email = "pierrekin@househugo.com" }]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"Operating System :: OS Independent",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3.14",
|
|
16
|
+
"Typing :: Typed",
|
|
17
|
+
]
|
|
18
|
+
dependencies = []
|
|
19
|
+
|
|
20
|
+
[build-system]
|
|
21
|
+
requires = ["hatchling"]
|
|
22
|
+
build-backend = "hatchling.build"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.build.targets.wheel]
|
|
25
|
+
packages = ["src/dagrun"]
|
|
26
|
+
|
|
27
|
+
[dependency-groups]
|
|
28
|
+
dev = [
|
|
29
|
+
"mypy>=1.20.1",
|
|
30
|
+
"pytest>=8.0.0",
|
|
31
|
+
"ruff>=0.15.11",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[tool.mypy]
|
|
35
|
+
strict = true
|
|
36
|
+
|
|
37
|
+
[tool.ruff]
|
|
38
|
+
line-length = 100
|
|
39
|
+
|
|
40
|
+
[tool.pytest.ini_options]
|
|
41
|
+
testpaths = ["tests"]
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from collections.abc import Callable, Mapping
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Protocol
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def member(fn: Callable[..., Any]) -> Callable[..., Any]:
|
|
7
|
+
fn.__dagrun_member__ = True # type: ignore[attr-defined]
|
|
8
|
+
return fn
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Combine(Protocol):
|
|
12
|
+
def reduce(self, column: str, values: list[Any]) -> Any: ...
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class LastWins:
|
|
17
|
+
def reduce(self, column: str, values: list[Any]) -> Any:
|
|
18
|
+
return values[-1]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class FirstWins:
|
|
23
|
+
def reduce(self, column: str, values: list[Any]) -> Any:
|
|
24
|
+
return values[0]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(frozen=True)
|
|
28
|
+
class Error:
|
|
29
|
+
def reduce(self, column: str, values: list[Any]) -> Any:
|
|
30
|
+
distinct = {_hash(v) for v in values}
|
|
31
|
+
if len(distinct) > 1:
|
|
32
|
+
raise CombineConflict(column, values)
|
|
33
|
+
return values[0]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CombineConflict(RuntimeError):
|
|
37
|
+
def __init__(self, column: str, values: list[Any]) -> None:
|
|
38
|
+
super().__init__(f"Conflicting values for column {column!r}: {values!r}")
|
|
39
|
+
self.column = column
|
|
40
|
+
self.values = values
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _hash(value: Any) -> Any:
|
|
44
|
+
try:
|
|
45
|
+
hash(value)
|
|
46
|
+
except TypeError:
|
|
47
|
+
return repr(value)
|
|
48
|
+
return value
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass(frozen=True)
|
|
52
|
+
class PerColumn:
|
|
53
|
+
"""Per-column combine override map. '*' means default for unlisted columns."""
|
|
54
|
+
|
|
55
|
+
rules: tuple[tuple[str, Combine], ...]
|
|
56
|
+
|
|
57
|
+
def __init__(self, rules: Mapping[str, Combine] | "tuple[tuple[str, Combine], ...]") -> None:
|
|
58
|
+
if isinstance(rules, Mapping):
|
|
59
|
+
items = tuple(rules.items())
|
|
60
|
+
else:
|
|
61
|
+
items = tuple(rules)
|
|
62
|
+
object.__setattr__(self, "rules", items)
|
|
63
|
+
|
|
64
|
+
def reduce(self, column: str, values: list[Any]) -> Any:
|
|
65
|
+
lookup = dict(self.rules)
|
|
66
|
+
rule = lookup.get(column, lookup.get("*"))
|
|
67
|
+
if rule is None:
|
|
68
|
+
return values[-1]
|
|
69
|
+
return rule.reduce(column, values)
|
|
70
|
+
|
|
71
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
class Defer(Exception):
|
|
2
|
+
"""Signal from a node that the current input should not be processed now.
|
|
3
|
+
|
|
4
|
+
When a node raises Defer the runner discards any output produced for that
|
|
5
|
+
input and records no invocation, so the input is evaluated again from
|
|
6
|
+
scratch on the next run rather than being treated as done. Use it for
|
|
7
|
+
decisions that depend on runtime configuration (e.g. a token budget)
|
|
8
|
+
rather than on an intrinsic, persistable property of the input.
|
|
9
|
+
"""
|