dagrun 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. dagrun-0.1.0/.gitignore +4 -0
  2. dagrun-0.1.0/LICENSE +21 -0
  3. dagrun-0.1.0/PKG-INFO +87 -0
  4. dagrun-0.1.0/README.md +71 -0
  5. dagrun-0.1.0/pyproject.toml +41 -0
  6. dagrun-0.1.0/src/dagrun/__init__.py +6 -0
  7. dagrun-0.1.0/src/dagrun/chain.py +71 -0
  8. dagrun-0.1.0/src/dagrun/control.py +9 -0
  9. dagrun-0.1.0/src/dagrun/dag.py +396 -0
  10. dagrun-0.1.0/src/dagrun/dag_runner.py +59 -0
  11. dagrun-0.1.0/src/dagrun/events.py +96 -0
  12. dagrun-0.1.0/src/dagrun/ext/__init__.py +0 -0
  13. dagrun-0.1.0/src/dagrun/ext/blob.py +80 -0
  14. dagrun-0.1.0/src/dagrun/ext/progress.py +277 -0
  15. dagrun-0.1.0/src/dagrun/ext/sqlite.py +326 -0
  16. dagrun-0.1.0/src/dagrun/model.py +272 -0
  17. dagrun-0.1.0/src/dagrun/plan.py +67 -0
  18. dagrun-0.1.0/src/dagrun/py.typed +0 -0
  19. dagrun-0.1.0/src/dagrun/store.py +88 -0
  20. dagrun-0.1.0/src/dagrun/strategy.py +778 -0
  21. dagrun-0.1.0/tests/test_api_spec.py +131 -0
  22. dagrun-0.1.0/tests/test_column_groups_e2e.py +44 -0
  23. dagrun-0.1.0/tests/test_combine.py +45 -0
  24. dagrun-0.1.0/tests/test_dag_compile.py +368 -0
  25. dagrun-0.1.0/tests/test_defer.py +87 -0
  26. dagrun-0.1.0/tests/test_external_columns.py +155 -0
  27. dagrun-0.1.0/tests/test_fk_normalization.py +53 -0
  28. dagrun-0.1.0/tests/test_max_age.py +198 -0
  29. dagrun-0.1.0/tests/test_memory_store.py +185 -0
  30. dagrun-0.1.0/tests/test_model.py +181 -0
  31. dagrun-0.1.0/tests/test_observer_events.py +81 -0
  32. dagrun-0.1.0/tests/test_plan_filter.py +81 -0
  33. dagrun-0.1.0/tests/test_required_validation.py +122 -0
  34. dagrun-0.1.0/tests/test_single_thread.py +244 -0
  35. dagrun-0.1.0/tests/test_sqlite_store.py +268 -0
  36. dagrun-0.1.0/tests/test_unified_executor.py +170 -0
  37. dagrun-0.1.0/uv.lock +243 -0
@@ -0,0 +1,4 @@
1
+ # Python
2
+ __pycache__
3
+ dist/
4
+
dagrun-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Pierre Hugo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dagrun-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,87 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagrun
3
+ Version: 0.1.0
4
+ Summary: Declarative DAG runner with pipelining and concurrency control.
5
+ Author-email: Pierre Hugo <pierrekin@househugo.com>
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.14
13
+ Classifier: Typing :: Typed
14
+ Requires-Python: >=3.14
15
+ Description-Content-Type: text/markdown
16
+
17
+ # dagrun
18
+
19
+ A small, dependency-free DAG runner for Python where you never declare an edge.
20
+ Annotate your functions with the columns they read and produce, and dagrun will
21
+ figure out the rest.
22
+
23
+ ## Example
24
+
25
+ ```python
26
+ from collections.abc import Iterable
27
+ from dagrun import Dag, DagRunner
28
+ from dagrun.model import PK, FK, Column, Entity
29
+
30
+
31
+ class Site(Entity):
32
+ site_id = PK[str]
33
+ name = Column[str | None]
34
+
35
+
36
+ class File(Entity):
37
+ file_id = PK[str]
38
+ site = FK[Site]
39
+ name = Column[str | None]
40
+ title = Column[str | None]
41
+
42
+
43
+ dag = Dag()
44
+
45
+
46
+ @dag.fn
47
+ def discover_sites(sp: Sharepoint) -> Iterable[Site[Site.name]]:
48
+ for raw in sp.list_sites():
49
+ yield Site(site_id=raw["id"], name=raw["name"])
50
+
51
+
52
+ @dag.fn
53
+ def discover_files(site: Site, sp: Sharepoint) -> Iterable[File[File.name]]:
54
+ for raw in sp.list_files(site.site_id):
55
+ yield File(file_id=raw["id"], site=site, name=raw["name"])
56
+
57
+
58
+ @dag.fn
59
+ def extract_title(file: File[File.name]) -> File[File.title]:
60
+ return File(file_id=file.file_id, title=file.name.removesuffix(".txt"))
61
+
62
+
63
+ runner = DagRunner()
64
+ runner.provide(Sharepoint, Sharepoint)
65
+ runner.execute(dag)
66
+ ```
67
+
68
+ Parameter annotations declare what a function reads (`File[File.name]`), return
69
+ annotations declare what it produces, and the dependency graph is built from
70
+ those. Reads and writes are keyed per column, so functions that enrich different
71
+ columns of the same entity run independently.
72
+
73
+ ## What you get
74
+
75
+ - **Compile-time checks.** `compile()` fails on a missing dependency, a column
76
+ produced twice, or a cycle, before anything runs.
77
+ - **Incremental reruns.** The store records when each `(function, input)` pair
78
+ last ran. Pass `max_age=timedelta(hours=6)` to skip anything still fresh.
79
+ - **Deferred nodes.** A node can `raise Defer()` to skip the current input at
80
+ runtime, writing and recording nothing so the next run reconsiders it.
81
+ - **Pipelining.** Tasks are scheduled and gated per input, so `extract_title`
82
+ starts on the first file while `discover_files` is still finding the rest.
83
+ - **Concurrency control.** Each function declares a `cost` against named pools to
84
+ bound resource usage and respect rate limits.
85
+ - **Blob columns.** A `Blob` column stashes bytes in a dedicated backend and
86
+ keeps only a reference in the row.
87
+
dagrun-0.1.0/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # dagrun
2
+
3
+ A small, dependency-free DAG runner for Python where you never declare an edge.
4
+ Annotate your functions with the columns they read and produce, and dagrun will
5
+ figure out the rest.
6
+
7
+ ## Example
8
+
9
+ ```python
10
+ from collections.abc import Iterable
11
+ from dagrun import Dag, DagRunner
12
+ from dagrun.model import PK, FK, Column, Entity
13
+
14
+
15
+ class Site(Entity):
16
+ site_id = PK[str]
17
+ name = Column[str | None]
18
+
19
+
20
+ class File(Entity):
21
+ file_id = PK[str]
22
+ site = FK[Site]
23
+ name = Column[str | None]
24
+ title = Column[str | None]
25
+
26
+
27
+ dag = Dag()
28
+
29
+
30
+ @dag.fn
31
+ def discover_sites(sp: Sharepoint) -> Iterable[Site[Site.name]]:
32
+ for raw in sp.list_sites():
33
+ yield Site(site_id=raw["id"], name=raw["name"])
34
+
35
+
36
+ @dag.fn
37
+ def discover_files(site: Site, sp: Sharepoint) -> Iterable[File[File.name]]:
38
+ for raw in sp.list_files(site.site_id):
39
+ yield File(file_id=raw["id"], site=site, name=raw["name"])
40
+
41
+
42
+ @dag.fn
43
+ def extract_title(file: File[File.name]) -> File[File.title]:
44
+ return File(file_id=file.file_id, title=file.name.removesuffix(".txt"))
45
+
46
+
47
+ runner = DagRunner()
48
+ runner.provide(Sharepoint, Sharepoint)
49
+ runner.execute(dag)
50
+ ```
51
+
52
+ Parameter annotations declare what a function reads (`File[File.name]`), return
53
+ annotations declare what it produces, and the dependency graph is built from
54
+ those. Reads and writes are keyed per column, so functions that enrich different
55
+ columns of the same entity run independently.
56
+
57
+ ## What you get
58
+
59
+ - **Compile-time checks.** `compile()` fails on a missing dependency, a column
60
+ produced twice, or a cycle, before anything runs.
61
+ - **Incremental reruns.** The store records when each `(function, input)` pair
62
+ last ran. Pass `max_age=timedelta(hours=6)` to skip anything still fresh.
63
+ - **Deferred nodes.** A node can `raise Defer()` to skip the current input at
64
+ runtime, writing and recording nothing so the next run reconsiders it.
65
+ - **Pipelining.** Tasks are scheduled and gated per input, so `extract_title`
66
+ starts on the first file while `discover_files` is still finding the rest.
67
+ - **Concurrency control.** Each function declares a `cost` against named pools to
68
+ bound resource usage and respect rate limits.
69
+ - **Blob columns.** A `Blob` column stashes bytes in a dedicated backend and
70
+ keeps only a reference in the row.
71
+
@@ -0,0 +1,41 @@
1
+ [project]
2
+ name = "dagrun"
3
+ version = "0.1.0"
4
+ description = "Declarative DAG runner with pipelining and concurrency control."
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ license-files = ["LICENSE"]
8
+ requires-python = ">=3.14"
9
+ authors = [{ name = "Pierre Hugo", email = "pierrekin@househugo.com" }]
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Intended Audience :: Developers",
13
+ "Operating System :: OS Independent",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3.14",
16
+ "Typing :: Typed",
17
+ ]
18
+ dependencies = []
19
+
20
+ [build-system]
21
+ requires = ["hatchling"]
22
+ build-backend = "hatchling.build"
23
+
24
+ [tool.hatch.build.targets.wheel]
25
+ packages = ["src/dagrun"]
26
+
27
+ [dependency-groups]
28
+ dev = [
29
+ "mypy>=1.20.1",
30
+ "pytest>=8.0.0",
31
+ "ruff>=0.15.11",
32
+ ]
33
+
34
+ [tool.mypy]
35
+ strict = true
36
+
37
+ [tool.ruff]
38
+ line-length = 100
39
+
40
+ [tool.pytest.ini_options]
41
+ testpaths = ["tests"]
@@ -0,0 +1,6 @@
1
+ from dagrun import model
2
+ from dagrun.control import Defer
3
+ from dagrun.dag import Dag, DagValidationError
4
+ from dagrun.dag_runner import DagRunner
5
+
6
+ __all__ = ["Dag", "DagRunner", "DagValidationError", "Defer", "model"]
@@ -0,0 +1,71 @@
1
+ from collections.abc import Callable, Mapping
2
+ from dataclasses import dataclass
3
+ from typing import Any, Protocol
4
+
5
+
6
+ def member(fn: Callable[..., Any]) -> Callable[..., Any]:
7
+ fn.__dagrun_member__ = True # type: ignore[attr-defined]
8
+ return fn
9
+
10
+
11
+ class Combine(Protocol):
12
+ def reduce(self, column: str, values: list[Any]) -> Any: ...
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class LastWins:
17
+ def reduce(self, column: str, values: list[Any]) -> Any:
18
+ return values[-1]
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class FirstWins:
23
+ def reduce(self, column: str, values: list[Any]) -> Any:
24
+ return values[0]
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class Error:
29
+ def reduce(self, column: str, values: list[Any]) -> Any:
30
+ distinct = {_hash(v) for v in values}
31
+ if len(distinct) > 1:
32
+ raise CombineConflict(column, values)
33
+ return values[0]
34
+
35
+
36
+ class CombineConflict(RuntimeError):
37
+ def __init__(self, column: str, values: list[Any]) -> None:
38
+ super().__init__(f"Conflicting values for column {column!r}: {values!r}")
39
+ self.column = column
40
+ self.values = values
41
+
42
+
43
+ def _hash(value: Any) -> Any:
44
+ try:
45
+ hash(value)
46
+ except TypeError:
47
+ return repr(value)
48
+ return value
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class PerColumn:
53
+ """Per-column combine override map. '*' means default for unlisted columns."""
54
+
55
+ rules: tuple[tuple[str, Combine], ...]
56
+
57
+ def __init__(self, rules: Mapping[str, Combine] | "tuple[tuple[str, Combine], ...]") -> None:
58
+ if isinstance(rules, Mapping):
59
+ items = tuple(rules.items())
60
+ else:
61
+ items = tuple(rules)
62
+ object.__setattr__(self, "rules", items)
63
+
64
+ def reduce(self, column: str, values: list[Any]) -> Any:
65
+ lookup = dict(self.rules)
66
+ rule = lookup.get(column, lookup.get("*"))
67
+ if rule is None:
68
+ return values[-1]
69
+ return rule.reduce(column, values)
70
+
71
+
@@ -0,0 +1,9 @@
1
+ class Defer(Exception):
2
+ """Signal from a node that the current input should not be processed now.
3
+
4
+ When a node raises Defer the runner discards any output produced for that
5
+ input and records no invocation, so the input is evaluated again from
6
+ scratch on the next run rather than being treated as done. Use it for
7
+ decisions that depend on runtime configuration (e.g. a token budget)
8
+ rather than on an intrinsic, persistable property of the input.
9
+ """