kahn-queue 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kahn_queue-0.1.0/PKG-INFO +75 -0
- kahn_queue-0.1.0/README.md +64 -0
- kahn_queue-0.1.0/pyproject.toml +26 -0
- kahn_queue-0.1.0/setup.cfg +4 -0
- kahn_queue-0.1.0/src/dag.py +149 -0
- kahn_queue-0.1.0/src/exception.py +6 -0
- kahn_queue-0.1.0/src/kahnQueue/__init__.py +6 -0
- kahn_queue-0.1.0/src/kahnQueue/concurrent_kahn_queue.py +75 -0
- kahn_queue-0.1.0/src/kahnQueue/default_kahn_queue.py +60 -0
- kahn_queue-0.1.0/src/kahnQueue/kahn_queue.py +18 -0
- kahn_queue-0.1.0/src/kahnQueue/node_machine.py +50 -0
- kahn_queue-0.1.0/src/kahnQueue/node_state.py +10 -0
- kahn_queue-0.1.0/src/kahn_queue.egg-info/PKG-INFO +75 -0
- kahn_queue-0.1.0/src/kahn_queue.egg-info/SOURCES.txt +25 -0
- kahn_queue-0.1.0/src/kahn_queue.egg-info/dependency_links.txt +1 -0
- kahn_queue-0.1.0/src/kahn_queue.egg-info/top_level.txt +6 -0
- kahn_queue-0.1.0/src/scheduler.py +79 -0
- kahn_queue-0.1.0/src/tracker.py +28 -0
- kahn_queue-0.1.0/src/utils/__init__.py +4 -0
- kahn_queue-0.1.0/src/utils/state_machine.py +39 -0
- kahn_queue-0.1.0/tests/test_concurrent_kahn_queue.py +263 -0
- kahn_queue-0.1.0/tests/test_dag.py +115 -0
- kahn_queue-0.1.0/tests/test_default_kahn_queue.py +123 -0
- kahn_queue-0.1.0/tests/test_kahn_scheduler.py +95 -0
- kahn_queue-0.1.0/tests/test_node_machine.py +58 -0
- kahn_queue-0.1.0/tests/test_node_progress_tracker.py +78 -0
- kahn_queue-0.1.0/tests/test_state_machine.py +58 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kahn-queue
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Kahn-style ready-queue for dependency scheduling
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Flashlock/kahn-queue
|
|
7
|
+
Project-URL: Repository, https://github.com/Flashlock/kahn-queue
|
|
8
|
+
Keywords: dag,scheduler,workflow,kahn
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# kahn-queue (Python)
|
|
13
|
+
|
|
14
|
+
## Getting started
|
|
15
|
+
|
|
16
|
+
- **Python:** **3.10+** recommended (venv + `requirements-dev.txt`; no version pinned in-repo).
|
|
17
|
+
- **Tests:** `make test-python`, or `cd python`, create `.venv`, install `requirements-dev.txt`, then `pytest`.
|
|
18
|
+
|
|
19
|
+
## Pieces
|
|
20
|
+
|
|
21
|
+
| Piece | Role |
|
|
22
|
+
|--------|------|
|
|
23
|
+
| `Dag` / `Dag.builder()` | Immutable DAG: `add`, `connect`, `build`. |
|
|
24
|
+
| `KahnScheduler` | Drives execution: `run`, `signal_complete` / `signal_failed`; `get_result()` returns `DagResult` (frozensets of ids). |
|
|
25
|
+
| `KahnQueue` / `DefaultKahnQueue` / `ConcurrentKahnQueue` | **`DefaultKahnQueue`** for single-threaded updates; **`ConcurrentKahnQueue`** when `pop` / `prune` run from many threads. |
|
|
26
|
+
| `IllegalGraphException` | Raised for invalid graphs (e.g. self-loop or cycle at `build()`). |
|
|
27
|
+
| `NodeProgressTracker` | Optional per-node progress in `[0, 1]`; not required for scheduling. |
|
|
28
|
+
|
|
29
|
+
## Examples
|
|
30
|
+
|
|
31
|
+
### Single-threaded (Temporal-style)
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from dag import Dag
|
|
35
|
+
from scheduler import KahnScheduler
|
|
36
|
+
|
|
37
|
+
b = Dag.builder()
|
|
38
|
+
lint = b.add("lint")
|
|
39
|
+
comp = b.add("compile")
|
|
40
|
+
test = b.add("test")
|
|
41
|
+
b.connect(lint, comp).connect(comp, test)
|
|
42
|
+
dag = b.build()
|
|
43
|
+
|
|
44
|
+
def execute_node(node_id: int, sched: KahnScheduler[str]) -> None:
|
|
45
|
+
try:
|
|
46
|
+
run_step(dag[node_id])
|
|
47
|
+
sched.signal_complete(node_id)
|
|
48
|
+
except Exception:
|
|
49
|
+
sched.signal_failed(node_id)
|
|
50
|
+
|
|
51
|
+
sched = KahnScheduler(dag, execute_node)
|
|
52
|
+
sched.run()
|
|
53
|
+
result = sched.get_result()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
The two-arg `KahnScheduler(dag, execute_node)` uses `DefaultKahnQueue` when `queue` is omitted.
|
|
57
|
+
|
|
58
|
+
### Concurrent (`ConcurrentKahnQueue`)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from kahnQueue.concurrent_kahn_queue import ConcurrentKahnQueue
|
|
62
|
+
from scheduler import KahnScheduler
|
|
63
|
+
|
|
64
|
+
# … build dag, define execute_node as in the previous example …
|
|
65
|
+
|
|
66
|
+
sched = KahnScheduler(
|
|
67
|
+
dag,
|
|
68
|
+
execute_node,
|
|
69
|
+
queue=ConcurrentKahnQueue(dag),
|
|
70
|
+
)
|
|
71
|
+
sched.run()
|
|
72
|
+
result = sched.get_result()
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Use a thread-safe queue when `execute_node` is invoked from many threads; keep any extra shared result structures thread-safe as well.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# kahn-queue (Python)
|
|
2
|
+
|
|
3
|
+
## Getting started
|
|
4
|
+
|
|
5
|
+
- **Python:** **3.10+** recommended (venv + `requirements-dev.txt`; no version pinned in-repo).
|
|
6
|
+
- **Tests:** `make test-python`, or `cd python`, create `.venv`, install `requirements-dev.txt`, then `pytest`.
|
|
7
|
+
|
|
8
|
+
## Pieces
|
|
9
|
+
|
|
10
|
+
| Piece | Role |
|
|
11
|
+
|--------|------|
|
|
12
|
+
| `Dag` / `Dag.builder()` | Immutable DAG: `add`, `connect`, `build`. |
|
|
13
|
+
| `KahnScheduler` | Drives execution: `run`, `signal_complete` / `signal_failed`; `get_result()` returns `DagResult` (frozensets of ids). |
|
|
14
|
+
| `KahnQueue` / `DefaultKahnQueue` / `ConcurrentKahnQueue` | **`DefaultKahnQueue`** for single-threaded updates; **`ConcurrentKahnQueue`** when `pop` / `prune` run from many threads. |
|
|
15
|
+
| `IllegalGraphException` | Raised for invalid graphs (e.g. self-loop or cycle at `build()`). |
|
|
16
|
+
| `NodeProgressTracker` | Optional per-node progress in `[0, 1]`; not required for scheduling. |
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
|
|
20
|
+
### Single-threaded (Temporal-style)
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from dag import Dag
|
|
24
|
+
from scheduler import KahnScheduler
|
|
25
|
+
|
|
26
|
+
b = Dag.builder()
|
|
27
|
+
lint = b.add("lint")
|
|
28
|
+
comp = b.add("compile")
|
|
29
|
+
test = b.add("test")
|
|
30
|
+
b.connect(lint, comp).connect(comp, test)
|
|
31
|
+
dag = b.build()
|
|
32
|
+
|
|
33
|
+
def execute_node(node_id: int, sched: KahnScheduler[str]) -> None:
|
|
34
|
+
try:
|
|
35
|
+
run_step(dag[node_id])
|
|
36
|
+
sched.signal_complete(node_id)
|
|
37
|
+
except Exception:
|
|
38
|
+
sched.signal_failed(node_id)
|
|
39
|
+
|
|
40
|
+
sched = KahnScheduler(dag, execute_node)
|
|
41
|
+
sched.run()
|
|
42
|
+
result = sched.get_result()
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
The two-arg `KahnScheduler(dag, execute_node)` uses `DefaultKahnQueue` when `queue` is omitted.
|
|
46
|
+
|
|
47
|
+
### Concurrent (`ConcurrentKahnQueue`)
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from kahnQueue.concurrent_kahn_queue import ConcurrentKahnQueue
|
|
51
|
+
from scheduler import KahnScheduler
|
|
52
|
+
|
|
53
|
+
# … build dag, define execute_node as in the previous example …
|
|
54
|
+
|
|
55
|
+
sched = KahnScheduler(
|
|
56
|
+
dag,
|
|
57
|
+
execute_node,
|
|
58
|
+
queue=ConcurrentKahnQueue(dag),
|
|
59
|
+
)
|
|
60
|
+
sched.run()
|
|
61
|
+
result = sched.get_result()
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Use a thread-safe queue when `execute_node` is invoked from many threads; keep any extra shared result structures thread-safe as well.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kahn-queue"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Kahn-style ready-queue for dependency scheduling"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
keywords = ["dag", "scheduler", "workflow", "kahn"]
|
|
13
|
+
|
|
14
|
+
[project.urls]
|
|
15
|
+
Homepage = "https://github.com/Flashlock/kahn-queue"
|
|
16
|
+
Repository = "https://github.com/Flashlock/kahn-queue"
|
|
17
|
+
|
|
18
|
+
# Layout: packages under src/ plus top-level modules (dag, scheduler, …).
|
|
19
|
+
[tool.setuptools.package-dir]
|
|
20
|
+
"" = "src"
|
|
21
|
+
|
|
22
|
+
[tool.setuptools.packages.find]
|
|
23
|
+
where = ["src"]
|
|
24
|
+
|
|
25
|
+
[tool.setuptools]
|
|
26
|
+
py-modules = ["dag", "scheduler", "exception", "tracker"]
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Deque, Generic, Iterable, Iterator, List, Set, Tuple, TypeVar
|
|
6
|
+
|
|
7
|
+
from exception import IllegalGraphException
|
|
8
|
+
|
|
9
|
+
T = TypeVar("T")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def validate_node(node_id: int, size: int) -> None:
|
|
13
|
+
"""Ensures ``node_id`` is valid for a graph of ``size`` nodes.
|
|
14
|
+
|
|
15
|
+
Raises:
|
|
16
|
+
IndexError: if out of range
|
|
17
|
+
"""
|
|
18
|
+
if not 0 <= node_id < size:
|
|
19
|
+
raise IndexError(f"Invalid node id: {node_id}")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class Dag(Generic[T], Iterable[T]):
|
|
24
|
+
"""Immutable directed graph of nodes with integer ids and typed payloads.
|
|
25
|
+
|
|
26
|
+
Obtain instances via the static :meth:`builder` factory; edges go from source to target.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
_nodes: Tuple[T, ...]
|
|
30
|
+
_adj: Tuple[Tuple[int, ...], ...]
|
|
31
|
+
_rev: Tuple[Tuple[int, ...], ...]
|
|
32
|
+
|
|
33
|
+
def __len__(self) -> int:
|
|
34
|
+
"""Number of nodes (ids are ``0 .. size()-1``)."""
|
|
35
|
+
return len(self._nodes)
|
|
36
|
+
|
|
37
|
+
def size(self) -> int:
|
|
38
|
+
"""Number of nodes."""
|
|
39
|
+
return len(self)
|
|
40
|
+
|
|
41
|
+
def __getitem__(self, node_id: int) -> T:
|
|
42
|
+
"""Payload for node ``node_id``."""
|
|
43
|
+
validate_node(node_id, len(self))
|
|
44
|
+
return self._nodes[node_id]
|
|
45
|
+
|
|
46
|
+
def get(self, node_id: int) -> T:
|
|
47
|
+
"""Payload for node ``node_id``."""
|
|
48
|
+
return self[node_id]
|
|
49
|
+
|
|
50
|
+
def in_degree(self, node_id: int) -> int:
|
|
51
|
+
"""Count of incoming edges to ``node_id``."""
|
|
52
|
+
validate_node(node_id, len(self))
|
|
53
|
+
return len(self._rev[node_id])
|
|
54
|
+
|
|
55
|
+
def out_degree(self, node_id: int) -> int:
|
|
56
|
+
"""Count of outgoing edges from ``node_id``."""
|
|
57
|
+
validate_node(node_id, len(self))
|
|
58
|
+
return len(self._adj[node_id])
|
|
59
|
+
|
|
60
|
+
def targets(self, node_id: int) -> Iterable[int]:
|
|
61
|
+
"""Successor ids of ``node_id``."""
|
|
62
|
+
validate_node(node_id, len(self))
|
|
63
|
+
return self._adj[node_id]
|
|
64
|
+
|
|
65
|
+
def sources(self, node_id: int) -> Iterable[int]:
|
|
66
|
+
"""Predecessor ids of ``node_id``."""
|
|
67
|
+
validate_node(node_id, len(self))
|
|
68
|
+
return self._rev[node_id]
|
|
69
|
+
|
|
70
|
+
def __iter__(self) -> Iterator[T]:
|
|
71
|
+
"""Payloads in id order; use ``targets`` / ``sources`` for edge endpoints."""
|
|
72
|
+
return iter(self._nodes)
|
|
73
|
+
|
|
74
|
+
@staticmethod
|
|
75
|
+
def validate_node(node_id: int, size: int) -> None:
|
|
76
|
+
"""Ensures ``node_id`` is valid for a graph of ``size`` nodes.
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
IndexError: if out of range
|
|
80
|
+
"""
|
|
81
|
+
validate_node(node_id, size)
|
|
82
|
+
|
|
83
|
+
@classmethod
|
|
84
|
+
def builder(cls) -> DagBuilder[T]:
|
|
85
|
+
"""Returns a new mutable ``Builder``."""
|
|
86
|
+
return DagBuilder()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class DagBuilder(Generic[T]):
|
|
90
|
+
"""Mutable graph builder."""
|
|
91
|
+
|
|
92
|
+
def __init__(self) -> None:
|
|
93
|
+
self._nodes: List[T] = []
|
|
94
|
+
self._adj: List[Set[int]] = []
|
|
95
|
+
self._rev: List[Set[int]] = []
|
|
96
|
+
|
|
97
|
+
def add(self, data: T) -> int:
|
|
98
|
+
"""Adds a node; returns its id for passing to ``connect``."""
|
|
99
|
+
node_id = len(self._nodes)
|
|
100
|
+
self._nodes.append(data)
|
|
101
|
+
self._adj.append(set())
|
|
102
|
+
self._rev.append(set())
|
|
103
|
+
return node_id
|
|
104
|
+
|
|
105
|
+
def connect(self, source: int, target: int) -> DagBuilder[T]:
|
|
106
|
+
"""Directed edge ``source`` → ``target`` (ignored if duplicate).
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
IllegalGraphException: if ``source == target``
|
|
110
|
+
"""
|
|
111
|
+
validate_node(source, len(self._nodes))
|
|
112
|
+
validate_node(target, len(self._nodes))
|
|
113
|
+
|
|
114
|
+
if source == target:
|
|
115
|
+
raise IllegalGraphException(f"Self-loop not allowed: {source}")
|
|
116
|
+
|
|
117
|
+
if target not in self._adj[source]:
|
|
118
|
+
self._adj[source].add(target)
|
|
119
|
+
self._rev[target].add(source)
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
def build(self) -> Dag[T]:
|
|
123
|
+
"""Build the graph."""
|
|
124
|
+
self._cycle_check()
|
|
125
|
+
return Dag(
|
|
126
|
+
_nodes=tuple(self._nodes),
|
|
127
|
+
_adj=tuple(tuple(sorted(s)) for s in self._adj),
|
|
128
|
+
_rev=tuple(tuple(sorted(s)) for s in self._rev),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _cycle_check(self) -> None:
|
|
132
|
+
n = len(self._nodes)
|
|
133
|
+
if n == 0:
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
indeg = [len(self._rev[i]) for i in range(n)]
|
|
137
|
+
ready: Deque[int] = deque(i for i, d in enumerate(indeg) if d == 0)
|
|
138
|
+
processed = 0
|
|
139
|
+
|
|
140
|
+
while ready:
|
|
141
|
+
u = ready.popleft()
|
|
142
|
+
processed += 1
|
|
143
|
+
for v in self._adj[u]:
|
|
144
|
+
indeg[v] -= 1
|
|
145
|
+
if indeg[v] == 0:
|
|
146
|
+
ready.append(v)
|
|
147
|
+
|
|
148
|
+
if processed != n:
|
|
149
|
+
raise IllegalGraphException("Graph contains a directed cycle")
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from threading import RLock
|
|
4
|
+
from typing import List, Set
|
|
5
|
+
|
|
6
|
+
from dag import Dag, validate_node
|
|
7
|
+
from kahnQueue.node_machine import NodeMachine
|
|
8
|
+
from kahnQueue.node_state import NodeState
|
|
9
|
+
from kahnQueue.kahn_queue import KahnQueue
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ConcurrentKahnQueue(KahnQueue):
|
|
13
|
+
"""``KahnQueue`` for concurrent ``pop`` and ``prune`` calls. ``ready_ids()`` may not
|
|
14
|
+
reflect a consistent snapshot if other threads update the queue at the same time; coordinate
|
|
15
|
+
externally if you need strict ordering or visibility. For single-threaded use, prefer
|
|
16
|
+
``DefaultKahnQueue``.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, dag: Dag[object]) -> None:
|
|
20
|
+
"""Builds a queue whose readiness matches ``dag``."""
|
|
21
|
+
self._dag = dag
|
|
22
|
+
# Each machine gets its own lock, mirroring the synchronized(nodeMachines[id]) pattern
|
|
23
|
+
self._node_machines: List[NodeMachine] = [
|
|
24
|
+
NodeMachine.create(i, dag.in_degree(i)) for i in range(dag.size())
|
|
25
|
+
]
|
|
26
|
+
self._locks = [RLock() for _ in range(dag.size())]
|
|
27
|
+
|
|
28
|
+
def pop(self, id: int) -> Set[int]:
|
|
29
|
+
validate_node(id, self._dag.size())
|
|
30
|
+
|
|
31
|
+
with self._locks[id]:
|
|
32
|
+
machine = self._node_machines[id]
|
|
33
|
+
if not machine.is_(NodeState.ACTIVE):
|
|
34
|
+
raise ValueError(f"Pop failed. Node {id} is not ACTIVE (state: {machine.state})")
|
|
35
|
+
|
|
36
|
+
machine.transition(NodeState.COMPLETE)
|
|
37
|
+
|
|
38
|
+
promoted: Set[int] = set()
|
|
39
|
+
for cid in self._dag.targets(id):
|
|
40
|
+
with self._locks[cid]:
|
|
41
|
+
child = self._node_machines[cid]
|
|
42
|
+
child.decrement()
|
|
43
|
+
|
|
44
|
+
if child.can_transition(NodeState.ACTIVE):
|
|
45
|
+
child.transition(NodeState.ACTIVE)
|
|
46
|
+
promoted.add(cid)
|
|
47
|
+
return promoted
|
|
48
|
+
|
|
49
|
+
def prune(self, id: int) -> Set[int]:
|
|
50
|
+
validate_node(id, self._dag.size())
|
|
51
|
+
|
|
52
|
+
affected: Set[int] = set()
|
|
53
|
+
stack: List[int] = [id]
|
|
54
|
+
|
|
55
|
+
while stack:
|
|
56
|
+
curr = stack.pop()
|
|
57
|
+
|
|
58
|
+
with self._locks[curr]:
|
|
59
|
+
machine = self._node_machines[curr]
|
|
60
|
+
# Avoid redundant transitions if another thread pruned this branch
|
|
61
|
+
if machine.is_(NodeState.PRUNED):
|
|
62
|
+
continue
|
|
63
|
+
machine.transition(NodeState.PRUNED)
|
|
64
|
+
affected.add(curr)
|
|
65
|
+
|
|
66
|
+
# Adding targets outside the lock is safe as the DAG structure is immutable
|
|
67
|
+
stack.extend(self._dag.targets(curr))
|
|
68
|
+
|
|
69
|
+
return affected
|
|
70
|
+
|
|
71
|
+
def ready_ids(self) -> Set[int]:
|
|
72
|
+
return {
|
|
73
|
+
m.id for m in self._node_machines
|
|
74
|
+
if m.is_(NodeState.READY)
|
|
75
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import deque
|
|
4
|
+
from typing import List, Set
|
|
5
|
+
|
|
6
|
+
from dag import Dag, validate_node
|
|
7
|
+
from kahnQueue.node_machine import NodeMachine
|
|
8
|
+
from kahnQueue.node_state import NodeState
|
|
9
|
+
from kahnQueue.kahn_queue import KahnQueue
|
|
10
|
+
|
|
11
|
+
class DefaultKahnQueue(KahnQueue):
|
|
12
|
+
"""Single-threaded implementation of Kahn's algorithm state tracking."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, dag: Dag[object]) -> None:
|
|
15
|
+
self._dag = dag
|
|
16
|
+
self._node_machines: List[NodeMachine] = [
|
|
17
|
+
NodeMachine.create(i, dag.in_degree(i)) for i in range(dag.size())
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
def pop(self, id: int) -> Set[int]:
|
|
21
|
+
validate_node(id, self._dag.size())
|
|
22
|
+
machine = self._node_machines[id]
|
|
23
|
+
|
|
24
|
+
if not machine.is_(NodeState.ACTIVE):
|
|
25
|
+
raise ValueError(f"Pop failed. Node {id} is in state {machine.state}, not ACTIVE")
|
|
26
|
+
|
|
27
|
+
machine.transition(NodeState.COMPLETE)
|
|
28
|
+
|
|
29
|
+
promoted: Set[int] = set()
|
|
30
|
+
for cid in self._dag.targets(id):
|
|
31
|
+
child = self._node_machines[cid]
|
|
32
|
+
child.decrement()
|
|
33
|
+
# Encapsulated logic: if it's READY, move it to ACTIVE immediately
|
|
34
|
+
# in a single-threaded queue.
|
|
35
|
+
if child.is_(NodeState.READY):
|
|
36
|
+
child.transition(NodeState.ACTIVE)
|
|
37
|
+
promoted.add(cid)
|
|
38
|
+
return promoted
|
|
39
|
+
|
|
40
|
+
def prune(self, id: int) -> Set[int]:
|
|
41
|
+
validate_node(id, self._dag.size())
|
|
42
|
+
affected: Set[int] = set()
|
|
43
|
+
stack: List[int] = [id]
|
|
44
|
+
|
|
45
|
+
while stack:
|
|
46
|
+
curr = stack.pop()
|
|
47
|
+
machine = self._node_machines[curr]
|
|
48
|
+
|
|
49
|
+
# Skip if already pruned to avoid cycles/redundancy
|
|
50
|
+
if machine.is_(NodeState.PRUNED):
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
machine.transition(NodeState.PRUNED)
|
|
54
|
+
affected.add(curr)
|
|
55
|
+
stack.extend(self._dag.targets(curr))
|
|
56
|
+
|
|
57
|
+
return affected
|
|
58
|
+
|
|
59
|
+
def ready_ids(self) -> Set[int]:
|
|
60
|
+
return {m.id for m in self._node_machines if m.is_(NodeState.READY)}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Protocol, runtime_checkable, Iterable
|
|
3
|
+
|
|
4
|
+
@runtime_checkable
|
|
5
|
+
class KahnQueue(Protocol):
|
|
6
|
+
"""Tracks which DAG nodes are runnable and applies completion or pruning."""
|
|
7
|
+
|
|
8
|
+
def pop(self, id: int) -> Iterable[int]:
|
|
9
|
+
"""Marks ``id`` completed and returns ids of nodes that became runnable."""
|
|
10
|
+
...
|
|
11
|
+
|
|
12
|
+
def prune(self, id: int) -> Iterable[int]:
|
|
13
|
+
"""Marks ``id`` and its descendants pruned; returns every affected node id."""
|
|
14
|
+
...
|
|
15
|
+
|
|
16
|
+
def ready_ids(self) -> Iterable[int]:
|
|
17
|
+
"""Node ids currently runnable (not yet active)."""
|
|
18
|
+
...
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Final, Mapping, Set
|
|
5
|
+
|
|
6
|
+
from kahnQueue.node_state import NodeState
|
|
7
|
+
from utils.state_machine import StateMachine
|
|
8
|
+
|
|
9
|
+
NODE_TRANSITIONS: Final[Mapping[NodeState, Set[NodeState]]] = {
|
|
10
|
+
NodeState.QUEUED: {NodeState.READY, NodeState.PRUNED},
|
|
11
|
+
NodeState.READY: {NodeState.ACTIVE, NodeState.PRUNED},
|
|
12
|
+
NodeState.ACTIVE: {NodeState.COMPLETE, NodeState.PRUNED},
|
|
13
|
+
NodeState.COMPLETE: set(),
|
|
14
|
+
NodeState.PRUNED: set(),
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class NodeMachine(StateMachine[NodeState]):
|
|
20
|
+
num_sources: int
|
|
21
|
+
id: int
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def create(cls, id: int, num_sources: int) -> NodeMachine:
|
|
25
|
+
"""Create a new NodeMachine and attempt initial transition to READY."""
|
|
26
|
+
m = cls(
|
|
27
|
+
_state=NodeState.QUEUED,
|
|
28
|
+
_transitions=NODE_TRANSITIONS,
|
|
29
|
+
num_sources=num_sources,
|
|
30
|
+
id=id,
|
|
31
|
+
)
|
|
32
|
+
m._try_ready()
|
|
33
|
+
return m
|
|
34
|
+
|
|
35
|
+
def can_transition(self, to: NodeState) -> bool:
|
|
36
|
+
if self.is_(NodeState.QUEUED) and self.num_sources > 0 and to == NodeState.READY:
|
|
37
|
+
return False
|
|
38
|
+
return super().can_transition(to)
|
|
39
|
+
|
|
40
|
+
def decrement(self) -> None:
|
|
41
|
+
"""Decrement num_sources and transition to READY if possible."""
|
|
42
|
+
if self.num_sources <= 0:
|
|
43
|
+
raise RuntimeError("Attempting to decrement below zero")
|
|
44
|
+
self.num_sources -= 1
|
|
45
|
+
self._try_ready()
|
|
46
|
+
|
|
47
|
+
def _try_ready(self) -> None:
|
|
48
|
+
if self.can_transition(NodeState.READY):
|
|
49
|
+
self.transition(NodeState.READY)
|
|
50
|
+
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kahn-queue
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Kahn-style ready-queue for dependency scheduling
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/Flashlock/kahn-queue
|
|
7
|
+
Project-URL: Repository, https://github.com/Flashlock/kahn-queue
|
|
8
|
+
Keywords: dag,scheduler,workflow,kahn
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# kahn-queue (Python)
|
|
13
|
+
|
|
14
|
+
## Getting started
|
|
15
|
+
|
|
16
|
+
- **Python:** **3.10+** recommended (venv + `requirements-dev.txt`; no version pinned in-repo).
|
|
17
|
+
- **Tests:** `make test-python`, or `cd python`, create `.venv`, install `requirements-dev.txt`, then `pytest`.
|
|
18
|
+
|
|
19
|
+
## Pieces
|
|
20
|
+
|
|
21
|
+
| Piece | Role |
|
|
22
|
+
|--------|------|
|
|
23
|
+
| `Dag` / `Dag.builder()` | Immutable DAG: `add`, `connect`, `build`. |
|
|
24
|
+
| `KahnScheduler` | Drives execution: `run`, `signal_complete` / `signal_failed`; `get_result()` returns `DagResult` (frozensets of ids). |
|
|
25
|
+
| `KahnQueue` / `DefaultKahnQueue` / `ConcurrentKahnQueue` | **`DefaultKahnQueue`** for single-threaded updates; **`ConcurrentKahnQueue`** when `pop` / `prune` run from many threads. |
|
|
26
|
+
| `IllegalGraphException` | Raised for invalid graphs (e.g. self-loop or cycle at `build()`). |
|
|
27
|
+
| `NodeProgressTracker` | Optional per-node progress in `[0, 1]`; not required for scheduling. |
|
|
28
|
+
|
|
29
|
+
## Examples
|
|
30
|
+
|
|
31
|
+
### Single-threaded (Temporal-style)
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from dag import Dag
|
|
35
|
+
from scheduler import KahnScheduler
|
|
36
|
+
|
|
37
|
+
b = Dag.builder()
|
|
38
|
+
lint = b.add("lint")
|
|
39
|
+
comp = b.add("compile")
|
|
40
|
+
test = b.add("test")
|
|
41
|
+
b.connect(lint, comp).connect(comp, test)
|
|
42
|
+
dag = b.build()
|
|
43
|
+
|
|
44
|
+
def execute_node(node_id: int, sched: KahnScheduler[str]) -> None:
|
|
45
|
+
try:
|
|
46
|
+
run_step(dag[node_id])
|
|
47
|
+
sched.signal_complete(node_id)
|
|
48
|
+
except Exception:
|
|
49
|
+
sched.signal_failed(node_id)
|
|
50
|
+
|
|
51
|
+
sched = KahnScheduler(dag, execute_node)
|
|
52
|
+
sched.run()
|
|
53
|
+
result = sched.get_result()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
The two-arg `KahnScheduler(dag, execute_node)` uses `DefaultKahnQueue` when `queue` is omitted.
|
|
57
|
+
|
|
58
|
+
### Concurrent (`ConcurrentKahnQueue`)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from kahnQueue.concurrent_kahn_queue import ConcurrentKahnQueue
|
|
62
|
+
from scheduler import KahnScheduler
|
|
63
|
+
|
|
64
|
+
# … build dag, define execute_node as in the previous example …
|
|
65
|
+
|
|
66
|
+
sched = KahnScheduler(
|
|
67
|
+
dag,
|
|
68
|
+
execute_node,
|
|
69
|
+
queue=ConcurrentKahnQueue(dag),
|
|
70
|
+
)
|
|
71
|
+
sched.run()
|
|
72
|
+
result = sched.get_result()
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Use a thread-safe queue when `execute_node` is invoked from many threads; keep any extra shared result structures thread-safe as well.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/dag.py
|
|
4
|
+
src/exception.py
|
|
5
|
+
src/scheduler.py
|
|
6
|
+
src/tracker.py
|
|
7
|
+
src/kahnQueue/__init__.py
|
|
8
|
+
src/kahnQueue/concurrent_kahn_queue.py
|
|
9
|
+
src/kahnQueue/default_kahn_queue.py
|
|
10
|
+
src/kahnQueue/kahn_queue.py
|
|
11
|
+
src/kahnQueue/node_machine.py
|
|
12
|
+
src/kahnQueue/node_state.py
|
|
13
|
+
src/kahn_queue.egg-info/PKG-INFO
|
|
14
|
+
src/kahn_queue.egg-info/SOURCES.txt
|
|
15
|
+
src/kahn_queue.egg-info/dependency_links.txt
|
|
16
|
+
src/kahn_queue.egg-info/top_level.txt
|
|
17
|
+
src/utils/__init__.py
|
|
18
|
+
src/utils/state_machine.py
|
|
19
|
+
tests/test_concurrent_kahn_queue.py
|
|
20
|
+
tests/test_dag.py
|
|
21
|
+
tests/test_default_kahn_queue.py
|
|
22
|
+
tests/test_kahn_scheduler.py
|
|
23
|
+
tests/test_node_machine.py
|
|
24
|
+
tests/test_node_progress_tracker.py
|
|
25
|
+
tests/test_state_machine.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|