pydantic-graph 1.2.1__py3-none-any.whl → 1.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pydantic_graph/_utils.py +39 -0
- pydantic_graph/beta/__init__.py +25 -0
- pydantic_graph/beta/decision.py +276 -0
- pydantic_graph/beta/graph.py +978 -0
- pydantic_graph/beta/graph_builder.py +1053 -0
- pydantic_graph/beta/id_types.py +76 -0
- pydantic_graph/beta/join.py +249 -0
- pydantic_graph/beta/mermaid.py +208 -0
- pydantic_graph/beta/node.py +95 -0
- pydantic_graph/beta/node_types.py +90 -0
- pydantic_graph/beta/parent_forks.py +232 -0
- pydantic_graph/beta/paths.py +421 -0
- pydantic_graph/beta/step.py +253 -0
- pydantic_graph/beta/util.py +90 -0
- pydantic_graph/exceptions.py +22 -0
- pydantic_graph/graph.py +12 -4
- pydantic_graph/nodes.py +0 -2
- pydantic_graph/persistence/in_mem.py +1 -1
- {pydantic_graph-1.2.1.dist-info → pydantic_graph-1.22.0.dist-info}/METADATA +1 -1
- pydantic_graph-1.22.0.dist-info/RECORD +28 -0
- pydantic_graph-1.2.1.dist-info/RECORD +0 -15
- {pydantic_graph-1.2.1.dist-info → pydantic_graph-1.22.0.dist-info}/WHEEL +0 -0
- {pydantic_graph-1.2.1.dist-info → pydantic_graph-1.22.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Type definitions for graph node categories.
|
|
2
|
+
|
|
3
|
+
This module defines type aliases and utilities for categorizing nodes in the
|
|
4
|
+
graph execution system. It provides clear distinctions between source nodes,
|
|
5
|
+
destination nodes, and middle nodes, along with type guards for validation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Any, TypeGuard
|
|
11
|
+
|
|
12
|
+
from typing_extensions import TypeAliasType, TypeVar
|
|
13
|
+
|
|
14
|
+
from pydantic_graph.beta.decision import Decision
|
|
15
|
+
from pydantic_graph.beta.join import Join
|
|
16
|
+
from pydantic_graph.beta.node import EndNode, Fork, StartNode
|
|
17
|
+
from pydantic_graph.beta.step import Step
|
|
18
|
+
|
|
19
|
+
StateT = TypeVar('StateT', infer_variance=True)
|
|
20
|
+
DepsT = TypeVar('DepsT', infer_variance=True)
|
|
21
|
+
InputT = TypeVar('InputT', infer_variance=True)
|
|
22
|
+
OutputT = TypeVar('OutputT', infer_variance=True)
|
|
23
|
+
|
|
24
|
+
MiddleNode = TypeAliasType(
|
|
25
|
+
'MiddleNode',
|
|
26
|
+
Step[StateT, DepsT, InputT, OutputT] | Join[StateT, DepsT, InputT, OutputT] | Fork[InputT, OutputT],
|
|
27
|
+
type_params=(StateT, DepsT, InputT, OutputT),
|
|
28
|
+
)
|
|
29
|
+
"""Type alias for nodes that can appear in the middle of a graph execution path.
|
|
30
|
+
|
|
31
|
+
Middle nodes can both receive input and produce output, making them suitable
|
|
32
|
+
for intermediate processing steps in the graph.
|
|
33
|
+
"""
|
|
34
|
+
SourceNode = TypeAliasType(
|
|
35
|
+
'SourceNode', MiddleNode[StateT, DepsT, Any, OutputT] | StartNode[OutputT], type_params=(StateT, DepsT, OutputT)
|
|
36
|
+
)
|
|
37
|
+
"""Type alias for nodes that can serve as sources in a graph execution path.
|
|
38
|
+
|
|
39
|
+
Source nodes produce output data and can be the starting point for data flow
|
|
40
|
+
in the graph. This includes start nodes and middle nodes configured as sources.
|
|
41
|
+
"""
|
|
42
|
+
DestinationNode = TypeAliasType(
|
|
43
|
+
'DestinationNode',
|
|
44
|
+
MiddleNode[StateT, DepsT, InputT, Any] | Decision[StateT, DepsT, InputT] | EndNode[InputT],
|
|
45
|
+
type_params=(StateT, DepsT, InputT),
|
|
46
|
+
)
|
|
47
|
+
"""Type alias for nodes that can serve as destinations in a graph execution path.
|
|
48
|
+
|
|
49
|
+
Destination nodes consume input data and can be the ending point for data flow
|
|
50
|
+
in the graph. This includes end nodes, decision nodes, and middle nodes configured as destinations.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
AnySourceNode = TypeAliasType('AnySourceNode', SourceNode[Any, Any, Any])
|
|
54
|
+
"""Type alias for source nodes with any type parameters."""
|
|
55
|
+
|
|
56
|
+
AnyDestinationNode = TypeAliasType('AnyDestinationNode', DestinationNode[Any, Any, Any])
|
|
57
|
+
"""Type alias for destination nodes with any type parameters."""
|
|
58
|
+
|
|
59
|
+
AnyNode = TypeAliasType('AnyNode', AnySourceNode | AnyDestinationNode)
|
|
60
|
+
"""Type alias for any node in the graph, regardless of its role or type parameters."""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def is_source(node: AnyNode) -> TypeGuard[AnySourceNode]:
|
|
64
|
+
"""Check if a node can serve as a source in the graph.
|
|
65
|
+
|
|
66
|
+
Source nodes are capable of producing output data and can be the starting
|
|
67
|
+
point for data flow in graph execution paths.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
node: The node to check
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
True if the node can serve as a source, False otherwise
|
|
74
|
+
"""
|
|
75
|
+
return isinstance(node, StartNode | Step | Join)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def is_destination(node: AnyNode) -> TypeGuard[AnyDestinationNode]:
|
|
79
|
+
"""Check if a node can serve as a destination in the graph.
|
|
80
|
+
|
|
81
|
+
Destination nodes are capable of consuming input data and can be the ending
|
|
82
|
+
point for data flow in graph execution paths.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
node: The node to check
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
True if the node can serve as a destination, False otherwise
|
|
89
|
+
"""
|
|
90
|
+
return isinstance(node, EndNode | Step | Join | Decision)
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""Parent fork identification and deadlock avoidance in parallel graph execution.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to identify "parent forks" in a graph, which are dominating
|
|
4
|
+
fork nodes that control access to join nodes. A parent fork is a fork node that:
|
|
5
|
+
|
|
6
|
+
1. Dominates a join node (all paths to the join must pass through the fork)
|
|
7
|
+
2. Does not participate in cycles that bypass it to reach the join
|
|
8
|
+
|
|
9
|
+
Identifying parent forks is crucial for deadlock avoidance in parallel execution. When a join
|
|
10
|
+
node waits for all its incoming branches, knowing the parent fork helps determine when it's
|
|
11
|
+
safe to proceed without risking deadlock.
|
|
12
|
+
|
|
13
|
+
In most typical graphs, such dominating forks exist naturally. However, when there are multiple
|
|
14
|
+
subsequent forks, the choice of parent fork can be ambiguous and may need to be specified by
|
|
15
|
+
the graph designer.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from collections.abc import Hashable
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from functools import cached_property
|
|
23
|
+
from typing import Generic
|
|
24
|
+
|
|
25
|
+
from typing_extensions import TypeVar
|
|
26
|
+
|
|
27
|
+
from pydantic_graph.exceptions import GraphBuildingError
|
|
28
|
+
|
|
29
|
+
T = TypeVar('T', bound=Hashable, infer_variance=True, default=str)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ParentFork(Generic[T]):
|
|
34
|
+
"""Represents a parent fork node and its relationship to a join node.
|
|
35
|
+
|
|
36
|
+
A parent fork is a dominating fork that controls the execution flow to a join node.
|
|
37
|
+
It tracks which nodes lie between the fork and the join, which is essential for
|
|
38
|
+
determining when it's safe to proceed past the join point.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
fork_id: T
|
|
42
|
+
"""The identifier of the fork node that serves as the parent."""
|
|
43
|
+
|
|
44
|
+
intermediate_nodes: set[T]
|
|
45
|
+
"""The set of node IDs of nodes upstream of the join and downstream of the parent fork.
|
|
46
|
+
|
|
47
|
+
If there are no graph walkers in these nodes that were a part of a previous fork, it is safe to proceed downstream
|
|
48
|
+
of the join.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class ParentForkFinder(Generic[T]):
|
|
54
|
+
"""Analyzes graph structure to identify parent forks for join nodes.
|
|
55
|
+
|
|
56
|
+
This class implements algorithms to find dominating forks in a directed graph,
|
|
57
|
+
which is essential for coordinating parallel execution and avoiding deadlocks.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
nodes: set[T]
|
|
61
|
+
"""All node identifiers in the graph."""
|
|
62
|
+
|
|
63
|
+
start_ids: set[T]
|
|
64
|
+
"""Node identifiers that serve as entry points to the graph."""
|
|
65
|
+
|
|
66
|
+
fork_ids: set[T]
|
|
67
|
+
"""Node identifiers that represent fork nodes (nodes that create parallel branches)."""
|
|
68
|
+
|
|
69
|
+
edges: dict[T, list[T]] # source_id to list of destination_ids
|
|
70
|
+
"""Graph edges represented as adjacency list mapping source nodes to destinations."""
|
|
71
|
+
|
|
72
|
+
def find_parent_fork(
|
|
73
|
+
self, join_id: T, *, parent_fork_id: T | None = None, prefer_closest: bool = False
|
|
74
|
+
) -> ParentFork[T] | None:
|
|
75
|
+
"""Find the parent fork for a given join node.
|
|
76
|
+
|
|
77
|
+
Searches for the _most_ ancestral dominating fork that can serve as a parent fork
|
|
78
|
+
for the specified join node. A valid parent fork must dominate the join without
|
|
79
|
+
allowing cycles that bypass it.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
join_id: The identifier of the join node to analyze.
|
|
83
|
+
parent_fork_id: Optional manually selected node ID to attempt to use as the parent fork node.
|
|
84
|
+
prefer_closest: If no explicit fork is specified, this argument is used to determine
|
|
85
|
+
whether to find the closest or farthest (i.e., most ancestral) dominating fork.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
A ParentFork object containing the fork ID and intermediate nodes if a valid
|
|
89
|
+
parent fork exists, or None if no valid parent fork can be found (which would
|
|
90
|
+
indicate potential deadlock risk).
|
|
91
|
+
|
|
92
|
+
Note:
|
|
93
|
+
If every dominating fork of the join lets it participate in a cycle that avoids
|
|
94
|
+
the fork, None is returned since no valid "parent fork" exists.
|
|
95
|
+
"""
|
|
96
|
+
if parent_fork_id is not None:
|
|
97
|
+
# A fork was manually specified; we still verify it's a valid dominating fork
|
|
98
|
+
upstream_nodes = self._get_upstream_nodes_if_parent(join_id, parent_fork_id)
|
|
99
|
+
if upstream_nodes is None:
|
|
100
|
+
raise GraphBuildingError(
|
|
101
|
+
f'There is a cycle in the graph passing through {join_id!r} that does not include {parent_fork_id!r}.'
|
|
102
|
+
f' Parent forks of a join must be a part of any cycles involving that join.'
|
|
103
|
+
)
|
|
104
|
+
return ParentFork[T](parent_fork_id, upstream_nodes)
|
|
105
|
+
|
|
106
|
+
visited: set[str] = set()
|
|
107
|
+
cur = join_id # start at J and walk up the immediate dominator chain
|
|
108
|
+
|
|
109
|
+
parent_fork: ParentFork[T] | None = None
|
|
110
|
+
while True:
|
|
111
|
+
cur = self._immediate_dominator(cur)
|
|
112
|
+
if cur is None: # reached the root
|
|
113
|
+
break
|
|
114
|
+
|
|
115
|
+
# The visited-tracking shouldn't be necessary, but I included it to prevent infinite loops if there are bugs
|
|
116
|
+
assert cur not in visited, f'Cycle detected in dominator tree: {join_id} → {cur} → {visited}'
|
|
117
|
+
visited.add(cur)
|
|
118
|
+
|
|
119
|
+
if cur not in self.fork_ids:
|
|
120
|
+
continue # not a fork, so keep climbing
|
|
121
|
+
|
|
122
|
+
upstream_nodes = self._get_upstream_nodes_if_parent(join_id, cur)
|
|
123
|
+
if upstream_nodes is not None: # found upstream nodes without a cycle
|
|
124
|
+
parent_fork = ParentFork[T](cur, upstream_nodes)
|
|
125
|
+
if prefer_closest:
|
|
126
|
+
return parent_fork
|
|
127
|
+
elif parent_fork is not None:
|
|
128
|
+
# We reached a fork that is an ancestor of a parent fork but is not itself a parent fork.
|
|
129
|
+
# This means there is a cycle to J that is downstream of `cur`, and so any node further upstream
|
|
130
|
+
# will fail to be a parent fork for the same reason. So we can stop here and just return `parent_fork`.
|
|
131
|
+
return parent_fork
|
|
132
|
+
|
|
133
|
+
# No dominating fork passed the cycle test to be a "parent" fork
|
|
134
|
+
return parent_fork
|
|
135
|
+
|
|
136
|
+
@cached_property
|
|
137
|
+
def _predecessors(self) -> dict[T, list[T]]:
|
|
138
|
+
"""Compute and cache the predecessor mapping for all nodes.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A dictionary mapping each node to a list of its immediate predecessors.
|
|
142
|
+
"""
|
|
143
|
+
predecessors: dict[T, list[T]] = {n: [] for n in self.nodes}
|
|
144
|
+
for source_id in self.nodes:
|
|
145
|
+
for destination_id in self.edges.get(source_id, []):
|
|
146
|
+
predecessors[destination_id].append(source_id)
|
|
147
|
+
return predecessors
|
|
148
|
+
|
|
149
|
+
@cached_property
|
|
150
|
+
def _dominators(self) -> dict[T, set[T]]:
|
|
151
|
+
"""Compute the dominator sets for all nodes using iterative dataflow analysis.
|
|
152
|
+
|
|
153
|
+
A node D dominates node N if every path from a start node to N must pass through D.
|
|
154
|
+
This is computed using a fixed-point iteration algorithm.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
A dictionary mapping each node to its set of dominators.
|
|
158
|
+
"""
|
|
159
|
+
node_ids = set(self.nodes)
|
|
160
|
+
start_ids = self.start_ids
|
|
161
|
+
|
|
162
|
+
dom: dict[T, set[T]] = {n: set(node_ids) for n in node_ids}
|
|
163
|
+
for s in start_ids:
|
|
164
|
+
dom[s] = {s}
|
|
165
|
+
|
|
166
|
+
changed = True
|
|
167
|
+
while changed:
|
|
168
|
+
changed = False
|
|
169
|
+
for n in node_ids - start_ids:
|
|
170
|
+
preds = self._predecessors[n]
|
|
171
|
+
if not preds: # unreachable from any start
|
|
172
|
+
continue
|
|
173
|
+
intersection = set[T].intersection(*(dom[p] for p in preds)) if preds else set[T]()
|
|
174
|
+
new_dom = {n} | intersection
|
|
175
|
+
if new_dom != dom[n]:
|
|
176
|
+
dom[n] = new_dom
|
|
177
|
+
changed = True
|
|
178
|
+
return dom
|
|
179
|
+
|
|
180
|
+
def _immediate_dominator(self, node_id: T) -> T | None:
|
|
181
|
+
"""Find the immediate dominator of a node.
|
|
182
|
+
|
|
183
|
+
The immediate dominator is the closest dominator to a node (other than itself)
|
|
184
|
+
in the dominator tree.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
node_id: The node to find the immediate dominator for.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
The immediate dominator's ID if one exists, None otherwise.
|
|
191
|
+
"""
|
|
192
|
+
dom = self._dominators
|
|
193
|
+
candidates = dom[node_id] - {node_id}
|
|
194
|
+
for c in candidates:
|
|
195
|
+
if all((c == d) or (c not in dom[d]) for d in candidates):
|
|
196
|
+
return c
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
def _get_upstream_nodes_if_parent(self, join_id: T, fork_id: T) -> set[T] | None:
|
|
200
|
+
"""Check if a fork is a valid parent and return upstream nodes.
|
|
201
|
+
|
|
202
|
+
Tests whether the given fork can serve as a parent fork for the join by checking
|
|
203
|
+
for cycles that bypass the fork. If valid, returns all nodes that can reach the
|
|
204
|
+
join without going through the fork.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
join_id: The join node being analyzed.
|
|
208
|
+
fork_id: The potential parent fork to test.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
The set of node IDs upstream of the join (excluding the fork) if the fork is
|
|
212
|
+
a valid parent, or None if a cycle exists that bypasses the fork (making it
|
|
213
|
+
invalid as a parent fork).
|
|
214
|
+
|
|
215
|
+
Note:
|
|
216
|
+
If, in the graph with fork_id removed, a path exists that starts and ends at
|
|
217
|
+
the join (i.e., join is on a cycle avoiding the fork), we return None because
|
|
218
|
+
the fork would not be a valid "parent fork".
|
|
219
|
+
"""
|
|
220
|
+
upstream: set[T] = set()
|
|
221
|
+
stack = [join_id]
|
|
222
|
+
while stack:
|
|
223
|
+
v = stack.pop()
|
|
224
|
+
for p in self._predecessors[v]:
|
|
225
|
+
if p == fork_id:
|
|
226
|
+
continue
|
|
227
|
+
if p == join_id:
|
|
228
|
+
return None # J sits on a cycle w/out the specified node
|
|
229
|
+
if p not in upstream:
|
|
230
|
+
upstream.add(p)
|
|
231
|
+
stack.append(p)
|
|
232
|
+
return upstream
|