pydantic-graph 1.3.0__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ """Type definitions for graph node categories.
2
+
3
+ This module defines type aliases and utilities for categorizing nodes in the
4
+ graph execution system. It provides clear distinctions between source nodes,
5
+ destination nodes, and middle nodes, along with type guards for validation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any, TypeGuard
11
+
12
+ from typing_extensions import TypeAliasType, TypeVar
13
+
14
+ from pydantic_graph.beta.decision import Decision
15
+ from pydantic_graph.beta.join import Join
16
+ from pydantic_graph.beta.node import EndNode, Fork, StartNode
17
+ from pydantic_graph.beta.step import Step
18
+
19
+ StateT = TypeVar('StateT', infer_variance=True)
20
+ DepsT = TypeVar('DepsT', infer_variance=True)
21
+ InputT = TypeVar('InputT', infer_variance=True)
22
+ OutputT = TypeVar('OutputT', infer_variance=True)
23
+
24
+ MiddleNode = TypeAliasType(
25
+ 'MiddleNode',
26
+ Step[StateT, DepsT, InputT, OutputT] | Join[StateT, DepsT, InputT, OutputT] | Fork[InputT, OutputT],
27
+ type_params=(StateT, DepsT, InputT, OutputT),
28
+ )
29
+ """Type alias for nodes that can appear in the middle of a graph execution path.
30
+
31
+ Middle nodes can both receive input and produce output, making them suitable
32
+ for intermediate processing steps in the graph.
33
+ """
34
+ SourceNode = TypeAliasType(
35
+ 'SourceNode', MiddleNode[StateT, DepsT, Any, OutputT] | StartNode[OutputT], type_params=(StateT, DepsT, OutputT)
36
+ )
37
+ """Type alias for nodes that can serve as sources in a graph execution path.
38
+
39
+ Source nodes produce output data and can be the starting point for data flow
40
+ in the graph. This includes start nodes and middle nodes configured as sources.
41
+ """
42
+ DestinationNode = TypeAliasType(
43
+ 'DestinationNode',
44
+ MiddleNode[StateT, DepsT, InputT, Any] | Decision[StateT, DepsT, InputT] | EndNode[InputT],
45
+ type_params=(StateT, DepsT, InputT),
46
+ )
47
+ """Type alias for nodes that can serve as destinations in a graph execution path.
48
+
49
+ Destination nodes consume input data and can be the ending point for data flow
50
+ in the graph. This includes end nodes, decision nodes, and middle nodes configured as destinations.
51
+ """
52
+
53
+ AnySourceNode = TypeAliasType('AnySourceNode', SourceNode[Any, Any, Any])
54
+ """Type alias for source nodes with any type parameters."""
55
+
56
+ AnyDestinationNode = TypeAliasType('AnyDestinationNode', DestinationNode[Any, Any, Any])
57
+ """Type alias for destination nodes with any type parameters."""
58
+
59
+ AnyNode = TypeAliasType('AnyNode', AnySourceNode | AnyDestinationNode)
60
+ """Type alias for any node in the graph, regardless of its role or type parameters."""
61
+
62
+
63
+ def is_source(node: AnyNode) -> TypeGuard[AnySourceNode]:
64
+ """Check if a node can serve as a source in the graph.
65
+
66
+ Source nodes are capable of producing output data and can be the starting
67
+ point for data flow in graph execution paths.
68
+
69
+ Args:
70
+ node: The node to check
71
+
72
+ Returns:
73
+ True if the node can serve as a source, False otherwise
74
+ """
75
+ return isinstance(node, StartNode | Step | Join)
76
+
77
+
78
+ def is_destination(node: AnyNode) -> TypeGuard[AnyDestinationNode]:
79
+ """Check if a node can serve as a destination in the graph.
80
+
81
+ Destination nodes are capable of consuming input data and can be the ending
82
+ point for data flow in graph execution paths.
83
+
84
+ Args:
85
+ node: The node to check
86
+
87
+ Returns:
88
+ True if the node can serve as a destination, False otherwise
89
+ """
90
+ return isinstance(node, EndNode | Step | Join | Decision)
@@ -0,0 +1,232 @@
1
+ """Parent fork identification and deadlock avoidance in parallel graph execution.
2
+
3
+ This module provides functionality to identify "parent forks" in a graph, which are dominating
4
+ fork nodes that control access to join nodes. A parent fork is a fork node that:
5
+
6
+ 1. Dominates a join node (all paths to the join must pass through the fork)
7
+ 2. Does not participate in cycles that bypass it to reach the join
8
+
9
+ Identifying parent forks is crucial for deadlock avoidance in parallel execution. When a join
10
+ node waits for all its incoming branches, knowing the parent fork helps determine when it's
11
+ safe to proceed without risking deadlock.
12
+
13
+ In most typical graphs, such dominating forks exist naturally. However, when there are multiple
14
+ subsequent forks, the choice of parent fork can be ambiguous and may need to be specified by
15
+ the graph designer.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from collections.abc import Hashable
21
+ from dataclasses import dataclass
22
+ from functools import cached_property
23
+ from typing import Generic
24
+
25
+ from typing_extensions import TypeVar
26
+
27
+ from pydantic_graph.exceptions import GraphBuildingError
28
+
29
+ T = TypeVar('T', bound=Hashable, infer_variance=True, default=str)
30
+
31
+
32
+ @dataclass
33
+ class ParentFork(Generic[T]):
34
+ """Represents a parent fork node and its relationship to a join node.
35
+
36
+ A parent fork is a dominating fork that controls the execution flow to a join node.
37
+ It tracks which nodes lie between the fork and the join, which is essential for
38
+ determining when it's safe to proceed past the join point.
39
+ """
40
+
41
+ fork_id: T
42
+ """The identifier of the fork node that serves as the parent."""
43
+
44
+ intermediate_nodes: set[T]
45
+ """The set of node IDs of nodes upstream of the join and downstream of the parent fork.
46
+
47
+ If there are no graph walkers in these nodes that were a part of a previous fork, it is safe to proceed downstream
48
+ of the join.
49
+ """
50
+
51
+
52
+ @dataclass
53
+ class ParentForkFinder(Generic[T]):
54
+ """Analyzes graph structure to identify parent forks for join nodes.
55
+
56
+ This class implements algorithms to find dominating forks in a directed graph,
57
+ which is essential for coordinating parallel execution and avoiding deadlocks.
58
+ """
59
+
60
+ nodes: set[T]
61
+ """All node identifiers in the graph."""
62
+
63
+ start_ids: set[T]
64
+ """Node identifiers that serve as entry points to the graph."""
65
+
66
+ fork_ids: set[T]
67
+ """Node identifiers that represent fork nodes (nodes that create parallel branches)."""
68
+
69
+ edges: dict[T, list[T]] # source_id to list of destination_ids
70
+ """Graph edges represented as adjacency list mapping source nodes to destinations."""
71
+
72
+ def find_parent_fork(
73
+ self, join_id: T, *, parent_fork_id: T | None = None, prefer_closest: bool = False
74
+ ) -> ParentFork[T] | None:
75
+ """Find the parent fork for a given join node.
76
+
77
+ Searches for the _most_ ancestral dominating fork that can serve as a parent fork
78
+ for the specified join node. A valid parent fork must dominate the join without
79
+ allowing cycles that bypass it.
80
+
81
+ Args:
82
+ join_id: The identifier of the join node to analyze.
83
+ parent_fork_id: Optional manually selected node ID to attempt to use as the parent fork node.
84
+ prefer_closest: If no explicit fork is specified, this argument is used to determine
85
+ whether to find the closest or farthest (i.e., most ancestral) dominating fork.
86
+
87
+ Returns:
88
+ A ParentFork object containing the fork ID and intermediate nodes if a valid
89
+ parent fork exists, or None if no valid parent fork can be found (which would
90
+ indicate potential deadlock risk).
91
+
92
+ Note:
93
+ If every dominating fork of the join lets it participate in a cycle that avoids
94
+ the fork, None is returned since no valid "parent fork" exists.
95
+ """
96
+ if parent_fork_id is not None:
97
+ # A fork was manually specified; we still verify it's a valid dominating fork
98
+ upstream_nodes = self._get_upstream_nodes_if_parent(join_id, parent_fork_id)
99
+ if upstream_nodes is None:
100
+ raise GraphBuildingError(
101
+ f'There is a cycle in the graph passing through {join_id!r} that does not include {parent_fork_id!r}.'
102
+ f' Parent forks of a join must be a part of any cycles involving that join.'
103
+ )
104
+ return ParentFork[T](parent_fork_id, upstream_nodes)
105
+
106
+ visited: set[str] = set()
107
+ cur = join_id # start at J and walk up the immediate dominator chain
108
+
109
+ parent_fork: ParentFork[T] | None = None
110
+ while True:
111
+ cur = self._immediate_dominator(cur)
112
+ if cur is None: # reached the root
113
+ break
114
+
115
+ # The visited-tracking shouldn't be necessary, but I included it to prevent infinite loops if there are bugs
116
+ assert cur not in visited, f'Cycle detected in dominator tree: {join_id} → {cur} → {visited}'
117
+ visited.add(cur)
118
+
119
+ if cur not in self.fork_ids:
120
+ continue # not a fork, so keep climbing
121
+
122
+ upstream_nodes = self._get_upstream_nodes_if_parent(join_id, cur)
123
+ if upstream_nodes is not None: # found upstream nodes without a cycle
124
+ parent_fork = ParentFork[T](cur, upstream_nodes)
125
+ if prefer_closest:
126
+ return parent_fork
127
+ elif parent_fork is not None:
128
+ # We reached a fork that is an ancestor of a parent fork but is not itself a parent fork.
129
+ # This means there is a cycle to J that is downstream of `cur`, and so any node further upstream
130
+ # will fail to be a parent fork for the same reason. So we can stop here and just return `parent_fork`.
131
+ return parent_fork
132
+
133
+ # No dominating fork passed the cycle test to be a "parent" fork
134
+ return parent_fork
135
+
136
+ @cached_property
137
+ def _predecessors(self) -> dict[T, list[T]]:
138
+ """Compute and cache the predecessor mapping for all nodes.
139
+
140
+ Returns:
141
+ A dictionary mapping each node to a list of its immediate predecessors.
142
+ """
143
+ predecessors: dict[T, list[T]] = {n: [] for n in self.nodes}
144
+ for source_id in self.nodes:
145
+ for destination_id in self.edges.get(source_id, []):
146
+ predecessors[destination_id].append(source_id)
147
+ return predecessors
148
+
149
+ @cached_property
150
+ def _dominators(self) -> dict[T, set[T]]:
151
+ """Compute the dominator sets for all nodes using iterative dataflow analysis.
152
+
153
+ A node D dominates node N if every path from a start node to N must pass through D.
154
+ This is computed using a fixed-point iteration algorithm.
155
+
156
+ Returns:
157
+ A dictionary mapping each node to its set of dominators.
158
+ """
159
+ node_ids = set(self.nodes)
160
+ start_ids = self.start_ids
161
+
162
+ dom: dict[T, set[T]] = {n: set(node_ids) for n in node_ids}
163
+ for s in start_ids:
164
+ dom[s] = {s}
165
+
166
+ changed = True
167
+ while changed:
168
+ changed = False
169
+ for n in node_ids - start_ids:
170
+ preds = self._predecessors[n]
171
+ if not preds: # unreachable from any start
172
+ continue
173
+ intersection = set[T].intersection(*(dom[p] for p in preds)) if preds else set[T]()
174
+ new_dom = {n} | intersection
175
+ if new_dom != dom[n]:
176
+ dom[n] = new_dom
177
+ changed = True
178
+ return dom
179
+
180
+ def _immediate_dominator(self, node_id: T) -> T | None:
181
+ """Find the immediate dominator of a node.
182
+
183
+ The immediate dominator is the closest dominator to a node (other than itself)
184
+ in the dominator tree.
185
+
186
+ Args:
187
+ node_id: The node to find the immediate dominator for.
188
+
189
+ Returns:
190
+ The immediate dominator's ID if one exists, None otherwise.
191
+ """
192
+ dom = self._dominators
193
+ candidates = dom[node_id] - {node_id}
194
+ for c in candidates:
195
+ if all((c == d) or (c not in dom[d]) for d in candidates):
196
+ return c
197
+ return None
198
+
199
+ def _get_upstream_nodes_if_parent(self, join_id: T, fork_id: T) -> set[T] | None:
200
+ """Check if a fork is a valid parent and return upstream nodes.
201
+
202
+ Tests whether the given fork can serve as a parent fork for the join by checking
203
+ for cycles that bypass the fork. If valid, returns all nodes that can reach the
204
+ join without going through the fork.
205
+
206
+ Args:
207
+ join_id: The join node being analyzed.
208
+ fork_id: The potential parent fork to test.
209
+
210
+ Returns:
211
+ The set of node IDs upstream of the join (excluding the fork) if the fork is
212
+ a valid parent, or None if a cycle exists that bypasses the fork (making it
213
+ invalid as a parent fork).
214
+
215
+ Note:
216
+ If, in the graph with fork_id removed, a path exists that starts and ends at
217
+ the join (i.e., join is on a cycle avoiding the fork), we return None because
218
+ the fork would not be a valid "parent fork".
219
+ """
220
+ upstream: set[T] = set()
221
+ stack = [join_id]
222
+ while stack:
223
+ v = stack.pop()
224
+ for p in self._predecessors[v]:
225
+ if p == fork_id:
226
+ continue
227
+ if p == join_id:
228
+ return None # J sits on a cycle w/out the specified node
229
+ if p not in upstream:
230
+ upstream.add(p)
231
+ stack.append(p)
232
+ return upstream