PyPI - pydantic-graph - Versions diffs - 0.2.2__py3-none-any.whl → 1.24.0__py3-none-any.whl - Mend

pydantic-graph 0.2.2py3-none-any.whl → 1.24.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

pydantic_graph/_utils.py +73 -6
pydantic_graph/beta/__init__.py +25 -0
pydantic_graph/beta/decision.py +276 -0
pydantic_graph/beta/graph.py +978 -0
pydantic_graph/beta/graph_builder.py +1053 -0
pydantic_graph/beta/id_types.py +76 -0
pydantic_graph/beta/join.py +249 -0
pydantic_graph/beta/mermaid.py +208 -0
pydantic_graph/beta/node.py +95 -0
pydantic_graph/beta/node_types.py +90 -0
pydantic_graph/beta/parent_forks.py +232 -0
pydantic_graph/beta/paths.py +421 -0
pydantic_graph/beta/step.py +253 -0
pydantic_graph/beta/util.py +90 -0
pydantic_graph/exceptions.py +22 -0
pydantic_graph/graph.py +27 -71
pydantic_graph/mermaid.py +2 -2
pydantic_graph/nodes.py +13 -16
pydantic_graph/persistence/__init__.py +4 -4
pydantic_graph/persistence/_utils.py +1 -1
pydantic_graph/persistence/file.py +13 -14
pydantic_graph/persistence/in_mem.py +4 -4
{pydantic_graph-0.2.2.dist-info → pydantic_graph-1.24.0.dist-info}/METADATA +10 -10
pydantic_graph-1.24.0.dist-info/RECORD +28 -0
pydantic_graph-1.24.0.dist-info/licenses/LICENSE +21 -0
pydantic_graph-0.2.2.dist-info/RECORD +0 -14
{pydantic_graph-0.2.2.dist-info → pydantic_graph-1.24.0.dist-info}/WHEEL +0 -0

pydantic_graph/beta/id_types.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Type definitions for identifiers used throughout the graph execution system.
+This module defines NewType wrappers and aliases for various ID types used in graph execution,
+providing type safety and clarity when working with different kinds of identifiers.
+"""
+from __future__ import annotations
+import re
+import uuid
+from dataclasses import dataclass
+from typing import NewType
+NodeID = NewType('NodeID', str)
+"""Unique identifier for a node in the graph."""
+NodeRunID = NewType('NodeRunID', str)
+"""Unique identifier for a specific execution instance of a node."""
+# The following aliases are just included for clarity; making them NewTypes is a hassle
+JoinID = NodeID
+"""Alias for NodeId when referring to join nodes."""
+ForkID = NodeID
+"""Alias for NodeId when referring to fork nodes."""
+TaskID = NewType('TaskID', str)
+"""Unique identifier for a task within the graph execution."""
+@dataclass(frozen=True)
+class ForkStackItem:
+    """Represents a single fork point in the execution stack.
+    When a node creates multiple parallel execution paths (forks), each fork is tracked
+    using a ForkStackItem. This allows the system to maintain the execution hierarchy
+    and coordinate parallel branches of execution.
+    """
+    fork_id: ForkID
+    """The ID of the node that created this fork."""
+    node_run_id: NodeRunID
+    """The ID associated to the specific run of the node that created this fork."""
+    thread_index: int
+    """The index of the execution "thread" created during the node run that created this fork.
+    This is largely intended for observability/debugging; it may eventually be used to ensure idempotency."""
+ForkStack = tuple[ForkStackItem, ...]
+"""A stack of fork items representing the full hierarchy of parallel execution branches.
+The fork stack tracks the complete path through nested parallel executions,
+allowing the system to coordinate and join parallel branches correctly.
+"""
+def generate_placeholder_node_id(label: str) -> str:
+    """Generate a placeholder node ID, to be replaced during graph building."""
+    return f'{_NODE_ID_PLACEHOLDER_PREFIX}:{label}:{uuid.uuid4()}'
+def replace_placeholder_id(node_id: NodeID) -> str:
+    """Returns whether a given NodeID is a placeholder node ID which should be replaced during graph building."""
+    return re.sub(rf'{_NODE_ID_PLACEHOLDER_PREFIX}:([^:]+):.*', r'\1', node_id)
+_NODE_ID_PLACEHOLDER_PREFIX = '__placeholder__'
+"""
+When Node IDs are required but not specified when building a graph, we generate placeholder values
+using this prefix followed by a random string.
+During graph building, we replace these with simpler and deterministically-selected values.
+This ensures that the node IDs are stable when rebuilding the graph, and makes the generated mermaid diagrams etc.
+easier to read.
+"""

pydantic_graph/beta/join.py ADDED Viewed

@@ -0,0 +1,249 @@
+"""Join operations and reducers for graph execution.
+This module provides the core components for joining parallel execution paths
+in a graph, including various reducer types that aggregate data from multiple
+sources into a single output.
+"""
+from __future__ import annotations
+import inspect
+from abc import abstractmethod
+from collections.abc import Callable, Iterable, Mapping
+from dataclasses import dataclass
+from typing import Any, Generic, Literal, cast, overload
+from typing_extensions import Protocol, Self, TypeAliasType, TypeVar
+from pydantic_graph import BaseNode, End, GraphRunContext
+from pydantic_graph.beta.id_types import ForkID, ForkStack, JoinID
+StateT = TypeVar('StateT', infer_variance=True)
+DepsT = TypeVar('DepsT', infer_variance=True)
+InputT = TypeVar('InputT', infer_variance=True)
+OutputT = TypeVar('OutputT', infer_variance=True)
+T = TypeVar('T', infer_variance=True)
+K = TypeVar('K', infer_variance=True)
+V = TypeVar('V', infer_variance=True)
+# TODO(P1): I guess we should make this class private, etc.
+@dataclass
+class JoinState:
+    """The state of a join during graph execution associated to a particular fork run."""
+    current: Any
+    downstream_fork_stack: ForkStack
+    cancelled_sibling_tasks: bool = False
+@dataclass(init=False)
+class ReducerContext(Generic[StateT, DepsT]):
+    """Context information passed to reducer functions during graph execution.
+    The reducer context provides access to the current graph state and dependencies.
+    Type Parameters:
+        StateT: The type of the graph state
+        DepsT: The type of the dependencies
+    """
+    _state: StateT
+    """The current graph state."""
+    _deps: DepsT
+    """The dependencies of the current graph run."""
+    _join_state: JoinState
+    """The JoinState for this reducer context."""
+    def __init__(self, *, state: StateT, deps: DepsT, join_state: JoinState):
+        self._state = state
+        self._deps = deps
+        self._join_state = join_state
+    @property
+    def state(self) -> StateT:
+        """The state of the graph run."""
+        return self._state
+    @property
+    def deps(self) -> DepsT:
+        """The deps for the graph run."""
+        return self._deps
+    def cancel_sibling_tasks(self):
+        """Cancel all sibling tasks created from the same fork.
+        You can call this if you want your join to have early-stopping behavior.
+        """
+        self._join_state.cancelled_sibling_tasks = True
+PlainReducerFunction = TypeAliasType(
+    'PlainReducerFunction',
+    Callable[[OutputT, InputT], OutputT],
+    type_params=(InputT, OutputT),
+)
+ContextReducerFunction = TypeAliasType(
+    'ContextReducerFunction',
+    Callable[[ReducerContext[StateT, DepsT], OutputT, InputT], OutputT],
+    type_params=(StateT, DepsT, InputT, OutputT),
+)
+ReducerFunction = TypeAliasType(
+    'ReducerFunction',
+    ContextReducerFunction[StateT, DepsT, InputT, OutputT] | PlainReducerFunction[InputT, OutputT],
+    type_params=(StateT, DepsT, InputT, OutputT),
+)
+"""
+A function used for reducing inputs to a join node.
+"""
+def reduce_null(current: None, inputs: Any) -> None:
+    """A reducer that discards all input data and returns None."""
+    return None
+def reduce_list_append(current: list[T], inputs: T) -> list[T]:
+    """A reducer that appends to a list."""
+    current.append(inputs)
+    return current
+def reduce_list_extend(current: list[T], inputs: Iterable[T]) -> list[T]:
+    """A reducer that extends a list."""
+    current.extend(inputs)
+    return current
+def reduce_dict_update(current: dict[K, V], inputs: Mapping[K, V]) -> dict[K, V]:
+    """A reducer that updates a dict."""
+    current.update(inputs)
+    return current
+class SupportsSum(Protocol):
+    """A protocol for a type that supports adding to itself."""
+    @abstractmethod
+    def __add__(self, other: Self, /) -> Self:
+        pass
+NumericT = TypeVar('NumericT', bound=SupportsSum, infer_variance=True)
+def reduce_sum(current: NumericT, inputs: NumericT) -> NumericT:
+    """A reducer that sums numbers."""
+    return current + inputs
+@dataclass
+class ReduceFirstValue(Generic[T]):
+    """A reducer that returns the first value it encounters, and cancels all other tasks."""
+    def __call__(self, ctx: ReducerContext[object, object], current: T, inputs: T) -> T:
+        """The reducer function."""
+        ctx.cancel_sibling_tasks()
+        return inputs
+@dataclass(init=False)
+class Join(Generic[StateT, DepsT, InputT, OutputT]):
+    """A join operation that synchronizes and aggregates parallel execution paths.
+    A join defines how to combine outputs from multiple parallel execution paths
+    using a [`ReducerFunction`][pydantic_graph.beta.join.ReducerFunction]. It specifies which fork
+    it joins (if any) and manages the initialization of reducers.
+    Type Parameters:
+        StateT: The type of the graph state
+        DepsT: The type of the dependencies
+        InputT: The type of input data to join
+        OutputT: The type of the final joined output
+    """
+    id: JoinID
+    _reducer: ReducerFunction[StateT, DepsT, InputT, OutputT]
+    _initial_factory: Callable[[], OutputT]
+    parent_fork_id: ForkID | None
+    preferred_parent_fork: Literal['closest', 'farthest']
+    def __init__(
+        self,
+        *,
+        id: JoinID,
+        reducer: ReducerFunction[StateT, DepsT, InputT, OutputT],
+        initial_factory: Callable[[], OutputT],
+        parent_fork_id: ForkID | None = None,
+        preferred_parent_fork: Literal['farthest', 'closest'] = 'farthest',
+    ):
+        self.id = id
+        self._reducer = reducer
+        self._initial_factory = initial_factory
+        self.parent_fork_id = parent_fork_id
+        self.preferred_parent_fork = preferred_parent_fork
+    @property
+    def reducer(self):
+        return self._reducer
+    @property
+    def initial_factory(self):
+        return self._initial_factory
+    def reduce(self, ctx: ReducerContext[StateT, DepsT], current: OutputT, inputs: InputT) -> OutputT:
+        n_parameters = len(inspect.signature(self.reducer).parameters)
+        if n_parameters == 2:
+            return cast(PlainReducerFunction[InputT, OutputT], self.reducer)(current, inputs)
+        else:
+            return cast(ContextReducerFunction[StateT, DepsT, InputT, OutputT], self.reducer)(ctx, current, inputs)
+    @overload
+    def as_node(self, inputs: None = None) -> JoinNode[StateT, DepsT]: ...
+    @overload
+    def as_node(self, inputs: InputT) -> JoinNode[StateT, DepsT]: ...
+    def as_node(self, inputs: InputT | None = None) -> JoinNode[StateT, DepsT]:
+        """Create a step node with bound inputs.
+        Args:
+            inputs: The input data to bind to this step, or None
+        Returns:
+            A [`StepNode`][pydantic_graph.beta.step.StepNode] with this step and the bound inputs
+        """
+        return JoinNode(self, inputs)
+@dataclass
+class JoinNode(BaseNode[StateT, DepsT, Any]):
+    """A base node that represents a join item with bound inputs.
+    JoinNode bridges between the v1 and v2 graph execution systems by wrapping
+    a [`Join`][pydantic_graph.beta.join.Join] with bound inputs in a BaseNode interface.
+    It is not meant to be run directly but rather used to indicate transitions
+    to v2-style steps.
+    """
+    join: Join[StateT, DepsT, Any, Any]
+    """The step to execute."""
+    inputs: Any
+    """The inputs bound to this step."""
+    async def run(self, ctx: GraphRunContext[StateT, DepsT]) -> BaseNode[StateT, DepsT, Any] | End[Any]:
+        """Attempt to run the join node.
+        Args:
+            ctx: The graph execution context
+        Returns:
+            The result of step execution
+        Raises:
+            NotImplementedError: Always raised as StepNode is not meant to be run directly
+        """
+        raise NotImplementedError(
+            '`JoinNode` is not meant to be run directly, it is meant to be used in `BaseNode` subclasses to indicate a transition to v2-style steps.'
+        )

pydantic_graph/beta/mermaid.py ADDED Viewed

@@ -0,0 +1,208 @@
+from __future__ import annotations
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Literal
+from typing_extensions import assert_never
+from pydantic_graph.beta.decision import Decision
+from pydantic_graph.beta.id_types import NodeID
+from pydantic_graph.beta.join import Join
+from pydantic_graph.beta.node import EndNode, Fork, StartNode
+from pydantic_graph.beta.node_types import AnyNode
+from pydantic_graph.beta.paths import BroadcastMarker, DestinationMarker, LabelMarker, MapMarker, Path
+from pydantic_graph.beta.step import Step
+DEFAULT_HIGHLIGHT_CSS = 'fill:#fdff32'
+"""The default CSS to use for highlighting nodes."""
+StateDiagramDirection = Literal['TB', 'LR', 'RL', 'BT']
+"""Used to specify the direction of the state diagram generated by mermaid.
+- `'TB'`: Top to bottom, this is the default for mermaid charts.
+- `'LR'`: Left to right
+- `'RL'`: Right to left
+- `'BT'`: Bottom to top
+"""
+NodeKind = Literal['broadcast', 'map', 'join', 'start', 'end', 'step', 'decision']
+@dataclass
+class MermaidNode:
+    """A mermaid node."""
+    id: str
+    kind: NodeKind
+    label: str | None
+    note: str | None
+@dataclass
+class MermaidEdge:
+    """A mermaid edge."""
+    start_id: str
+    end_id: str
+    label: str | None
+def build_mermaid_graph(  # noqa: C901
+    graph_nodes: dict[NodeID, AnyNode], graph_edges_by_source: dict[NodeID, list[Path]]
+) -> MermaidGraph:
+    """Build a mermaid graph."""
+    nodes: list[MermaidNode] = []
+    edges_by_source: dict[str, list[MermaidEdge]] = defaultdict(list)
+    def _collect_edges(path: Path, last_source_id: NodeID) -> None:
+        working_label: str | None = None
+        for item in path.items:
+            assert not isinstance(item, MapMarker | BroadcastMarker), 'These should be removed during Graph building'
+            if isinstance(item, LabelMarker):
+                working_label = item.label
+            elif isinstance(item, DestinationMarker):
+                edges_by_source[last_source_id].append(MermaidEdge(last_source_id, item.destination_id, working_label))
+    for node_id, node in graph_nodes.items():
+        kind: NodeKind
+        label: str | None = None
+        note: str | None = None
+        if isinstance(node, StartNode):
+            kind = 'start'
+        elif isinstance(node, EndNode):
+            kind = 'end'
+        elif isinstance(node, Step):
+            kind = 'step'
+            label = node.label
+        elif isinstance(node, Join):
+            kind = 'join'
+        elif isinstance(node, Fork):
+            kind = 'map' if node.is_map else 'broadcast'
+        elif isinstance(node, Decision):
+            kind = 'decision'
+            note = node.note
+        else:
+            assert_never(node)
+        source_node = MermaidNode(id=node_id, kind=kind, label=label, note=note)
+        nodes.append(source_node)
+    for k, v in graph_edges_by_source.items():
+        for path in v:
+            _collect_edges(path, k)
+    for node in graph_nodes.values():
+        if isinstance(node, Decision):
+            for branch in node.branches:
+                _collect_edges(branch.path, node.id)
+    # Add edges in the same order that we added nodes
+    edges: list[MermaidEdge] = sum([edges_by_source.get(node.id, []) for node in nodes], list[MermaidEdge]())
+    return MermaidGraph(nodes, edges)
+@dataclass
+class MermaidGraph:
+    """A mermaid graph."""
+    nodes: list[MermaidNode]
+    edges: list[MermaidEdge]
+    title: str | None = None
+    direction: StateDiagramDirection | None = None
+    def render(
+        self,
+        direction: StateDiagramDirection | None = None,
+        title: str | None = None,
+        edge_labels: bool = True,
+    ):
+        lines: list[str] = []
+        if title:
+            lines = ['---', f'title: {title}', '---']
+        lines.append('stateDiagram-v2')
+        if direction is not None:
+            lines.append(f'  direction {direction}')
+        nodes, edges = _topological_sort(self.nodes, self.edges)
+        for node in nodes:
+            # List all nodes in order they were created
+            node_lines: list[str] = []
+            if node.kind == 'start' or node.kind == 'end':
+                pass  # Start and end nodes use special [*] syntax in edges
+            elif node.kind == 'step':
+                line = f'  {node.id}'
+                if node.label:
+                    line += f': {node.label}'
+                node_lines.append(line)
+            elif node.kind == 'join':
+                node_lines = [f'  state {node.id} <<join>>']
+            elif node.kind == 'broadcast' or node.kind == 'map':
+                node_lines = [f'  state {node.id} <<fork>>']
+            elif node.kind == 'decision':
+                node_lines = [f'  state {node.id} <<choice>>']
+                if node.note:
+                    node_lines.append(f'  note right of {node.id}\n    {node.note}\n  end note')
+            else:  # pragma: no cover
+                assert_never(node.kind)
+            lines.extend(node_lines)
+        lines.append('')
+        for edge in edges:
+            # Use special [*] syntax for start/end nodes
+            render_start_id = '[*]' if edge.start_id == StartNode.id else edge.start_id
+            render_end_id = '[*]' if edge.end_id == EndNode.id else edge.end_id
+            edge_line = f'  {render_start_id} --> {render_end_id}'
+            if edge.label and edge_labels:
+                edge_line += f': {edge.label}'
+            lines.append(edge_line)
+        return '\n'.join(lines)
+def _topological_sort(
+    nodes: list[MermaidNode], edges: list[MermaidEdge]
+) -> tuple[list[MermaidNode], list[MermaidEdge]]:
+    """Sort nodes and edges in a logical topological order.
+    Uses BFS from the start node to assign depths, then sorts:
+    - Nodes by their distance from start
+    - Edges by the distance of their source and target nodes
+    """
+    # Build adjacency list for BFS
+    adjacency: dict[str, list[str]] = defaultdict(list)
+    for edge in edges:
+        adjacency[edge.start_id].append(edge.end_id)
+    # BFS to assign depth to each node (distance from start)
+    depths: dict[str, int] = {}
+    queue: list[tuple[str, int]] = [(StartNode.id, 0)]
+    depths[StartNode.id] = 0
+    while queue:
+        node_id, depth = queue.pop(0)
+        for next_id in adjacency[node_id]:
+            if next_id not in depths:  # pragma: no branch
+                depths[next_id] = depth + 1
+                queue.append((next_id, depth + 1))
+    # Sort nodes by depth (distance from start), then by id for stability
+    # Nodes not reachable from start get infinity depth (sorted to end)
+    sorted_nodes = sorted(nodes, key=lambda n: (depths.get(n.id, float('inf')), n.id))
+    # Sort edges by source depth, then target depth
+    # This ensures edges closer to start come first, edges closer to end come last
+    sorted_edges = sorted(
+        edges,
+        key=lambda e: (
+            depths.get(e.start_id, float('inf')),
+            depths.get(e.end_id, float('inf')),
+            e.start_id,
+            e.end_id,
+        ),
+    )
+    return sorted_nodes, sorted_edges

pydantic_graph/beta/node.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Core node types for graph construction and execution.
+This module defines the fundamental node types used to build execution graphs,
+including start/end nodes and fork nodes for parallel execution.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Generic
+from typing_extensions import TypeVar
+from pydantic_graph.beta.id_types import ForkID, JoinID, NodeID
+StateT = TypeVar('StateT', infer_variance=True)
+"""Type variable for graph state."""
+OutputT = TypeVar('OutputT', infer_variance=True)
+"""Type variable for node output data."""
+InputT = TypeVar('InputT', infer_variance=True)
+"""Type variable for node input data."""
+class StartNode(Generic[OutputT]):
+    """Entry point node for graph execution.
+    The StartNode represents the beginning of a graph execution flow.
+    """
+    id = NodeID('__start__')
+    """Fixed identifier for the start node."""
+class EndNode(Generic[InputT]):
+    """Terminal node representing the completion of graph execution.
+    The EndNode marks the successful completion of a graph execution flow
+    and can collect the final output data.
+    """
+    id = NodeID('__end__')
+    """Fixed identifier for the end node."""
+    def _force_variance(self, inputs: InputT) -> None:  # pragma: no cover
+        """Force type variance for proper generic typing.
+        This method exists solely for type checking purposes and should never be called.
+        Args:
+            inputs: Input data of type InputT.
+        Raises:
+            RuntimeError: Always, as this method should never be executed.
+        """
+        raise RuntimeError('This method should never be called, it is just defined for typing purposes.')
+@dataclass
+class Fork(Generic[InputT, OutputT]):
+    """Fork node that creates parallel execution branches.
+    A Fork node splits the execution flow into multiple parallel branches,
+    enabling concurrent execution of downstream nodes. It can either map
+    a sequence across multiple branches or duplicate data to each branch.
+    """
+    id: ForkID
+    """Unique identifier for this fork node."""
+    is_map: bool
+    """Determines fork behavior.
+    If True, InputT must be Sequence[OutputT] and each element is sent to a separate branch.
+    If False, InputT must be OutputT and the same data is sent to all branches.
+    """
+    downstream_join_id: JoinID | None
+    """Optional identifier of a downstream join node that should be jumped to if mapping an empty iterable."""
+    def _force_variance(self, inputs: InputT) -> OutputT:  # pragma: no cover
+        """Force type variance for proper generic typing.
+        This method exists solely for type checking purposes and should never be called.
+        Args:
+            inputs: Input data to be forked.
+        Returns:
+            Output data type (never actually returned).
+        Raises:
+            RuntimeError: Always, as this method should never be executed.
+        """
+        raise RuntimeError('This method should never be called, it is just defined for typing purposes.')

pydantic-graph 0.2.2__py3-none-any.whl → 1.24.0__py3-none-any.whl

pydantic-graph 0.2.2py3-none-any.whl → 1.24.0py3-none-any.whl