PyPI - penguiflow - Versions diffs - 1.0.3__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

penguiflow 1.0.3py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of penguiflow might be problematic. Click here for more details.

Files changed (17) hide show

penguiflow/__init__.py +26 -3
penguiflow/core.py +729 -54
penguiflow/errors.py +113 -0
penguiflow/metrics.py +105 -0
penguiflow/middlewares.py +6 -7
penguiflow/patterns.py +47 -5
penguiflow/policies.py +149 -0
penguiflow/streaming.py +142 -0
penguiflow/testkit.py +269 -0
penguiflow/types.py +15 -1
penguiflow/viz.py +133 -24
{penguiflow-1.0.3.dist-info → penguiflow-2.0.0.dist-info}/METADATA +161 -20
penguiflow-2.0.0.dist-info/RECORD +18 -0
penguiflow-1.0.3.dist-info/RECORD +0 -13
{penguiflow-1.0.3.dist-info → penguiflow-2.0.0.dist-info}/WHEEL +0 -0
{penguiflow-1.0.3.dist-info → penguiflow-2.0.0.dist-info}/licenses/LICENSE +0 -0
{penguiflow-1.0.3.dist-info → penguiflow-2.0.0.dist-info}/top_level.txt +0 -0

penguiflow/testkit.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""Utilities for writing concise PenguiFlow tests.
+The helpers in this module provide a minimal harness around ``PenguiFlow`` so
+unit tests can focus on the behaviour of their nodes instead of the runtime
+plumbing.  Each helper intentionally works with the public runtime surface to
+avoid relying on private attributes, keeping the harness forward compatible
+with the v1 API.
+"""
+from __future__ import annotations
+import asyncio
+import inspect
+from collections import OrderedDict
+from collections.abc import Awaitable, Callable, Iterable, Sequence
+from dataclasses import dataclass, field
+from itertools import groupby
+from typing import Any
+from weakref import WeakKeyDictionary
+from .core import PenguiFlow
+from .errors import FlowErrorCode
+from .metrics import FlowEvent
+from .types import Message
+__all__ = ["run_one", "assert_node_sequence", "simulate_error"]
+_MAX_TRACE_HISTORY = 64
+_TRACE_HISTORY: OrderedDict[str, list[FlowEvent]] = OrderedDict()
+_RECORDER_STATE: WeakKeyDictionary[PenguiFlow, _RecorderState] = (
+    WeakKeyDictionary()
+)
+def _register_trace_history(trace_id: str, events: list[FlowEvent]) -> None:
+    if not trace_id:
+        return
+    if trace_id in _TRACE_HISTORY:
+        _TRACE_HISTORY.move_to_end(trace_id)
+    _TRACE_HISTORY[trace_id] = events
+    while len(_TRACE_HISTORY) > _MAX_TRACE_HISTORY:
+        _TRACE_HISTORY.popitem(last=False)
+@dataclass(slots=True)
+class _RunLog:
+    events: list[FlowEvent] = field(default_factory=list)
+    traces: dict[str, list[FlowEvent]] = field(default_factory=dict)
+    active_traces: set[str] = field(default_factory=set)
+class _RecorderState:
+    def __init__(self) -> None:
+        self._lock = asyncio.Lock()
+        self._log = _RunLog()
+        self._middleware = _Recorder(self)
+    @property
+    def middleware(self) -> _Recorder:
+        return self._middleware
+    def begin(self, traces: Iterable[str] | None = None) -> None:
+        trace_ids = set(traces or [])
+        self._log = _RunLog(active_traces=trace_ids)
+        for trace_id in trace_ids:
+            bucket: list[FlowEvent] = []
+            self._log.traces[trace_id] = bucket
+            _register_trace_history(trace_id, bucket)
+    async def record(self, event: FlowEvent) -> None:
+        async with self._lock:
+            self._log.events.append(event)
+            trace_id = event.trace_id
+            if trace_id is None:
+                return
+            bucket = self._log.traces.get(trace_id)
+            if bucket is None:
+                bucket = []
+                self._log.traces[trace_id] = bucket
+                _register_trace_history(trace_id, bucket)
+            bucket.append(event)
+    def node_sequence(self, trace_id: str) -> list[str]:
+        bucket = self._log.traces.get(trace_id)
+        if bucket is None:
+            bucket = _TRACE_HISTORY.get(trace_id, [])
+        sequence: list[str] = []
+        for event in bucket:
+            if event.event_type != "node_start":
+                continue
+            name = event.node_name or event.node_id or "<anonymous>"
+            sequence.append(name)
+        return sequence
+class _Recorder:
+    def __init__(self, state: _RecorderState) -> None:
+        self._state = state
+    async def __call__(self, event: FlowEvent) -> None:
+        await self._state.record(event)
+def _get_state(flow: PenguiFlow) -> _RecorderState:
+    state = _RECORDER_STATE.get(flow)
+    if state is None:
+        state = _RecorderState()
+        _RECORDER_STATE[flow] = state
+    middlewares = getattr(flow, "_middlewares", None)
+    if middlewares is None:
+        raise AttributeError("PenguiFlow instance is missing middleware hooks")
+    middleware = state.middleware
+    if not any(middleware is existing for existing in middlewares):
+        middlewares.append(middleware)
+    return state
+async def run_one(
+    flow: PenguiFlow,
+    message: Message,
+    *,
+    registry: Any | None = None,
+    timeout_s: float | None = 1.0,
+) -> Any:
+    """Run ``message`` through ``flow`` and return the first Rookery payload.
+    The flow is started and stopped for the caller.  The original message's
+    ``trace_id`` is tracked so :func:`assert_node_sequence` can introspect the
+    execution order afterwards.
+    """
+    if not isinstance(message, Message):
+        raise TypeError("run_one expects a penguiflow.types.Message instance")
+    state = _get_state(flow)
+    state.begin([message.trace_id])
+    flow.run(registry=registry)
+    try:
+        await flow.emit(message)
+        result_coro = flow.fetch()
+        if timeout_s is not None:
+            result = await asyncio.wait_for(result_coro, timeout_s)
+        else:
+            result = await result_coro
+    finally:
+        await flow.stop()
+    return result
+def assert_node_sequence(trace_id: str, expected: Sequence[str]) -> None:
+    """Assert that ``expected`` matches the recorded node start order."""
+    expected_nodes = list(expected)
+    events = _TRACE_HISTORY.get(trace_id, [])
+    if not events:
+        raise AssertionError(
+            "No recorded events for trace_id="
+            f"{trace_id!r}; run a flow with run_one first."
+        )
+    actual_nodes = [
+        event.node_name or event.node_id or "<anonymous>"
+        for event in events
+        if event.event_type == "node_start"
+    ]
+    actual_nodes = [name for name, _ in groupby(actual_nodes)]
+    if actual_nodes != expected_nodes:
+        raise AssertionError(
+            "Node sequence mismatch:\n"
+            f"  expected: {expected_nodes}\n"
+            f"  actual:   {actual_nodes}"
+        )
+class _ErrorSimulation:
+    def __init__(
+        self,
+        *,
+        node_name: str,
+        code: str,
+        fail_times: int,
+        exception_factory: Callable[[str], Exception],
+        result_factory: Callable[[Any], Awaitable[Any] | Any] | None,
+    ) -> None:
+        self._node_name = node_name
+        self._code = code
+        self._fail_times = fail_times
+        self._exception_factory = exception_factory
+        self._result_factory = result_factory
+        self._attempts = 0
+    @property
+    def attempts(self) -> int:
+        return self._attempts
+    @property
+    def failures(self) -> int:
+        return min(self._attempts, self._fail_times)
+    async def __call__(self, message: Any, _ctx: Any) -> Any:
+        self._attempts += 1
+        if self._attempts <= self._fail_times:
+            text = (
+                f"[{self._code}] simulated failure in {self._node_name}"
+                f" (attempt {self._attempts})"
+            )
+            raise self._exception_factory(text)
+        if self._result_factory is None:
+            return message
+        result = self._result_factory(message)
+        if inspect.isawaitable(result):
+            return await result
+        return result
+def simulate_error(
+    node_name: str,
+    code: FlowErrorCode | str,
+    *,
+    fail_times: int = 1,
+    result: Any | None = None,
+    result_factory: Callable[[Any], Awaitable[Any] | Any] | None = None,
+    exception_type: type[Exception] = RuntimeError,
+) -> Callable[[Any, Any], Awaitable[Any]]:
+    """Return an async callable that fails ``fail_times`` before succeeding.
+    The returned coroutine is suitable for wrapping in :class:`~penguiflow.node.Node`
+    and is especially useful for retry-centric tests.  By default the callable
+    echoes the incoming ``message`` once the simulated failures are exhausted, but
+    ``result``/``result_factory`` can override the successful return value.
+    """
+    if fail_times < 1:
+        raise ValueError("fail_times must be >= 1")
+    if result is not None and result_factory is not None:
+        raise ValueError("Specify only one of result or result_factory")
+    resolved_code = code.value if isinstance(code, FlowErrorCode) else str(code)
+    def _exception_factory(text: str) -> Exception:
+        return exception_type(text)
+    if result_factory is None and result is not None:
+        async def _const_result(_: Any) -> Any:
+            return result
+        result_factory = _const_result
+    simulation = _ErrorSimulation(
+        node_name=node_name,
+        code=resolved_code,
+        fail_times=fail_times,
+        exception_factory=_exception_factory,
+        result_factory=result_factory,
+    )
+    async def _runner(message: Any, ctx: Any) -> Any:
+        return await simulation(message, ctx)
+    # Attach useful attributes for introspection in tests without exposing the
+    # internal class.
+    _runner.simulation = simulation  # type: ignore[attr-defined]
+    return _runner

penguiflow/types.py CHANGED Viewed

@@ -21,6 +21,17 @@ class Message(BaseModel):
     trace_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
     ts: float = Field(default_factory=time.time)
     deadline_s: float | None = None
+    meta: dict[str, Any] = Field(default_factory=dict)
+class StreamChunk(BaseModel):
+    """Represents a chunk of streamed output."""
+    stream_id: str
+    seq: int
+    text: str
+    done: bool = False
+    meta: dict[str, Any] = Field(default_factory=dict)
 class PlanStep(BaseModel):
@@ -39,7 +50,9 @@ class WM(BaseModel):
     query: str
     facts: list[Any] = Field(default_factory=list)
     hops: int = 0
-    budget_hops: int = 8
+    budget_hops: int | None = 8
+    tokens_used: int = 0
+    budget_tokens: int | None = None
     confidence: float = 0.0
@@ -51,6 +64,7 @@ class FinalAnswer(BaseModel):
 __all__ = [
     "Headers",
     "Message",
+    "StreamChunk",
     "PlanStep",
     "Thought",
     "WM",

penguiflow/viz.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import re
+from dataclasses import dataclass
 from typing import TYPE_CHECKING
 from .core import Endpoint
@@ -11,7 +12,21 @@ from .node import Node
 if TYPE_CHECKING:  # pragma: no cover - type checking only
     from .core import PenguiFlow
-__all__ = ["flow_to_mermaid"]
+__all__ = ["flow_to_mermaid", "flow_to_dot"]
+@dataclass
+class _VisualNode:
+    identifier: str
+    label: str
+    classes: list[str]
+@dataclass
+class _VisualEdge:
+    source: str
+    target: str
+    label: str | None
 def flow_to_mermaid(flow: PenguiFlow, *, direction: str = "TD") -> str:
@@ -20,42 +35,131 @@ def flow_to_mermaid(flow: PenguiFlow, *, direction: str = "TD") -> str:
     Parameters
     ----------
     flow:
-        The `PenguiFlow` instance to visualize.
+        The :class:`PenguiFlow` instance to visualize.
     direction:
-        Mermaid graph direction ("TD", "LR", etc.). Defaults to top-down.
+        Mermaid graph direction (``"TD"``, ``"LR"``, etc.). Defaults to top-down.
     """
+    nodes, edges = _collect_graph(flow)
     lines: list[str] = [f"graph {direction}"]
-    nodes: set[object] = set()
+    class_defs = {
+        "endpoint": "fill:#e0f2fe,stroke:#0369a1,stroke-width:1px",
+        "controller_loop": "fill:#fef3c7,stroke:#b45309,stroke-width:1px",
+    }
+    used_definitions: set[str] = set()
+    for node in nodes:
+        label = _escape_label(node.label)
+        lines.append(f"    {node.identifier}[\"{label}\"]")
+        for class_name in node.classes:
+            used_definitions.add(class_name)
+    for class_name in sorted(used_definitions):
+        style = class_defs.get(class_name)
+        if style:
+            lines.append(f"    classDef {class_name} {style}")
+    for node in nodes:
+        if node.classes:
+            classes = " ".join(node.classes)
+            lines.append(f"    class {node.identifier} {classes}")
+    for edge in edges:
+        label = f"|{edge.label}|" if edge.label else ""
+        lines.append(f"    {edge.source} -->{label} {edge.target}")
+    return "\n".join(lines)
+def flow_to_dot(flow: PenguiFlow, *, rankdir: str = "TB") -> str:
+    """Render the flow graph as a Graphviz DOT string.
+    Parameters
+    ----------
+    flow:
+        The :class:`PenguiFlow` instance to visualize.
+    rankdir:
+        Graph orientation (``"TB"``, ``"LR"``, etc.). Defaults to top-bottom.
+    """
+    nodes, edges = _collect_graph(flow)
+    lines: list[str] = ["digraph PenguiFlow {", f"    rankdir={rankdir}"]
+    lines.append("    node [shape=box, style=rounded]")
+    for node in nodes:
+        attributes: list[str] = [f'label="{node.label}"']
+        if "endpoint" in node.classes:
+            attributes.append('shape=oval')
+            attributes.append('style="filled"')
+            attributes.append('fillcolor="#e0f2fe"')
+        elif "controller_loop" in node.classes:
+            attributes.append('style="rounded,filled"')
+            attributes.append('fillcolor="#fef3c7"')
+        attr_str = ", ".join(attributes)
+        lines.append(f"    {node.identifier} [{attr_str}]")
+    for edge in edges:
+        if edge.label:
+            edge_label = _escape_label(edge.label)
+            lines.append(
+                f"    {edge.source} -> {edge.target} [label=\"{edge_label}\"]"
+            )
+        else:
+            lines.append(f"    {edge.source} -> {edge.target}")
+    lines.append("}")
+    return "\n".join(lines)
-    for floe in flow._floes:  # noqa: SLF001 - visualization accesses internals by design
-        if floe.source is not None:
-            nodes.add(floe.source)
-        if floe.target is not None:
-            nodes.add(floe.target)
-    id_lookup: dict[object, str] = {}
+def _collect_graph(flow: PenguiFlow) -> tuple[list[_VisualNode], list[_VisualEdge]]:
+    nodes: dict[object, _VisualNode] = {}
+    edges: list[_VisualEdge] = []
     used_ids: set[str] = set()
+    loop_sources: set[object] = set()
-    for entity in nodes:
+    def ensure_node(entity: object) -> _VisualNode:
+        node = nodes.get(entity)
+        if node is not None:
+            return node
         label = _display_label(entity)
-        node_id = _unique_id(label, used_ids)
-        used_ids.add(node_id)
-        id_lookup[entity] = node_id
-        lines.append(f"    {node_id}[\"{label}\"]")
-    for floe in flow._floes:  # noqa: SLF001
+        identifier = _unique_id(label, used_ids)
+        used_ids.add(identifier)
+        classes: list[str] = []
+        if isinstance(entity, Endpoint):
+            classes.append("endpoint")
+        if isinstance(entity, Node) and entity.allow_cycle:
+            classes.append("controller_loop")
+        node = _VisualNode(identifier=identifier, label=label, classes=classes)
+        nodes[entity] = node
+        return node
+    for floe in flow._floes:  # noqa: SLF001 - visualization inspects internals
         source = floe.source
         target = floe.target
         if source is None or target is None:
             continue
-        src_id = id_lookup.get(source)
-        tgt_id = id_lookup.get(target)
-        if src_id is None or tgt_id is None:
-            continue
-        lines.append(f"    {src_id} --> {tgt_id}")
-    return "\n".join(lines)
+        src_node = ensure_node(source)
+        tgt_node = ensure_node(target)
+        if source is target:
+            loop_sources.add(source)
+            label = "loop"
+        elif isinstance(source, Endpoint):
+            label = "ingress"
+        elif isinstance(target, Endpoint):
+            label = "egress"
+        else:
+            label = None
+        edges.append(_VisualEdge(src_node.identifier, tgt_node.identifier, label))
+    if loop_sources:
+        for entity, node in nodes.items():
+            if entity in loop_sources and "controller_loop" not in node.classes:
+                node.classes.append("controller_loop")
+    return list(nodes.values()), edges
 def _display_label(entity: object) -> str:
@@ -74,3 +178,8 @@ def _unique_id(label: str, used: set[str]) -> str:
         index += 1
         candidate = f"{base}_{index}"
     return candidate
+def _escape_label(label: str) -> str:
+    return label.replace("\"", "\\\"")

penguiflow 1.0.3__py3-none-any.whl → 2.0.0__py3-none-any.whl

Potentially problematic release.

penguiflow 1.0.3py3-none-any.whl → 2.0.0py3-none-any.whl