PyPI - penguiflow - Versions diffs - 1.0.2__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

penguiflow 1.0.2py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of penguiflow might be problematic. Click here for more details.

Files changed (17) hide show

penguiflow/__init__.py +27 -2
penguiflow/core.py +729 -54
penguiflow/errors.py +113 -0
penguiflow/metrics.py +105 -0
penguiflow/middlewares.py +6 -7
penguiflow/patterns.py +47 -5
penguiflow/policies.py +149 -0
penguiflow/streaming.py +142 -0
penguiflow/testkit.py +269 -0
penguiflow/types.py +15 -1
penguiflow/viz.py +181 -1
{penguiflow-1.0.2.dist-info → penguiflow-2.0.0.dist-info}/METADATA +181 -19
penguiflow-2.0.0.dist-info/RECORD +18 -0
penguiflow-1.0.2.dist-info/RECORD +0 -13
{penguiflow-1.0.2.dist-info → penguiflow-2.0.0.dist-info}/WHEEL +0 -0
{penguiflow-1.0.2.dist-info → penguiflow-2.0.0.dist-info}/licenses/LICENSE +0 -0
{penguiflow-1.0.2.dist-info → penguiflow-2.0.0.dist-info}/top_level.txt +0 -0

penguiflow/testkit.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""Utilities for writing concise PenguiFlow tests.
+The helpers in this module provide a minimal harness around ``PenguiFlow`` so
+unit tests can focus on the behaviour of their nodes instead of the runtime
+plumbing.  Each helper intentionally works with the public runtime surface to
+avoid relying on private attributes, keeping the harness forward compatible
+with the v1 API.
+"""
+from __future__ import annotations
+import asyncio
+import inspect
+from collections import OrderedDict
+from collections.abc import Awaitable, Callable, Iterable, Sequence
+from dataclasses import dataclass, field
+from itertools import groupby
+from typing import Any
+from weakref import WeakKeyDictionary
+from .core import PenguiFlow
+from .errors import FlowErrorCode
+from .metrics import FlowEvent
+from .types import Message
+__all__ = ["run_one", "assert_node_sequence", "simulate_error"]
+_MAX_TRACE_HISTORY = 64
+_TRACE_HISTORY: OrderedDict[str, list[FlowEvent]] = OrderedDict()
+_RECORDER_STATE: WeakKeyDictionary[PenguiFlow, _RecorderState] = (
+    WeakKeyDictionary()
+)
+def _register_trace_history(trace_id: str, events: list[FlowEvent]) -> None:
+    if not trace_id:
+        return
+    if trace_id in _TRACE_HISTORY:
+        _TRACE_HISTORY.move_to_end(trace_id)
+    _TRACE_HISTORY[trace_id] = events
+    while len(_TRACE_HISTORY) > _MAX_TRACE_HISTORY:
+        _TRACE_HISTORY.popitem(last=False)
+@dataclass(slots=True)
+class _RunLog:
+    events: list[FlowEvent] = field(default_factory=list)
+    traces: dict[str, list[FlowEvent]] = field(default_factory=dict)
+    active_traces: set[str] = field(default_factory=set)
+class _RecorderState:
+    def __init__(self) -> None:
+        self._lock = asyncio.Lock()
+        self._log = _RunLog()
+        self._middleware = _Recorder(self)
+    @property
+    def middleware(self) -> _Recorder:
+        return self._middleware
+    def begin(self, traces: Iterable[str] | None = None) -> None:
+        trace_ids = set(traces or [])
+        self._log = _RunLog(active_traces=trace_ids)
+        for trace_id in trace_ids:
+            bucket: list[FlowEvent] = []
+            self._log.traces[trace_id] = bucket
+            _register_trace_history(trace_id, bucket)
+    async def record(self, event: FlowEvent) -> None:
+        async with self._lock:
+            self._log.events.append(event)
+            trace_id = event.trace_id
+            if trace_id is None:
+                return
+            bucket = self._log.traces.get(trace_id)
+            if bucket is None:
+                bucket = []
+                self._log.traces[trace_id] = bucket
+                _register_trace_history(trace_id, bucket)
+            bucket.append(event)
+    def node_sequence(self, trace_id: str) -> list[str]:
+        bucket = self._log.traces.get(trace_id)
+        if bucket is None:
+            bucket = _TRACE_HISTORY.get(trace_id, [])
+        sequence: list[str] = []
+        for event in bucket:
+            if event.event_type != "node_start":
+                continue
+            name = event.node_name or event.node_id or "<anonymous>"
+            sequence.append(name)
+        return sequence
+class _Recorder:
+    def __init__(self, state: _RecorderState) -> None:
+        self._state = state
+    async def __call__(self, event: FlowEvent) -> None:
+        await self._state.record(event)
+def _get_state(flow: PenguiFlow) -> _RecorderState:
+    state = _RECORDER_STATE.get(flow)
+    if state is None:
+        state = _RecorderState()
+        _RECORDER_STATE[flow] = state
+    middlewares = getattr(flow, "_middlewares", None)
+    if middlewares is None:
+        raise AttributeError("PenguiFlow instance is missing middleware hooks")
+    middleware = state.middleware
+    if not any(middleware is existing for existing in middlewares):
+        middlewares.append(middleware)
+    return state
+async def run_one(
+    flow: PenguiFlow,
+    message: Message,
+    *,
+    registry: Any | None = None,
+    timeout_s: float | None = 1.0,
+) -> Any:
+    """Run ``message`` through ``flow`` and return the first Rookery payload.
+    The flow is started and stopped for the caller.  The original message's
+    ``trace_id`` is tracked so :func:`assert_node_sequence` can introspect the
+    execution order afterwards.
+    """
+    if not isinstance(message, Message):
+        raise TypeError("run_one expects a penguiflow.types.Message instance")
+    state = _get_state(flow)
+    state.begin([message.trace_id])
+    flow.run(registry=registry)
+    try:
+        await flow.emit(message)
+        result_coro = flow.fetch()
+        if timeout_s is not None:
+            result = await asyncio.wait_for(result_coro, timeout_s)
+        else:
+            result = await result_coro
+    finally:
+        await flow.stop()
+    return result
+def assert_node_sequence(trace_id: str, expected: Sequence[str]) -> None:
+    """Assert that ``expected`` matches the recorded node start order."""
+    expected_nodes = list(expected)
+    events = _TRACE_HISTORY.get(trace_id, [])
+    if not events:
+        raise AssertionError(
+            "No recorded events for trace_id="
+            f"{trace_id!r}; run a flow with run_one first."
+        )
+    actual_nodes = [
+        event.node_name or event.node_id or "<anonymous>"
+        for event in events
+        if event.event_type == "node_start"
+    ]
+    actual_nodes = [name for name, _ in groupby(actual_nodes)]
+    if actual_nodes != expected_nodes:
+        raise AssertionError(
+            "Node sequence mismatch:\n"
+            f"  expected: {expected_nodes}\n"
+            f"  actual:   {actual_nodes}"
+        )
+class _ErrorSimulation:
+    def __init__(
+        self,
+        *,
+        node_name: str,
+        code: str,
+        fail_times: int,
+        exception_factory: Callable[[str], Exception],
+        result_factory: Callable[[Any], Awaitable[Any] | Any] | None,
+    ) -> None:
+        self._node_name = node_name
+        self._code = code
+        self._fail_times = fail_times
+        self._exception_factory = exception_factory
+        self._result_factory = result_factory
+        self._attempts = 0
+    @property
+    def attempts(self) -> int:
+        return self._attempts
+    @property
+    def failures(self) -> int:
+        return min(self._attempts, self._fail_times)
+    async def __call__(self, message: Any, _ctx: Any) -> Any:
+        self._attempts += 1
+        if self._attempts <= self._fail_times:
+            text = (
+                f"[{self._code}] simulated failure in {self._node_name}"
+                f" (attempt {self._attempts})"
+            )
+            raise self._exception_factory(text)
+        if self._result_factory is None:
+            return message
+        result = self._result_factory(message)
+        if inspect.isawaitable(result):
+            return await result
+        return result
+def simulate_error(
+    node_name: str,
+    code: FlowErrorCode | str,
+    *,
+    fail_times: int = 1,
+    result: Any | None = None,
+    result_factory: Callable[[Any], Awaitable[Any] | Any] | None = None,
+    exception_type: type[Exception] = RuntimeError,
+) -> Callable[[Any, Any], Awaitable[Any]]:
+    """Return an async callable that fails ``fail_times`` before succeeding.
+    The returned coroutine is suitable for wrapping in :class:`~penguiflow.node.Node`
+    and is especially useful for retry-centric tests.  By default the callable
+    echoes the incoming ``message`` once the simulated failures are exhausted, but
+    ``result``/``result_factory`` can override the successful return value.
+    """
+    if fail_times < 1:
+        raise ValueError("fail_times must be >= 1")
+    if result is not None and result_factory is not None:
+        raise ValueError("Specify only one of result or result_factory")
+    resolved_code = code.value if isinstance(code, FlowErrorCode) else str(code)
+    def _exception_factory(text: str) -> Exception:
+        return exception_type(text)
+    if result_factory is None and result is not None:
+        async def _const_result(_: Any) -> Any:
+            return result
+        result_factory = _const_result
+    simulation = _ErrorSimulation(
+        node_name=node_name,
+        code=resolved_code,
+        fail_times=fail_times,
+        exception_factory=_exception_factory,
+        result_factory=result_factory,
+    )
+    async def _runner(message: Any, ctx: Any) -> Any:
+        return await simulation(message, ctx)
+    # Attach useful attributes for introspection in tests without exposing the
+    # internal class.
+    _runner.simulation = simulation  # type: ignore[attr-defined]
+    return _runner

penguiflow/types.py CHANGED Viewed

@@ -21,6 +21,17 @@ class Message(BaseModel):
     trace_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
     ts: float = Field(default_factory=time.time)
     deadline_s: float | None = None
+    meta: dict[str, Any] = Field(default_factory=dict)
+class StreamChunk(BaseModel):
+    """Represents a chunk of streamed output."""
+    stream_id: str
+    seq: int
+    text: str
+    done: bool = False
+    meta: dict[str, Any] = Field(default_factory=dict)
 class PlanStep(BaseModel):
@@ -39,7 +50,9 @@ class WM(BaseModel):
     query: str
     facts: list[Any] = Field(default_factory=list)
     hops: int = 0
-    budget_hops: int = 8
+    budget_hops: int | None = 8
+    tokens_used: int = 0
+    budget_tokens: int | None = None
     confidence: float = 0.0
@@ -51,6 +64,7 @@ class FinalAnswer(BaseModel):
 __all__ = [
     "Headers",
     "Message",
+    "StreamChunk",
     "PlanStep",
     "Thought",
     "WM",

penguiflow/viz.py CHANGED Viewed

@@ -2,4 +2,184 @@
 from __future__ import annotations
-__all__: list[str] = []
+import re
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from .core import Endpoint
+from .node import Node
+if TYPE_CHECKING:  # pragma: no cover - type checking only
+    from .core import PenguiFlow
+__all__ = ["flow_to_mermaid", "flow_to_dot"]
+@dataclass
+class _VisualNode:
+    identifier: str
+    label: str
+    classes: list[str]
+@dataclass
+class _VisualEdge:
+    source: str
+    target: str
+    label: str | None
+def flow_to_mermaid(flow: PenguiFlow, *, direction: str = "TD") -> str:
+    """Render the flow graph as a Mermaid diagram string.
+    Parameters
+    ----------
+    flow:
+        The :class:`PenguiFlow` instance to visualize.
+    direction:
+        Mermaid graph direction (``"TD"``, ``"LR"``, etc.). Defaults to top-down.
+    """
+    nodes, edges = _collect_graph(flow)
+    lines: list[str] = [f"graph {direction}"]
+    class_defs = {
+        "endpoint": "fill:#e0f2fe,stroke:#0369a1,stroke-width:1px",
+        "controller_loop": "fill:#fef3c7,stroke:#b45309,stroke-width:1px",
+    }
+    used_definitions: set[str] = set()
+    for node in nodes:
+        label = _escape_label(node.label)
+        lines.append(f"    {node.identifier}[\"{label}\"]")
+        for class_name in node.classes:
+            used_definitions.add(class_name)
+    for class_name in sorted(used_definitions):
+        style = class_defs.get(class_name)
+        if style:
+            lines.append(f"    classDef {class_name} {style}")
+    for node in nodes:
+        if node.classes:
+            classes = " ".join(node.classes)
+            lines.append(f"    class {node.identifier} {classes}")
+    for edge in edges:
+        label = f"|{edge.label}|" if edge.label else ""
+        lines.append(f"    {edge.source} -->{label} {edge.target}")
+    return "\n".join(lines)
+def flow_to_dot(flow: PenguiFlow, *, rankdir: str = "TB") -> str:
+    """Render the flow graph as a Graphviz DOT string.
+    Parameters
+    ----------
+    flow:
+        The :class:`PenguiFlow` instance to visualize.
+    rankdir:
+        Graph orientation (``"TB"``, ``"LR"``, etc.). Defaults to top-bottom.
+    """
+    nodes, edges = _collect_graph(flow)
+    lines: list[str] = ["digraph PenguiFlow {", f"    rankdir={rankdir}"]
+    lines.append("    node [shape=box, style=rounded]")
+    for node in nodes:
+        attributes: list[str] = [f'label="{node.label}"']
+        if "endpoint" in node.classes:
+            attributes.append('shape=oval')
+            attributes.append('style="filled"')
+            attributes.append('fillcolor="#e0f2fe"')
+        elif "controller_loop" in node.classes:
+            attributes.append('style="rounded,filled"')
+            attributes.append('fillcolor="#fef3c7"')
+        attr_str = ", ".join(attributes)
+        lines.append(f"    {node.identifier} [{attr_str}]")
+    for edge in edges:
+        if edge.label:
+            edge_label = _escape_label(edge.label)
+            lines.append(
+                f"    {edge.source} -> {edge.target} [label=\"{edge_label}\"]"
+            )
+        else:
+            lines.append(f"    {edge.source} -> {edge.target}")
+    lines.append("}")
+    return "\n".join(lines)
+def _collect_graph(flow: PenguiFlow) -> tuple[list[_VisualNode], list[_VisualEdge]]:
+    nodes: dict[object, _VisualNode] = {}
+    edges: list[_VisualEdge] = []
+    used_ids: set[str] = set()
+    loop_sources: set[object] = set()
+    def ensure_node(entity: object) -> _VisualNode:
+        node = nodes.get(entity)
+        if node is not None:
+            return node
+        label = _display_label(entity)
+        identifier = _unique_id(label, used_ids)
+        used_ids.add(identifier)
+        classes: list[str] = []
+        if isinstance(entity, Endpoint):
+            classes.append("endpoint")
+        if isinstance(entity, Node) and entity.allow_cycle:
+            classes.append("controller_loop")
+        node = _VisualNode(identifier=identifier, label=label, classes=classes)
+        nodes[entity] = node
+        return node
+    for floe in flow._floes:  # noqa: SLF001 - visualization inspects internals
+        source = floe.source
+        target = floe.target
+        if source is None or target is None:
+            continue
+        src_node = ensure_node(source)
+        tgt_node = ensure_node(target)
+        if source is target:
+            loop_sources.add(source)
+            label = "loop"
+        elif isinstance(source, Endpoint):
+            label = "ingress"
+        elif isinstance(target, Endpoint):
+            label = "egress"
+        else:
+            label = None
+        edges.append(_VisualEdge(src_node.identifier, tgt_node.identifier, label))
+    if loop_sources:
+        for entity, node in nodes.items():
+            if entity in loop_sources and "controller_loop" not in node.classes:
+                node.classes.append("controller_loop")
+    return list(nodes.values()), edges
+def _display_label(entity: object) -> str:
+    if isinstance(entity, Node):
+        return entity.name or entity.node_id
+    if isinstance(entity, Endpoint):
+        return entity.name
+    return str(entity)
+def _unique_id(label: str, used: set[str]) -> str:
+    base = re.sub(r"[^0-9A-Za-z_]", "_", label) or "node"
+    candidate = base
+    index = 1
+    while candidate in used:
+        index += 1
+        candidate = f"{base}_{index}"
+    return candidate
+def _escape_label(label: str) -> str:
+    return label.replace("\"", "\\\"")

penguiflow 1.0.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

Potentially problematic release.

penguiflow 1.0.2py3-none-any.whl → 2.0.0py3-none-any.whl