penguiflow 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of penguiflow might be problematic. Click here for more details.
- penguiflow/__init__.py +43 -0
- penguiflow/core.py +587 -0
- penguiflow/middlewares.py +17 -0
- penguiflow/node.py +117 -0
- penguiflow/patterns.py +142 -0
- penguiflow/registry.py +49 -0
- penguiflow/types.py +58 -0
- penguiflow/viz.py +5 -0
- penguiflow-1.0.0.dist-info/METADATA +392 -0
- penguiflow-1.0.0.dist-info/RECORD +13 -0
- penguiflow-1.0.0.dist-info/WHEEL +5 -0
- penguiflow-1.0.0.dist-info/licenses/LICENSE +21 -0
- penguiflow-1.0.0.dist-info/top_level.txt +1 -0
penguiflow/node.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Node abstractions for PenguiFlow runtime."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import inspect
|
|
7
|
+
import uuid
|
|
8
|
+
from collections.abc import Awaitable, Callable
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import TYPE_CHECKING, Any
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from pydantic.type_adapter import TypeAdapter
|
|
14
|
+
|
|
15
|
+
from .core import Context
|
|
16
|
+
from .registry import ModelRegistry
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True)
|
|
20
|
+
class NodePolicy:
|
|
21
|
+
"""Execution policy configuration placeholder."""
|
|
22
|
+
|
|
23
|
+
validate: str = "both"
|
|
24
|
+
timeout_s: float | None = None
|
|
25
|
+
max_retries: int = 0
|
|
26
|
+
backoff_base: float = 0.5
|
|
27
|
+
backoff_mult: float = 2.0
|
|
28
|
+
max_backoff: float | None = None
|
|
29
|
+
|
|
30
|
+
def __post_init__(self) -> None:
|
|
31
|
+
if self.validate not in {"both", "in", "out", "none"}:
|
|
32
|
+
raise ValueError("validate must be one of 'both', 'in', 'out', 'none'")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class Node:
|
|
37
|
+
"""Wraps an async callable with metadata used by the runtime."""
|
|
38
|
+
|
|
39
|
+
func: Callable[..., Awaitable[Any]]
|
|
40
|
+
name: str | None = None
|
|
41
|
+
policy: NodePolicy = field(default_factory=NodePolicy)
|
|
42
|
+
allow_cycle: bool = False
|
|
43
|
+
node_id: str = field(init=False)
|
|
44
|
+
|
|
45
|
+
def __post_init__(self) -> None:
|
|
46
|
+
if not asyncio.iscoroutinefunction(self.func):
|
|
47
|
+
raise TypeError("Node function must be declared with async def")
|
|
48
|
+
|
|
49
|
+
self.name = self.name or self.func.__name__
|
|
50
|
+
assert self.name is not None # narrow for type-checkers
|
|
51
|
+
self.node_id = uuid.uuid4().hex
|
|
52
|
+
|
|
53
|
+
signature = inspect.signature(self.func)
|
|
54
|
+
params = list(signature.parameters.values())
|
|
55
|
+
if len(params) != 2:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"Node '{self.name}' must accept exactly two parameters "
|
|
58
|
+
f"(message, ctx); got {len(params)}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
ctx_param = params[1]
|
|
62
|
+
if ctx_param.kind not in (
|
|
63
|
+
inspect.Parameter.POSITIONAL_ONLY,
|
|
64
|
+
inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
|
65
|
+
):
|
|
66
|
+
raise ValueError("Context parameter must be positional")
|
|
67
|
+
|
|
68
|
+
def _maybe_validate(
|
|
69
|
+
self,
|
|
70
|
+
adapter: TypeAdapter[Any] | None,
|
|
71
|
+
value: Any,
|
|
72
|
+
*,
|
|
73
|
+
enforce: bool,
|
|
74
|
+
) -> Any:
|
|
75
|
+
if not enforce or adapter is None:
|
|
76
|
+
return value
|
|
77
|
+
return adapter.validate_python(value)
|
|
78
|
+
|
|
79
|
+
async def invoke(
|
|
80
|
+
self,
|
|
81
|
+
message: Any,
|
|
82
|
+
ctx: Context,
|
|
83
|
+
*,
|
|
84
|
+
registry: ModelRegistry | None,
|
|
85
|
+
) -> Any:
|
|
86
|
+
"""Invoke the underlying coroutine, applying optional validation."""
|
|
87
|
+
|
|
88
|
+
adapter_in: TypeAdapter[Any] | None = None
|
|
89
|
+
adapter_out: TypeAdapter[Any] | None = None
|
|
90
|
+
|
|
91
|
+
if registry is not None and self.policy.validate != "none":
|
|
92
|
+
node_name = self.name
|
|
93
|
+
assert node_name is not None
|
|
94
|
+
adapter_in, adapter_out = registry.adapters(node_name)
|
|
95
|
+
|
|
96
|
+
enforce_in = self.policy.validate in {"in", "both"}
|
|
97
|
+
enforce_out = self.policy.validate in {"out", "both"}
|
|
98
|
+
|
|
99
|
+
validated_msg = self._maybe_validate(adapter_in, message, enforce=enforce_in)
|
|
100
|
+
result = await self.func(validated_msg, ctx)
|
|
101
|
+
|
|
102
|
+
if result is None:
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
return self._maybe_validate(adapter_out, result, enforce=enforce_out)
|
|
106
|
+
|
|
107
|
+
def to(self, *nodes: Node) -> tuple[Node, tuple[Node, ...]]:
|
|
108
|
+
return self, nodes
|
|
109
|
+
|
|
110
|
+
def __hash__(self) -> int:
|
|
111
|
+
return hash(self.node_id)
|
|
112
|
+
|
|
113
|
+
def __repr__(self) -> str: # pragma: no cover - debug helper
|
|
114
|
+
return f"Node(name={self.name!r}, node_id={self.node_id})"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
__all__ = ["Node", "NodePolicy"]
|
penguiflow/patterns.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Common orchestration patterns for PenguiFlow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from collections.abc import Awaitable, Callable, Iterable, Sequence
|
|
8
|
+
from typing import Any, TypeVar, cast
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
from pydantic.type_adapter import TypeAdapter
|
|
12
|
+
|
|
13
|
+
from .node import Node, NodePolicy
|
|
14
|
+
from .types import Message
|
|
15
|
+
|
|
16
|
+
PayloadT = TypeVar("PayloadT")
|
|
17
|
+
ResultT = TypeVar("ResultT")
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"map_concurrent",
|
|
21
|
+
"join_k",
|
|
22
|
+
"predicate_router",
|
|
23
|
+
"union_router",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def map_concurrent(
|
|
28
|
+
items: Iterable[PayloadT],
|
|
29
|
+
worker: Callable[[PayloadT], Awaitable[ResultT]],
|
|
30
|
+
*,
|
|
31
|
+
max_concurrency: int = 8,
|
|
32
|
+
) -> list[ResultT]:
|
|
33
|
+
"""Run the async *worker* across *items* with bounded concurrency."""
|
|
34
|
+
|
|
35
|
+
items_list = list(items)
|
|
36
|
+
semaphore = asyncio.Semaphore(max(1, max_concurrency))
|
|
37
|
+
results: list[ResultT | None] = [None] * len(items_list)
|
|
38
|
+
|
|
39
|
+
async def run(index: int, item: PayloadT) -> None:
|
|
40
|
+
async with semaphore:
|
|
41
|
+
results[index] = await worker(item)
|
|
42
|
+
|
|
43
|
+
await asyncio.gather(*(run(idx, item) for idx, item in enumerate(items_list)))
|
|
44
|
+
return [cast(ResultT, result) for result in results]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def predicate_router(
|
|
48
|
+
name: str,
|
|
49
|
+
predicate: Callable[[Any], Sequence[Node | str] | Node | str | None],
|
|
50
|
+
) -> Node:
|
|
51
|
+
"""Create a node that routes messages based on predicate outputs."""
|
|
52
|
+
|
|
53
|
+
async def router(msg: Any, ctx) -> None:
|
|
54
|
+
targets = predicate(msg)
|
|
55
|
+
if targets is None:
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
normalized = _normalize_targets(ctx, targets)
|
|
59
|
+
if normalized:
|
|
60
|
+
await ctx.emit(msg, to=normalized)
|
|
61
|
+
|
|
62
|
+
return Node(router, name=name, policy=NodePolicy(validate="none"))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def union_router(
|
|
66
|
+
name: str,
|
|
67
|
+
union_model: type[BaseModel],
|
|
68
|
+
) -> Node:
|
|
69
|
+
"""Route based on a discriminated union Pydantic model."""
|
|
70
|
+
|
|
71
|
+
adapter = TypeAdapter(union_model)
|
|
72
|
+
|
|
73
|
+
async def router(msg: BaseModel, ctx) -> None:
|
|
74
|
+
validated = adapter.validate_python(msg)
|
|
75
|
+
|
|
76
|
+
target = getattr(validated, "kind", validated.__class__.__name__)
|
|
77
|
+
normalized = _normalize_targets(ctx, target)
|
|
78
|
+
if not normalized:
|
|
79
|
+
raise KeyError(f"No successor matches '{target}'")
|
|
80
|
+
await ctx.emit(validated, to=normalized)
|
|
81
|
+
|
|
82
|
+
return Node(router, name=name, policy=NodePolicy(validate="none"))
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def join_k(name: str, k: int) -> Node:
|
|
86
|
+
"""Aggregate *k* messages per trace_id and emit the grouped payloads."""
|
|
87
|
+
|
|
88
|
+
if k <= 0:
|
|
89
|
+
raise ValueError("k must be positive")
|
|
90
|
+
|
|
91
|
+
buckets: defaultdict[str, list[Any]] = defaultdict(list)
|
|
92
|
+
|
|
93
|
+
async def aggregator(msg: Any, ctx) -> Any:
|
|
94
|
+
trace_id = getattr(msg, "trace_id", None)
|
|
95
|
+
if trace_id is None:
|
|
96
|
+
raise ValueError("join_k requires messages with trace_id")
|
|
97
|
+
|
|
98
|
+
bucket = buckets[trace_id]
|
|
99
|
+
bucket.append(msg)
|
|
100
|
+
if len(bucket) < k:
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
buckets.pop(trace_id, None)
|
|
104
|
+
batch = list(bucket)
|
|
105
|
+
first = batch[0]
|
|
106
|
+
if isinstance(first, Message):
|
|
107
|
+
payloads = [item.payload for item in batch]
|
|
108
|
+
aggregated = first.model_copy(update={"payload": payloads})
|
|
109
|
+
return aggregated
|
|
110
|
+
return batch
|
|
111
|
+
|
|
112
|
+
return Node(aggregator, name=name, policy=NodePolicy(validate="none"))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _normalize_targets(context, targets) -> list[Node]:
|
|
116
|
+
if isinstance(targets, Node):
|
|
117
|
+
target_list: Sequence[Node | str] = [targets]
|
|
118
|
+
elif isinstance(targets, str):
|
|
119
|
+
target_list = [targets]
|
|
120
|
+
else:
|
|
121
|
+
target_list = list(targets)
|
|
122
|
+
|
|
123
|
+
normalized: list[Node] = []
|
|
124
|
+
candidates = list(getattr(context, "_outgoing", {}).keys())
|
|
125
|
+
for target in target_list:
|
|
126
|
+
if isinstance(target, Node):
|
|
127
|
+
normalized.append(target)
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
if not isinstance(target, str):
|
|
131
|
+
raise TypeError("Targets must be Node or str")
|
|
132
|
+
|
|
133
|
+
matched = None
|
|
134
|
+
for node in candidates:
|
|
135
|
+
if isinstance(node, Node) and node.name == target:
|
|
136
|
+
matched = node
|
|
137
|
+
break
|
|
138
|
+
if matched is None:
|
|
139
|
+
raise KeyError(f"No successor named '{target}'")
|
|
140
|
+
normalized.append(matched)
|
|
141
|
+
|
|
142
|
+
return normalized
|
penguiflow/registry.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Model registry for PenguiFlow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any, TypeVar
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
from pydantic.type_adapter import TypeAdapter
|
|
10
|
+
|
|
11
|
+
ModelT = TypeVar("ModelT", bound=BaseModel)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class RegistryEntry:
|
|
16
|
+
in_adapter: TypeAdapter[Any]
|
|
17
|
+
out_adapter: TypeAdapter[Any]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ModelRegistry:
|
|
21
|
+
"""Stores per-node type adapters for validation."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
self._entries: dict[str, RegistryEntry] = {}
|
|
25
|
+
|
|
26
|
+
def register(
|
|
27
|
+
self,
|
|
28
|
+
node_name: str,
|
|
29
|
+
in_model: type[BaseModel],
|
|
30
|
+
out_model: type[BaseModel],
|
|
31
|
+
) -> None:
|
|
32
|
+
if node_name in self._entries:
|
|
33
|
+
raise ValueError(f"Node '{node_name}' already registered")
|
|
34
|
+
if not issubclass(in_model, BaseModel) or not issubclass(out_model, BaseModel):
|
|
35
|
+
raise TypeError("Models must inherit from pydantic.BaseModel")
|
|
36
|
+
self._entries[node_name] = RegistryEntry(
|
|
37
|
+
TypeAdapter(in_model),
|
|
38
|
+
TypeAdapter(out_model),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def adapters(self, node_name: str) -> tuple[TypeAdapter[Any], TypeAdapter[Any]]:
|
|
42
|
+
try:
|
|
43
|
+
entry = self._entries[node_name]
|
|
44
|
+
except KeyError as exc:
|
|
45
|
+
raise KeyError(f"Node '{node_name}' not registered") from exc
|
|
46
|
+
return entry.in_adapter, entry.out_adapter
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
__all__ = ["ModelRegistry"]
|
penguiflow/types.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Typed message and controller models for PenguiFlow."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Headers(BaseModel):
|
|
13
|
+
tenant: str
|
|
14
|
+
topic: str | None = None
|
|
15
|
+
priority: int = 0
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Message(BaseModel):
|
|
19
|
+
payload: Any
|
|
20
|
+
headers: Headers
|
|
21
|
+
trace_id: str = Field(default_factory=lambda: uuid.uuid4().hex)
|
|
22
|
+
ts: float = Field(default_factory=time.time)
|
|
23
|
+
deadline_s: float | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class PlanStep(BaseModel):
|
|
27
|
+
kind: Literal["retrieve", "web", "sql", "summarize", "route", "stop"]
|
|
28
|
+
args: dict[str, Any] = Field(default_factory=dict)
|
|
29
|
+
max_concurrency: int = 1
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class Thought(BaseModel):
|
|
33
|
+
steps: list[PlanStep]
|
|
34
|
+
rationale: str
|
|
35
|
+
done: bool = False
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class WM(BaseModel):
|
|
39
|
+
query: str
|
|
40
|
+
facts: list[Any] = Field(default_factory=list)
|
|
41
|
+
hops: int = 0
|
|
42
|
+
budget_hops: int = 8
|
|
43
|
+
confidence: float = 0.0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class FinalAnswer(BaseModel):
|
|
47
|
+
text: str
|
|
48
|
+
citations: list[str] = Field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"Headers",
|
|
53
|
+
"Message",
|
|
54
|
+
"PlanStep",
|
|
55
|
+
"Thought",
|
|
56
|
+
"WM",
|
|
57
|
+
"FinalAnswer",
|
|
58
|
+
]
|