penguiflow 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of penguiflow might be problematic. Click here for more details.

penguiflow/__init__.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from . import testkit
6
+ from .bus import BusEnvelope, MessageBus
6
7
  from .core import (
7
8
  DEFAULT_QUEUE_MAXSIZE,
8
9
  Context,
@@ -18,6 +19,14 @@ from .node import Node, NodePolicy
18
19
  from .patterns import join_k, map_concurrent, predicate_router, union_router
19
20
  from .policies import DictRoutingPolicy, RoutingPolicy, RoutingRequest
20
21
  from .registry import ModelRegistry
22
+ from .remote import (
23
+ RemoteCallRequest,
24
+ RemoteCallResult,
25
+ RemoteNode,
26
+ RemoteStreamEvent,
27
+ RemoteTransport,
28
+ )
29
+ from .state import RemoteBinding, StateStore, StoredEvent
21
30
  from .streaming import (
22
31
  chunk_to_ws_json,
23
32
  emit_stream_events,
@@ -40,6 +49,8 @@ __all__ = [
40
49
  "FlowEvent",
41
50
  "FlowError",
42
51
  "FlowErrorCode",
52
+ "MessageBus",
53
+ "BusEnvelope",
43
54
  "call_playbook",
44
55
  "Headers",
45
56
  "Message",
@@ -63,6 +74,14 @@ __all__ = [
63
74
  "flow_to_dot",
64
75
  "create",
65
76
  "testkit",
77
+ "StateStore",
78
+ "StoredEvent",
79
+ "RemoteBinding",
80
+ "RemoteTransport",
81
+ "RemoteCallRequest",
82
+ "RemoteCallResult",
83
+ "RemoteStreamEvent",
84
+ "RemoteNode",
66
85
  ]
67
86
 
68
- __version__ = "2.0.0"
87
+ __version__ = "2.1.0"
penguiflow/admin.py ADDED
@@ -0,0 +1,174 @@
1
+ """Developer CLI helpers for inspecting PenguiFlow trace history."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import asyncio
7
+ import importlib
8
+ import json
9
+ import sys
10
+ from collections.abc import Callable, Sequence
11
+ from typing import Any
12
+
13
+ from .state import StateStore, StoredEvent
14
+
15
+ __all__ = ["load_state_store", "render_events", "main"]
16
+
17
+
18
+ class _Args(argparse.Namespace):
19
+ handler: Callable[[_Args], Any]
20
+ state_store: str
21
+ trace_id: str
22
+ tail: int | None
23
+ delay: float
24
+
25
+
26
+ def _resolve_factory(spec: str) -> Callable[[], Any]:
27
+ module_name, _, attr = spec.partition(":")
28
+ if not module_name or not attr:
29
+ raise ValueError(
30
+ "state store spec must be in the form 'package.module:callable'"
31
+ )
32
+ module = importlib.import_module(module_name)
33
+ try:
34
+ factory = getattr(module, attr)
35
+ except AttributeError as exc: # pragma: no cover - defensive guard
36
+ raise ValueError(f"{spec!r} does not resolve to a callable") from exc
37
+ if not callable(factory):
38
+ raise TypeError(f"{spec!r} resolved to {type(factory)!r}, not a callable")
39
+ return factory
40
+
41
+
42
+ async def load_state_store(spec: str) -> StateStore:
43
+ """Instantiate a :class:`StateStore` from ``module:callable`` spec."""
44
+
45
+ factory = _resolve_factory(spec)
46
+ instance = factory()
47
+ if asyncio.iscoroutine(instance):
48
+ instance = await instance
49
+ required = ("save_event", "load_history", "save_remote_binding")
50
+ if not all(hasattr(instance, attr) for attr in required): # pragma: no cover
51
+ raise TypeError(
52
+ "StateStore factories must implement "
53
+ "save_event/load_history/save_remote_binding"
54
+ )
55
+ return instance
56
+
57
+
58
+ def _trim_events(events: Sequence[StoredEvent], tail: int | None) -> list[StoredEvent]:
59
+ items = list(events)
60
+ if tail is None:
61
+ return items
62
+ if tail <= 0:
63
+ return []
64
+ return items[-tail:]
65
+
66
+
67
+ def render_events(
68
+ events: Sequence[StoredEvent], *, tail: int | None = None
69
+ ) -> list[str]:
70
+ """Return JSON line representations of ``events`` (optionally tail-truncated)."""
71
+
72
+ trimmed = _trim_events(events, tail)
73
+ lines: list[str] = []
74
+ for event in trimmed:
75
+ payload = dict(event.payload)
76
+ payload.setdefault("event", event.kind)
77
+ payload.setdefault("trace_id", event.trace_id)
78
+ payload.setdefault("node_name", event.node_name)
79
+ payload.setdefault("node_id", event.node_id)
80
+ payload.setdefault("ts", event.ts)
81
+ lines.append(json.dumps(payload, sort_keys=True, default=str))
82
+ return lines
83
+
84
+
85
+ async def _cmd_history(args: _Args) -> None:
86
+ store = await load_state_store(args.state_store)
87
+ events = await store.load_history(args.trace_id)
88
+ for line in render_events(events, tail=args.tail):
89
+ print(line)
90
+
91
+
92
+ async def _cmd_replay(args: _Args) -> None:
93
+ store = await load_state_store(args.state_store)
94
+ events = _trim_events(await store.load_history(args.trace_id), args.tail)
95
+ total = len(events)
96
+ if not total:
97
+ print(f"# trace {args.trace_id} has no stored events")
98
+ return
99
+ print(f"# replay trace={args.trace_id} events={total}")
100
+ for event in events:
101
+ payload = render_events([event])[0]
102
+ print(payload)
103
+ if args.delay > 0:
104
+ await asyncio.sleep(args.delay)
105
+
106
+
107
+ def _build_parser() -> argparse.ArgumentParser:
108
+ parser = argparse.ArgumentParser(
109
+ prog="penguiflow-admin",
110
+ description=(
111
+ "Inspect PenguiFlow trace history via configured StateStore "
112
+ "adapters."
113
+ ),
114
+ )
115
+ common = argparse.ArgumentParser(add_help=False)
116
+ common.add_argument(
117
+ "--state-store",
118
+ required=True,
119
+ help="Import path to a factory returning a StateStore (module:callable)",
120
+ )
121
+ common.add_argument(
122
+ "--tail",
123
+ type=int,
124
+ default=None,
125
+ help="Only show the last N events from the trace history.",
126
+ )
127
+ subparsers = parser.add_subparsers(dest="command", required=True)
128
+
129
+ history = subparsers.add_parser(
130
+ "history",
131
+ parents=[common],
132
+ help="Print stored events for a trace as JSON lines.",
133
+ )
134
+ history.add_argument("trace_id", help="Trace identifier to inspect")
135
+ history.set_defaults(handler=_cmd_history)
136
+
137
+ replay = subparsers.add_parser(
138
+ "replay",
139
+ parents=[common],
140
+ help="Replay events with optional delay to mimic runtime emission.",
141
+ )
142
+ replay.add_argument("trace_id", help="Trace identifier to replay")
143
+ replay.add_argument(
144
+ "--delay",
145
+ type=float,
146
+ default=0.0,
147
+ help="Sleep duration (seconds) between events when replaying.",
148
+ )
149
+ replay.set_defaults(handler=_cmd_replay)
150
+
151
+ return parser
152
+
153
+
154
+ def main(argv: Sequence[str] | None = None) -> int:
155
+ """Entry point for the ``penguiflow-admin`` CLI."""
156
+
157
+ parser = _build_parser()
158
+ args = parser.parse_args(argv)
159
+ handler = getattr(args, "handler", None)
160
+ if handler is None: # pragma: no cover - argparse guard
161
+ parser.print_help()
162
+ return 1
163
+
164
+ try:
165
+ asyncio.run(handler(args))
166
+ except Exception as exc: # pragma: no cover - runtime guard
167
+ print(f"error: {exc}", file=sys.stderr)
168
+ return 1
169
+ return 0
170
+
171
+
172
+ if __name__ == "__main__": # pragma: no cover - manual invocation
173
+ raise SystemExit(main())
174
+
penguiflow/bus.py ADDED
@@ -0,0 +1,30 @@
1
+ """Message bus protocol for distributed PenguiFlow edges."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from dataclasses import dataclass
7
+ from typing import Any, Protocol
8
+
9
+
10
+ @dataclass(slots=True)
11
+ class BusEnvelope:
12
+ """Structured payload published to a :class:`MessageBus`."""
13
+
14
+ edge: str
15
+ source: str | None
16
+ target: str | None
17
+ trace_id: str | None
18
+ payload: Any
19
+ headers: Mapping[str, Any] | None
20
+ meta: Mapping[str, Any] | None
21
+
22
+
23
+ class MessageBus(Protocol):
24
+ """Protocol for pluggable message bus adapters."""
25
+
26
+ async def publish(self, envelope: BusEnvelope) -> None:
27
+ """Publish an envelope for downstream workers."""
28
+
29
+
30
+ __all__ = ["BusEnvelope", "MessageBus"]
penguiflow/core.py CHANGED
@@ -13,13 +13,15 @@ from collections import deque
13
13
  from collections.abc import Awaitable, Callable, Mapping, Sequence
14
14
  from contextlib import suppress
15
15
  from dataclasses import dataclass
16
- from typing import Any
16
+ from typing import Any, cast
17
17
 
18
+ from .bus import BusEnvelope, MessageBus
18
19
  from .errors import FlowError, FlowErrorCode
19
20
  from .metrics import FlowEvent
20
21
  from .middlewares import Middleware
21
22
  from .node import Node, NodePolicy
22
23
  from .registry import ModelRegistry
24
+ from .state import RemoteBinding, StateStore, StoredEvent
23
25
  from .types import WM, FinalAnswer, Message, StreamChunk
24
26
 
25
27
  logger = logging.getLogger("penguiflow.core")
@@ -143,8 +145,7 @@ class Context:
143
145
  if self._runtime is None:
144
146
  raise RuntimeError("Context is not attached to a running flow")
145
147
  for floe in self._resolve_targets(to, self._outgoing):
146
- self._runtime._on_message_enqueued(msg)
147
- await floe.queue.put(msg)
148
+ await self._runtime._send_to_floe(floe, msg)
148
149
 
149
150
  def emit_nowait(
150
151
  self, msg: Any, to: Node | Endpoint | Sequence[Node | Endpoint] | None = None
@@ -152,8 +153,7 @@ class Context:
152
153
  if self._runtime is None:
153
154
  raise RuntimeError("Context is not attached to a running flow")
154
155
  for floe in self._resolve_targets(to, self._outgoing):
155
- self._runtime._on_message_enqueued(msg)
156
- floe.queue.put_nowait(msg)
156
+ self._runtime._send_to_floe_nowait(floe, msg)
157
157
 
158
158
  async def emit_chunk(
159
159
  self,
@@ -301,6 +301,8 @@ class PenguiFlow:
301
301
  allow_cycles: bool = False,
302
302
  middlewares: Sequence[Middleware] | None = None,
303
303
  emit_errors_to_rookery: bool = False,
304
+ state_store: StateStore | None = None,
305
+ message_bus: MessageBus | None = None,
304
306
  ) -> None:
305
307
  self._queue_maxsize = queue_maxsize
306
308
  self._allow_cycles = allow_cycles
@@ -314,10 +316,14 @@ class PenguiFlow:
314
316
  self._middlewares: list[Middleware] = list(middlewares or [])
315
317
  self._trace_counts: dict[str, int] = {}
316
318
  self._trace_events: dict[str, asyncio.Event] = {}
317
- self._trace_invocations: dict[str, set[asyncio.Future[Any]]] = {}
319
+ self._trace_invocations: dict[str, set[asyncio.Task[Any]]] = {}
320
+ self._external_tasks: dict[str, set[asyncio.Future[Any]]] = {}
318
321
  self._trace_capacity_waiters: dict[str, list[asyncio.Event]] = {}
319
322
  self._latest_wm_hops: dict[str, int] = {}
320
323
  self._emit_errors_to_rookery = emit_errors_to_rookery
324
+ self._state_store = state_store
325
+ self._message_bus = message_bus
326
+ self._bus_tasks: set[asyncio.Task[None]] = set()
321
327
 
322
328
  self._build_graph(adjacencies)
323
329
 
@@ -487,6 +493,29 @@ class PenguiFlow:
487
493
  task.cancel()
488
494
  await asyncio.gather(*self._tasks, return_exceptions=True)
489
495
  self._tasks.clear()
496
+ if self._trace_invocations:
497
+ pending: list[asyncio.Task[Any]] = []
498
+ for invocation_tasks in self._trace_invocations.values():
499
+ for task in invocation_tasks:
500
+ if not task.done():
501
+ task.cancel()
502
+ pending.append(task)
503
+ if pending:
504
+ await asyncio.gather(*pending, return_exceptions=True)
505
+ self._trace_invocations.clear()
506
+ if self._external_tasks:
507
+ pending_ext: list[asyncio.Future[Any]] = []
508
+ for external_tasks in self._external_tasks.values():
509
+ for external_task in external_tasks:
510
+ if not external_task.done():
511
+ external_task.cancel()
512
+ pending_ext.append(external_task)
513
+ if pending_ext:
514
+ await asyncio.gather(*pending_ext, return_exceptions=True)
515
+ self._external_tasks.clear()
516
+ if self._bus_tasks:
517
+ await asyncio.gather(*self._bus_tasks, return_exceptions=True)
518
+ self._bus_tasks.clear()
490
519
  self._trace_counts.clear()
491
520
  self._trace_events.clear()
492
521
  self._trace_invocations.clear()
@@ -547,6 +576,84 @@ class PenguiFlow:
547
576
  await self._finalize_message(result)
548
577
  return result
549
578
 
579
+ async def load_history(self, trace_id: str) -> Sequence[StoredEvent]:
580
+ """Return the persisted history for ``trace_id`` from the state store."""
581
+
582
+ if self._state_store is None:
583
+ raise RuntimeError("PenguiFlow was created without a state_store")
584
+ return await self._state_store.load_history(trace_id)
585
+
586
+ def ensure_trace_event(self, trace_id: str) -> asyncio.Event:
587
+ """Return (and create if needed) the cancellation event for ``trace_id``."""
588
+
589
+ return self._trace_events.setdefault(trace_id, asyncio.Event())
590
+
591
+ def register_external_task(self, trace_id: str, task: asyncio.Future[Any]) -> None:
592
+ """Track an externally created task for cancellation bookkeeping."""
593
+
594
+ if trace_id is None:
595
+ return
596
+ tasks = self._external_tasks.get(trace_id)
597
+ if tasks is None:
598
+ tasks = set[asyncio.Future[Any]]()
599
+ self._external_tasks[trace_id] = tasks
600
+ tasks.add(task)
601
+
602
+ def _cleanup(finished: asyncio.Future[Any]) -> None:
603
+ remaining = self._external_tasks.get(trace_id)
604
+ if remaining is None:
605
+ return
606
+ remaining.discard(finished)
607
+ if not remaining:
608
+ self._external_tasks.pop(trace_id, None)
609
+
610
+ task.add_done_callback(_cleanup)
611
+
612
+ async def save_remote_binding(self, binding: RemoteBinding) -> None:
613
+ """Persist a remote binding if a state store is configured."""
614
+
615
+ if self._state_store is None:
616
+ return
617
+ try:
618
+ await self._state_store.save_remote_binding(binding)
619
+ except Exception as exc: # pragma: no cover - defensive logging
620
+ logger.exception(
621
+ "state_store_binding_failed",
622
+ extra={
623
+ "event": "state_store_binding_failed",
624
+ "trace_id": binding.trace_id,
625
+ "context_id": binding.context_id,
626
+ "task_id": binding.task_id,
627
+ "agent_url": binding.agent_url,
628
+ "exception": repr(exc),
629
+ },
630
+ )
631
+
632
+ async def record_remote_event(
633
+ self,
634
+ *,
635
+ event: str,
636
+ node: Node,
637
+ context: Context,
638
+ trace_id: str | None,
639
+ latency_ms: float | None,
640
+ level: int = logging.INFO,
641
+ extra: Mapping[str, Any] | None = None,
642
+ ) -> None:
643
+ """Emit a structured :class:`FlowEvent` for remote transport activity."""
644
+
645
+ payload = dict(extra or {})
646
+ await self._emit_event(
647
+ event=event,
648
+ node=node,
649
+ context=context,
650
+ trace_id=trace_id,
651
+ attempt=0,
652
+ latency_ms=latency_ms,
653
+ level=level,
654
+ extra=payload,
655
+ )
656
+
550
657
  async def _execute_with_reliability(
551
658
  self,
552
659
  node: Node,
@@ -805,16 +912,19 @@ class PenguiFlow:
805
912
  return await self._await_invocation(node, invocation, trace_id, timeout)
806
913
 
807
914
  def _register_invocation_task(
808
- self, trace_id: str, task: asyncio.Future[Any]
915
+ self, trace_id: str, task: asyncio.Task[Any]
809
916
  ) -> None:
810
- tasks = self._trace_invocations.setdefault(trace_id, set())
917
+ tasks = self._trace_invocations.get(trace_id)
918
+ if tasks is None:
919
+ tasks = set[asyncio.Task[Any]]()
920
+ self._trace_invocations[trace_id] = tasks
811
921
  tasks.add(task)
812
922
 
813
923
  def _cleanup(finished: asyncio.Future[Any]) -> None:
814
924
  remaining = self._trace_invocations.get(trace_id)
815
925
  if remaining is None:
816
926
  return
817
- remaining.discard(finished)
927
+ remaining.discard(cast(asyncio.Task[Any], finished))
818
928
  if not remaining:
819
929
  self._trace_invocations.pop(trace_id, None)
820
930
 
@@ -827,7 +937,7 @@ class PenguiFlow:
827
937
  trace_id: str,
828
938
  timeout: float | None,
829
939
  ) -> Any:
830
- invocation_task = asyncio.ensure_future(invocation)
940
+ invocation_task = cast(asyncio.Task[Any], asyncio.ensure_future(invocation))
831
941
  self._register_invocation_task(trace_id, invocation_task)
832
942
 
833
943
  cancel_event = self._trace_events.get(trace_id)
@@ -904,6 +1014,89 @@ class PenguiFlow:
904
1014
  self._trace_counts[trace_id] = self._trace_counts.get(trace_id, 0) + 1
905
1015
  self._trace_events.setdefault(trace_id, asyncio.Event())
906
1016
 
1017
+ def _node_label(self, node: Node | Endpoint | None) -> str | None:
1018
+ if node is None:
1019
+ return None
1020
+ name = getattr(node, "name", None)
1021
+ if name:
1022
+ return name
1023
+ return getattr(node, "node_id", None)
1024
+
1025
+ def _build_bus_envelope(
1026
+ self,
1027
+ source: Node | Endpoint | None,
1028
+ target: Node | Endpoint | None,
1029
+ message: Any,
1030
+ ) -> BusEnvelope:
1031
+ source_name = self._node_label(source)
1032
+ target_name = self._node_label(target)
1033
+ edge = f"{source_name or '*'}->{target_name or '*'}"
1034
+ headers: Mapping[str, Any] | None = None
1035
+ meta: Mapping[str, Any] | None = None
1036
+ if isinstance(message, Message):
1037
+ headers = message.headers.model_dump()
1038
+ meta = dict(message.meta)
1039
+ return BusEnvelope(
1040
+ edge=edge,
1041
+ source=source_name,
1042
+ target=target_name,
1043
+ trace_id=self._get_trace_id(message),
1044
+ payload=message,
1045
+ headers=headers,
1046
+ meta=meta,
1047
+ )
1048
+
1049
+ async def _publish_to_bus(
1050
+ self,
1051
+ source: Node | Endpoint | None,
1052
+ target: Node | Endpoint | None,
1053
+ message: Any,
1054
+ ) -> None:
1055
+ if self._message_bus is None:
1056
+ return
1057
+ envelope = self._build_bus_envelope(source, target, message)
1058
+ try:
1059
+ await self._message_bus.publish(envelope)
1060
+ except Exception as exc:
1061
+ logger.exception(
1062
+ "message_bus_publish_failed",
1063
+ extra={
1064
+ "event": "message_bus_publish_failed",
1065
+ "edge": envelope.edge,
1066
+ "trace_id": envelope.trace_id,
1067
+ "exception": repr(exc),
1068
+ },
1069
+ )
1070
+
1071
+ def _schedule_bus_publish(
1072
+ self,
1073
+ source: Node | Endpoint | None,
1074
+ target: Node | Endpoint | None,
1075
+ message: Any,
1076
+ ) -> None:
1077
+ if self._message_bus is None:
1078
+ return
1079
+ loop = asyncio.get_running_loop()
1080
+ task = loop.create_task(self._publish_to_bus(source, target, message))
1081
+ self._bus_tasks.add(task)
1082
+
1083
+ def _cleanup(done: asyncio.Task[None]) -> None:
1084
+ self._bus_tasks.discard(done)
1085
+
1086
+ task.add_done_callback(_cleanup)
1087
+
1088
+ async def _send_to_floe(self, floe: Floe, message: Any) -> None:
1089
+ self._on_message_enqueued(message)
1090
+ if self._message_bus is not None:
1091
+ await self._publish_to_bus(floe.source, floe.target, message)
1092
+ await floe.queue.put(message)
1093
+
1094
+ def _send_to_floe_nowait(self, floe: Floe, message: Any) -> None:
1095
+ self._on_message_enqueued(message)
1096
+ if self._message_bus is not None:
1097
+ self._schedule_bus_publish(floe.source, floe.target, message)
1098
+ floe.queue.put_nowait(message)
1099
+
907
1100
  async def _finalize_message(self, message: Any) -> None:
908
1101
  trace_id = self._get_trace_id(message)
909
1102
  if trace_id is None:
@@ -1107,11 +1300,12 @@ class PenguiFlow:
1107
1300
  if floe is None and incoming:
1108
1301
  floe = next(iter(incoming.values()))
1109
1302
 
1110
- self._on_message_enqueued(message)
1111
-
1112
1303
  if floe is not None:
1113
- await floe.queue.put(message)
1304
+ await self._send_to_floe(floe, message)
1114
1305
  else:
1306
+ self._on_message_enqueued(message)
1307
+ if self._message_bus is not None:
1308
+ await self._publish_to_bus(source, ROOKERY, message)
1115
1309
  buffer = rookery_context._buffer
1116
1310
  buffer.append(message)
1117
1311
 
@@ -1167,6 +1361,21 @@ class PenguiFlow:
1167
1361
 
1168
1362
  logger.log(level, event, extra=event_obj.to_payload())
1169
1363
 
1364
+ if self._state_store is not None:
1365
+ stored_event = StoredEvent.from_flow_event(event_obj)
1366
+ try:
1367
+ await self._state_store.save_event(stored_event)
1368
+ except Exception as exc:
1369
+ logger.exception(
1370
+ "state_store_save_failed",
1371
+ extra={
1372
+ "event": "state_store_save_failed",
1373
+ "trace_id": stored_event.trace_id,
1374
+ "kind": stored_event.kind,
1375
+ "exception": repr(exc),
1376
+ },
1377
+ )
1378
+
1170
1379
  for middleware in list(self._middlewares):
1171
1380
  try:
1172
1381
  await middleware(event_obj)