penguiflow 2.0.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of penguiflow might be problematic. Click here for more details.

Files changed (59) hide show
  1. {penguiflow-2.0.0 → penguiflow-2.2.0}/PKG-INFO +173 -4
  2. {penguiflow-2.0.0 → penguiflow-2.2.0}/README.md +165 -3
  3. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/__init__.py +42 -2
  4. penguiflow-2.2.0/penguiflow/admin.py +174 -0
  5. penguiflow-2.2.0/penguiflow/bus.py +30 -0
  6. penguiflow-2.2.0/penguiflow/catalog.py +146 -0
  7. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/core.py +261 -13
  8. penguiflow-2.2.0/penguiflow/debug.py +30 -0
  9. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/metrics.py +9 -0
  10. penguiflow-2.2.0/penguiflow/middlewares.py +87 -0
  11. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/registry.py +21 -0
  12. penguiflow-2.2.0/penguiflow/remote.py +486 -0
  13. penguiflow-2.2.0/penguiflow/state.py +64 -0
  14. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/testkit.py +107 -2
  15. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow.egg-info/PKG-INFO +173 -4
  16. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow.egg-info/SOURCES.txt +16 -0
  17. penguiflow-2.2.0/penguiflow.egg-info/entry_points.txt +2 -0
  18. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow.egg-info/requires.txt +9 -0
  19. penguiflow-2.2.0/penguiflow.egg-info/top_level.txt +2 -0
  20. penguiflow-2.2.0/penguiflow_a2a/__init__.py +19 -0
  21. penguiflow-2.2.0/penguiflow_a2a/server.py +695 -0
  22. {penguiflow-2.0.0 → penguiflow-2.2.0}/pyproject.toml +29 -3
  23. penguiflow-2.2.0/tests/test_a2a_server.py +341 -0
  24. penguiflow-2.2.0/tests/test_catalog.py +61 -0
  25. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_core.py +52 -0
  26. penguiflow-2.2.0/tests/test_distribution_hooks.py +140 -0
  27. penguiflow-2.2.0/tests/test_metrics.py +133 -0
  28. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_middlewares.py +103 -1
  29. penguiflow-2.2.0/tests/test_planner_prompts.py +55 -0
  30. penguiflow-2.2.0/tests/test_property_based.py +172 -0
  31. penguiflow-2.2.0/tests/test_react_planner.py +845 -0
  32. penguiflow-2.2.0/tests/test_remote.py +216 -0
  33. penguiflow-2.2.0/tests/test_testkit.py +203 -0
  34. penguiflow-2.0.0/penguiflow/middlewares.py +0 -16
  35. penguiflow-2.0.0/penguiflow.egg-info/top_level.txt +0 -1
  36. penguiflow-2.0.0/tests/test_metrics.py +0 -41
  37. penguiflow-2.0.0/tests/test_testkit.py +0 -92
  38. {penguiflow-2.0.0 → penguiflow-2.2.0}/LICENSE +0 -0
  39. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/errors.py +0 -0
  40. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/node.py +0 -0
  41. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/patterns.py +0 -0
  42. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/policies.py +0 -0
  43. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/streaming.py +0 -0
  44. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/types.py +0 -0
  45. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow/viz.py +0 -0
  46. {penguiflow-2.0.0 → penguiflow-2.2.0}/penguiflow.egg-info/dependency_links.txt +0 -0
  47. {penguiflow-2.0.0 → penguiflow-2.2.0}/setup.cfg +0 -0
  48. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_budgets.py +0 -0
  49. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_cancel.py +0 -0
  50. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_controller.py +0 -0
  51. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_errors.py +0 -0
  52. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_metadata.py +0 -0
  53. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_node.py +0 -0
  54. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_patterns.py +0 -0
  55. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_registry.py +0 -0
  56. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_routing_policy.py +0 -0
  57. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_streaming.py +0 -0
  58. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_types.py +0 -0
  59. {penguiflow-2.0.0 → penguiflow-2.2.0}/tests/test_viz.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: penguiflow
3
- Version: 2.0.0
3
+ Version: 2.2.0
4
4
  Summary: Async agent orchestration primitives.
5
5
  Author: PenguiFlow Team
6
6
  License: MIT License
@@ -36,7 +36,14 @@ Requires-Dist: pytest>=7.4; extra == "dev"
36
36
  Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
37
37
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
38
38
  Requires-Dist: coverage[toml]>=7.0; extra == "dev"
39
+ Requires-Dist: hypothesis>=6.103; extra == "dev"
39
40
  Requires-Dist: ruff>=0.2; extra == "dev"
41
+ Requires-Dist: fastapi>=0.118; extra == "dev"
42
+ Requires-Dist: httpx>=0.27; extra == "dev"
43
+ Provides-Extra: a2a-server
44
+ Requires-Dist: fastapi>=0.118; extra == "a2a-server"
45
+ Provides-Extra: planner
46
+ Requires-Dist: litellm>=1.77.3; extra == "planner"
40
47
  Dynamic: license-file
41
48
 
42
49
  # PenguiFlow 🐧❄️
@@ -52,6 +59,9 @@ Dynamic: license-file
52
59
  <a href="https://github.com/penguiflow/penguiflow">
53
60
  <img src="https://img.shields.io/badge/coverage-85%25-brightgreen" alt="Coverage">
54
61
  </a>
62
+ <a href="https://nightly.link/penguiflow/penguiflow/workflows/benchmarks/main/benchmarks.json.zip">
63
+ <img src="https://img.shields.io/badge/benchmarks-latest-orange" alt="Benchmarks">
64
+ </a>
55
65
  <a href="https://pypi.org/project/penguiflow/">
56
66
  <img src="https://img.shields.io/pypi/v/penguiflow.svg" alt="PyPI version">
57
67
  </a>
@@ -77,10 +87,39 @@ It provides:
77
87
  * **Observability hooks** (`FlowEvent` callbacks for logging, MLflow, or custom metrics sinks)
78
88
  * **Policy-driven routing** (optional policies steer routers without breaking existing flows)
79
89
  * **Traceable exceptions** (`FlowError` captures node/trace metadata and optionally emits to Rookery)
90
+ * **Distribution hooks (opt-in)** — plug a `StateStore` to persist trace history and a
91
+ `MessageBus` to publish floe traffic for remote workers without changing existing flows.
92
+ * **Remote calls (opt-in)** — `RemoteNode` bridges the runtime to external agents through a
93
+ pluggable `RemoteTransport` interface (A2A-ready) while propagating streaming chunks and
94
+ cancellation.
95
+ * **A2A server adapter (opt-in)** — wrap a PenguiFlow graph in a FastAPI surface using
96
+ `penguiflow_a2a.A2AServerAdapter` so other agents can call `message/send`,
97
+ `message/stream`, and `tasks/cancel` while reusing the runtime's backpressure and
98
+ cancellation semantics.
99
+ * **Observability & ops polish** — remote calls emit structured metrics (latency, payload
100
+ sizes, cancel reasons) and the `penguiflow-admin` CLI replays trace history from any
101
+ configured `StateStore` for debugging.
80
102
 
81
103
  Built on pure `asyncio` (no threads), PenguiFlow is small, predictable, and repo-agnostic.
82
104
  Product repos only define **their models + node functions** — the core stays dependency-light.
83
105
 
106
+ ## Gold Standard Scorecard
107
+
108
+ | Area | Metric | Target | Current |
109
+ | --- | --- | --- | --- |
110
+ | Hop overhead | µs per hop | ≤ 500 | 398 |
111
+ | Streaming order | gaps/dupes | 0 | 0 |
112
+ | Cancel leakage | orphan tasks | 0 | 0 |
113
+ | Coverage | lines | ≥85% | 87% |
114
+ | Deps | count | ≤2 | 2 |
115
+ | Import time | ms | ≤220 | 203 |
116
+
117
+ ## 📑 Core Behavior Spec
118
+
119
+ * [Core Behavior Spec](docs/core_behavior_spec.md) — single-page rundown of ordering,
120
+ streaming, cancellation, deadline, and fan-in invariants with pointers to regression
121
+ tests.
122
+
84
123
  ---
85
124
 
86
125
  ## ✨ Why PenguiFlow?
@@ -168,6 +207,10 @@ print(out.payload) # PackOut(...)
168
207
  await flow.stop()
169
208
  ```
170
209
 
210
+ > **Opt-in distribution:** pass `state_store=` and/or `message_bus=` when calling
211
+ > `penguiflow.core.create(...)` to persist trace history and publish floe traffic
212
+ > without changing node logic.
213
+
171
214
  ---
172
215
 
173
216
  ## 🧭 Design Principles
@@ -222,6 +265,60 @@ sacrificing backpressure or ordering guarantees. The helper wraps the payload i
222
265
  increments per-stream sequence numbers. See `tests/test_streaming.py` and
223
266
  `examples/streaming_llm/` for an end-to-end walk-through.
224
267
 
268
+ ### Remote orchestration
269
+
270
+ Phase 2 introduces `RemoteNode` and the `RemoteTransport` protocol so flows can delegate
271
+ work to remote agents (e.g., the A2A JSON-RPC/SSE ecosystem) without changing existing
272
+ nodes. The helper records remote bindings via the `StateStore`, mirrors streaming
273
+ partials back into the graph, and propagates per-trace cancellation to remote tasks via
274
+ `RemoteTransport.cancel`. See `tests/test_remote.py` for reference in-memory transports.
275
+
276
+ ### Exposing a flow over A2A
277
+
278
+ Install the optional extra to expose PenguiFlow as an A2A-compatible FastAPI service:
279
+
280
+ ```bash
281
+ pip install "penguiflow[a2a-server]"
282
+ ```
283
+
284
+ Create the adapter and mount the routes:
285
+
286
+ ```python
287
+ from penguiflow import Message, Node, create
288
+ from penguiflow_a2a import A2AAgentCard, A2AServerAdapter, A2ASkill, create_a2a_app
289
+
290
+ async def orchestrate(message: Message, ctx):
291
+ await ctx.emit_chunk(parent=message, text="thinking...")
292
+ return {"result": "done"}
293
+
294
+ node = Node(orchestrate, name="main")
295
+ flow = create(node.to())
296
+
297
+ card = A2AAgentCard(
298
+ name="Main Agent",
299
+ description="Primary entrypoint for orchestration",
300
+ version="2.1.0",
301
+ skills=[A2ASkill(name="orchestrate", description="Handles orchestration")],
302
+ )
303
+
304
+ adapter = A2AServerAdapter(
305
+ flow,
306
+ agent_card=card,
307
+ agent_url="https://agent.example",
308
+ )
309
+ app = create_a2a_app(adapter)
310
+ ```
311
+
312
+ The generated FastAPI app implements:
313
+
314
+ * `GET /agent` for discovery (Agent Card)
315
+ * `POST /message/send` for unary execution
316
+ * `POST /message/stream` for SSE streaming
317
+ * `POST /tasks/cancel` to mirror cancellation into PenguiFlow traces
318
+
319
+ `A2AServerAdapter` reuses the runtime's `StateStore` hooks, so bindings between trace IDs
320
+ and external `taskId`/`contextId` pairs are persisted automatically.
321
+
225
322
  ### Reliability & guardrails
226
323
 
227
324
  PenguiFlow enforces reliability boundaries out of the box:
@@ -272,6 +369,70 @@ The new `penguiflow.testkit` module keeps unit tests tiny:
272
369
  The harness is covered by `tests/test_testkit.py` and demonstrated in
273
370
  `examples/testkit_demo/`.
274
371
 
372
+ ### JSON-only ReAct planner (Phase A)
373
+
374
+ Phase A introduces a lightweight planner loop that keeps PenguiFlow typed and
375
+ deterministic:
376
+
377
+ * `penguiflow.catalog.NodeSpec` + `build_catalog` turn registered nodes into
378
+ tool descriptors with JSON Schemas derived from your Pydantic models.
379
+ * `penguiflow.planner.ReactPlanner` drives a JSON-only ReAct loop over those
380
+ descriptors, validating every LLM action with Pydantic and replaying invalid
381
+ steps to request corrections.
382
+ * LiteLLM stays optional—install `penguiflow[planner]` or inject a custom
383
+ `llm_client` for deterministic/offline runs.
384
+
385
+ See `examples/react_minimal/` for a stubbed end-to-end run.
386
+
387
+ ### Trajectory summarisation & pause/resume (Phase B)
388
+
389
+ Phase B adds the tools you need for longer-running, approval-driven flows:
390
+
391
+ * **Token-aware summaries** — `Trajectory.compress()` keeps a compact state and
392
+ the planner can route summaries through a cheaper `summarizer_llm` before
393
+ asking for the next action.
394
+ * **`PlannerPause` contract** — nodes can call `await ctx.pause(...)` to return a
395
+ typed pause payload. Resume the run later with `ReactPlanner.resume(token, user_input=...)`.
396
+ * **Developer hints** — pass `planning_hints={...}` to enforce disallowed tools,
397
+ preferred ordering, or parallelism ceilings.
398
+
399
+ All three features are exercised in `examples/react_pause_resume/`, which runs
400
+ entirely offline with stubbed LLM responses.
401
+
402
+ ### Adaptive re-planning & budgets (Phase C)
403
+
404
+ Phase C closes the loop when things go sideways:
405
+
406
+ * **Structured failure feedback** — if a tool raises after exhausting its retries,
407
+ the planner records `{failure: {node, args, error_code, suggestion}}` and feeds
408
+ it back to the LLM, prompting a constrained re-plan instead of aborting.
409
+ * **Hard guardrails** — configure wall-clock deadlines and hop budgets directly
410
+ on `ReactPlanner`; attempts beyond the allotted hops surface deterministic
411
+ violations and ultimately finish with `reason="budget_exhausted"` alongside a
412
+ constraint snapshot.
413
+ * **Typed exit reasons** — runs now finish with one of
414
+ `answer_complete`, `no_path`, or `budget_exhausted`, keeping downstream code
415
+ simple and machine-checkable.
416
+
417
+ The new `examples/react_replan/` sample shows a retrieval timeout automatically
418
+ recover via a cached index without leaving the JSON-only contract.
419
+
420
+ ### Parallel fan-out & joins (Phase D)
421
+
422
+ Phase D lets the planner propose sets of independent tool calls and join them
423
+ without leaving the typed surface area:
424
+
425
+ * **Parallel `plan` blocks** — the LLM can return `{"plan": [...]}` actions
426
+ where each branch is validated against the catalog and executed concurrently.
427
+ * **Typed joins** — provide a `{"join": {"node": ...}}` descriptor and the
428
+ planner will aggregate results, auto-populate fields like `expect`, `results`,
429
+ or `failures`, and feed branch metadata through `ctx.meta` for the join node.
430
+ * **Deterministic telemetry** — branch errors, pauses, and joins are recorded as
431
+ structured observations so follow-up actions can re-plan or finish cleanly.
432
+
433
+ See `examples/react_parallel/` for a shard fan-out that merges responses in one
434
+ round-trip.
435
+
275
436
 
276
437
  ## 🧭 Repo Structure
277
438
 
@@ -478,9 +639,15 @@ docs or diagramming pipelines.
478
639
  * **Structured `FlowEvent`s**: every node event carries `{ts, trace_id, node_name, event,
479
640
  latency_ms, q_depth_in, q_depth_out, attempt}` plus a mutable `extra` map for custom
480
641
  annotations.
642
+ * **Remote call telemetry**: `RemoteNode` executions emit extra metrics (latency, request
643
+ and response bytes, context/task identifiers, cancel reasons) so remote hops can be
644
+ traced end-to-end.
481
645
  * **Middleware hooks**: subscribe observers (e.g., MLflow) to the structured `FlowEvent`
482
646
  stream. See `examples/mlflow_metrics/` for an MLflow integration and
483
647
  `examples/reliability_middleware/` for a concrete timeout + retry walkthrough.
648
+ * **`penguiflow-admin` CLI**: inspect or replay stored trace history from any configured
649
+ `StateStore` (`penguiflow-admin history <trace>` or `penguiflow-admin replay <trace>`)
650
+ when debugging distributed runs.
484
651
 
485
652
  ---
486
653
 
@@ -488,9 +655,9 @@ docs or diagramming pipelines.
488
655
 
489
656
  - **In-process runtime**: there is no built-in distribution layer yet. Long-running CPU work should be delegated to your own pools or services.
490
657
  - **Registry-driven typing**: nodes default to validation. Provide a `ModelRegistry` when calling `flow.run(...)` or set `validate="none"` explicitly for untyped hops.
491
- - **Observability**: structured `FlowEvent` callbacks power logs/metrics; integrations with
492
- third-party stacks (OTel, Prometheus, Datadog) remain DIY. See the MLflow middleware
493
- example for a lightweight pattern.
658
+ - **Observability**: structured `FlowEvent` callbacks and the `penguiflow-admin` CLI power
659
+ local debugging; integrations with third-party stacks (OTel, Prometheus, Datadog) remain
660
+ DIY. See the MLflow middleware example for a lightweight pattern.
494
661
  - **Roadmap**: follow-up releases focus on optional distributed backends, deeper observability integrations, and additional playbook patterns. Contributions and proposals are welcome!
495
662
 
496
663
  ---
@@ -546,6 +713,8 @@ pytest -q
546
713
  * `examples/streaming_llm/`: mock LLM emitting streaming chunks to an SSE sink.
547
714
  * `examples/metadata_propagation/`: attaching and consuming `Message.meta` context.
548
715
  * `examples/visualizer/`: exports Mermaid + DOT diagrams with loop/subflow annotations.
716
+ * `examples/react_minimal/`: JSON-only ReactPlanner loop with a stubbed LLM.
717
+ * `examples/react_pause_resume/`: Phase B planner features with pause/resume and developer hints.
549
718
 
550
719
  ---
551
720
 
@@ -11,6 +11,9 @@
11
11
  <a href="https://github.com/penguiflow/penguiflow">
12
12
  <img src="https://img.shields.io/badge/coverage-85%25-brightgreen" alt="Coverage">
13
13
  </a>
14
+ <a href="https://nightly.link/penguiflow/penguiflow/workflows/benchmarks/main/benchmarks.json.zip">
15
+ <img src="https://img.shields.io/badge/benchmarks-latest-orange" alt="Benchmarks">
16
+ </a>
14
17
  <a href="https://pypi.org/project/penguiflow/">
15
18
  <img src="https://img.shields.io/pypi/v/penguiflow.svg" alt="PyPI version">
16
19
  </a>
@@ -36,10 +39,39 @@ It provides:
36
39
  * **Observability hooks** (`FlowEvent` callbacks for logging, MLflow, or custom metrics sinks)
37
40
  * **Policy-driven routing** (optional policies steer routers without breaking existing flows)
38
41
  * **Traceable exceptions** (`FlowError` captures node/trace metadata and optionally emits to Rookery)
42
+ * **Distribution hooks (opt-in)** — plug a `StateStore` to persist trace history and a
43
+ `MessageBus` to publish floe traffic for remote workers without changing existing flows.
44
+ * **Remote calls (opt-in)** — `RemoteNode` bridges the runtime to external agents through a
45
+ pluggable `RemoteTransport` interface (A2A-ready) while propagating streaming chunks and
46
+ cancellation.
47
+ * **A2A server adapter (opt-in)** — wrap a PenguiFlow graph in a FastAPI surface using
48
+ `penguiflow_a2a.A2AServerAdapter` so other agents can call `message/send`,
49
+ `message/stream`, and `tasks/cancel` while reusing the runtime's backpressure and
50
+ cancellation semantics.
51
+ * **Observability & ops polish** — remote calls emit structured metrics (latency, payload
52
+ sizes, cancel reasons) and the `penguiflow-admin` CLI replays trace history from any
53
+ configured `StateStore` for debugging.
39
54
 
40
55
  Built on pure `asyncio` (no threads), PenguiFlow is small, predictable, and repo-agnostic.
41
56
  Product repos only define **their models + node functions** — the core stays dependency-light.
42
57
 
58
+ ## Gold Standard Scorecard
59
+
60
+ | Area | Metric | Target | Current |
61
+ | --- | --- | --- | --- |
62
+ | Hop overhead | µs per hop | ≤ 500 | 398 |
63
+ | Streaming order | gaps/dupes | 0 | 0 |
64
+ | Cancel leakage | orphan tasks | 0 | 0 |
65
+ | Coverage | lines | ≥85% | 87% |
66
+ | Deps | count | ≤2 | 2 |
67
+ | Import time | ms | ≤220 | 203 |
68
+
69
+ ## 📑 Core Behavior Spec
70
+
71
+ * [Core Behavior Spec](docs/core_behavior_spec.md) — single-page rundown of ordering,
72
+ streaming, cancellation, deadline, and fan-in invariants with pointers to regression
73
+ tests.
74
+
43
75
  ---
44
76
 
45
77
  ## ✨ Why PenguiFlow?
@@ -127,6 +159,10 @@ print(out.payload) # PackOut(...)
127
159
  await flow.stop()
128
160
  ```
129
161
 
162
+ > **Opt-in distribution:** pass `state_store=` and/or `message_bus=` when calling
163
+ > `penguiflow.core.create(...)` to persist trace history and publish floe traffic
164
+ > without changing node logic.
165
+
130
166
  ---
131
167
 
132
168
  ## 🧭 Design Principles
@@ -181,6 +217,60 @@ sacrificing backpressure or ordering guarantees. The helper wraps the payload i
181
217
  increments per-stream sequence numbers. See `tests/test_streaming.py` and
182
218
  `examples/streaming_llm/` for an end-to-end walk-through.
183
219
 
220
+ ### Remote orchestration
221
+
222
+ Phase 2 introduces `RemoteNode` and the `RemoteTransport` protocol so flows can delegate
223
+ work to remote agents (e.g., the A2A JSON-RPC/SSE ecosystem) without changing existing
224
+ nodes. The helper records remote bindings via the `StateStore`, mirrors streaming
225
+ partials back into the graph, and propagates per-trace cancellation to remote tasks via
226
+ `RemoteTransport.cancel`. See `tests/test_remote.py` for reference in-memory transports.
227
+
228
+ ### Exposing a flow over A2A
229
+
230
+ Install the optional extra to expose PenguiFlow as an A2A-compatible FastAPI service:
231
+
232
+ ```bash
233
+ pip install "penguiflow[a2a-server]"
234
+ ```
235
+
236
+ Create the adapter and mount the routes:
237
+
238
+ ```python
239
+ from penguiflow import Message, Node, create
240
+ from penguiflow_a2a import A2AAgentCard, A2AServerAdapter, A2ASkill, create_a2a_app
241
+
242
+ async def orchestrate(message: Message, ctx):
243
+ await ctx.emit_chunk(parent=message, text="thinking...")
244
+ return {"result": "done"}
245
+
246
+ node = Node(orchestrate, name="main")
247
+ flow = create(node.to())
248
+
249
+ card = A2AAgentCard(
250
+ name="Main Agent",
251
+ description="Primary entrypoint for orchestration",
252
+ version="2.1.0",
253
+ skills=[A2ASkill(name="orchestrate", description="Handles orchestration")],
254
+ )
255
+
256
+ adapter = A2AServerAdapter(
257
+ flow,
258
+ agent_card=card,
259
+ agent_url="https://agent.example",
260
+ )
261
+ app = create_a2a_app(adapter)
262
+ ```
263
+
264
+ The generated FastAPI app implements:
265
+
266
+ * `GET /agent` for discovery (Agent Card)
267
+ * `POST /message/send` for unary execution
268
+ * `POST /message/stream` for SSE streaming
269
+ * `POST /tasks/cancel` to mirror cancellation into PenguiFlow traces
270
+
271
+ `A2AServerAdapter` reuses the runtime's `StateStore` hooks, so bindings between trace IDs
272
+ and external `taskId`/`contextId` pairs are persisted automatically.
273
+
184
274
  ### Reliability & guardrails
185
275
 
186
276
  PenguiFlow enforces reliability boundaries out of the box:
@@ -231,6 +321,70 @@ The new `penguiflow.testkit` module keeps unit tests tiny:
231
321
  The harness is covered by `tests/test_testkit.py` and demonstrated in
232
322
  `examples/testkit_demo/`.
233
323
 
324
+ ### JSON-only ReAct planner (Phase A)
325
+
326
+ Phase A introduces a lightweight planner loop that keeps PenguiFlow typed and
327
+ deterministic:
328
+
329
+ * `penguiflow.catalog.NodeSpec` + `build_catalog` turn registered nodes into
330
+ tool descriptors with JSON Schemas derived from your Pydantic models.
331
+ * `penguiflow.planner.ReactPlanner` drives a JSON-only ReAct loop over those
332
+ descriptors, validating every LLM action with Pydantic and replaying invalid
333
+ steps to request corrections.
334
+ * LiteLLM stays optional—install `penguiflow[planner]` or inject a custom
335
+ `llm_client` for deterministic/offline runs.
336
+
337
+ See `examples/react_minimal/` for a stubbed end-to-end run.
338
+
339
+ ### Trajectory summarisation & pause/resume (Phase B)
340
+
341
+ Phase B adds the tools you need for longer-running, approval-driven flows:
342
+
343
+ * **Token-aware summaries** — `Trajectory.compress()` keeps a compact state and
344
+ the planner can route summaries through a cheaper `summarizer_llm` before
345
+ asking for the next action.
346
+ * **`PlannerPause` contract** — nodes can call `await ctx.pause(...)` to return a
347
+ typed pause payload. Resume the run later with `ReactPlanner.resume(token, user_input=...)`.
348
+ * **Developer hints** — pass `planning_hints={...}` to enforce disallowed tools,
349
+ preferred ordering, or parallelism ceilings.
350
+
351
+ All three features are exercised in `examples/react_pause_resume/`, which runs
352
+ entirely offline with stubbed LLM responses.
353
+
354
+ ### Adaptive re-planning & budgets (Phase C)
355
+
356
+ Phase C closes the loop when things go sideways:
357
+
358
+ * **Structured failure feedback** — if a tool raises after exhausting its retries,
359
+ the planner records `{failure: {node, args, error_code, suggestion}}` and feeds
360
+ it back to the LLM, prompting a constrained re-plan instead of aborting.
361
+ * **Hard guardrails** — configure wall-clock deadlines and hop budgets directly
362
+ on `ReactPlanner`; attempts beyond the allotted hops surface deterministic
363
+ violations and ultimately finish with `reason="budget_exhausted"` alongside a
364
+ constraint snapshot.
365
+ * **Typed exit reasons** — runs now finish with one of
366
+ `answer_complete`, `no_path`, or `budget_exhausted`, keeping downstream code
367
+ simple and machine-checkable.
368
+
369
+ The new `examples/react_replan/` sample shows a retrieval timeout automatically
370
+ recover via a cached index without leaving the JSON-only contract.
371
+
372
+ ### Parallel fan-out & joins (Phase D)
373
+
374
+ Phase D lets the planner propose sets of independent tool calls and join them
375
+ without leaving the typed surface area:
376
+
377
+ * **Parallel `plan` blocks** — the LLM can return `{"plan": [...]}` actions
378
+ where each branch is validated against the catalog and executed concurrently.
379
+ * **Typed joins** — provide a `{"join": {"node": ...}}` descriptor and the
380
+ planner will aggregate results, auto-populate fields like `expect`, `results`,
381
+ or `failures`, and feed branch metadata through `ctx.meta` for the join node.
382
+ * **Deterministic telemetry** — branch errors, pauses, and joins are recorded as
383
+ structured observations so follow-up actions can re-plan or finish cleanly.
384
+
385
+ See `examples/react_parallel/` for a shard fan-out that merges responses in one
386
+ round-trip.
387
+
234
388
 
235
389
  ## 🧭 Repo Structure
236
390
 
@@ -437,9 +591,15 @@ docs or diagramming pipelines.
437
591
  * **Structured `FlowEvent`s**: every node event carries `{ts, trace_id, node_name, event,
438
592
  latency_ms, q_depth_in, q_depth_out, attempt}` plus a mutable `extra` map for custom
439
593
  annotations.
594
+ * **Remote call telemetry**: `RemoteNode` executions emit extra metrics (latency, request
595
+ and response bytes, context/task identifiers, cancel reasons) so remote hops can be
596
+ traced end-to-end.
440
597
  * **Middleware hooks**: subscribe observers (e.g., MLflow) to the structured `FlowEvent`
441
598
  stream. See `examples/mlflow_metrics/` for an MLflow integration and
442
599
  `examples/reliability_middleware/` for a concrete timeout + retry walkthrough.
600
+ * **`penguiflow-admin` CLI**: inspect or replay stored trace history from any configured
601
+ `StateStore` (`penguiflow-admin history <trace>` or `penguiflow-admin replay <trace>`)
602
+ when debugging distributed runs.
443
603
 
444
604
  ---
445
605
 
@@ -447,9 +607,9 @@ docs or diagramming pipelines.
447
607
 
448
608
  - **In-process runtime**: there is no built-in distribution layer yet. Long-running CPU work should be delegated to your own pools or services.
449
609
  - **Registry-driven typing**: nodes default to validation. Provide a `ModelRegistry` when calling `flow.run(...)` or set `validate="none"` explicitly for untyped hops.
450
- - **Observability**: structured `FlowEvent` callbacks power logs/metrics; integrations with
451
- third-party stacks (OTel, Prometheus, Datadog) remain DIY. See the MLflow middleware
452
- example for a lightweight pattern.
610
+ - **Observability**: structured `FlowEvent` callbacks and the `penguiflow-admin` CLI power
611
+ local debugging; integrations with third-party stacks (OTel, Prometheus, Datadog) remain
612
+ DIY. See the MLflow middleware example for a lightweight pattern.
453
613
  - **Roadmap**: follow-up releases focus on optional distributed backends, deeper observability integrations, and additional playbook patterns. Contributions and proposals are welcome!
454
614
 
455
615
  ---
@@ -505,6 +665,8 @@ pytest -q
505
665
  * `examples/streaming_llm/`: mock LLM emitting streaming chunks to an SSE sink.
506
666
  * `examples/metadata_propagation/`: attaching and consuming `Message.meta` context.
507
667
  * `examples/visualizer/`: exports Mermaid + DOT diagrams with loop/subflow annotations.
668
+ * `examples/react_minimal/`: JSON-only ReactPlanner loop with a stubbed LLM.
669
+ * `examples/react_pause_resume/`: Phase B planner features with pause/resume and developer hints.
508
670
 
509
671
  ---
510
672
 
@@ -3,6 +3,8 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from . import testkit
6
+ from .bus import BusEnvelope, MessageBus
7
+ from .catalog import NodeSpec, SideEffect, build_catalog, tool
6
8
  from .core import (
7
9
  DEFAULT_QUEUE_MAXSIZE,
8
10
  Context,
@@ -11,13 +13,29 @@ from .core import (
11
13
  call_playbook,
12
14
  create,
13
15
  )
16
+ from .debug import format_flow_event
14
17
  from .errors import FlowError, FlowErrorCode
15
18
  from .metrics import FlowEvent
16
- from .middlewares import Middleware
19
+ from .middlewares import LatencyCallback, Middleware, log_flow_events
17
20
  from .node import Node, NodePolicy
18
21
  from .patterns import join_k, map_concurrent, predicate_router, union_router
22
+ from .planner import (
23
+ PlannerAction,
24
+ PlannerFinish,
25
+ ReactPlanner,
26
+ Trajectory,
27
+ TrajectoryStep,
28
+ )
19
29
  from .policies import DictRoutingPolicy, RoutingPolicy, RoutingRequest
20
30
  from .registry import ModelRegistry
31
+ from .remote import (
32
+ RemoteCallRequest,
33
+ RemoteCallResult,
34
+ RemoteNode,
35
+ RemoteStreamEvent,
36
+ RemoteTransport,
37
+ )
38
+ from .state import RemoteBinding, StateStore, StoredEvent
21
39
  from .streaming import (
22
40
  chunk_to_ws_json,
23
41
  emit_stream_events,
@@ -36,10 +54,19 @@ __all__ = [
36
54
  "Node",
37
55
  "NodePolicy",
38
56
  "ModelRegistry",
57
+ "NodeSpec",
58
+ "SideEffect",
59
+ "build_catalog",
60
+ "tool",
39
61
  "Middleware",
62
+ "log_flow_events",
63
+ "LatencyCallback",
40
64
  "FlowEvent",
65
+ "format_flow_event",
41
66
  "FlowError",
42
67
  "FlowErrorCode",
68
+ "MessageBus",
69
+ "BusEnvelope",
43
70
  "call_playbook",
44
71
  "Headers",
45
72
  "Message",
@@ -63,6 +90,19 @@ __all__ = [
63
90
  "flow_to_dot",
64
91
  "create",
65
92
  "testkit",
93
+ "StateStore",
94
+ "StoredEvent",
95
+ "RemoteBinding",
96
+ "RemoteTransport",
97
+ "RemoteCallRequest",
98
+ "RemoteCallResult",
99
+ "RemoteStreamEvent",
100
+ "RemoteNode",
101
+ "ReactPlanner",
102
+ "PlannerAction",
103
+ "PlannerFinish",
104
+ "Trajectory",
105
+ "TrajectoryStep",
66
106
  ]
67
107
 
68
- __version__ = "2.0.0"
108
+ __version__ = "2.2.0"