flw-studio 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flw_studio-0.1.0/CHANGELOG.md +331 -0
- flw_studio-0.1.0/LICENSE +21 -0
- flw_studio-0.1.0/MANIFEST.in +8 -0
- flw_studio-0.1.0/PKG-INFO +475 -0
- flw_studio-0.1.0/README.md +441 -0
- flw_studio-0.1.0/agentic_flow/__init__.py +186 -0
- flw_studio-0.1.0/agentic_flow/cli.py +232 -0
- flw_studio-0.1.0/agentic_flow/console.py +114 -0
- flw_studio-0.1.0/agentic_flow/core/__init__.py +6 -0
- flw_studio-0.1.0/agentic_flow/core/agent.py +357 -0
- flw_studio-0.1.0/agentic_flow/core/authority.py +210 -0
- flw_studio-0.1.0/agentic_flow/core/checkpoint.py +162 -0
- flw_studio-0.1.0/agentic_flow/core/context.py +59 -0
- flw_studio-0.1.0/agentic_flow/core/cursor_format.py +208 -0
- flw_studio-0.1.0/agentic_flow/core/events.py +181 -0
- flw_studio-0.1.0/agentic_flow/core/graph.py +657 -0
- flw_studio-0.1.0/agentic_flow/core/limits.py +97 -0
- flw_studio-0.1.0/agentic_flow/core/providers/__init__.py +27 -0
- flw_studio-0.1.0/agentic_flow/core/providers/anthropic_provider.py +103 -0
- flw_studio-0.1.0/agentic_flow/core/providers/base.py +128 -0
- flw_studio-0.1.0/agentic_flow/core/providers/gemini_provider.py +157 -0
- flw_studio-0.1.0/agentic_flow/core/retry.py +92 -0
- flw_studio-0.1.0/agentic_flow/core/runtime.py +883 -0
- flw_studio-0.1.0/agentic_flow/core/schema.py +118 -0
- flw_studio-0.1.0/agentic_flow/core/secrets.py +241 -0
- flw_studio-0.1.0/agentic_flow/core/store.py +799 -0
- flw_studio-0.1.0/agentic_flow/core/templating.py +82 -0
- flw_studio-0.1.0/agentic_flow/core/tools/__init__.py +31 -0
- flw_studio-0.1.0/agentic_flow/core/tools/builtin/__init__.py +27 -0
- flw_studio-0.1.0/agentic_flow/core/tools/builtin/messaging.py +50 -0
- flw_studio-0.1.0/agentic_flow/core/tools/builtin/state.py +47 -0
- flw_studio-0.1.0/agentic_flow/core/tools/builtin/todos.py +115 -0
- flw_studio-0.1.0/agentic_flow/core/tools/core.py +282 -0
- flw_studio-0.1.0/agentic_flow/core/tools/mcp.py +176 -0
- flw_studio-0.1.0/agentic_flow/loader.py +1130 -0
- flw_studio-0.1.0/agentic_flow/log.py +108 -0
- flw_studio-0.1.0/agentic_flow/pipeline.py +145 -0
- flw_studio-0.1.0/agentic_flow/program.py +256 -0
- flw_studio-0.1.0/docs/ARCHITECTURE.md +406 -0
- flw_studio-0.1.0/docs/CONTEXT.md +277 -0
- flw_studio-0.1.0/docs/FEATURES.md +991 -0
- flw_studio-0.1.0/docs/INTERNALS.md +929 -0
- flw_studio-0.1.0/docs/STATE.md +562 -0
- flw_studio-0.1.0/docs/adr/0001-graph-cursor-resume.md +184 -0
- flw_studio-0.1.0/docs/adr/0002-channels-and-reducers.md +165 -0
- flw_studio-0.1.0/docs/adr/0003-secrets-and-config.md +185 -0
- flw_studio-0.1.0/docs/adr/0004-core-and-layer2-boundary.md +171 -0
- flw_studio-0.1.0/docs/features/0001-channels-surface.md +56 -0
- flw_studio-0.1.0/docs/features/0002-subgraph-execution.md +71 -0
- flw_studio-0.1.0/docs/features/0003-mcp-tools.md +73 -0
- flw_studio-0.1.0/docs/features/0004-node-output-validation.md +71 -0
- flw_studio-0.1.0/docs/features/0006-shared-transcript.md +75 -0
- flw_studio-0.1.0/docs/features/0007-input-ref-check.md +85 -0
- flw_studio-0.1.0/docs/features/0008-secrets-config.md +130 -0
- flw_studio-0.1.0/docs/features/0009-execution-tracing.md +123 -0
- flw_studio-0.1.0/docs/features/0010-core-layer2-split.md +126 -0
- flw_studio-0.1.0/docs/features/README.md +58 -0
- flw_studio-0.1.0/docs/known-issues.md +90 -0
- flw_studio-0.1.0/docs/research/inter-agent-data-passing.md +305 -0
- flw_studio-0.1.0/docs/research/sample-checkpoints/README.md +111 -0
- flw_studio-0.1.0/examples/README.md +116 -0
- flw_studio-0.1.0/examples/__init__.py +0 -0
- flw_studio-0.1.0/examples/_common.py +41 -0
- flw_studio-0.1.0/examples/approval/__init__.py +0 -0
- flw_studio-0.1.0/examples/approval/pipeline.yaml +55 -0
- flw_studio-0.1.0/examples/approval/run.py +70 -0
- flw_studio-0.1.0/examples/approval/tools.py +49 -0
- flw_studio-0.1.0/examples/data_pipeline/__init__.py +0 -0
- flw_studio-0.1.0/examples/data_pipeline/out/north.md +12 -0
- flw_studio-0.1.0/examples/data_pipeline/pipeline.yaml +58 -0
- flw_studio-0.1.0/examples/data_pipeline/run.py +18 -0
- flw_studio-0.1.0/examples/data_pipeline/tools.py +164 -0
- flw_studio-0.1.0/examples/delegation/README.md +60 -0
- flw_studio-0.1.0/examples/delegation/__init__.py +0 -0
- flw_studio-0.1.0/examples/delegation/pipeline.yaml +48 -0
- flw_studio-0.1.0/examples/delegation/run.py +17 -0
- flw_studio-0.1.0/examples/delegation/tools.py +37 -0
- flw_studio-0.1.0/examples/graph_branch/__init__.py +0 -0
- flw_studio-0.1.0/examples/graph_branch/out/SHIPPED.md +3 -0
- flw_studio-0.1.0/examples/graph_branch/pipeline.yaml +154 -0
- flw_studio-0.1.0/examples/graph_branch/run.py +19 -0
- flw_studio-0.1.0/examples/graph_branch/tools.py +73 -0
- flw_studio-0.1.0/examples/incident_replan/__init__.py +0 -0
- flw_studio-0.1.0/examples/incident_replan/out/PAGE.txt +2 -0
- flw_studio-0.1.0/examples/incident_replan/out/POSTMORTEM.md +13 -0
- flw_studio-0.1.0/examples/incident_replan/out/incident_log.md +459 -0
- flw_studio-0.1.0/examples/incident_replan/pipeline.yaml +137 -0
- flw_studio-0.1.0/examples/incident_replan/run.py +33 -0
- flw_studio-0.1.0/examples/incident_replan/tools.py +70 -0
- flw_studio-0.1.0/examples/quickstart/__init__.py +0 -0
- flw_studio-0.1.0/examples/quickstart/pipeline.yaml +45 -0
- flw_studio-0.1.0/examples/quickstart/run.py +15 -0
- flw_studio-0.1.0/examples/quickstart/tools.py +33 -0
- flw_studio-0.1.0/examples/refine_loop/__init__.py +0 -0
- flw_studio-0.1.0/examples/refine_loop/pipeline.yaml +100 -0
- flw_studio-0.1.0/examples/refine_loop/run.py +21 -0
- flw_studio-0.1.0/examples/refine_loop/tools.py +43 -0
- flw_studio-0.1.0/examples/router/__init__.py +0 -0
- flw_studio-0.1.0/examples/router/pipeline.yaml +112 -0
- flw_studio-0.1.0/examples/router/run.py +27 -0
- flw_studio-0.1.0/examples/secrets_demo/__init__.py +0 -0
- flw_studio-0.1.0/examples/secrets_demo/pipeline.yaml +38 -0
- flw_studio-0.1.0/examples/secrets_demo/run.py +20 -0
- flw_studio-0.1.0/examples/secrets_demo/token.secret +1 -0
- flw_studio-0.1.0/examples/secrets_demo/tools.py +35 -0
- flw_studio-0.1.0/examples/shared_state/__init__.py +0 -0
- flw_studio-0.1.0/examples/shared_state/pipeline.yaml +95 -0
- flw_studio-0.1.0/examples/shared_state/run.py +17 -0
- flw_studio-0.1.0/examples/shared_state/tools.py +47 -0
- flw_studio-0.1.0/examples/shared_transcript/__init__.py +0 -0
- flw_studio-0.1.0/examples/shared_transcript/pipeline.yaml +140 -0
- flw_studio-0.1.0/examples/shared_transcript/run.py +26 -0
- flw_studio-0.1.0/examples/structured_output/__init__.py +0 -0
- flw_studio-0.1.0/examples/structured_output/pipeline.yaml +33 -0
- flw_studio-0.1.0/examples/structured_output/run.py +11 -0
- flw_studio-0.1.0/examples/subgraph_intake/__init__.py +0 -0
- flw_studio-0.1.0/examples/subgraph_intake/pipeline.yaml +69 -0
- flw_studio-0.1.0/examples/subgraph_intake/run.py +22 -0
- flw_studio-0.1.0/examples/subgraph_intake/tools.py +78 -0
- flw_studio-0.1.0/examples/team_todos/__init__.py +0 -0
- flw_studio-0.1.0/examples/team_todos/pipeline.yaml +113 -0
- flw_studio-0.1.0/examples/team_todos/run.py +19 -0
- flw_studio-0.1.0/examples/team_todos/run_stream.py +54 -0
- flw_studio-0.1.0/examples/team_todos/tools.py +27 -0
- flw_studio-0.1.0/examples/tool_steps/__init__.py +0 -0
- flw_studio-0.1.0/examples/tool_steps/out/fly-higher-today.txt +1 -0
- flw_studio-0.1.0/examples/tool_steps/out/north.md +12 -0
- flw_studio-0.1.0/examples/tool_steps/pipeline.yaml +40 -0
- flw_studio-0.1.0/examples/tool_steps/run.py +13 -0
- flw_studio-0.1.0/examples/tool_steps/tools.py +38 -0
- flw_studio-0.1.0/flw_studio.egg-info/PKG-INFO +475 -0
- flw_studio-0.1.0/flw_studio.egg-info/SOURCES.txt +157 -0
- flw_studio-0.1.0/flw_studio.egg-info/dependency_links.txt +1 -0
- flw_studio-0.1.0/flw_studio.egg-info/entry_points.txt +2 -0
- flw_studio-0.1.0/flw_studio.egg-info/requires.txt +9 -0
- flw_studio-0.1.0/flw_studio.egg-info/top_level.txt +1 -0
- flw_studio-0.1.0/pyproject.toml +51 -0
- flw_studio-0.1.0/setup.cfg +4 -0
- flw_studio-0.1.0/tests/test_channels.py +122 -0
- flw_studio-0.1.0/tests/test_cli.py +157 -0
- flw_studio-0.1.0/tests/test_coordination.py +366 -0
- flw_studio-0.1.0/tests/test_events.py +111 -0
- flw_studio-0.1.0/tests/test_example_replan.py +186 -0
- flw_studio-0.1.0/tests/test_graph.py +286 -0
- flw_studio-0.1.0/tests/test_input_refs.py +325 -0
- flw_studio-0.1.0/tests/test_layering.py +200 -0
- flw_studio-0.1.0/tests/test_loader.py +754 -0
- flw_studio-0.1.0/tests/test_lowering.py +827 -0
- flw_studio-0.1.0/tests/test_mcp.py +398 -0
- flw_studio-0.1.0/tests/test_node_schema.py +179 -0
- flw_studio-0.1.0/tests/test_observability.py +308 -0
- flw_studio-0.1.0/tests/test_replan.py +599 -0
- flw_studio-0.1.0/tests/test_route.py +333 -0
- flw_studio-0.1.0/tests/test_runtime.py +736 -0
- flw_studio-0.1.0/tests/test_secrets.py +375 -0
- flw_studio-0.1.0/tests/test_store.py +629 -0
- flw_studio-0.1.0/tests/test_subgraph.py +422 -0
- flw_studio-0.1.0/tests/test_transcript.py +345 -0
- flw_studio-0.1.0/tests/test_units.py +848 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to **agentic-flow** are recorded here.
|
|
4
|
+
|
|
5
|
+
The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and
|
|
6
|
+
the project aims to follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-06-21
|
|
11
|
+
|
|
12
|
+
Initial release: declare agents (system prompt + tools) in YAML and run them as an
|
|
13
|
+
orchestrated **graph** over one shared `Store`, on Gemini (default) or Anthropic.
|
|
14
|
+
|
|
15
|
+
### Added
|
|
16
|
+
|
|
17
|
+
**The flow graph**
|
|
18
|
+
- **Graph IR** (`agentic_flow/graph.py`): a `Node` (one body per kind —
|
|
19
|
+
`agent`/`tool`/`human`) plus three `Edge` kinds — **static** (`{from, to}`),
|
|
20
|
+
**conditional** (`{from, to, when[, else]}`; a back-edge is a loop), and
|
|
21
|
+
**model-driven** (`{from, route: {by, to}}`). `Graph` bundles a `nodes` dict + an
|
|
22
|
+
`edges` **tuple** (declaration order is load-bearing for determinism) + an `entry`
|
|
23
|
+
+ an `Authority`. First-violation `validate()` with stable `GraphError` codes
|
|
24
|
+
(reachability, dead-end, reserved id, route-target-is-a-node, undeclared refs,
|
|
25
|
+
zero-visits, …), `cycles()`, and `render_graph` (the `--dry-run` view).
|
|
26
|
+
`END = "__end__"` is the terminal sentinel.
|
|
27
|
+
- **Two authoring surfaces, one engine** (`agentic_flow/loader.py`):
|
|
28
|
+
`build_program(config, registry)` → a validated `Graph` + the agent map. A
|
|
29
|
+
`graph:` block is the explicit node/edge surface; a `pipeline:` block is linear
|
|
30
|
+
**lowering sugar** — `when:` → a `cond:` control node + conditional edges, `loop:`
|
|
31
|
+
→ the body inlined + a `loopcheck:` control node with a `max_visits`-bounded
|
|
32
|
+
back-edge (do-while semantics, including a graceful cap-exit so a non-converging
|
|
33
|
+
loop *continues* rather than failing), `human:` → a pause node. Synthetic control
|
|
34
|
+
nodes are excluded from `max_steps`. One `LoaderError` family wraps name/shape +
|
|
35
|
+
topology errors.
|
|
36
|
+
- **Executable `subgraph:` nodes — inlined at load** (`loader.py`; closes
|
|
37
|
+
known-issue #2): a `graph:` node may declare a reusable nested graph as a
|
|
38
|
+
`subgraph:` body (its own `entry`/`nodes`/`edges`). `_inline_subgraphs` splices it
|
|
39
|
+
into one flat graph at load — like `pipeline:` lowering — with prefix-qualified ids
|
|
40
|
+
`<callsite>/<inner>`, so **no `SubgraphBody` reaches the runtime**. The seam rewires:
|
|
41
|
+
an edge *into* the node retargets to the inner entry; an inner edge to the inner
|
|
42
|
+
`__end__` folds to a minted `<callsite>/__exit__` anchor that carries the node's
|
|
43
|
+
original out-edges (any kind), re-converging on the parent continuation. The flat
|
|
44
|
+
frontier checkpoints at every inner node, so **a crash resumes inside the subgraph**;
|
|
45
|
+
nested subgraphs compose prefixes (`outer/inner/x`) and the same block at two call
|
|
46
|
+
sites gets two prefixes (no id clash). Subgraphs share the one run `Store` (a
|
|
47
|
+
deliberate difference from test_flow); a `subgraph:` node that also sets `output:` is
|
|
48
|
+
a `LoaderError`. Validates with `allow_synthetic=True`; a hand-authored `/` id stays
|
|
49
|
+
`RESERVED_ID`; `render_graph`/`--dry-run` show the qualified ids.
|
|
50
|
+
- **Node `output_schema` validation** (`agentic_flow/schema.py`, `graph.py`, `loader.py`,
|
|
51
|
+
`runtime.py`; feature 0004): a `graph:` node may declare an `output_schema` — a
|
|
52
|
+
validation contract on whatever it produces. The runtime checks the node's output
|
|
53
|
+
against it **just before the reducer merge**; a mismatch is a terminal
|
|
54
|
+
`NodeOutputError` (`status:"failed"`) naming the node and the violation, so a bad
|
|
55
|
+
shape never enters shared state. The validator (`schema.validate_value(schema, value)
|
|
56
|
+
-> list[str]`) is a small, **dependency-free** JSON-Schema subset — `type`
|
|
57
|
+
(object/array/string/number/integer/boolean/null), object `properties` + `required`,
|
|
58
|
+
array `items`, `enum` — with path + expected/got messages (no `jsonschema` dep). It is
|
|
59
|
+
**validation-only**: an agent node still needs its own `output_schema` to emit a
|
|
60
|
+
`dict` (an agent returning text where an object is required fails with a hint). A node
|
|
61
|
+
without `output_schema` runs unchanged; a malformed schema is a `LoaderError` at load.
|
|
62
|
+
|
|
63
|
+
**The data plane**
|
|
64
|
+
- **Channels + reducers** (`agentic_flow/store.py`,
|
|
65
|
+
**[ADR 0002](docs/adr/0002-channels-and-reducers.md)**): each state key is a
|
|
66
|
+
`Channel` — a value, a monotonic `version`, and a `Reducer` (`last_write` /
|
|
67
|
+
`append` / `merge` / `numeric_add` / `set_union`). `Store` is the `MutableMapping`
|
|
68
|
+
over those channels (templating/tools ride the same seam) and composes the
|
|
69
|
+
`MessageLog` + `TodoBoard`. Reducers **fail loud** (`ReducerTypeError` *before*
|
|
70
|
+
mutating); selection is author-only. The snapshot persists the reducer name and
|
|
71
|
+
re-coerces on restore; `from_dict` rejects an unknown version
|
|
72
|
+
(`UnsupportedSnapshotVersion`). An all-`last_write` store is a plain flat
|
|
73
|
+
last-writer-wins state — the common case (a `pipeline:` declares no reducer).
|
|
74
|
+
- **`channels:` YAML surface + `add_messages` reducer** (`store.py`, `loader.py`,
|
|
75
|
+
`program.py`; closes known-issue #3): a `graph:` author declares a channel's
|
|
76
|
+
reducer in a top-level `channels:` block (`name: { reducer: X }` or shorthand
|
|
77
|
+
`name: X`), so two nodes can write the same `output:` and the reducer accumulates
|
|
78
|
+
instead of clobbering. The loader checks each reducer against `REDUCERS` (unknown
|
|
79
|
+
→ a clear `LoaderError`) and flags a node output to an undeclared channel as
|
|
80
|
+
`CHANNEL_UNDECLARED` before the run; the `Orchestrator` applies the declared
|
|
81
|
+
reducers to the run's `Store` (via `Store.declare`) before execution. New
|
|
82
|
+
`add_messages` reducer: accumulate a message transcript by `id` — append, replace
|
|
83
|
+
on id match, assign an id when absent, and delete via a JSON-safe
|
|
84
|
+
`RemoveMessage(id)` sentinel. `pipeline:` stays all-`last_write`.
|
|
85
|
+
- **Shared transcript on agent nodes — `emit:`** (`graph.py`, `loader.py`,
|
|
86
|
+
`runtime.py`; feature 0006, satisfies the 0001 Non-Goal): a `graph:` agent node may
|
|
87
|
+
declare `emit: <channel>`. After the agent answers, the runtime appends **one** turn
|
|
88
|
+
`{id, sender, content}` to that channel — at **commit 1**, right after the `output:`
|
|
89
|
+
write and **before** the `completed` bump + boundary save, so a completed node never
|
|
90
|
+
re-emits on resume. `content` is the answer as-returned (a `str`, or a `dict` for an
|
|
91
|
+
`output_schema` agent); the channel's `add_messages` reducer mints the `id`. `emit` is
|
|
92
|
+
independent of `output:` (a node may set either, both, or neither) and does not affect
|
|
93
|
+
`output_schema` validation. The loader gates `emit:` to a channel declared `add_messages`
|
|
94
|
+
in the `channels:` block (an undeclared channel, or a non-`add_messages` reducer, is a
|
|
95
|
+
`LoaderError` naming the node + channel). A later node **reads** the transcript through
|
|
96
|
+
ordinary `{channel}` templating (no formatter, no `messages: shared` auto-channel) —
|
|
97
|
+
collaboration over a shared blackboard, alongside the exactly-once push log. `graph:`-only
|
|
98
|
+
(a `pipeline:` step has no `emit`).
|
|
99
|
+
- **Load-time input template-reference check** (`graph.py`, `templating.py`; feature
|
|
100
|
+
0007): the **symmetric** counterpart to the `output:` check. In `channels:` mode the
|
|
101
|
+
declared block is the *complete* channel namespace, so every `{key}` a node (or a
|
|
102
|
+
conditional edge) **reads** must name a declared channel — a typo'd `{notez}` is now a
|
|
103
|
+
load-time `LoaderError` (new stable `GraphError` code `INPUT_REF_UNDECLARED`, naming
|
|
104
|
+
node/edge + template + missing ref) instead of a mid-run `KeyError` from
|
|
105
|
+
`templating.render`. The templates scanned are an agent node's `input:`, a tool node's
|
|
106
|
+
**string** `args:` values, and a conditional edge's `when:` condition. A new pure
|
|
107
|
+
`templating.refs(template) -> set[str]` extracts the **root** channel ref of each `{...}`
|
|
108
|
+
field (`{review[score]}` → `review`); `Graph.validate` reuses it (its one package import
|
|
109
|
+
is this leaf). It is a **sound membership** check (it permits reading a channel a *later*
|
|
110
|
+
node writes on a back-edge), **not** a runtime `input_schema` and **not** an ordering
|
|
111
|
+
analysis — so 0004's deferred input side is addressed as **reachability**, not shape. The
|
|
112
|
+
author rule: **declare every channel you read** (a run-seed key or a `state_write` runtime
|
|
113
|
+
key a template reads must also be declared, e.g. `seed: last_write`; no implicit
|
|
114
|
+
exemption). Absent `channels:`, there is no input-ref check (the flat-`last_write` default
|
|
115
|
+
is preserved); the check runs **after** `subgraph:` inlining (inner refs are the bare
|
|
116
|
+
channel names).
|
|
117
|
+
|
|
118
|
+
**Secrets — referenced, late-resolved, never checkpointed**
|
|
119
|
+
- **The secrets seam** (`agentic_flow/secrets.py` + `loader.py` / `store.py` /
|
|
120
|
+
`context.py` / `tools/core.py` / `program.py` / `providers/` / `tools/mcp.py`;
|
|
121
|
+
feature 0008, **[ADR 0003](docs/adr/0003-secrets-and-config.md)**): a credential is
|
|
122
|
+
**named** in YAML, never embedded. A top-level `secrets:` block maps a local name to a
|
|
123
|
+
pluggable backend (`{provider: env, var: X}` / `{provider: file, path: P}` / shorthand
|
|
124
|
+
`name: VAR`; `provider:` defaults to `env`). The new `secrets.py` leaf is stdlib-only —
|
|
125
|
+
`Secret` (masked, `.reveal()`-only, **not** JSON-serializable so a stray one fails
|
|
126
|
+
loud), the pluggable `SecretProvider` registry (`register_secret_provider` /
|
|
127
|
+
`get_secret_provider`; dependency-free `env` / `file` built-ins), the lazy-caching
|
|
128
|
+
`SecretResolver` (a backend `fetch` runs at most once per name), and the pure ref
|
|
129
|
+
helpers (`is_secret_ref` / `interpolate_env`). The run's **one** resolver is built once
|
|
130
|
+
in `build_program` over an injectable `environ` and **rides the `Store` off-channel**
|
|
131
|
+
(the `known_agents` pattern) — `snapshot()` never serializes it, so a credential
|
|
132
|
+
*structurally cannot* enter a checkpoint. It is `.reveal()`-ed only at point-of-use:
|
|
133
|
+
a provider's `api_key: {secret: name}` (revealed at lazy `.client` construction;
|
|
134
|
+
keyless = unchanged env path), an MCP server's `env:` / `headers:` values
|
|
135
|
+
(`{secret:}` or `${VAR}` / `${VAR:-default}`, resolved at connect — `mcp.py` stays pure
|
|
136
|
+
of secrets), or a tool body's `ctx.secrets.get(name).reveal()` (plus a framework-injected
|
|
137
|
+
`secrets` param, kept out of the model schema). `${VAR}` interpolation is a **separate**
|
|
138
|
+
pass scoped to credential fields only — never `{key}` state templating. A typo'd
|
|
139
|
+
`{secret: nope}` or unknown `provider:` is a `LoaderError` **before** the run; a missing
|
|
140
|
+
var/file or an unset `${VAR}` with no default is a `SecretError`. A `vault`/`aws`/cloud
|
|
141
|
+
backend is a registry entry away (lazy-imports its SDK inside `fetch`) — seam-ready,
|
|
142
|
+
shipped behind a `verify-real-deps` gate (see [known-issues.md](docs/known-issues.md)).
|
|
143
|
+
A caller-**injected** provider (`build_program(providers=…)`, the test/programmatic seam)
|
|
144
|
+
**wins** over keyed construction: a `{secret:}` `api_key` ref is still validated, but the
|
|
145
|
+
injected instance is used as-is (no fetch) — so an offline build with a `{secret:}` key
|
|
146
|
+
needs no real credential. The `api_key: {secret:}` agent surface is exercised offline by
|
|
147
|
+
`tests/test_secrets.py`; `examples/secrets_demo` shows the tool-side `ctx.secrets` read.
|
|
148
|
+
|
|
149
|
+
**The scheduler & durability**
|
|
150
|
+
- **The flat-frontier scheduler** (`agentic_flow/runtime.py`,
|
|
151
|
+
**[ADR 0001](docs/adr/0001-graph-cursor-resume.md)**): `run` / `resume` walk a
|
|
152
|
+
frontier of ready nodes. The per-node commit order is load-bearing — write → bump
|
|
153
|
+
`completed` → remove from frontier → resolve out-edges → **save** — and a
|
|
154
|
+
checkpoint is written at **every node boundary**, so a crash resumes at the last
|
|
155
|
+
completed node, inside loops/branches too (**at-least-once** — a crash before
|
|
156
|
+
`save` re-runs one node). `pick_ready` re-evaluates join readiness; terminal
|
|
157
|
+
failures write a `status:"failed"` record before propagating; a tool node runs
|
|
158
|
+
under the tool's retry policy.
|
|
159
|
+
- **The graph cursor** (`agentic_flow/cursor_format.py`): `CheckpointRecord` —
|
|
160
|
+
`{status, error, channels, versions, completed, frontier, edge_log, pending,
|
|
161
|
+
budget, replans, snapshot}`. The `frontier + completed` *is* the serialized
|
|
162
|
+
program counter; resume restores it verbatim. `is_completed` is structural;
|
|
163
|
+
`from_dict` guards the version and rejects a truncated record with a typed error.
|
|
164
|
+
- **Durable checkpointing** (`agentic_flow/checkpoint.py`, stdlib-only): a
|
|
165
|
+
`Checkpointer` ABC — `save` / `load` / `delete` over an **opaque JSON record**
|
|
166
|
+
keyed by `run_id` — with `InMemoryCheckpointer` and `JsonFileCheckpointer` built
|
|
167
|
+
in, plus a naming registry (`register_checkpointer` / `get_checkpointer`, `memory`
|
|
168
|
+
/ `json`).
|
|
169
|
+
- **Human-in-the-loop**: a `human:` node renders its prompt, tracks a todo
|
|
170
|
+
(`in_progress` → `blocked` → `done`), checkpoints a `pending` marker, and raises
|
|
171
|
+
`NodePaused`. `resume(human_input=…)` injects the answer and continues;
|
|
172
|
+
`HumanInputRequired` signals a pending pause with no answer.
|
|
173
|
+
|
|
174
|
+
**Model-driven flow — the authority spectrum**
|
|
175
|
+
- The human authors the graph; the author declares how much authority the model has,
|
|
176
|
+
and the model only ever composes from the author's declared vocabulary.
|
|
177
|
+
- **Route (L1)** — a `route:` edge invokes a router (`output_schema` `{next,
|
|
178
|
+
reason}`) agent that picks the next node from the declared `to:` set. The pick is
|
|
179
|
+
validated, recorded in `edge_log`, and **replayed on resume**; an out-of-set pick
|
|
180
|
+
is fail-loud (`RouteOutOfSet`, terminal).
|
|
181
|
+
- **Replan (L2)** (`agentic_flow/authority.py`) — an `authority.replan:` policy
|
|
182
|
+
(`{by, when, allow, powers, max_replans}`) fires at a node boundary only when the
|
|
183
|
+
guard trips (**zero planner cost** otherwise). The planner proposes a batch of
|
|
184
|
+
edits to the not-yet-run subgraph; `validate_replan` checks the **folded batch**
|
|
185
|
+
atomically (five gates — vocabulary, powers, past-immutable, referential,
|
|
186
|
+
reachability) and keeps the un-amended graph on rejection. The accepted delta
|
|
187
|
+
persists in the cursor and resume replays it by a pure fold — **durable replan**.
|
|
188
|
+
|
|
189
|
+
**Agents & tools**
|
|
190
|
+
- **`Agent`** — a system prompt + tools + a hand-written tool-use loop, bounded by
|
|
191
|
+
`MAX_STEPS`. Surfaces every tool call/result through `on_event`. **Structured
|
|
192
|
+
output** via `output_schema` (`additionalProperties: false` injected; malformed
|
|
193
|
+
JSON raises `OutputParseError` and is retried). **Agents as tools** — delegate via
|
|
194
|
+
an `agents:` list; sub-agents share the same `Store`; cycles bounded by
|
|
195
|
+
`MAX_STEPS`. **Adaptive thinking** on by default (Anthropic).
|
|
196
|
+
- **`Tool` + `@tool`** — derive a model-callable tool's JSON Schema from a function's
|
|
197
|
+
type hints and Google-style docstring; resolve stringized hints; hide injected
|
|
198
|
+
`state`/`ctx` params. `Tool.invoke(args, ctx)` is the one home for calling a tool.
|
|
199
|
+
`ToolRegistry` has named groups; a YAML agent's `tools:` list takes individual
|
|
200
|
+
names or a group name that expands.
|
|
201
|
+
- **MCP servers as a tool group** (`agentic_flow/tools/mcp.py`): a top-level `mcp:`
|
|
202
|
+
block maps a server name to a `{command: […]}` (stdio) or `{url: …}` (HTTP/SSE)
|
|
203
|
+
spec; the framework connects each server through an **injected client factory**
|
|
204
|
+
(`build_program(..., mcp_client_factory=connect_mcp)`, threaded through
|
|
205
|
+
`Orchestrator.from_yaml`/`from_config`), lists its tools, and registers each under
|
|
206
|
+
a group named for the server — so `tools: [<server>]` grants them via the existing
|
|
207
|
+
`resolve()` path. The server-**declared** JSON schema is the `Tool.parameters`
|
|
208
|
+
verbatim (contract data across a process boundary, not Python-derived); a
|
|
209
|
+
server-side error becomes a tool-error string, not a crash. A name collision →
|
|
210
|
+
`LoaderError` naming the server; a connect/list failure or missing SDK → `McpError`
|
|
211
|
+
naming the server. The MCP SDK is an **optional** `mcp` extra — `connect_mcp`
|
|
212
|
+
lazy-imports it only when called, so `import agentic_flow` and the whole offline
|
|
213
|
+
suite run with no SDK installed (tests inject a fake factory; new `tests/test_mcp.py`).
|
|
214
|
+
- **Coordination built-ins**: `state` (`state_write`/`state_read`/`state_keys`),
|
|
215
|
+
`messaging` (`send_message`/`read_messages`), and the `todos` board. **Exactly-once
|
|
216
|
+
push delivery** of messages and assigned todos into a recipient's prompt.
|
|
217
|
+
- **`Context`** (`agentic_flow/context.py`) — the shared store + caller name handed
|
|
218
|
+
to an injected tool.
|
|
219
|
+
- **`RetryPolicy` + `run_with_retry`** — configurable attempts with backoff.
|
|
220
|
+
- **Capacity & timeout limits** (`agentic_flow/limits.py`): a `limits:` block at two
|
|
221
|
+
scopes — **agent** (`max_steps`, `token_budget`, `timeout`, `call_timeout`) and
|
|
222
|
+
**run** (top-level `max_steps`, `timeout`). Breaches raise dedicated, non-retried
|
|
223
|
+
exceptions.
|
|
224
|
+
|
|
225
|
+
**Providers**
|
|
226
|
+
- Pluggable `Provider` seam (`start` / `complete` / `add_assistant` /
|
|
227
|
+
`add_tool_results`) with normalized `Completion` / `ToolCall` / `ToolResult`.
|
|
228
|
+
**Gemini** (`google-genai`, default `gemini-3.5-flash`) and **Anthropic**
|
|
229
|
+
(`anthropic`, default `claude-opus-4-8`; thinking blocks preserved across turns).
|
|
230
|
+
Lazy SDK imports keep the pure logic testable without a key; mixed-provider runs
|
|
231
|
+
share one `Store`.
|
|
232
|
+
|
|
233
|
+
**Facade, CLI & observability**
|
|
234
|
+
- **`Orchestrator` facade** (`agentic_flow/program.py`): the ergonomic
|
|
235
|
+
`from_yaml` / `from_config` / `run` / `resume` surface over `build_program` +
|
|
236
|
+
`runtime`. `run`/`resume` raise `NodePaused` / `HumanInputRequired` /
|
|
237
|
+
`RuntimeFailure`; run-scope `max_steps`/`timeout` come from a `limits:` block.
|
|
238
|
+
- **`agentic-flow` CLI** (`agentic_flow/cli.py`): run a pipeline, `--tools MODULE`,
|
|
239
|
+
`--set KEY=VALUE`, `--dry-run` (renders the graph — nodes, edges grouped by source
|
|
240
|
+
with kind annotations and `(back-edge)` flags, an authority footer),
|
|
241
|
+
`--checkpoint SPEC` / `--run-id` / `--resume` / `--human-input*`. **Exit codes:**
|
|
242
|
+
`0` completed, `1` error, `2` paused (printing a copy-pasteable resume command). A
|
|
243
|
+
`human:` run without `--checkpoint` is rejected up front.
|
|
244
|
+
- **`on_event` is the single observability seam.** A `console_tracer()` renderer
|
|
245
|
+
(`agentic_flow/console.py`), an `event_logger()` → `logging` adapter
|
|
246
|
+
(`agentic_flow/log.py`), and an `EventBus` (`agentic_flow/events.py`, a pub/sub
|
|
247
|
+
fan-out with per-subscriber filtering, `Subscription` handles, error isolation, a
|
|
248
|
+
thread-safe `stream()`, and `run_in_background()`). Taxonomy: `node_start` /
|
|
249
|
+
`node_end` / `checkpoint` / `human` / `route` / `replan` / `replan_rejected`, plus
|
|
250
|
+
agent-level `tool_call` / `tool_result` / `retry` / `messages_delivered`.
|
|
251
|
+
- **Durable execution trace** — a layer-2 extension over the `on_event` seam
|
|
252
|
+
(feature 0009). Two **additive** core events — `node_input` (a node's rendered
|
|
253
|
+
prompt / args) and `node_error` (a terminal failure's detail) — make full per-node
|
|
254
|
+
I/O observable without changing existing behavior. The consumer lives **outside**
|
|
255
|
+
the package, in a reference tree `extensions/observability/` (like `examples/`: on
|
|
256
|
+
`sys.path`, **not in the core wheel**) that imports core one-way: `TraceSink(run_id,
|
|
257
|
+
dir)` persists every event to append-only `<dir>/<run_id>.jsonl` (swallow-on-error,
|
|
258
|
+
so it never breaks the run), `summarize(run_id, dir)` folds it into one `NodeTrace`
|
|
259
|
+
per node execution (input / output / status / `duration_ms`, timed in layer 2), and
|
|
260
|
+
`python -m extensions.observability <run_id> --dir D` renders the per-node table
|
|
261
|
+
(unknown run id → stderr + non-zero). **No** vendor export (LangSmith / OTel) and
|
|
262
|
+
**no** history / time-travel — local JSONL only.
|
|
263
|
+
- **Visual pipeline studio** (`ui/`): a no-build static web app that renders a
|
|
264
|
+
pipeline YAML as a flowchart and round-trips it to/from YAML (authoring only).
|
|
265
|
+
|
|
266
|
+
**Package structure**
|
|
267
|
+
- **Core/layer-2 split** (`agentic_flow/core/`; feature 0010,
|
|
268
|
+
**[ADR 0004](docs/adr/0004-core-and-layer2-boundary.md)**): restructured the
|
|
269
|
+
package — the engine moved to `agentic_flow.core` (layers 0+1: `graph` / `store` /
|
|
270
|
+
`agent` / `runtime` / `authority` / `cursor_format` / `checkpoint` / `tools` /
|
|
271
|
+
`providers` + the pure leaves `schema` / `templating` / `secrets` / `events` /
|
|
272
|
+
`limits` / `retry`); layer-2 consumers (`loader` / `pipeline` / `console` / `log` /
|
|
273
|
+
`program` / `cli`) stay at the top level and consume the engine through the six
|
|
274
|
+
seams. One-way layering (`core/` never imports upward) is enforced by
|
|
275
|
+
`tests/test_layering.py`. **Public `from agentic_flow import …` API unchanged**
|
|
276
|
+
(the facade re-exports from the new paths); deep submodule paths moved
|
|
277
|
+
(`agentic_flow.runtime` → `agentic_flow.core.runtime`). The `builtins.py` shim was
|
|
278
|
+
removed. A pure relocation — no behavior change.
|
|
279
|
+
|
|
280
|
+
**Docs, examples & tests**
|
|
281
|
+
- Project documentation under [`docs/`](docs/): an architecture map
|
|
282
|
+
([ARCHITECTURE.md](docs/ARCHITECTURE.md)), a feature catalog
|
|
283
|
+
([FEATURES.md](docs/FEATURES.md)), an internals walkthrough
|
|
284
|
+
([INTERNALS.md](docs/INTERNALS.md)), two ADRs
|
|
285
|
+
([0001](docs/adr/0001-graph-cursor-resume.md) graph-cursor resume,
|
|
286
|
+
[0002](docs/adr/0002-channels-and-reducers.md) channels & reducers), and
|
|
287
|
+
[known-issues.md](docs/known-issues.md). This changelog.
|
|
288
|
+
- Worked examples — a 14-step ladder under [`examples/`](examples/) (see
|
|
289
|
+
[`examples/README.md`](examples/README.md)), each a single concept meant to be read in
|
|
290
|
+
order: `quickstart` (one agent/tool/step, plus `limits:`), `structured_output`,
|
|
291
|
+
`tool_steps`, `shared_state` (fan-out → fan-in over one `Store`, plus a `retry` aside),
|
|
292
|
+
`refine_loop` (a loop + conditional), `approval` (offline human gate, pause→resume),
|
|
293
|
+
`graph_branch` (the native `graph:` surface — hand-authored nodes/edges whose review
|
|
294
|
+
verdict branches to a `publish`/`flag` terminal), `router` (a model-driven `route:` edge
|
|
295
|
+
— an agent picks the next node), `subgraph_intake` (offline — a reusable `subgraph:`
|
|
296
|
+
inlined at load, `parse → validate → normalize`), `delegation` (agent-as-tool — a lead
|
|
297
|
+
agent calls a sub-agent listed under its `agents:`), `shared_transcript` (a **shared
|
|
298
|
+
transcript** — panelists `emit:` into an `add_messages` `channels:` entry, then a
|
|
299
|
+
moderator's verdict is node-`output_schema`-validated), `team_todos` (the todo board +
|
|
300
|
+
agent-to-agent messaging, plus `run_stream.py` streaming events via an `EventBus`),
|
|
301
|
+
`secrets_demo` (offline — a tool reads a file-backed `{secret:}` via `ctx.secrets`,
|
|
302
|
+
writing only a masked result; the raw value never enters the `Store`/checkpoint),
|
|
303
|
+
`incident_replan` (replan / authority L2 — a static base graph that rewrites itself when
|
|
304
|
+
an assessor marks an incident critical), and `data_pipeline` (offline ETL, no API key).
|
|
305
|
+
- Twenty-one offline test suites run in Docker (`docker compose run --rm tests`, no API
|
|
306
|
+
key): `test_units`, `test_coordination`, `test_events`, `test_graph`, `test_store`,
|
|
307
|
+
`test_runtime`, `test_route`, `test_replan`, `test_loader`, `test_lowering`,
|
|
308
|
+
`test_cli`, `test_example_replan` (drives the `incident_replan` example offline),
|
|
309
|
+
`test_channels` (declared reducers), `test_subgraph` (subgraph inlining + durable
|
|
310
|
+
resume inside a subgraph), `test_mcp` (an `mcp:` server → a tool group, via a fake
|
|
311
|
+
client), `test_node_schema` (node `output_schema` validation), `test_transcript`
|
|
312
|
+
(agent-node `emit:` → a shared transcript), `test_input_refs` (the load-time
|
|
313
|
+
template-ref check), `test_secrets` (the secrets seam — `Secret` masking, the
|
|
314
|
+
`env`/`file` backends, and the no-secret-in-the-checkpoint guarantee),
|
|
315
|
+
`test_observability` (the layer-2 trace seam — `TraceSink` → JSONL, `summarize`, the
|
|
316
|
+
`python -m extensions.observability` renderer), and `test_layering` (the core-never-
|
|
317
|
+
imports-up boundary scan + engine self-sufficiency check). The lowering suite
|
|
318
|
+
round-trips every `examples/*/pipeline.yaml`.
|
|
319
|
+
|
|
320
|
+
### Known limitations
|
|
321
|
+
- On Gemini, `output_schema` and tools cannot be combined on a single agent (the
|
|
322
|
+
provider raises a clear error); split into two agents or use Anthropic.
|
|
323
|
+
- Agent retry re-runs already-called tools — prefer tool retry for side effects.
|
|
324
|
+
- A non-`last_write` channel-declaration YAML surface is not yet wired; a `graph:`
|
|
325
|
+
author writes to `last_write` channels (the reducer machinery is fully shipped).
|
|
326
|
+
- `import agentic_flow` eagerly imports the vendor SDKs, so a SDK-less import is not
|
|
327
|
+
supported (see [known-issues.md](docs/known-issues.md)).
|
|
328
|
+
- Requires Python ≥ 3.10.
|
|
329
|
+
|
|
330
|
+
[Unreleased]: https://github.com/ai-agent-lead/agentic-flow/compare/v0.1.0...HEAD
|
|
331
|
+
[0.1.0]: https://github.com/ai-agent-lead/agentic-flow/releases/tag/v0.1.0
|
flw_studio-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AI Agent Lead
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Source distribution (sdist) contents. The wheel ships only `agentic_flow*`
|
|
2
|
+
# (see [tool.setuptools.packages.find]); the docs/examples below travel with the
|
|
3
|
+
# source tarball so a clone-free `pip download` still has the full project.
|
|
4
|
+
include LICENSE CHANGELOG.md README.md
|
|
5
|
+
recursive-include docs *.md
|
|
6
|
+
recursive-include examples *.py *.yaml *.yml *.md *.secret *.txt
|
|
7
|
+
recursive-include tests *.py
|
|
8
|
+
global-exclude __pycache__/* *.py[cod] .DS_Store
|