@kontourai/flow-agents 2.0.1 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/actions/trust-verify/action.yml +4 -2
- package/.github/workflows/ci.yml +16 -4
- package/.github/workflows/docs-pages.yml +1 -1
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +2 -2
- package/.github/workflows/runtime-compat.yml +2 -2
- package/.github/workflows/trust-reconcile.yml +1 -1
- package/CHANGELOG.md +28 -0
- package/README.md +3 -3
- package/build/src/cli/workflow-sidecar.js +8 -2
- package/context/scripts/telemetry/lib/config.sh +15 -0
- package/context/scripts/telemetry/telemetry.conf +4 -0
- package/context/scripts/telemetry/telemetry.sh +23 -1
- package/docs/design/flowrun-eventsourcing-design.md +216 -0
- package/docs/design/workflowrun-observability-design.md +431 -0
- package/evals/ci/antigaming-suite.sh +1 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/integration/test_command_log_concurrency.sh +114 -0
- package/evals/integration/test_gate_lockdown.sh +21 -6
- package/evals/integration/test_usage_cost.sh +119 -0
- package/evals/integration/test_verify_cli.sh +23 -0
- package/integrations/strands/flow_agents_strands/hooks.py +126 -1
- package/integrations/strands/flow_agents_strands/telemetry.py +172 -0
- package/integrations/strands/tests/test_usage.py +129 -0
- package/integrations/strands-ts/src/hooks.ts +135 -1
- package/integrations/strands-ts/src/telemetry.ts +170 -0
- package/integrations/strands-ts/test/test-usage.ts +85 -0
- package/package.json +2 -2
- package/scripts/ci/trust-reconcile.js +7 -23
- package/scripts/hooks/evidence-capture.js +85 -50
- package/scripts/hooks/stop-goal-fit.js +18 -45
- package/scripts/lib/command-log-chain.js +73 -0
- package/scripts/repair-command-log.js +8 -15
- package/scripts/telemetry/lib/config.sh +15 -0
- package/scripts/telemetry/lib/pricing.sh +42 -0
- package/scripts/telemetry/lib/usage.sh +108 -0
- package/scripts/telemetry/pricing.golden.json +15 -0
- package/scripts/telemetry/pricing.json +31 -0
- package/scripts/telemetry/telemetry.conf +4 -0
- package/scripts/telemetry/telemetry.sh +23 -1
- package/src/cli/workflow-sidecar.ts +8 -2
|
@@ -113,7 +113,9 @@ runs:
|
|
|
113
113
|
BUNDLE_ARG=""
|
|
114
114
|
fi
|
|
115
115
|
|
|
116
|
-
|
|
116
|
+
# action_path is .github/actions/trust-verify/ — climb THREE levels to the
|
|
117
|
+
# repo root where scripts/ lives (trust-verify -> actions -> .github -> root).
|
|
118
|
+
node "${{ github.action_path }}/../../../scripts/ci/trust-reconcile.js" \
|
|
117
119
|
--commands "$VERIFY_COMMAND" \
|
|
118
120
|
--repo-root "${{ github.workspace }}" \
|
|
119
121
|
$BUNDLE_ARG || {
|
|
@@ -130,7 +132,7 @@ runs:
|
|
|
130
132
|
- name: Mint attestation
|
|
131
133
|
if: inputs.sign == 'true' && steps.trust-verify.outcome == 'success'
|
|
132
134
|
shell: bash
|
|
133
|
-
run: node "${{ github.action_path }}
|
|
135
|
+
run: node "${{ github.action_path }}/../../../scripts/ci/mint-attestation.js"
|
|
134
136
|
|
|
135
137
|
- name: Upload attestation
|
|
136
138
|
if: inputs.sign == 'true' && steps.trust-verify.outcome == 'success'
|
package/.github/workflows/ci.yml
CHANGED
|
@@ -14,6 +14,14 @@ concurrency:
|
|
|
14
14
|
cancel-in-progress: true
|
|
15
15
|
|
|
16
16
|
jobs:
|
|
17
|
+
# Suite-wide secret-scan gate, defined once in kontourai/.github (Hachure: one
|
|
18
|
+
# normative source). Scans git-tracked history; gitignored runtime/.env excluded.
|
|
19
|
+
secret-scan:
|
|
20
|
+
name: Secret Scan
|
|
21
|
+
uses: kontourai/.github/.github/workflows/secret-scan.yml@main
|
|
22
|
+
permissions:
|
|
23
|
+
contents: read
|
|
24
|
+
|
|
17
25
|
source-and-static:
|
|
18
26
|
name: Source and Static
|
|
19
27
|
runs-on: ubuntu-latest
|
|
@@ -25,7 +33,7 @@ jobs:
|
|
|
25
33
|
|
|
26
34
|
steps:
|
|
27
35
|
- name: Checkout
|
|
28
|
-
uses: actions/checkout@
|
|
36
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
29
37
|
|
|
30
38
|
- name: Set up Node.js
|
|
31
39
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -91,7 +99,7 @@ jobs:
|
|
|
91
99
|
|
|
92
100
|
steps:
|
|
93
101
|
- name: Checkout
|
|
94
|
-
uses: actions/checkout@
|
|
102
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
95
103
|
|
|
96
104
|
- name: Set up Node.js
|
|
97
105
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -152,7 +160,7 @@ jobs:
|
|
|
152
160
|
|
|
153
161
|
steps:
|
|
154
162
|
- name: Checkout
|
|
155
|
-
uses: actions/checkout@
|
|
163
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
156
164
|
|
|
157
165
|
- name: Set up Node.js
|
|
158
166
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -242,6 +250,10 @@ jobs:
|
|
|
242
250
|
continue-on-error: true
|
|
243
251
|
run: bash evals/ci/run-baseline.sh --check telemetry-doctor-integration
|
|
244
252
|
|
|
253
|
+
- name: Usage and cost integration
|
|
254
|
+
continue-on-error: true
|
|
255
|
+
run: bash evals/ci/run-baseline.sh --check usage-and-cost-integration
|
|
256
|
+
|
|
245
257
|
- name: Utterance check integration
|
|
246
258
|
continue-on-error: true
|
|
247
259
|
run: bash evals/ci/run-baseline.sh --check utterance-check-integration
|
|
@@ -280,7 +292,7 @@ jobs:
|
|
|
280
292
|
|
|
281
293
|
steps:
|
|
282
294
|
- name: Checkout
|
|
283
|
-
uses: actions/checkout@
|
|
295
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
284
296
|
|
|
285
297
|
- name: Set up Node.js
|
|
286
298
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -22,7 +22,7 @@ jobs:
|
|
|
22
22
|
runs-on: ubuntu-latest
|
|
23
23
|
steps:
|
|
24
24
|
- name: Checkout
|
|
25
|
-
uses: actions/checkout@
|
|
25
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
26
26
|
|
|
27
27
|
- name: Configure Pages
|
|
28
28
|
uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0
|
|
@@ -37,7 +37,7 @@ jobs:
|
|
|
37
37
|
|
|
38
38
|
steps:
|
|
39
39
|
- name: Checkout
|
|
40
|
-
uses: actions/checkout@
|
|
40
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
41
41
|
|
|
42
42
|
- name: Set up Node.js
|
|
43
43
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -103,7 +103,7 @@ jobs:
|
|
|
103
103
|
|
|
104
104
|
steps:
|
|
105
105
|
- name: Checkout
|
|
106
|
-
uses: actions/checkout@
|
|
106
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
107
107
|
|
|
108
108
|
- name: Set up Node.js
|
|
109
109
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -22,7 +22,7 @@ jobs:
|
|
|
22
22
|
runs-on: ubuntu-latest
|
|
23
23
|
steps:
|
|
24
24
|
- name: Check out repository
|
|
25
|
-
uses: actions/checkout@
|
|
25
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
26
26
|
with:
|
|
27
27
|
fetch-depth: 0
|
|
28
28
|
|
|
@@ -58,7 +58,7 @@ jobs:
|
|
|
58
58
|
id-token: write
|
|
59
59
|
steps:
|
|
60
60
|
- name: Check out repository
|
|
61
|
-
uses: actions/checkout@
|
|
61
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
62
62
|
with:
|
|
63
63
|
fetch-depth: 0
|
|
64
64
|
|
|
@@ -37,7 +37,7 @@ jobs:
|
|
|
37
37
|
version: pi --version
|
|
38
38
|
steps:
|
|
39
39
|
- name: Checkout
|
|
40
|
-
uses: actions/checkout@
|
|
40
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
41
41
|
|
|
42
42
|
- name: Set up Node.js
|
|
43
43
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -67,7 +67,7 @@ jobs:
|
|
|
67
67
|
timeout-minutes: 20
|
|
68
68
|
steps:
|
|
69
69
|
- name: Checkout
|
|
70
|
-
uses: actions/checkout@
|
|
70
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
71
71
|
|
|
72
72
|
- name: Set up Node.js
|
|
73
73
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
|
@@ -62,7 +62,7 @@ jobs:
|
|
|
62
62
|
|
|
63
63
|
steps:
|
|
64
64
|
- name: Checkout
|
|
65
|
-
uses: actions/checkout@
|
|
65
|
+
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
|
|
66
66
|
|
|
67
67
|
- name: Set up Node.js
|
|
68
68
|
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,33 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [2.1.1](https://github.com/kontourai/flow-agents/compare/v2.1.0...v2.1.1) (2026-06-29)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Refactoring
|
|
7
|
+
|
|
8
|
+
* **flow-agents:** one shared module for command-log chain helpers (ops[#20](https://github.com/kontourai/flow-agents/issues/20)) ([#249](https://github.com/kontourai/flow-agents/issues/249)) ([67af85f](https://github.com/kontourai/flow-agents/commit/67af85f5010dace3f33b36b86245e0c7aad95f77))
|
|
9
|
+
|
|
10
|
+
## [2.1.0](https://github.com/kontourai/flow-agents/compare/v2.0.1...v2.1.0) (2026-06-29)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### Features
|
|
14
|
+
|
|
15
|
+
* **telemetry:** derive live pricing source from the console ([#242](https://github.com/kontourai/flow-agents/issues/242)) ([ddce44e](https://github.com/kontourai/flow-agents/commit/ddce44e813e9a3515953324f4878bf51c33252ba))
|
|
16
|
+
* **telemetry:** real token+cost capture with single-source versioned pricing ([#241](https://github.com/kontourai/flow-agents/issues/241)) ([b0bd4c3](https://github.com/kontourai/flow-agents/commit/b0bd4c347897ec77f60d84cae702e7f42b2871d7))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
### Fixes
|
|
20
|
+
|
|
21
|
+
* **evidence-capture:** serialize command-log appends to prevent chain forks ([#232](https://github.com/kontourai/flow-agents/issues/232)) ([bb167e9](https://github.com/kontourai/flow-agents/commit/bb167e93e7f6cc19baa88da613e96fe88a681c10))
|
|
22
|
+
* **flow-agents:** stop corrupting sidecar JSONL event lines ([#244](https://github.com/kontourai/flow-agents/issues/244)) ([fb65d10](https://github.com/kontourai/flow-agents/commit/fb65d1017e5cb659ce2b48da7a548f0c1f360426))
|
|
23
|
+
* **trust-verify action:** correct cross-repo script path (../../ → ../../../) ([#240](https://github.com/kontourai/flow-agents/issues/240)) ([a75a6d2](https://github.com/kontourai/flow-agents/commit/a75a6d28baf68b4be527a2e8cdff8f007af88bd5))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
### Documentation
|
|
27
|
+
|
|
28
|
+
* **design:** preserve WorkflowRun observability + FlowRun event-sourcing design notes ([#239](https://github.com/kontourai/flow-agents/issues/239)) ([c2dc116](https://github.com/kontourai/flow-agents/commit/c2dc11698cf63704f14087001c4494079195d197))
|
|
29
|
+
* **flow-agents:** advertise the real eval coverage, clearly scoped (ops[#23](https://github.com/kontourai/flow-agents/issues/23)) ([#248](https://github.com/kontourai/flow-agents/issues/248)) ([d208207](https://github.com/kontourai/flow-agents/commit/d20820749408d5fa63f2bf1470252000712de5d8))
|
|
30
|
+
|
|
3
31
|
## [2.0.1](https://github.com/kontourai/flow-agents/compare/v2.0.0...v2.0.1) (2026-06-27)
|
|
4
32
|
|
|
5
33
|
|
package/README.md
CHANGED
|
@@ -29,7 +29,7 @@ Flow Agents addresses this with a process-discipline layer that sits between the
|
|
|
29
29
|
- **Four canonical policies** — workflow steering (phase reminders at each turn), quality gate (per-file checks after edits), stop-goal-fit (evidence check before the agent stops), and config protection (veto writes to linter/formatter configs). Each policy class has a canonical script under `scripts/hooks/` and compiles to the host's native hook format.
|
|
30
30
|
- **Evidence over confidence** — important work ends with tests, browser checks, CI results, review findings, governance reports, or an explicit `NOT_VERIFIED` gap. Optional [Veritas](docs/veritas-integration.md) integration attaches repo-governance evidence without making it mandatory.
|
|
31
31
|
- **Verifiable, un-gameable "done"** — the agent can't mark work complete that isn't: the gate re-derives the verdict from independent evidence, an external CI anchor re-runs the verification fresh and fails the merge on any divergence, and CI mints a Sigstore-signed record of what shipped. See [Verifiable Trust — why "done" actually means done](docs/verifiable-trust.md).
|
|
32
|
-
- **Evals that keep the bundle honest** —
|
|
32
|
+
- **Evals that keep the bundle honest** — 60 integration scenarios (1,829 assertions) and 7 static suites (110 assertions) validate the skills, contracts, fixtures, and hook influence as the bundle evolves.
|
|
33
33
|
|
|
34
34
|
## Flow Agents as a process-discipline layer
|
|
35
35
|
|
|
@@ -52,8 +52,8 @@ L2 means all four policy classes with blocking; L1 means steering and stop-goal-
|
|
|
52
52
|
|
|
53
53
|
| Runtime | Ships | Tested |
|
|
54
54
|
| --- | --- | --- |
|
|
55
|
-
| Claude Code | install + hooks + bundle |
|
|
56
|
-
| Codex | install + hooks + bundle |
|
|
55
|
+
| Claude Code | install + hooks + bundle | 60 integration scenarios + 7 static suites (1,939 assertions) — reference implementation |
|
|
56
|
+
| Codex | install + hooks + bundle | 60 integration scenarios + 7 static suites (1,939 assertions) — reference implementation |
|
|
57
57
|
| Kiro | install + hooks + bundle | included in bundle assertions |
|
|
58
58
|
|
|
59
59
|
**Partial support — L1 (steering + stop-goal-fit warning)**
|
|
@@ -15,11 +15,17 @@ export const verdicts = new Set(["pass", "partial", "fail", "not_verified"]);
|
|
|
15
15
|
function now() { return new Date().toISOString().replace(/\.\d{3}Z$/, "Z"); }
|
|
16
16
|
function read(file) { return fs.readFileSync(file, "utf8"); }
|
|
17
17
|
export function writeJson(file, payload) { fs.mkdirSync(path.dirname(file), { recursive: true }); fs.writeFileSync(file, `${JSON.stringify(payload, null, 2)}\n`); }
|
|
18
|
-
|
|
18
|
+
// Single-line but readable "key": "value" form. Built by collapsing the
|
|
19
|
+
// structural whitespace from an indented stringify — corruption-proof, unlike a
|
|
20
|
+
// regex that would also rewrite ":"/"," sequences inside string values.
|
|
21
|
+
function spacedLine(payload, replacer) {
|
|
22
|
+
return JSON.stringify(payload, replacer, 1).replace(/\n\s*/g, " ");
|
|
23
|
+
}
|
|
24
|
+
function printJson(payload) { console.log(spacedLine(payload)); }
|
|
19
25
|
export function loadJson(file, fallback = {}) { return fs.existsSync(file) ? JSON.parse(read(file)) : { ...fallback }; }
|
|
20
26
|
export function appendJsonl(file, payload) {
|
|
21
27
|
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
22
|
-
const line =
|
|
28
|
+
const line = spacedLine(payload, Object.keys(payload).sort());
|
|
23
29
|
fs.appendFileSync(file, `${line}\n`);
|
|
24
30
|
}
|
|
25
31
|
function die(message) { throw new Error(message); }
|
|
@@ -38,6 +38,11 @@ CONSOLE_TELEMETRY_URL="${CONSOLE_TELEMETRY_URL:-${CONSOLE_URL:-}}"
|
|
|
38
38
|
CONSOLE_TELEMETRY_ENDPOINT_URL="${CONSOLE_TELEMETRY_ENDPOINT_URL:-}"
|
|
39
39
|
CONSOLE_TELEMETRY_TOKEN="${CONSOLE_TELEMETRY_TOKEN:-${CONSOLE_AUTH_TOKEN:-}}"
|
|
40
40
|
CONSOLE_TENANT_ID="${CONSOLE_TENANT_ID:-}"
|
|
41
|
+
# Pricing registry source (consumed by lib/pricing.sh). Explicit file/URL win;
|
|
42
|
+
# otherwise the URL is derived from the console below so all runtimes read one
|
|
43
|
+
# live pricing source. Falls back to the bundled pricing.json offline.
|
|
44
|
+
TELEMETRY_PRICING_FILE="${TELEMETRY_PRICING_FILE:-${FLOW_AGENTS_PRICING_FILE:-}}"
|
|
45
|
+
TELEMETRY_PRICING_URL="${TELEMETRY_PRICING_URL:-${FLOW_AGENTS_PRICING_URL:-}}"
|
|
41
46
|
|
|
42
47
|
# Load config file if it exists
|
|
43
48
|
if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
|
|
@@ -78,6 +83,9 @@ if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
|
|
|
78
83
|
console_telemetry_token) CONSOLE_TELEMETRY_TOKEN="$value" ;;
|
|
79
84
|
console_tenant_id) CONSOLE_TENANT_ID="$value" ;;
|
|
80
85
|
console_telemetry_redact) CONSOLE_TELEMETRY_REDACT="$value" ;;
|
|
86
|
+
console_pricing_url) TELEMETRY_PRICING_URL="$value" ;;
|
|
87
|
+
pricing_url) TELEMETRY_PRICING_URL="$value" ;;
|
|
88
|
+
pricing_file) TELEMETRY_PRICING_FILE="$value" ;;
|
|
81
89
|
esac
|
|
82
90
|
fi
|
|
83
91
|
done < "$TELEMETRY_CONFIG_FILE"
|
|
@@ -85,5 +93,12 @@ fi
|
|
|
85
93
|
|
|
86
94
|
CONSOLE_TELEMETRY_REDACT="${CONSOLE_TELEMETRY_REDACT:-${TELEMETRY_CHANNEL_ANALYTICS_REDACT}}"
|
|
87
95
|
|
|
96
|
+
# Derive the live pricing source from the console when not set explicitly, the
|
|
97
|
+
# same way the transport derives /api/telemetry/records. One live source for
|
|
98
|
+
# bash/Python/TS runtimes; lib/pricing.sh caches it and falls back to bundled.
|
|
99
|
+
if [[ -z "${TELEMETRY_PRICING_URL:-}" && -n "${CONSOLE_TELEMETRY_URL:-}" ]]; then
|
|
100
|
+
TELEMETRY_PRICING_URL="${CONSOLE_TELEMETRY_URL%/}/api/telemetry/pricing"
|
|
101
|
+
fi
|
|
102
|
+
|
|
88
103
|
# Ensure directories exist
|
|
89
104
|
mkdir -p "$TELEMETRY_DATA_DIR" "$TELEMETRY_SESSION_DIR" 2>/dev/null
|
|
@@ -8,6 +8,10 @@ channel.analytics.redact=tool.input,tool.output,turn.prompt_text,delegation.targ
|
|
|
8
8
|
# The transport derives /api/telemetry/records from console_telemetry_url.
|
|
9
9
|
# console_telemetry_token=
|
|
10
10
|
# console_tenant_id=
|
|
11
|
+
# Live pricing registry source. If unset, derived from console_telemetry_url as
|
|
12
|
+
# <console>/api/telemetry/pricing so bash/Python/TS runtimes read one live
|
|
13
|
+
# source; lib/pricing.sh caches it and falls back to bundled pricing.json.
|
|
14
|
+
# console_pricing_url=https://console.kontourai.io/api/telemetry/pricing
|
|
11
15
|
enrich_system=true
|
|
12
16
|
enrich_workspace=true
|
|
13
17
|
enrich_auth=true
|
|
@@ -309,13 +309,35 @@ add_stop_data_and_emit_usage() {
|
|
|
309
309
|
tool_count=$(usage_count_tool_calls "$session_id" "$full_log")
|
|
310
310
|
delegation_count=$(usage_count_delegations "$session_id" "$full_log")
|
|
311
311
|
|
|
312
|
+
# Ground-truth token + cost usage from the runtime transcript, when the
|
|
313
|
+
# runtime exposes one (Claude Code, Codex, etc. set hook.transcript_path).
|
|
314
|
+
# Tokens are source-of-truth; estimated_cost_usd is derived from pricing.json
|
|
315
|
+
# (recomputed authoritatively console-side, so pricing updates are retroactive).
|
|
316
|
+
local transcript_path transcript_usage
|
|
317
|
+
transcript_path=$(echo "$event" | jq -r '.hook.transcript_path // ""')
|
|
318
|
+
transcript_usage=$(usage_parse_transcript "$transcript_path")
|
|
319
|
+
[[ -z "$transcript_usage" ]] && transcript_usage='null'
|
|
320
|
+
|
|
312
321
|
local usage_event
|
|
313
322
|
usage_event=$(echo "$event" | jq -c \
|
|
314
323
|
--arg m "$model" \
|
|
315
324
|
--argjson tc "$tool_count" \
|
|
316
325
|
--argjson dc "$delegation_count" \
|
|
326
|
+
--argjson tu "$transcript_usage" \
|
|
317
327
|
'.event_type = "session.usage" | .event_id = (.event_id + "-usage") | . + {
|
|
318
|
-
usage: {
|
|
328
|
+
usage: ({
|
|
329
|
+
model: $m,
|
|
330
|
+
duration_s: .session.duration_s,
|
|
331
|
+
tool_invocations: $tc,
|
|
332
|
+
delegations: $dc,
|
|
333
|
+
input_tokens: ($tu.input_tokens // null),
|
|
334
|
+
output_tokens: ($tu.output_tokens // null),
|
|
335
|
+
cache_creation_input_tokens: ($tu.cache_creation_input_tokens // null),
|
|
336
|
+
cache_read_input_tokens: ($tu.cache_read_input_tokens // null),
|
|
337
|
+
estimated_cost_usd: ($tu.estimated_cost_usd // null),
|
|
338
|
+
pricing_version: ($tu.pricing_version // null),
|
|
339
|
+
by_model: ($tu.by_model // null)
|
|
340
|
+
})
|
|
319
341
|
}')
|
|
320
342
|
transport_emit "$usage_event"
|
|
321
343
|
fi
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# DESIGN DOC: Evolving Flow's `FlowRun` to be EVENT-SOURCED
|
|
2
|
+
|
|
3
|
+
**Status:** DRAFT for owner / Flow-maintainer review. Design only — no Flow source modified.
|
|
4
|
+
**Date:** 2026-06-27
|
|
5
|
+
**Repo:** `/Users/brian/dev/github/kontourai/flow`
|
|
6
|
+
**Branch studied:** `feat/emit-trust-bundles` @ 269ae97 (in-flight; one commit ahead of `origin/main`).
|
|
7
|
+
**Relates to:** Flow ADR 0001 ("Flow owns Flow Runs / transitions"), the in-flight trust-bundle emission work, and the companion consumer design `scratchpad/workflowrun-design.md` (Flow Agents side).
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## 0. TL;DR — the three load-bearing findings
|
|
12
|
+
|
|
13
|
+
1. **`FlowRun` is NOT partway event-sourced.** The prompt's premise that `flow-run-store.ts:~85` already holds a run `events[]` array is a **false lead**: that `events: [...]` at `src/runtime/flow-run-store.ts:85-96` is the **demo acceptance-bundle fixture** — a Hachure *TrustBundle*'s `events` field (claims/evidence/policies/events) written into `scaffoldDemoRun`. It has nothing to do with `FlowRunState`. The actual persisted run state (`schemas/flow-run.schema.json:7`, `src/contracts/flow-types.ts:83-97`) has **no `events[]` at all**. It is a **mutable record** with three accumulated arrays — `gate_outcomes[]`, `transitions[]`, `exceptions[]` — plus a **stored** `status` and `current_step` that are set imperatively. So this work is **"introduce an event log,"** not "promote `events[]` to source of truth."
|
|
14
|
+
|
|
15
|
+
2. **`status`/`current_step` are stored, not folded.** `applyEvaluation` (`src/gates/flow-gates.ts:237-320`) mutates `state.status`, `state.current_step`, pushes to `state.transitions`, and merges `state.gate_outcomes` **in place**, then `saveRun` overwrites `state.json` (`flow-run-store.ts:209-214`). The projection (`projectFlowRun`, `src/console/console-projection.ts:577-607`) **reads the stored `state.status`/`state.transitions` directly** (`:585, :591`) — it re-shapes mutable state, it does not fold. **`transitions[]` is the closest thing to an event log that exists today**, but it is a *derived byproduct* of mutation, not the source of truth.
|
|
16
|
+
|
|
17
|
+
3. **The in-flight branch matters two ways.** `feat/emit-trust-bundles` (a) **adds** `writeTrustBundles` into `saveRun` (`flow-run-store.ts:193-214`) — emitting derived Hachure trust bundles per-gate + run-level under `<run>/trust/`; this is an **emission/projection** pattern the event log should align with, not collide with — and (b) **removes** `reDeriveBundleReports` + the per-evidence **`inquiry_records`** append-only audit series (deleted at `flow-run-store.ts` and `flow-types.ts` in the diff). Those `inquiry_records` were *"append-only series of point-in-time inquiry records … event high-water mark"* — i.e. the **one genuinely event-sourced-shaped structure in the codebase was just deleted on this branch.** The design must not resurrect it; it must reintroduce that discipline at the **run** level instead of the per-evidence level.
|
|
18
|
+
|
|
19
|
+
**Recommendation:** the thinnest valuable slice is to **add an append-only `events[]` log to `FlowRunState`, written by the existing mutators as a side-channel, with `transitions[]` redefined as a fold over it** — and prove fold-equality against today's `transitions[]`/`status`. Defer hash-chaining and "delete the mutable writes" to later phases gated on owner decisions.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## 1. Current-state map (grounded in file:line)
|
|
24
|
+
|
|
25
|
+
### 1.1 What persists today — `FlowRunState`
|
|
26
|
+
|
|
27
|
+
Schema `schemas/flow-run.schema.json` requires (`:7`): `schema_version` (const `"0.1"`, `:12`), `run_id`, `definition_id`, `status`, `current_step`, `gate_outcomes`, `transitions`, `exceptions`. TS mirror at `src/contracts/flow-types.ts:83-97`. Initial value at `src/definition/flow-definition.ts:350-368`: `status:"active"`, `current_step:firstStep.id`, empty `gate_outcomes/transitions/exceptions`. **No `events[]` field anywhere.**
|
|
28
|
+
|
|
29
|
+
| Array / field | What it holds today | Where written | Source of truth? |
|
|
30
|
+
|---|---|---|---|
|
|
31
|
+
| `status` (string enum, schema `:36`) | Current lifecycle status (`active`/`blocked`/`needs_decision`/`completed`/`failed`/`accepted_by_exception`) | set imperatively in `applyEvaluation` (`flow-gates.ts:253, 255, 289, 313`), `acceptException` (`flow-run-store.ts:349`) | **STORED** (not derived) |
|
|
32
|
+
| `current_step` (schema `:38`) | Current step id; open gates derived from it | `applyEvaluation:252, 290`; `initialState:361` | **STORED** |
|
|
33
|
+
| `gate_outcomes[]` (schema `:56-60`, `:73-110`) | Latest decision **per gate** — `mergeGateOutcome` *replaces* any prior outcome for the same `gate_id` (`flow-gates.ts:232-235`). One row per gate, **last-write-wins, not append-only.** | `applyEvaluation:239` | derived-ish but **collapsed** (history lost) |
|
|
34
|
+
| `transitions[]` (schema `:61-65`, `:111-135`) | **Append-only-ish** history of step movements: each pass/block/route-back pushes a `{from_step,to_step,status,reason,at,gate_id,…}` row (`flow-gates.ts:244, 257, 279, 291`). Carries `type:"route_back"`, `attempt`, `route_reason`, `limit_exceeded`, `classifier`, `diagnostics`, `analytics`. | `applyEvaluation` only | **this is the de-facto event log**, but it is a *byproduct* of mutation and only covers transitions (not evidence/exception/seal events) |
|
|
35
|
+
| `exceptions[]` (schema `:66-70`, `:137-148`) | Accepted exceptions `{id,gate_id,reason,authority,accepted_at}` — append-only | `acceptException` (`flow-run-store.ts:341-348`) | append-only, but separate stream |
|
|
36
|
+
| evidence | `evidence-manifest.json` (separate file), append via `attachEvidence` (`flow-run-store.ts:252-312`); `run.manifest.evidence.push` (`:309`) | append-only, separate file | separate stream |
|
|
37
|
+
|
|
38
|
+
### 1.2 How status/projection are derived
|
|
39
|
+
|
|
40
|
+
- `evaluateRun` (`flow-run-store.ts:314-336`): loads run, evaluates open gates via `evaluateGate` (pure, `flow-gates.ts:133-230`), validates the transition (`validateEvaluationTransition`), then calls **`applyEvaluation`** which **mutates** state, then `saveRun`.
|
|
41
|
+
- `applyEvaluation` (`flow-gates.ts:237-320`) is the single chokepoint that turns a gate outcome into: a merged `gate_outcomes` row, a pushed `transitions` row, and **assignment of `status`/`current_step`/`next_action`/`updated_at`**. This is exactly the imperative state machine an event-fold would replace.
|
|
42
|
+
- `projectFlowRun` (`console-projection.ts:577-607`) reads **stored** `state.status` (`:591` via `projectRunIdentity`), `state.current_step` (`:594`), `state.transitions` (`:585`), `state.gate_outcomes` (via `projectGate`), `state.exceptions` (`:586`). **The projection is a re-shaping of stored mutable fields — `status` is NOT a fold.**
|
|
43
|
+
- `reportJson`/`renderSummary`/`renderResume` (`src/reports/flow-reports.ts:16, 124, 164`) likewise read `state.status`/`state.current_step` directly (`:23, :25, :127, :169`).
|
|
44
|
+
|
|
45
|
+
**Verdict:** Flow today is a classic **mutable state-machine record**. `transitions[]` + `exceptions[]` + the evidence manifest are *append-ish side records*; `status`/`current_step`/`gate_outcomes` are *destructively overwritten*. There is **no single ordered event log** and **no fold** producing `status`. This is meaningfully *less* event-sourced than the prompt assumed, and the one append-only audit structure (`inquiry_records`) was **removed on this branch** (§1.3).
|
|
46
|
+
|
|
47
|
+
### 1.3 What `feat/emit-trust-bundles` changed (the in-flight diff — `git diff origin/main..HEAD`)
|
|
48
|
+
|
|
49
|
+
The single commit `269ae97 feat: emit per-gate and run-level trust bundles (recursive trust)` does two things relevant here, plus a large console-UI/reports deletion (out of scope):
|
|
50
|
+
|
|
51
|
+
**(a) ADDS emission into the save path.** `writeTrustBundles` (`flow-run-store.ts:193-207`) now runs inside `saveRun` (`:213`): it builds a run-level Hachure trust bundle via `buildFlowTrustBundle({state})` and one per `gate_outcome` via `buildGateTrustBundle` (`src/gates/flow-trust-emit.ts:173-294`), writing them under `<run>/trust/run.json` and `<run>/trust/<gate>.json` (`src/runtime/flow-files.ts:14-36`). **These bundles are pure folds over `state.gate_outcomes`** (`flow-trust-emit.ts:201, 208-214`) — i.e. Flow already adopted the pattern "derive an inspectable artifact from run state on every save." An event log is the **same pattern, one level deeper** (derive the bundle from the *log* instead of from the collapsed `gate_outcomes`).
|
|
52
|
+
|
|
53
|
+
**(b) REMOVES the per-evidence append-only audit series.** The diff deletes `reDeriveBundleReports` (was in `flow-run-store.ts`) and the `inquiry_records?` field on `FlowEvidenceEntry` (`flow-types.ts:63-73` — the comment described it as *"Append-only series of point-in-time inquiry records (Surface DerivationCheckpoints), one per re-derivation … status-by-claim + statusFunctionVersion + asOf + event high-water mark"*). It also drops `freshness_transitions` from `evaluateRun`'s return (`flow-run-store.ts:335`) and the `checkpointFromReport, diffFreshness` imports.
|
|
54
|
+
|
|
55
|
+
**Why this matters for sequencing:** the branch is moving Flow toward **"state in → derived trust artifacts out"** and **away from** a per-evidence append log. The event-sourcing design must (i) **build on the emission pattern** (the event log becomes the *input* to `buildFlowTrustBundle`, replacing the collapsed `state.gate_outcomes`), and (ii) **reintroduce append-only audit discipline at the RUN level** — the thing `inquiry_records` was reaching for, but cleanly, as the run's source of truth rather than a per-evidence sidecar. Do **not** re-add `inquiry_records`; that would re-create the very fork this branch just removed.
|
|
56
|
+
|
|
57
|
+
### 1.4 Flow's documented run-model stance (ADR 0001)
|
|
58
|
+
|
|
59
|
+
`docs/adr/0001-flow-as-process-transparency-layer.md:21-27`: **Flow owns** Flow Runs, steps, gates, Transitions, gate evidence, exceptions, continuation. `:71` rejects modeling process state in Surface because "Surface models trust state, not process-specific semantics such as steps, gates, transitions, and continuation." **There is no ADR on event-sourcing the run.** So introducing an event log is a *new architectural decision* that should land as its own Flow ADR — it is squarely inside Flow's owned surface per 0001, which is the right home (consistent with the consumer doc's §8 conclusion that event-sourcing belongs in `FlowRun`, not bespoke in Flow Agents).
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## 2. Target model — event log as source of truth, `transitions[]`/`status` as folds
|
|
64
|
+
|
|
65
|
+
### 2.1 Principle
|
|
66
|
+
|
|
67
|
+
Make `FlowRunState.events[]` an **append-only ordered log** the run's source of truth. Everything else becomes a **deterministic fold**:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
status = foldStatus(events) // replaces stored state.status
|
|
71
|
+
current_step = foldCurrentStep(events)
|
|
72
|
+
transitions[] = foldTransitions(events) // EXACTLY today's transitions rows, derived
|
|
73
|
+
gate_outcomes[]= foldGateOutcomes(events) // last-write-wins per gate, derived
|
|
74
|
+
exceptions[] = foldExceptions(events)
|
|
75
|
+
trust bundles = buildFlowTrustBundle(foldGateOutcomes(events)) // unchanged emission, fed by the fold
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
This is an **evolution of `applyEvaluation`**, not a rewrite: `applyEvaluation`'s existing branches (`flow-gates.ts:241-314`) become the **reducer cases** of `foldStatus`/`foldTransitions`. The mapping is nearly mechanical because the transition rows it pushes already carry the full causal payload.
|
|
79
|
+
|
|
80
|
+
### 2.2 Event taxonomy (reconciled with what exists)
|
|
81
|
+
|
|
82
|
+
Every event shares an envelope. **Reuse the `transitions[]` row shape** so the fold to `transitions[]` is near-identity:
|
|
83
|
+
|
|
84
|
+
```jsonc
|
|
85
|
+
{
|
|
86
|
+
"seq": 7, // monotonic per run
|
|
87
|
+
"type": "Transitioned",
|
|
88
|
+
"at": "2026-06-27T12:00:00Z", // already on every transition (schema :134)
|
|
89
|
+
"actor": "flow", // emitter; cf. trust-emit actor:"flow" (flow-trust-emit.ts:158)
|
|
90
|
+
"source": "evaluateRun", // the operation that emitted it
|
|
91
|
+
"payload": { /* type-specific */ }
|
|
92
|
+
// "_chain": {...} // OPTIONAL, Phase 3 — see §3
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
| Event type | Maps to today | Emitted at | Folds into |
|
|
97
|
+
|---|---|---|---|
|
|
98
|
+
| `RunStarted` | `initialState` (`flow-definition.ts:350-368`) | `startRun` (`flow-run-store.ts:157-172`) | seeds `status:"active"`, `current_step`, identity |
|
|
99
|
+
| `GateEvaluated` | a `GateOutcome` (`flow-gates.ts:133-230`) | `evaluateRun` per gate (`flow-run-store.ts:319-333`) | `gate_outcomes[]` (last-write-wins per `gate_id`) |
|
|
100
|
+
| `EvidenceAttached` | manifest push (`flow-run-store.ts:309`) | `attachEvidence` | evidence projection (manifest stays its own file initially) |
|
|
101
|
+
| `Transitioned` (pass) | `transitions.push{status:"allowed"}` (`flow-gates.ts:244-251`) | `applyEvaluation` pass branch | `transitions[]`, advances `current_step`, `status` active/completed |
|
|
102
|
+
| `Blocked` | `transitions.push{status:"blocked"}` non-route (`flow-gates.ts:279-287`) | block branch | `transitions[]`, `status:"blocked"` |
|
|
103
|
+
| `RoutedBack` | `transitions.push{type:"route_back"}` (`flow-gates.ts:288-311`) | route-back branch | `transitions[]`, `current_step:=route_back_to`, `status:"active"`, attempt counting |
|
|
104
|
+
| `ExceptionAccepted` | `exceptions.push` (`flow-run-store.ts:341-348`) | `acceptException` | `exceptions[]`, `status:"accepted_by_exception"` |
|
|
105
|
+
| `RunSealed` *(new)* | — (no equivalent today) | a future `sealRun`/delivery | terminal marker + head pointer for checkpoints (§4d) |
|
|
106
|
+
|
|
107
|
+
**Reconciliation note:** `GateEvaluated` and `Transitioned`/`Blocked`/`RoutedBack` are *distinct* events even though `applyEvaluation` does both in one call — the gate **decision** and its **effect on the run** are separate facts (the consumer doc wants the gate decision replayable independently). The fold re-derives `gate_outcomes[]` from `GateEvaluated` and `transitions[]` from the `Transitioned/Blocked/RoutedBack` family. This is the cleanest split and keeps `evaluateGate` (pure, already side-effect-free) unchanged.
|
|
108
|
+
|
|
109
|
+
**`attempt`/`limit_exceeded` route-back counting** currently reads `state.transitions` (`flow-definition.ts:415` `priorMatches`). After the change it reads `foldTransitions(events)` — identical data, so route-back cascade behavior (and the route-back tests) are preserved by construction.
|
|
110
|
+
|
|
111
|
+
### 2.3 Schema evolution
|
|
112
|
+
|
|
113
|
+
Add `events[]` to `schemas/flow-run.schema.json` (currently `additionalProperties:false`, `:8`, so this is a required, deliberate edit). Two viable shapes — **owner/maintainer decision**:
|
|
114
|
+
|
|
115
|
+
- **(A) Additive, bump `schema_version` 0.1→0.2:** `events[]` becomes **required**; `transitions[]`/`gate_outcomes[]`/`status` remain in the persisted file but are documented as **derived caches** (regenerated on save from the fold). Old `0.1` runs lack `events[]` → see migration §5.
|
|
116
|
+
- **(B) Pure log, defer:** persist **only** `events[]` (+ identity), drop the derived arrays from disk entirely, regenerate on read. Cleaner end state, bigger blast radius (every reader of `state.transitions`/`state.status` must go through the fold). This is the **zero-legacy end state** but should not be the first slice.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## 3. Tamper-evidence — should the log be hash-chained?
|
|
121
|
+
|
|
122
|
+
The consumer (ADR 0017, per `scratchpad/workflowrun-design.md` §5) leans on the flow-agents `command-log.jsonl` hash chain (`hash = sha256(prevHash + canonicalJson(record))`) as its tamper-evidence spine. If Flow Agents will *trust this log as a tamper-evident record*, the Flow event log should support an **optional** `_chain` per event with the **same construction** so the two integrity stories compose.
|
|
123
|
+
|
|
124
|
+
**Recommendation: design the envelope to carry an optional `_chain`, but do NOT couple Flow to flow-agents specifics.**
|
|
125
|
+
- Flow already imports `createHash` from `node:crypto` (`flow-run-store.ts:1`, used in `sha256File:216-219`), so the primitive is in-repo — no new dependency.
|
|
126
|
+
- Keep chaining **opt-in / Phase 3**: a plain monotonic `seq` (cheap, always on) gives ordering and replay; the `_chain` adds tamper-evidence when a consumer needs it. This avoids forcing Flow to adopt flow-agents' security posture before there's a Flow-side reason to.
|
|
127
|
+
- **Decouple by interface, not import:** Flow defines its own `hashEvent(prevHash, event)` over a canonical JSON of the Flow envelope. Flow Agents, if it wants one chain spanning both, reconciles at read time — it does **not** require Flow to chain into flow-agents' genesis. This honors "Flow owns Flow Runs" (ADR 0001) without importing a flow-agents-specific contract.
|
|
128
|
+
- **Open question for maintainers (§7 Q3):** does Flow *want* tamper-evidence as a first-class run property, or is that a flow-agents concern that should stay in the consumer? The `inquiry_records` removal (§1.3) suggests Flow is currently *trimming* audit-series complexity, so pushing a mandatory chain upstream now would cut against the branch's direction. Hence: **ship chaining as opt-in, let the consumer drive whether it becomes mandatory.**
|
|
129
|
+
|
|
130
|
+
**Hard rule (if adopted):** append-only; "edits" are compensating events, never rewrites; any fold that disagrees with a re-fold is a tamper signal. Add a Flow test asserting "a hand-edited event breaks the chain / changes the fold."
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## 4. Consumer contract for Flow Agents (Flow stays the owner)
|
|
135
|
+
|
|
136
|
+
Flow Agents consumes Flow's run primitive (ADR 0001 `:41`, "Flow Agents will be the first consumer of Flow"). Proposed additions to Flow's public API (`src/index.ts` already exports `startRun`/`loadRun`/`saveRun`/`evaluateRun`/`projectFlowRun`/`projectFlowRunFromFiles` `:161-162`):
|
|
137
|
+
|
|
138
|
+
**(a) Append events** — `appendRunEvent(runId, event, {cwd})`: validates type+payload, assigns `seq` (and `_chain` if enabled), persists, returns the stored event incl. head hash. The existing mutators (`evaluateRun`, `acceptException`, `attachEvidence`) become **internal callers** of this — Flow Agents normally appends *indirectly* by calling those, and only uses `appendRunEvent` directly for flow-agents-specific event types if Flow allows an extensibility escape hatch (open question §7 Q4).
|
|
139
|
+
|
|
140
|
+
**(b) Get projected state** — `projectFlowRun` / `projectFlowRunFromFiles` (already exist, `console-projection.ts:577, 609`) become **fold-backed**: same output shape (`FlowConsoleProjection`), but `status`/`transitions` come from the fold, not stored fields. **Consumer-transparent** — no Flow Agents change required to get the new guarantee.
|
|
141
|
+
|
|
142
|
+
**(c) Replay / trace** — `replayRun(runId, {atSeq|atTime})` → projected state as-of a point; `traceRun(runId)` → the ordered event timeline. These are the new capabilities the consumer doc's §3 ("rebuild the session") needs; they fall out for free once `status` is a fold. Render reuses `renderResume`/`renderSummary` (`flow-reports.ts:124, 164`).
|
|
143
|
+
|
|
144
|
+
**(d) Run head hash for a checkpoint pointer** — `getRunHead(runId)` → `{seq, headHash}`. This is the **"compiled vs raw notes" pointer** the consumer doc §3.3 wants: Flow Agents stores `event_log_ref + head_hash` in its sealed checkpoint, travels light, and can prove the raw log is unmodified later. Emit a `RunSealed` event (§2.2) capturing the head at seal time.
|
|
145
|
+
|
|
146
|
+
All four keep Flow as **owner of the model and the fold**; Flow Agents only **appends (indirectly) and reads**. No flow-agents trust/security types leak into Flow.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
## 5. Migration plan — phased, honoring "no legacy / no fallbacks"
|
|
151
|
+
|
|
152
|
+
Standing owner rule: long-term **no legacy or fallbacks**; dual-keep is acceptable **only as execution-transition scaffolding** with a deletion deadline; end state = **`events[]` is the sole authority, `transitions[]`/`status` derived, no mutable field with independent authority.**
|
|
153
|
+
|
|
154
|
+
### Phase 0 — Coordinate with `feat/emit-trust-bundles` (prerequisite)
|
|
155
|
+
Land **on top of** 269ae97 (do not branch from `origin/main`). Shared file is `flow-run-store.ts` (both touch `saveRun`/`evaluateRun`). Sequence: let the trust-emit branch merge first (or rebase onto it), then this work treats `writeTrustBundles` as the **first consumer of the fold** (feed `buildFlowTrustBundle` from `foldGateOutcomes(events)` instead of `state.gate_outcomes`). **Do not re-add `inquiry_records`** — the run event log supersedes it.
|
|
156
|
+
|
|
157
|
+
### Phase 1 — Emit the event log (additive, reversible, no behavior change)
|
|
158
|
+
**Ships:** `appendRunEvent` + a typed `events[]` written **alongside** the existing mutations. `applyEvaluation` (`flow-gates.ts:237-320`) additionally appends `GateEvaluated`/`Transitioned`/`Blocked`/`RoutedBack`; `acceptException` appends `ExceptionAccepted`; `startRun` appends `RunStarted`. **Stored `status`/`transitions`/`gate_outcomes` remain the source of truth.** Schema gets `events[]` (option A, §2.3).
|
|
159
|
+
**Exit criteria:** `foldTransitions(events)` reproduces `state.transitions` **byte-identically** (modulo `updated_at`/`at` timestamps), and `foldStatus(events) === state.status`, for every run in `.flow/runs/` and the route-back/exception test suites. This **fold==stored equality is the correctness oracle** for all later phases. Add `replayRun`/`traceRun` (read-only).
|
|
160
|
+
**Reversible:** delete `events[]` writes + the two new functions.
|
|
161
|
+
|
|
162
|
+
### Phase 2 — Flip source of truth to the fold (dual-write scaffolding, bounded)
|
|
163
|
+
**Ships:** `saveRun` writes `transitions`/`gate_outcomes`/`status` **as a cache generated from the fold** (not from in-place mutation). `applyEvaluation` is refactored to *only* append events; the derived arrays are regenerated by folding. Readers unchanged (still read the cached fields). `writeTrustBundles` reads the fold. A `--rebuild` path regenerates every derived field purely from `events[]` and must match.
|
|
164
|
+
**Exit criteria:** an integrity check `fold(events) == persisted-derived-fields` runs in CI; `--rebuild` is byte-stable. **Name the deletion deadline here** (Phase 3) so the dual-write doesn't become permanent legacy.
|
|
165
|
+
**Reversible:** flip `saveRun` back to direct mutation.
|
|
166
|
+
|
|
167
|
+
### Phase 3 — Remove mutable authority; (optional) chain (zero-legacy end state)
|
|
168
|
+
**Ships:** delete the imperative `state.status = …` / `state.transitions.push(…)` assignments. `projectFlowRun`, `reportJson`, `renderResume`, route-back counting (`flow-definition.ts:415`) all read **via the fold**. Choose schema option B (persist only `events[]` + identity, derive the rest on read) **or** keep the derived fields strictly as a **generated read-only cache with no independent authority**. Optionally enable `_chain` (§3) + add the tamper-detection test. `getRunHead`/`RunSealed` shipped for the checkpoint pointer.
|
|
169
|
+
**Exit criteria:** grep shows no code path assigns `status`/`transitions` except by appending an event; the fold is the only authority; `schema_version` bumped; consumer (Flow Agents) reads projections/replay only.
|
|
170
|
+
**This is the legacy-free end state.**
|
|
171
|
+
|
|
172
|
+
**Reversibility summary:** Phase 1 fully reversible; Phase 2 reversible (flip the writer); Phase 3 is the commitment point (deletes the mutable path) — gated on the Phase-1 oracle being green across all runs/tests and the §7 open questions resolved.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## 6. Coordination with `feat/emit-trust-bundles` (explicit)
|
|
177
|
+
|
|
178
|
+
- **Branch base:** build on 269ae97, not `origin/main`. The branch deletes a lot of console-UI/reports/test files (`git diff --stat`: ~6k deletions) — do **not** reintroduce them; scope this work to `flow-run-store.ts`, `flow-gates.ts`, `flow-types.ts`, `schemas/flow-run.schema.json`, and new event/fold modules.
|
|
179
|
+
- **Shared files & conflict surface:** `flow-run-store.ts` (`saveRun:209-214`, `evaluateRun:314-336`) and `flow-gates.ts` (`applyEvaluation`). Both branches edit `saveRun`. Resolution: the event-log work **inserts** an append step in `applyEvaluation`/`saveRun` and **redirects** `writeTrustBundles`' input from `state.gate_outcomes` to the fold — additive, low conflict if rebased after trust-emit lands.
|
|
180
|
+
- **Direction alignment:** trust-emit = "derive inspectable artifacts from run state on save." Event-sourcing = "make the log the run state, derive everything (including those artifacts) from it." They **compose**: the trust bundle becomes a *second-order fold*. The one thing to **not** do is revive `inquiry_records` (deleted on this branch) — the run event log is its successor at the right altitude.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## 7. Thinnest first slice + open questions
|
|
185
|
+
|
|
186
|
+
### 7.1 Recommended thinnest first slice (Phase 1, scoped)
|
|
187
|
+
**Add `events[]` to `FlowRunState` + emit `RunStarted`/`GateEvaluated`/`Transitioned`/`Blocked`/`RoutedBack`/`ExceptionAccepted` from the existing mutators, plus `foldTransitions`/`foldStatus` and a read-only `traceRun`/`replayRun --at <seq>`. Keep stored `status`/`transitions` as the source of truth. Prove `fold(events) == stored` across all `.flow/runs/` and the existing route-back/exception tests.**
|
|
188
|
+
|
|
189
|
+
Why this is the right first cut: it is **safe** (no behavior change — folds run alongside, asserted equal), **valuable** (delivers the consumer's "recreate the session"/trace need immediately and the head-hash pointer can follow), **reviewable** (one schema field + append calls in the two existing mutators + two pure fold functions + a read-only command; ~no change to `evaluateGate`, gates, or the trust-emit path), and it **produces the correctness oracle** (fold==stored) that de-risks Phases 2-3. Smallest demo: `traceRun` on `./.flow/runs/run.1781102325268/` showing its real timeline derived from `events[]`, with `foldTransitions` matching the persisted `transitions[]`.
|
|
190
|
+
|
|
191
|
+
### 7.2 Top open questions (owner / Flow maintainers must decide)
|
|
192
|
+
1. **Schema strategy (§2.3): additive-with-derived-caches (A) vs pure-log (B)?** A is the safe first slice; B is the zero-legacy end state. Which, and is bumping `schema_version` 0.1→0.2 acceptable now (the const is `"0.1"`, `schema:12`)? *I'm unsure how many external consumers pin the `0.1` const — maintainers must confirm blast radius.*
|
|
193
|
+
2. **`GateEvaluated` vs `Transitioned` as separate events, or one combined event?** I recommend separate (gate decision is independently replayable, matches the consumer's gate-debugger goal), but it adds an event type and a fold case `applyEvaluation` doesn't distinguish today (`flow-gates.ts:237-320` does both in one call). Maintainer call.
|
|
194
|
+
3. **Does Flow want hash-chaining as a first-class run property, or keep tamper-evidence in the consumer?** The branch just *removed* the `inquiry_records` audit series (§1.3), suggesting Flow is trimming audit complexity — so I lean **opt-in `_chain`, consumer-driven** (§3), but this is genuinely the owner's architectural call about how much of ADR 0017's tamper-evidence belongs upstream in Flow vs in Flow Agents.
|
|
195
|
+
|
|
196
|
+
### 7.3 Honest uncertainties / risks
|
|
197
|
+
- **The prompt's `events[]@~85` premise is wrong** (it's the demo *acceptance-bundle* fixture, `flow-run-store.ts:85-96`); I want to flag this explicitly because the whole "promote vs introduce" framing hinges on it — this is **"introduce a log."** If I've misread and there's a *different* `events[]` the maintainers had in mind, that changes §2.
|
|
198
|
+
- **`gate_outcomes[]` is last-write-wins** (`mergeGateOutcome`, `flow-gates.ts:232-235`) — it already *loses* history. Folding from `GateEvaluated` events is strictly *more* information; the only risk is a fold that doesn't reproduce the exact collapsed array order. The oracle (§7.1) catches this.
|
|
199
|
+
- **Replay determinism** depends on pinning the fold/evaluator version: if `evaluateGate`/Surface status derivation changes, replaying old events may differ. Record an evaluator/`statusFunctionVersion` on events (the deleted `inquiry_records` tracked exactly this — `flow-types.ts` removed comment — so the need is real). Worth carrying even in Phase 1.
|
|
200
|
+
- **Two source-of-truth files during Phase 2** is exactly the dual-write the owner dislikes; only acceptable as bounded scaffolding with the Phase-3 deletion deadline named.
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Appendix — key file:line references
|
|
205
|
+
- Demo fixture `events[]` (the false lead): `src/runtime/flow-run-store.ts:85-96`
|
|
206
|
+
- Persisted run shape: `schemas/flow-run.schema.json:7` (required), `:36` (status enum), `:56-70` (gate_outcomes/transitions/exceptions), `:111-135` (transition row)
|
|
207
|
+
- `FlowRunState` TS: `src/contracts/flow-types.ts:83-97`; `GateOutcome`: `:105-113`
|
|
208
|
+
- Initial state: `src/definition/flow-definition.ts:350-368`; route-back attempt counting reads transitions: `:415`
|
|
209
|
+
- The imperative state machine (becomes the reducer): `applyEvaluation` `src/gates/flow-gates.ts:237-320`; `mergeGateOutcome` (last-write-wins) `:232-235`; pure `evaluateGate` `:133-230`
|
|
210
|
+
- Save/evaluate path: `saveRun` `src/runtime/flow-run-store.ts:209-214`; `evaluateRun` `:314-336`; `acceptException` `:338-353`
|
|
211
|
+
- Projection reads stored status (not a fold): `projectFlowRun` `src/console/console-projection.ts:577-607` (`:585` transitions, `:591` identity/status, `:594` current_step); reports `src/reports/flow-reports.ts:16, 23, 25, 124, 164, 169`
|
|
212
|
+
- In-flight trust emission (build ON this): `writeTrustBundles` `src/runtime/flow-run-store.ts:193-207` (called in saveRun `:213`); builders `src/gates/flow-trust-emit.ts:173-294` (run-level folds `state.gate_outcomes` `:201, 208-214`); layout `src/runtime/flow-files.ts:14-36`
|
|
213
|
+
- In-flight REMOVED append-only audit series (do NOT revive): `reDeriveBundleReports` + `inquiry_records` deleted in `git diff origin/main..HEAD -- src/runtime/flow-run-store.ts src/contracts/flow-types.ts`
|
|
214
|
+
- Public API surface: `src/index.ts:161-162` (`projectFlowRun`, `projectFlowRunFromFiles`)
|
|
215
|
+
- Ownership stance: `docs/adr/0001-flow-as-process-transparency-layer.md:21-27` (Flow owns Runs/Transitions), `:41` (Flow Agents is first consumer), `:71` (process semantics not in Surface)
|
|
216
|
+
- Hash primitive already in-repo: `node:crypto createHash` `src/runtime/flow-run-store.ts:1, 216-219`
|