@kontourai/flow-agents 2.0.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.github/actions/trust-verify/action.yml +4 -2
  2. package/.github/workflows/ci.yml +16 -4
  3. package/.github/workflows/docs-pages.yml +1 -1
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +2 -2
  6. package/.github/workflows/runtime-compat.yml +2 -2
  7. package/.github/workflows/trust-reconcile.yml +1 -1
  8. package/CHANGELOG.md +28 -0
  9. package/README.md +3 -3
  10. package/build/src/cli/workflow-sidecar.js +8 -2
  11. package/context/scripts/telemetry/lib/config.sh +15 -0
  12. package/context/scripts/telemetry/telemetry.conf +4 -0
  13. package/context/scripts/telemetry/telemetry.sh +23 -1
  14. package/docs/design/flowrun-eventsourcing-design.md +216 -0
  15. package/docs/design/workflowrun-observability-design.md +431 -0
  16. package/evals/ci/antigaming-suite.sh +1 -0
  17. package/evals/ci/run-baseline.sh +2 -0
  18. package/evals/integration/test_command_log_concurrency.sh +114 -0
  19. package/evals/integration/test_gate_lockdown.sh +21 -6
  20. package/evals/integration/test_usage_cost.sh +119 -0
  21. package/evals/integration/test_verify_cli.sh +23 -0
  22. package/integrations/strands/flow_agents_strands/hooks.py +126 -1
  23. package/integrations/strands/flow_agents_strands/telemetry.py +172 -0
  24. package/integrations/strands/tests/test_usage.py +129 -0
  25. package/integrations/strands-ts/src/hooks.ts +135 -1
  26. package/integrations/strands-ts/src/telemetry.ts +170 -0
  27. package/integrations/strands-ts/test/test-usage.ts +85 -0
  28. package/package.json +2 -2
  29. package/scripts/ci/trust-reconcile.js +7 -23
  30. package/scripts/hooks/evidence-capture.js +85 -50
  31. package/scripts/hooks/stop-goal-fit.js +18 -45
  32. package/scripts/lib/command-log-chain.js +73 -0
  33. package/scripts/repair-command-log.js +8 -15
  34. package/scripts/telemetry/lib/config.sh +15 -0
  35. package/scripts/telemetry/lib/pricing.sh +42 -0
  36. package/scripts/telemetry/lib/usage.sh +108 -0
  37. package/scripts/telemetry/pricing.golden.json +15 -0
  38. package/scripts/telemetry/pricing.json +31 -0
  39. package/scripts/telemetry/telemetry.conf +4 -0
  40. package/scripts/telemetry/telemetry.sh +23 -1
  41. package/src/cli/workflow-sidecar.ts +8 -2
@@ -113,7 +113,9 @@ runs:
113
113
  BUNDLE_ARG=""
114
114
  fi
115
115
 
116
- node "${{ github.action_path }}/../../scripts/ci/trust-reconcile.js" \
116
+ # action_path is .github/actions/trust-verify/ — climb THREE levels to the
117
+ # repo root where scripts/ lives (trust-verify -> actions -> .github -> root).
118
+ node "${{ github.action_path }}/../../../scripts/ci/trust-reconcile.js" \
117
119
  --commands "$VERIFY_COMMAND" \
118
120
  --repo-root "${{ github.workspace }}" \
119
121
  $BUNDLE_ARG || {
@@ -130,7 +132,7 @@ runs:
130
132
  - name: Mint attestation
131
133
  if: inputs.sign == 'true' && steps.trust-verify.outcome == 'success'
132
134
  shell: bash
133
- run: node "${{ github.action_path }}/../../scripts/ci/mint-attestation.js"
135
+ run: node "${{ github.action_path }}/../../../scripts/ci/mint-attestation.js"
134
136
 
135
137
  - name: Upload attestation
136
138
  if: inputs.sign == 'true' && steps.trust-verify.outcome == 'success'
@@ -14,6 +14,14 @@ concurrency:
14
14
  cancel-in-progress: true
15
15
 
16
16
  jobs:
17
+ # Suite-wide secret-scan gate, defined once in kontourai/.github (Hachure: one
18
+ # normative source). Scans git-tracked history; gitignored runtime/.env excluded.
19
+ secret-scan:
20
+ name: Secret Scan
21
+ uses: kontourai/.github/.github/workflows/secret-scan.yml@main
22
+ permissions:
23
+ contents: read
24
+
17
25
  source-and-static:
18
26
  name: Source and Static
19
27
  runs-on: ubuntu-latest
@@ -25,7 +33,7 @@ jobs:
25
33
 
26
34
  steps:
27
35
  - name: Checkout
28
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
36
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
29
37
 
30
38
  - name: Set up Node.js
31
39
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -91,7 +99,7 @@ jobs:
91
99
 
92
100
  steps:
93
101
  - name: Checkout
94
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
102
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
95
103
 
96
104
  - name: Set up Node.js
97
105
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -152,7 +160,7 @@ jobs:
152
160
 
153
161
  steps:
154
162
  - name: Checkout
155
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
163
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
156
164
 
157
165
  - name: Set up Node.js
158
166
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -242,6 +250,10 @@ jobs:
242
250
  continue-on-error: true
243
251
  run: bash evals/ci/run-baseline.sh --check telemetry-doctor-integration
244
252
 
253
+ - name: Usage and cost integration
254
+ continue-on-error: true
255
+ run: bash evals/ci/run-baseline.sh --check usage-and-cost-integration
256
+
245
257
  - name: Utterance check integration
246
258
  continue-on-error: true
247
259
  run: bash evals/ci/run-baseline.sh --check utterance-check-integration
@@ -280,7 +292,7 @@ jobs:
280
292
 
281
293
  steps:
282
294
  - name: Checkout
283
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
295
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
284
296
 
285
297
  - name: Set up Node.js
286
298
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -22,7 +22,7 @@ jobs:
22
22
  runs-on: ubuntu-latest
23
23
  steps:
24
24
  - name: Checkout
25
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
25
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
26
26
 
27
27
  - name: Configure Pages
28
28
  uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0
@@ -37,7 +37,7 @@ jobs:
37
37
 
38
38
  steps:
39
39
  - name: Checkout
40
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
40
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
41
41
 
42
42
  - name: Set up Node.js
43
43
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -103,7 +103,7 @@ jobs:
103
103
 
104
104
  steps:
105
105
  - name: Checkout
106
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
106
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
107
107
 
108
108
  - name: Set up Node.js
109
109
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -22,7 +22,7 @@ jobs:
22
22
  runs-on: ubuntu-latest
23
23
  steps:
24
24
  - name: Check out repository
25
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
25
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
26
26
  with:
27
27
  fetch-depth: 0
28
28
 
@@ -58,7 +58,7 @@ jobs:
58
58
  id-token: write
59
59
  steps:
60
60
  - name: Check out repository
61
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
61
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
62
62
  with:
63
63
  fetch-depth: 0
64
64
 
@@ -37,7 +37,7 @@ jobs:
37
37
  version: pi --version
38
38
  steps:
39
39
  - name: Checkout
40
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
40
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
41
41
 
42
42
  - name: Set up Node.js
43
43
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -67,7 +67,7 @@ jobs:
67
67
  timeout-minutes: 20
68
68
  steps:
69
69
  - name: Checkout
70
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
70
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
71
71
 
72
72
  - name: Set up Node.js
73
73
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
@@ -62,7 +62,7 @@ jobs:
62
62
 
63
63
  steps:
64
64
  - name: Checkout
65
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
65
+ uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
66
66
 
67
67
  - name: Set up Node.js
68
68
  uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
package/CHANGELOG.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Changelog
2
2
 
3
+ ## [2.1.1](https://github.com/kontourai/flow-agents/compare/v2.1.0...v2.1.1) (2026-06-29)
4
+
5
+
6
+ ### Refactoring
7
+
8
+ * **flow-agents:** one shared module for command-log chain helpers (ops[#20](https://github.com/kontourai/flow-agents/issues/20)) ([#249](https://github.com/kontourai/flow-agents/issues/249)) ([67af85f](https://github.com/kontourai/flow-agents/commit/67af85f5010dace3f33b36b86245e0c7aad95f77))
9
+
10
+ ## [2.1.0](https://github.com/kontourai/flow-agents/compare/v2.0.1...v2.1.0) (2026-06-29)
11
+
12
+
13
+ ### Features
14
+
15
+ * **telemetry:** derive live pricing source from the console ([#242](https://github.com/kontourai/flow-agents/issues/242)) ([ddce44e](https://github.com/kontourai/flow-agents/commit/ddce44e813e9a3515953324f4878bf51c33252ba))
16
+ * **telemetry:** real token+cost capture with single-source versioned pricing ([#241](https://github.com/kontourai/flow-agents/issues/241)) ([b0bd4c3](https://github.com/kontourai/flow-agents/commit/b0bd4c347897ec77f60d84cae702e7f42b2871d7))
17
+
18
+
19
+ ### Fixes
20
+
21
+ * **evidence-capture:** serialize command-log appends to prevent chain forks ([#232](https://github.com/kontourai/flow-agents/issues/232)) ([bb167e9](https://github.com/kontourai/flow-agents/commit/bb167e93e7f6cc19baa88da613e96fe88a681c10))
22
+ * **flow-agents:** stop corrupting sidecar JSONL event lines ([#244](https://github.com/kontourai/flow-agents/issues/244)) ([fb65d10](https://github.com/kontourai/flow-agents/commit/fb65d1017e5cb659ce2b48da7a548f0c1f360426))
23
+ * **trust-verify action:** correct cross-repo script path (../../ → ../../../) ([#240](https://github.com/kontourai/flow-agents/issues/240)) ([a75a6d2](https://github.com/kontourai/flow-agents/commit/a75a6d28baf68b4be527a2e8cdff8f007af88bd5))
24
+
25
+
26
+ ### Documentation
27
+
28
+ * **design:** preserve WorkflowRun observability + FlowRun event-sourcing design notes ([#239](https://github.com/kontourai/flow-agents/issues/239)) ([c2dc116](https://github.com/kontourai/flow-agents/commit/c2dc11698cf63704f14087001c4494079195d197))
29
+ * **flow-agents:** advertise the real eval coverage, clearly scoped (ops[#23](https://github.com/kontourai/flow-agents/issues/23)) ([#248](https://github.com/kontourai/flow-agents/issues/248)) ([d208207](https://github.com/kontourai/flow-agents/commit/d20820749408d5fa63f2bf1470252000712de5d8))
30
+
3
31
  ## [2.0.1](https://github.com/kontourai/flow-agents/compare/v2.0.0...v2.0.1) (2026-06-27)
4
32
 
5
33
 
package/README.md CHANGED
@@ -29,7 +29,7 @@ Flow Agents addresses this with a process-discipline layer that sits between the
29
29
  - **Four canonical policies** — workflow steering (phase reminders at each turn), quality gate (per-file checks after edits), stop-goal-fit (evidence check before the agent stops), and config protection (veto writes to linter/formatter configs). Each policy class has a canonical script under `scripts/hooks/` and compiles to the host's native hook format.
30
30
  - **Evidence over confidence** — important work ends with tests, browser checks, CI results, review findings, governance reports, or an explicit `NOT_VERIFIED` gap. Optional [Veritas](docs/veritas-integration.md) integration attaches repo-governance evidence without making it mandatory.
31
31
  - **Verifiable, un-gameable "done"** — the agent can't mark work complete that isn't: the gate re-derives the verdict from independent evidence, an external CI anchor re-runs the verification fresh and fails the merge on any divergence, and CI mints a Sigstore-signed record of what shipped. See [Verifiable Trust — why "done" actually means done](docs/verifiable-trust.md).
32
- - **Evals that keep the bundle honest** — 77 integration and 36 static bundle assertions validate the skills, contracts, fixtures, and hook influence as the bundle evolves.
32
+ - **Evals that keep the bundle honest** — 60 integration scenarios (1,829 assertions) and 7 static suites (110 assertions) validate the skills, contracts, fixtures, and hook influence as the bundle evolves.
33
33
 
34
34
  ## Flow Agents as a process-discipline layer
35
35
 
@@ -52,8 +52,8 @@ L2 means all four policy classes with blocking; L1 means steering and stop-goal-
52
52
 
53
53
  | Runtime | Ships | Tested |
54
54
  | --- | --- | --- |
55
- | Claude Code | install + hooks + bundle | 77 integration + 36 static assertions — reference implementation |
56
- | Codex | install + hooks + bundle | 77 integration + 36 static assertions — reference implementation |
55
+ | Claude Code | install + hooks + bundle | 60 integration scenarios + 7 static suites (1,939 assertions) — reference implementation |
56
+ | Codex | install + hooks + bundle | 60 integration scenarios + 7 static suites (1,939 assertions) — reference implementation |
57
57
  | Kiro | install + hooks + bundle | included in bundle assertions |
58
58
 
59
59
  **Partial support — L1 (steering + stop-goal-fit warning)**
@@ -15,11 +15,17 @@ export const verdicts = new Set(["pass", "partial", "fail", "not_verified"]);
15
15
  function now() { return new Date().toISOString().replace(/\.\d{3}Z$/, "Z"); }
16
16
  function read(file) { return fs.readFileSync(file, "utf8"); }
17
17
  export function writeJson(file, payload) { fs.mkdirSync(path.dirname(file), { recursive: true }); fs.writeFileSync(file, `${JSON.stringify(payload, null, 2)}\n`); }
18
- function printJson(payload) { console.log(JSON.stringify(payload).replace(/":/g, '": ').replace(/,"/g, ', "')); }
18
+ // Single-line but readable "key": "value" form. Built by collapsing the
19
+ // structural whitespace from an indented stringify — corruption-proof, unlike a
20
+ // regex that would also rewrite ":"/"," sequences inside string values.
21
+ function spacedLine(payload, replacer) {
22
+ return JSON.stringify(payload, replacer, 1).replace(/\n\s*/g, " ");
23
+ }
24
+ function printJson(payload) { console.log(spacedLine(payload)); }
19
25
  export function loadJson(file, fallback = {}) { return fs.existsSync(file) ? JSON.parse(read(file)) : { ...fallback }; }
20
26
  export function appendJsonl(file, payload) {
21
27
  fs.mkdirSync(path.dirname(file), { recursive: true });
22
- const line = JSON.stringify(payload, Object.keys(payload).sort()).replace(/":/g, '": ').replace(/,"/g, ', "');
28
+ const line = spacedLine(payload, Object.keys(payload).sort());
23
29
  fs.appendFileSync(file, `${line}\n`);
24
30
  }
25
31
  function die(message) { throw new Error(message); }
@@ -38,6 +38,11 @@ CONSOLE_TELEMETRY_URL="${CONSOLE_TELEMETRY_URL:-${CONSOLE_URL:-}}"
38
38
  CONSOLE_TELEMETRY_ENDPOINT_URL="${CONSOLE_TELEMETRY_ENDPOINT_URL:-}"
39
39
  CONSOLE_TELEMETRY_TOKEN="${CONSOLE_TELEMETRY_TOKEN:-${CONSOLE_AUTH_TOKEN:-}}"
40
40
  CONSOLE_TENANT_ID="${CONSOLE_TENANT_ID:-}"
41
+ # Pricing registry source (consumed by lib/pricing.sh). Explicit file/URL win;
42
+ # otherwise the URL is derived from the console below so all runtimes read one
43
+ # live pricing source. Falls back to the bundled pricing.json offline.
44
+ TELEMETRY_PRICING_FILE="${TELEMETRY_PRICING_FILE:-${FLOW_AGENTS_PRICING_FILE:-}}"
45
+ TELEMETRY_PRICING_URL="${TELEMETRY_PRICING_URL:-${FLOW_AGENTS_PRICING_URL:-}}"
41
46
 
42
47
  # Load config file if it exists
43
48
  if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
@@ -78,6 +83,9 @@ if [[ -f "$TELEMETRY_CONFIG_FILE" ]]; then
78
83
  console_telemetry_token) CONSOLE_TELEMETRY_TOKEN="$value" ;;
79
84
  console_tenant_id) CONSOLE_TENANT_ID="$value" ;;
80
85
  console_telemetry_redact) CONSOLE_TELEMETRY_REDACT="$value" ;;
86
+ console_pricing_url) TELEMETRY_PRICING_URL="$value" ;;
87
+ pricing_url) TELEMETRY_PRICING_URL="$value" ;;
88
+ pricing_file) TELEMETRY_PRICING_FILE="$value" ;;
81
89
  esac
82
90
  fi
83
91
  done < "$TELEMETRY_CONFIG_FILE"
@@ -85,5 +93,12 @@ fi
85
93
 
86
94
  CONSOLE_TELEMETRY_REDACT="${CONSOLE_TELEMETRY_REDACT:-${TELEMETRY_CHANNEL_ANALYTICS_REDACT}}"
87
95
 
96
+ # Derive the live pricing source from the console when not set explicitly, the
97
+ # same way the transport derives /api/telemetry/records. One live source for
98
+ # bash/Python/TS runtimes; lib/pricing.sh caches it and falls back to bundled.
99
+ if [[ -z "${TELEMETRY_PRICING_URL:-}" && -n "${CONSOLE_TELEMETRY_URL:-}" ]]; then
100
+ TELEMETRY_PRICING_URL="${CONSOLE_TELEMETRY_URL%/}/api/telemetry/pricing"
101
+ fi
102
+
88
103
  # Ensure directories exist
89
104
  mkdir -p "$TELEMETRY_DATA_DIR" "$TELEMETRY_SESSION_DIR" 2>/dev/null
@@ -8,6 +8,10 @@ channel.analytics.redact=tool.input,tool.output,turn.prompt_text,delegation.targ
8
8
  # The transport derives /api/telemetry/records from console_telemetry_url.
9
9
  # console_telemetry_token=
10
10
  # console_tenant_id=
11
+ # Live pricing registry source. If unset, derived from console_telemetry_url as
12
+ # <console>/api/telemetry/pricing so bash/Python/TS runtimes read one live
13
+ # source; lib/pricing.sh caches it and falls back to bundled pricing.json.
14
+ # console_pricing_url=https://console.kontourai.io/api/telemetry/pricing
11
15
  enrich_system=true
12
16
  enrich_workspace=true
13
17
  enrich_auth=true
@@ -309,13 +309,35 @@ add_stop_data_and_emit_usage() {
309
309
  tool_count=$(usage_count_tool_calls "$session_id" "$full_log")
310
310
  delegation_count=$(usage_count_delegations "$session_id" "$full_log")
311
311
 
312
+ # Ground-truth token + cost usage from the runtime transcript, when the
313
+ # runtime exposes one (Claude Code, Codex, etc. set hook.transcript_path).
314
+ # Tokens are source-of-truth; estimated_cost_usd is derived from pricing.json
315
+ # (recomputed authoritatively console-side, so pricing updates are retroactive).
316
+ local transcript_path transcript_usage
317
+ transcript_path=$(echo "$event" | jq -r '.hook.transcript_path // ""')
318
+ transcript_usage=$(usage_parse_transcript "$transcript_path")
319
+ [[ -z "$transcript_usage" ]] && transcript_usage='null'
320
+
312
321
  local usage_event
313
322
  usage_event=$(echo "$event" | jq -c \
314
323
  --arg m "$model" \
315
324
  --argjson tc "$tool_count" \
316
325
  --argjson dc "$delegation_count" \
326
+ --argjson tu "$transcript_usage" \
317
327
  '.event_type = "session.usage" | .event_id = (.event_id + "-usage") | . + {
318
- usage: {model: $m, duration_s: .session.duration_s, tool_invocations: $tc, delegations: $dc, input_tokens: null, output_tokens: null, estimated_cost_usd: null}
328
+ usage: ({
329
+ model: $m,
330
+ duration_s: .session.duration_s,
331
+ tool_invocations: $tc,
332
+ delegations: $dc,
333
+ input_tokens: ($tu.input_tokens // null),
334
+ output_tokens: ($tu.output_tokens // null),
335
+ cache_creation_input_tokens: ($tu.cache_creation_input_tokens // null),
336
+ cache_read_input_tokens: ($tu.cache_read_input_tokens // null),
337
+ estimated_cost_usd: ($tu.estimated_cost_usd // null),
338
+ pricing_version: ($tu.pricing_version // null),
339
+ by_model: ($tu.by_model // null)
340
+ })
319
341
  }')
320
342
  transport_emit "$usage_event"
321
343
  fi
@@ -0,0 +1,216 @@
1
+ # DESIGN DOC: Evolving Flow's `FlowRun` to be EVENT-SOURCED
2
+
3
+ **Status:** DRAFT for owner / Flow-maintainer review. Design only — no Flow source modified.
4
+ **Date:** 2026-06-27
5
+ **Repo:** `/Users/brian/dev/github/kontourai/flow`
6
+ **Branch studied:** `feat/emit-trust-bundles` @ 269ae97 (in-flight; one commit ahead of `origin/main`).
7
+ **Relates to:** Flow ADR 0001 ("Flow owns Flow Runs / transitions"), the in-flight trust-bundle emission work, and the companion consumer design `scratchpad/workflowrun-design.md` (Flow Agents side).
8
+
9
+ ---
10
+
11
+ ## 0. TL;DR — the three load-bearing findings
12
+
13
+ 1. **`FlowRun` is NOT partway event-sourced.** The prompt's premise that `flow-run-store.ts:~85` already holds a run `events[]` array is a **false lead**: that `events: [...]` at `src/runtime/flow-run-store.ts:85-96` is the **demo acceptance-bundle fixture** — a Hachure *TrustBundle*'s `events` field (claims/evidence/policies/events) written into `scaffoldDemoRun`. It has nothing to do with `FlowRunState`. The actual persisted run state (`schemas/flow-run.schema.json:7`, `src/contracts/flow-types.ts:83-97`) has **no `events[]` at all**. It is a **mutable record** with three accumulated arrays — `gate_outcomes[]`, `transitions[]`, `exceptions[]` — plus a **stored** `status` and `current_step` that are set imperatively. So this work is **"introduce an event log,"** not "promote `events[]` to source of truth."
14
+
15
+ 2. **`status`/`current_step` are stored, not folded.** `applyEvaluation` (`src/gates/flow-gates.ts:237-320`) mutates `state.status`, `state.current_step`, pushes to `state.transitions`, and merges `state.gate_outcomes` **in place**, then `saveRun` overwrites `state.json` (`flow-run-store.ts:209-214`). The projection (`projectFlowRun`, `src/console/console-projection.ts:577-607`) **reads the stored `state.status`/`state.transitions` directly** (`:585, :591`) — it re-shapes mutable state, it does not fold. **`transitions[]` is the closest thing to an event log that exists today**, but it is a *derived byproduct* of mutation, not the source of truth.
16
+
17
+ 3. **The in-flight branch matters two ways.** `feat/emit-trust-bundles` (a) **adds** `writeTrustBundles` into `saveRun` (`flow-run-store.ts:193-214`) — emitting derived Hachure trust bundles per-gate + run-level under `<run>/trust/`; this is an **emission/projection** pattern the event log should align with, not collide with — and (b) **removes** `reDeriveBundleReports` + the per-evidence **`inquiry_records`** append-only audit series (deleted at `flow-run-store.ts` and `flow-types.ts` in the diff). Those `inquiry_records` were *"append-only series of point-in-time inquiry records … event high-water mark"* — i.e. the **one genuinely event-sourced-shaped structure in the codebase was just deleted on this branch.** The design must not resurrect it; it must reintroduce that discipline at the **run** level instead of the per-evidence level.
18
+
19
+ **Recommendation:** the thinnest valuable slice is to **add an append-only `events[]` log to `FlowRunState`, written by the existing mutators as a side-channel, with `transitions[]` redefined as a fold over it** — and prove fold-equality against today's `transitions[]`/`status`. Defer hash-chaining and "delete the mutable writes" to later phases gated on owner decisions.
20
+
21
+ ---
22
+
23
+ ## 1. Current-state map (grounded in file:line)
24
+
25
+ ### 1.1 What persists today — `FlowRunState`
26
+
27
+ Schema `schemas/flow-run.schema.json` requires (`:7`): `schema_version` (const `"0.1"`, `:12`), `run_id`, `definition_id`, `status`, `current_step`, `gate_outcomes`, `transitions`, `exceptions`. TS mirror at `src/contracts/flow-types.ts:83-97`. Initial value at `src/definition/flow-definition.ts:350-368`: `status:"active"`, `current_step:firstStep.id`, empty `gate_outcomes/transitions/exceptions`. **No `events[]` field anywhere.**
28
+
29
+ | Array / field | What it holds today | Where written | Source of truth? |
30
+ |---|---|---|---|
31
+ | `status` (string enum, schema `:36`) | Current lifecycle status (`active`/`blocked`/`needs_decision`/`completed`/`failed`/`accepted_by_exception`) | set imperatively in `applyEvaluation` (`flow-gates.ts:253, 255, 289, 313`), `acceptException` (`flow-run-store.ts:349`) | **STORED** (not derived) |
32
+ | `current_step` (schema `:38`) | Current step id; open gates derived from it | `applyEvaluation:252, 290`; `initialState:361` | **STORED** |
33
+ | `gate_outcomes[]` (schema `:56-60`, `:73-110`) | Latest decision **per gate** — `mergeGateOutcome` *replaces* any prior outcome for the same `gate_id` (`flow-gates.ts:232-235`). One row per gate, **last-write-wins, not append-only.** | `applyEvaluation:239` | derived-ish but **collapsed** (history lost) |
34
+ | `transitions[]` (schema `:61-65`, `:111-135`) | **Append-only-ish** history of step movements: each pass/block/route-back pushes a `{from_step,to_step,status,reason,at,gate_id,…}` row (`flow-gates.ts:244, 257, 279, 291`). Carries `type:"route_back"`, `attempt`, `route_reason`, `limit_exceeded`, `classifier`, `diagnostics`, `analytics`. | `applyEvaluation` only | **this is the de-facto event log**, but it is a *byproduct* of mutation and only covers transitions (not evidence/exception/seal events) |
35
+ | `exceptions[]` (schema `:66-70`, `:137-148`) | Accepted exceptions `{id,gate_id,reason,authority,accepted_at}` — append-only | `acceptException` (`flow-run-store.ts:341-348`) | append-only, but separate stream |
36
+ | evidence | `evidence-manifest.json` (separate file), append via `attachEvidence` (`flow-run-store.ts:252-312`); `run.manifest.evidence.push` (`:309`) | append-only, separate file | separate stream |
37
+
38
+ ### 1.2 How status/projection are derived
39
+
40
+ - `evaluateRun` (`flow-run-store.ts:314-336`): loads run, evaluates open gates via `evaluateGate` (pure, `flow-gates.ts:133-230`), validates the transition (`validateEvaluationTransition`), then calls **`applyEvaluation`** which **mutates** state, then `saveRun`.
41
+ - `applyEvaluation` (`flow-gates.ts:237-320`) is the single chokepoint that turns a gate outcome into: a merged `gate_outcomes` row, a pushed `transitions` row, and **assignment of `status`/`current_step`/`next_action`/`updated_at`**. This is exactly the imperative state machine an event-fold would replace.
42
+ - `projectFlowRun` (`console-projection.ts:577-607`) reads **stored** `state.status` (`:591` via `projectRunIdentity`), `state.current_step` (`:594`), `state.transitions` (`:585`), `state.gate_outcomes` (via `projectGate`), `state.exceptions` (`:586`). **The projection is a re-shaping of stored mutable fields — `status` is NOT a fold.**
43
+ - `reportJson`/`renderSummary`/`renderResume` (`src/reports/flow-reports.ts:16, 124, 164`) likewise read `state.status`/`state.current_step` directly (`:23, :25, :127, :169`).
44
+
45
+ **Verdict:** Flow today is a classic **mutable state-machine record**. `transitions[]` + `exceptions[]` + the evidence manifest are *append-ish side records*; `status`/`current_step`/`gate_outcomes` are *destructively overwritten*. There is **no single ordered event log** and **no fold** producing `status`. This is meaningfully *less* event-sourced than the prompt assumed, and the one append-only audit structure (`inquiry_records`) was **removed on this branch** (§1.3).
46
+
47
+ ### 1.3 What `feat/emit-trust-bundles` changed (the in-flight diff — `git diff origin/main..HEAD`)
48
+
49
+ The single commit `269ae97 feat: emit per-gate and run-level trust bundles (recursive trust)` does two things relevant here, plus a large console-UI/reports deletion (out of scope):
50
+
51
+ **(a) ADDS emission into the save path.** `writeTrustBundles` (`flow-run-store.ts:193-207`) now runs inside `saveRun` (`:213`): it builds a run-level Hachure trust bundle via `buildFlowTrustBundle({state})` and one per `gate_outcome` via `buildGateTrustBundle` (`src/gates/flow-trust-emit.ts:173-294`), writing them under `<run>/trust/run.json` and `<run>/trust/<gate>.json` (`src/runtime/flow-files.ts:14-36`). **These bundles are pure folds over `state.gate_outcomes`** (`flow-trust-emit.ts:201, 208-214`) — i.e. Flow already adopted the pattern "derive an inspectable artifact from run state on every save." An event log is the **same pattern, one level deeper** (derive the bundle from the *log* instead of from the collapsed `gate_outcomes`).
52
+
53
+ **(b) REMOVES the per-evidence append-only audit series.** The diff deletes `reDeriveBundleReports` (was in `flow-run-store.ts`) and the `inquiry_records?` field on `FlowEvidenceEntry` (`flow-types.ts:63-73` — the comment described it as *"Append-only series of point-in-time inquiry records (Surface DerivationCheckpoints), one per re-derivation … status-by-claim + statusFunctionVersion + asOf + event high-water mark"*). It also drops `freshness_transitions` from `evaluateRun`'s return (`flow-run-store.ts:335`) and the `checkpointFromReport, diffFreshness` imports.
54
+
55
+ **Why this matters for sequencing:** the branch is moving Flow toward **"state in → derived trust artifacts out"** and **away from** a per-evidence append log. The event-sourcing design must (i) **build on the emission pattern** (the event log becomes the *input* to `buildFlowTrustBundle`, replacing the collapsed `state.gate_outcomes`), and (ii) **reintroduce append-only audit discipline at the RUN level** — the thing `inquiry_records` was reaching for, but cleanly, as the run's source of truth rather than a per-evidence sidecar. Do **not** re-add `inquiry_records`; that would re-create the very fork this branch just removed.
56
+
57
+ ### 1.4 Flow's documented run-model stance (ADR 0001)
58
+
59
+ `docs/adr/0001-flow-as-process-transparency-layer.md:21-27`: **Flow owns** Flow Runs, steps, gates, Transitions, gate evidence, exceptions, continuation. `:71` rejects modeling process state in Surface because "Surface models trust state, not process-specific semantics such as steps, gates, transitions, and continuation." **There is no ADR on event-sourcing the run.** So introducing an event log is a *new architectural decision* that should land as its own Flow ADR — it is squarely inside Flow's owned surface per 0001, which is the right home (consistent with the consumer doc's §8 conclusion that event-sourcing belongs in `FlowRun`, not bespoke in Flow Agents).
60
+
61
+ ---
62
+
63
+ ## 2. Target model — event log as source of truth, `transitions[]`/`status` as folds
64
+
65
+ ### 2.1 Principle
66
+
67
+ Make `FlowRunState.events[]` an **append-only ordered log** the run's source of truth. Everything else becomes a **deterministic fold**:
68
+
69
+ ```
70
+ status = foldStatus(events) // replaces stored state.status
71
+ current_step = foldCurrentStep(events)
72
+ transitions[] = foldTransitions(events) // EXACTLY today's transitions rows, derived
73
+ gate_outcomes[]= foldGateOutcomes(events) // last-write-wins per gate, derived
74
+ exceptions[] = foldExceptions(events)
75
+ trust bundles = buildFlowTrustBundle(foldGateOutcomes(events)) // unchanged emission, fed by the fold
76
+ ```
77
+
78
+ This is an **evolution of `applyEvaluation`**, not a rewrite: `applyEvaluation`'s existing branches (`flow-gates.ts:241-314`) become the **reducer cases** of `foldStatus`/`foldTransitions`. The mapping is nearly mechanical because the transition rows it pushes already carry the full causal payload.
79
+
80
+ ### 2.2 Event taxonomy (reconciled with what exists)
81
+
82
+ Every event shares an envelope. **Reuse the `transitions[]` row shape** so the fold to `transitions[]` is near-identity:
83
+
84
+ ```jsonc
85
+ {
86
+ "seq": 7, // monotonic per run
87
+ "type": "Transitioned",
88
+ "at": "2026-06-27T12:00:00Z", // already on every transition (schema :134)
89
+ "actor": "flow", // emitter; cf. trust-emit actor:"flow" (flow-trust-emit.ts:158)
90
+ "source": "evaluateRun", // the operation that emitted it
91
+ "payload": { /* type-specific */ }
92
+ // "_chain": {...} // OPTIONAL, Phase 3 — see §3
93
+ }
94
+ ```
95
+
96
+ | Event type | Maps to today | Emitted at | Folds into |
97
+ |---|---|---|---|
98
+ | `RunStarted` | `initialState` (`flow-definition.ts:350-368`) | `startRun` (`flow-run-store.ts:157-172`) | seeds `status:"active"`, `current_step`, identity |
99
+ | `GateEvaluated` | a `GateOutcome` (`flow-gates.ts:133-230`) | `evaluateRun` per gate (`flow-run-store.ts:319-333`) | `gate_outcomes[]` (last-write-wins per `gate_id`) |
100
+ | `EvidenceAttached` | manifest push (`flow-run-store.ts:309`) | `attachEvidence` | evidence projection (manifest stays its own file initially) |
101
+ | `Transitioned` (pass) | `transitions.push{status:"allowed"}` (`flow-gates.ts:244-251`) | `applyEvaluation` pass branch | `transitions[]`, advances `current_step`, `status` active/completed |
102
+ | `Blocked` | `transitions.push{status:"blocked"}` non-route (`flow-gates.ts:279-287`) | block branch | `transitions[]`, `status:"blocked"` |
103
+ | `RoutedBack` | `transitions.push{type:"route_back"}` (`flow-gates.ts:288-311`) | route-back branch | `transitions[]`, `current_step:=route_back_to`, `status:"active"`, attempt counting |
104
+ | `ExceptionAccepted` | `exceptions.push` (`flow-run-store.ts:341-348`) | `acceptException` | `exceptions[]`, `status:"accepted_by_exception"` |
105
+ | `RunSealed` *(new)* | — (no equivalent today) | a future `sealRun`/delivery | terminal marker + head pointer for checkpoints (§4d) |
106
+
107
+ **Reconciliation note:** `GateEvaluated` and `Transitioned`/`Blocked`/`RoutedBack` are *distinct* events even though `applyEvaluation` does both in one call — the gate **decision** and its **effect on the run** are separate facts (the consumer doc wants the gate decision replayable independently). The fold re-derives `gate_outcomes[]` from `GateEvaluated` and `transitions[]` from the `Transitioned/Blocked/RoutedBack` family. This is the cleanest split and keeps `evaluateGate` (pure, already side-effect-free) unchanged.
108
+
109
+ **`attempt`/`limit_exceeded` route-back counting** currently reads `state.transitions` (`flow-definition.ts:415` `priorMatches`). After the change it reads `foldTransitions(events)` — identical data, so route-back cascade behavior (and the route-back tests) are preserved by construction.
110
+
111
+ ### 2.3 Schema evolution
112
+
113
+ Add `events[]` to `schemas/flow-run.schema.json` (currently `additionalProperties:false`, `:8`, so this is a required, deliberate edit). Two viable shapes — **owner/maintainer decision**:
114
+
115
+ - **(A) Additive, bump `schema_version` 0.1→0.2:** `events[]` becomes **required**; `transitions[]`/`gate_outcomes[]`/`status` remain in the persisted file but are documented as **derived caches** (regenerated on save from the fold). Old `0.1` runs lack `events[]` → see migration §5.
116
+ - **(B) Pure log, defer:** persist **only** `events[]` (+ identity), drop the derived arrays from disk entirely, regenerate on read. Cleaner end state, bigger blast radius (every reader of `state.transitions`/`state.status` must go through the fold). This is the **zero-legacy end state** but should not be the first slice.
117
+
118
+ ---
119
+
120
+ ## 3. Tamper-evidence — should the log be hash-chained?
121
+
122
+ The consumer (ADR 0017, per `scratchpad/workflowrun-design.md` §5) leans on the flow-agents `command-log.jsonl` hash chain (`hash = sha256(prevHash + canonicalJson(record))`) as its tamper-evidence spine. If Flow Agents will *trust this log as a tamper-evident record*, the Flow event log should support an **optional** `_chain` per event with the **same construction** so the two integrity stories compose.
123
+
124
+ **Recommendation: design the envelope to carry an optional `_chain`, but do NOT couple Flow to flow-agents specifics.**
125
+ - Flow already imports `createHash` from `node:crypto` (`flow-run-store.ts:1`, used in `sha256File:216-219`), so the primitive is in-repo — no new dependency.
126
+ - Keep chaining **opt-in / Phase 3**: a plain monotonic `seq` (cheap, always on) gives ordering and replay; the `_chain` adds tamper-evidence when a consumer needs it. This avoids forcing Flow to adopt flow-agents' security posture before there's a Flow-side reason to.
127
+ - **Decouple by interface, not import:** Flow defines its own `hashEvent(prevHash, event)` over a canonical JSON of the Flow envelope. Flow Agents, if it wants one chain spanning both, reconciles at read time — it does **not** require Flow to chain into flow-agents' genesis. This honors "Flow owns Flow Runs" (ADR 0001) without importing a flow-agents-specific contract.
128
+ - **Open question for maintainers (§7 Q3):** does Flow *want* tamper-evidence as a first-class run property, or is that a flow-agents concern that should stay in the consumer? The `inquiry_records` removal (§1.3) suggests Flow is currently *trimming* audit-series complexity, so pushing a mandatory chain upstream now would cut against the branch's direction. Hence: **ship chaining as opt-in, let the consumer drive whether it becomes mandatory.**
129
+
130
+ **Hard rule (if adopted):** append-only; "edits" are compensating events, never rewrites; any fold that disagrees with a re-fold is a tamper signal. Add a Flow test asserting "a hand-edited event breaks the chain / changes the fold."
131
+
132
+ ---
133
+
134
+ ## 4. Consumer contract for Flow Agents (Flow stays the owner)
135
+
136
+ Flow Agents consumes Flow's run primitive (ADR 0001 `:41`, "Flow Agents will be the first consumer of Flow"). Proposed additions to Flow's public API (`src/index.ts` already exports `startRun`/`loadRun`/`saveRun`/`evaluateRun`/`projectFlowRun`/`projectFlowRunFromFiles` `:161-162`):
137
+
138
+ **(a) Append events** — `appendRunEvent(runId, event, {cwd})`: validates type+payload, assigns `seq` (and `_chain` if enabled), persists, returns the stored event incl. head hash. The existing mutators (`evaluateRun`, `acceptException`, `attachEvidence`) become **internal callers** of this — Flow Agents normally appends *indirectly* by calling those, and only uses `appendRunEvent` directly for flow-agents-specific event types if Flow allows an extensibility escape hatch (open question §7 Q4).
139
+
140
+ **(b) Get projected state** — `projectFlowRun` / `projectFlowRunFromFiles` (already exist, `console-projection.ts:577, 609`) become **fold-backed**: same output shape (`FlowConsoleProjection`), but `status`/`transitions` come from the fold, not stored fields. **Consumer-transparent** — no Flow Agents change required to get the new guarantee.
141
+
142
+ **(c) Replay / trace** — `replayRun(runId, {atSeq|atTime})` → projected state as-of a point; `traceRun(runId)` → the ordered event timeline. These are the new capabilities the consumer doc's §3 ("rebuild the session") needs; they fall out for free once `status` is a fold. Render reuses `renderResume`/`renderSummary` (`flow-reports.ts:124, 164`).
143
+
144
+ **(d) Run head hash for a checkpoint pointer** — `getRunHead(runId)` → `{seq, headHash}`. This is the **"compiled vs raw notes" pointer** the consumer doc §3.3 wants: Flow Agents stores `event_log_ref + head_hash` in its sealed checkpoint, travels light, and can prove the raw log is unmodified later. Emit a `RunSealed` event (§2.2) capturing the head at seal time.
145
+
146
+ All four keep Flow as **owner of the model and the fold**; Flow Agents only **appends (indirectly) and reads**. No flow-agents trust/security types leak into Flow.
147
+
148
+ ---
149
+
150
+ ## 5. Migration plan — phased, honoring "no legacy / no fallbacks"
151
+
152
+ Standing owner rule: long-term **no legacy or fallbacks**; dual-keep is acceptable **only as execution-transition scaffolding** with a deletion deadline; end state = **`events[]` is the sole authority, `transitions[]`/`status` derived, no mutable field with independent authority.**
153
+
154
+ ### Phase 0 — Coordinate with `feat/emit-trust-bundles` (prerequisite)
155
+ Land **on top of** 269ae97 (do not branch from `origin/main`). Shared file is `flow-run-store.ts` (both touch `saveRun`/`evaluateRun`). Sequence: let the trust-emit branch merge first (or rebase onto it), then this work treats `writeTrustBundles` as the **first consumer of the fold** (feed `buildFlowTrustBundle` from `foldGateOutcomes(events)` instead of `state.gate_outcomes`). **Do not re-add `inquiry_records`** — the run event log supersedes it.
156
+
157
+ ### Phase 1 — Emit the event log (additive, reversible, no behavior change)
158
+ **Ships:** `appendRunEvent` + a typed `events[]` written **alongside** the existing mutations. `applyEvaluation` (`flow-gates.ts:237-320`) additionally appends `GateEvaluated`/`Transitioned`/`Blocked`/`RoutedBack`; `acceptException` appends `ExceptionAccepted`; `startRun` appends `RunStarted`. **Stored `status`/`transitions`/`gate_outcomes` remain the source of truth.** Schema gets `events[]` (option A, §2.3).
159
+ **Exit criteria:** `foldTransitions(events)` reproduces `state.transitions` **byte-identically** (modulo `updated_at`/`at` timestamps), and `foldStatus(events) === state.status`, for every run in `.flow/runs/` and the route-back/exception test suites. This **fold==stored equality is the correctness oracle** for all later phases. Add `replayRun`/`traceRun` (read-only).
160
+ **Reversible:** delete `events[]` writes + the two new functions.
161
+
162
+ ### Phase 2 — Flip source of truth to the fold (dual-write scaffolding, bounded)
163
+ **Ships:** `saveRun` writes `transitions`/`gate_outcomes`/`status` **as a cache generated from the fold** (not from in-place mutation). `applyEvaluation` is refactored to *only* append events; the derived arrays are regenerated by folding. Readers unchanged (still read the cached fields). `writeTrustBundles` reads the fold. A `--rebuild` path regenerates every derived field purely from `events[]` and must match.
164
+ **Exit criteria:** an integrity check `fold(events) == persisted-derived-fields` runs in CI; `--rebuild` is byte-stable. **Name the deletion deadline here** (Phase 3) so the dual-write doesn't become permanent legacy.
165
+ **Reversible:** flip `saveRun` back to direct mutation.
166
+
167
+ ### Phase 3 — Remove mutable authority; (optional) chain (zero-legacy end state)
168
+ **Ships:** delete the imperative `state.status = …` / `state.transitions.push(…)` assignments. `projectFlowRun`, `reportJson`, `renderResume`, route-back counting (`flow-definition.ts:415`) all read **via the fold**. Choose schema option B (persist only `events[]` + identity, derive the rest on read) **or** keep the derived fields strictly as a **generated read-only cache with no independent authority**. Optionally enable `_chain` (§3) + add the tamper-detection test. `getRunHead`/`RunSealed` shipped for the checkpoint pointer.
169
+ **Exit criteria:** grep shows no code path assigns `status`/`transitions` except by appending an event; the fold is the only authority; `schema_version` bumped; consumer (Flow Agents) reads projections/replay only.
170
+ **This is the legacy-free end state.**
171
+
172
+ **Reversibility summary:** Phase 1 fully reversible; Phase 2 reversible (flip the writer); Phase 3 is the commitment point (deletes the mutable path) — gated on the Phase-1 oracle being green across all runs/tests and the §7 open questions resolved.
173
+
174
+ ---
175
+
176
+ ## 6. Coordination with `feat/emit-trust-bundles` (explicit)
177
+
178
+ - **Branch base:** build on 269ae97, not `origin/main`. The branch deletes a lot of console-UI/reports/test files (`git diff --stat`: ~6k deletions) — do **not** reintroduce them; scope this work to `flow-run-store.ts`, `flow-gates.ts`, `flow-types.ts`, `schemas/flow-run.schema.json`, and new event/fold modules.
179
+ - **Shared files & conflict surface:** `flow-run-store.ts` (`saveRun:209-214`, `evaluateRun:314-336`) and `flow-gates.ts` (`applyEvaluation`). Both branches edit `saveRun`. Resolution: the event-log work **inserts** an append step in `applyEvaluation`/`saveRun` and **redirects** `writeTrustBundles`' input from `state.gate_outcomes` to the fold — additive, low conflict if rebased after trust-emit lands.
180
+ - **Direction alignment:** trust-emit = "derive inspectable artifacts from run state on save." Event-sourcing = "make the log the run state, derive everything (including those artifacts) from it." They **compose**: the trust bundle becomes a *second-order fold*. The one thing to **not** do is revive `inquiry_records` (deleted on this branch) — the run event log is its successor at the right altitude.
181
+
182
+ ---
183
+
184
+ ## 7. Thinnest first slice + open questions
185
+
186
+ ### 7.1 Recommended thinnest first slice (Phase 1, scoped)
187
+ **Add `events[]` to `FlowRunState` + emit `RunStarted`/`GateEvaluated`/`Transitioned`/`Blocked`/`RoutedBack`/`ExceptionAccepted` from the existing mutators, plus `foldTransitions`/`foldStatus` and a read-only `traceRun`/`replayRun --at <seq>`. Keep stored `status`/`transitions` as the source of truth. Prove `fold(events) == stored` across all `.flow/runs/` and the existing route-back/exception tests.**
188
+
189
+ Why this is the right first cut: it is **safe** (no behavior change — folds run alongside, asserted equal), **valuable** (delivers the consumer's "recreate the session"/trace need immediately and the head-hash pointer can follow), **reviewable** (one schema field + append calls in the two existing mutators + two pure fold functions + a read-only command; ~no change to `evaluateGate`, gates, or the trust-emit path), and it **produces the correctness oracle** (fold==stored) that de-risks Phases 2-3. Smallest demo: `traceRun` on `./.flow/runs/run.1781102325268/` showing its real timeline derived from `events[]`, with `foldTransitions` matching the persisted `transitions[]`.
190
+
191
+ ### 7.2 Top open questions (owner / Flow maintainers must decide)
192
+ 1. **Schema strategy (§2.3): additive-with-derived-caches (A) vs pure-log (B)?** A is the safe first slice; B is the zero-legacy end state. Which, and is bumping `schema_version` 0.1→0.2 acceptable now (the const is `"0.1"`, `schema:12`)? *I'm unsure how many external consumers pin the `0.1` const — maintainers must confirm blast radius.*
193
+ 2. **`GateEvaluated` vs `Transitioned` as separate events, or one combined event?** I recommend separate (gate decision is independently replayable, matches the consumer's gate-debugger goal), but it adds an event type and a fold case `applyEvaluation` doesn't distinguish today (`flow-gates.ts:237-320` does both in one call). Maintainer call.
194
+ 3. **Does Flow want hash-chaining as a first-class run property, or keep tamper-evidence in the consumer?** The branch just *removed* the `inquiry_records` audit series (§1.3), suggesting Flow is trimming audit complexity — so I lean **opt-in `_chain`, consumer-driven** (§3), but this is genuinely the owner's architectural call about how much of ADR 0017's tamper-evidence belongs upstream in Flow vs in Flow Agents.
195
+
196
+ ### 7.3 Honest uncertainties / risks
197
+ - **The prompt's `events[]@~85` premise is wrong** (it's the demo *acceptance-bundle* fixture, `flow-run-store.ts:85-96`); I want to flag this explicitly because the whole "promote vs introduce" framing hinges on it — this is **"introduce a log."** If I've misread and there's a *different* `events[]` the maintainers had in mind, that changes §2.
198
+ - **`gate_outcomes[]` is last-write-wins** (`mergeGateOutcome`, `flow-gates.ts:232-235`) — it already *loses* history. Folding from `GateEvaluated` events is strictly *more* information; the only risk is a fold that doesn't reproduce the exact collapsed array order. The oracle (§7.1) catches this.
199
+ - **Replay determinism** depends on pinning the fold/evaluator version: if `evaluateGate`/Surface status derivation changes, replaying old events may differ. Record an evaluator/`statusFunctionVersion` on events (the deleted `inquiry_records` tracked exactly this — `flow-types.ts` removed comment — so the need is real). Worth carrying even in Phase 1.
200
+ - **Two source-of-truth files during Phase 2** is exactly the dual-write the owner dislikes; only acceptable as bounded scaffolding with the Phase-3 deletion deadline named.
201
+
202
+ ---
203
+
204
+ ## Appendix — key file:line references
205
+ - Demo fixture `events[]` (the false lead): `src/runtime/flow-run-store.ts:85-96`
206
+ - Persisted run shape: `schemas/flow-run.schema.json:7` (required), `:36` (status enum), `:56-70` (gate_outcomes/transitions/exceptions), `:111-135` (transition row)
207
+ - `FlowRunState` TS: `src/contracts/flow-types.ts:83-97`; `GateOutcome`: `:105-113`
208
+ - Initial state: `src/definition/flow-definition.ts:350-368`; route-back attempt counting reads transitions: `:415`
209
+ - The imperative state machine (becomes the reducer): `applyEvaluation` `src/gates/flow-gates.ts:237-320`; `mergeGateOutcome` (last-write-wins) `:232-235`; pure `evaluateGate` `:133-230`
210
+ - Save/evaluate path: `saveRun` `src/runtime/flow-run-store.ts:209-214`; `evaluateRun` `:314-336`; `acceptException` `:338-353`
211
+ - Projection reads stored status (not a fold): `projectFlowRun` `src/console/console-projection.ts:577-607` (`:585` transitions, `:591` identity/status, `:594` current_step); reports `src/reports/flow-reports.ts:16, 23, 25, 124, 164, 169`
212
+ - In-flight trust emission (build ON this): `writeTrustBundles` `src/runtime/flow-run-store.ts:193-207` (called in saveRun `:213`); builders `src/gates/flow-trust-emit.ts:173-294` (run-level folds `state.gate_outcomes` `:201, 208-214`); layout `src/runtime/flow-files.ts:14-36`
213
+ - In-flight REMOVED append-only audit series (do NOT revive): `reDeriveBundleReports` + `inquiry_records` deleted in `git diff origin/main..HEAD -- src/runtime/flow-run-store.ts src/contracts/flow-types.ts`
214
+ - Public API surface: `src/index.ts:161-162` (`projectFlowRun`, `projectFlowRunFromFiles`)
215
+ - Ownership stance: `docs/adr/0001-flow-as-process-transparency-layer.md:21-27` (Flow owns Runs/Transitions), `:41` (Flow Agents is first consumer), `:71` (process semantics not in Surface)
216
+ - Hash primitive already in-repo: `node:crypto createHash` `src/runtime/flow-run-store.ts:1, 216-219`