@kontourai/flow-agents 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. package/.github/CODEOWNERS +29 -0
  2. package/.github/actions/trust-verify/action.yml +145 -0
  3. package/.github/workflows/ci.yml +11 -4
  4. package/.github/workflows/kit-gates-demo.yml +2 -2
  5. package/.github/workflows/publish-npm.yml +10 -2
  6. package/.github/workflows/release-please.yml +1 -1
  7. package/.github/workflows/trust-reconcile.yml +113 -0
  8. package/AGENTS.md +13 -0
  9. package/CHANGELOG.md +95 -0
  10. package/CONTRIBUTING.md +4 -4
  11. package/README.md +1 -0
  12. package/agents/tool-planner.json +1 -1
  13. package/build/src/cli/init.js +242 -20
  14. package/build/src/cli/validate-workflow-artifacts.js +19 -2
  15. package/build/src/cli/verify.d.ts +1 -0
  16. package/build/src/cli/verify.js +90 -0
  17. package/build/src/cli/workflow-sidecar.d.ts +300 -8
  18. package/build/src/cli/workflow-sidecar.js +1934 -83
  19. package/build/src/cli.js +2 -3
  20. package/build/src/lib/flow-resolver.d.ts +82 -0
  21. package/build/src/lib/flow-resolver.js +237 -0
  22. package/build/src/tools/build-universal-bundles.js +34 -22
  23. package/build/src/tools/generate-context-map.js +3 -16
  24. package/build/src/tools/validate-source-tree.d.ts +1 -1
  25. package/build/src/tools/validate-source-tree.js +42 -162
  26. package/context/contracts/artifact-contract.md +10 -0
  27. package/context/contracts/delivery-contract.md +1 -0
  28. package/context/contracts/review-contract.md +1 -0
  29. package/context/contracts/verification-contract.md +2 -0
  30. package/context/gate-awareness.md +39 -0
  31. package/context/scripts/hooks/stop-goal-fit.js +632 -70
  32. package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
  33. package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
  34. package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
  35. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
  36. package/docs/adr/0007-skill-audit.md +1 -1
  37. package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
  38. package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
  39. package/docs/adr/0011-mcp-posture.md +100 -0
  40. package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
  41. package/docs/adr/0013-context-lifecycle.md +151 -0
  42. package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
  43. package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
  44. package/docs/adr/0016-three-hard-boundary-model.md +71 -0
  45. package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
  46. package/docs/agent-system-guidebook.md +5 -12
  47. package/docs/context-map.md +4 -10
  48. package/docs/index.md +3 -2
  49. package/docs/integrations/framework-adapter.md +19 -6
  50. package/docs/integrations/index.md +2 -2
  51. package/docs/north-star.md +4 -4
  52. package/docs/operating-layers.md +3 -3
  53. package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
  54. package/docs/repository-structure.md +2 -2
  55. package/docs/skills-map.md +1 -0
  56. package/docs/spec/runtime-hook-surface.md +62 -9
  57. package/docs/standards-register.md +3 -3
  58. package/docs/survey-utterance-check.md +1 -1
  59. package/docs/trust-anchor-adoption.md +197 -0
  60. package/docs/verifiable-trust.md +95 -0
  61. package/docs/veritas-integration.md +2 -2
  62. package/docs/workflow-usage-guide.md +69 -0
  63. package/evals/acceptance/DEMO-false-completion.md +144 -0
  64. package/evals/acceptance/demo-cast.sh +92 -0
  65. package/evals/acceptance/demo-false-completion.sh +72 -0
  66. package/evals/acceptance/demo-real-evidence.sh +104 -0
  67. package/evals/acceptance/demo.tape +29 -0
  68. package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
  69. package/evals/acceptance/prove-capture-teeth.sh +114 -0
  70. package/evals/acceptance/prove-teeth.sh +105 -0
  71. package/evals/ci/antigaming-suite.sh +54 -0
  72. package/evals/ci/run-baseline.sh +2 -0
  73. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
  74. package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
  75. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
  76. package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
  77. package/evals/integration/test_builder_step_producers.sh +379 -0
  78. package/evals/integration/test_bundle_install.sh +35 -71
  79. package/evals/integration/test_bundle_lifecycle.sh +39 -2
  80. package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
  81. package/evals/integration/test_checkpoint_signing.sh +489 -0
  82. package/evals/integration/test_claim_lookup.sh +352 -0
  83. package/evals/integration/test_command_log_integrity.sh +275 -0
  84. package/evals/integration/test_context_map.sh +0 -2
  85. package/evals/integration/test_dual_emit_flow_step.sh +278 -0
  86. package/evals/integration/test_enforcer_expects_driven.sh +281 -0
  87. package/evals/integration/test_evidence_capture_hook.sh +185 -0
  88. package/evals/integration/test_flow_kit_repository.sh +2 -0
  89. package/evals/integration/test_flowdef_session_activation.sh +273 -0
  90. package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
  91. package/evals/integration/test_gate_bypass_chain.sh +448 -0
  92. package/evals/integration/test_gate_lockdown.sh +1137 -0
  93. package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
  94. package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
  95. package/evals/integration/test_goal_fit_hook.sh +69 -4
  96. package/evals/integration/test_goal_fit_rederive.sh +263 -0
  97. package/evals/integration/test_install_merge.sh +1176 -0
  98. package/evals/integration/test_mint_attestation.sh +373 -0
  99. package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
  100. package/evals/integration/test_publish_delivery.sh +269 -0
  101. package/evals/integration/test_reconcile_soundness.sh +528 -0
  102. package/evals/integration/test_resolvefirststep_security.sh +208 -0
  103. package/evals/integration/test_session_resume_roundtrip.sh +286 -0
  104. package/evals/integration/test_trust_checkpoint.sh +325 -0
  105. package/evals/integration/test_trust_reconcile.sh +293 -0
  106. package/evals/integration/test_verify_cli.sh +208 -0
  107. package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
  108. package/evals/lib/node.sh +0 -6
  109. package/evals/run.sh +45 -0
  110. package/evals/static/test_workflow_skills.sh +6 -13
  111. package/install.sh +0 -7
  112. package/integrations/strands-ts/README.md +25 -15
  113. package/integrations/veritas/flow-agents.adapter.json +1 -2
  114. package/kits/builder/flows/build.flow.json +59 -12
  115. package/kits/builder/kit.json +85 -15
  116. package/kits/builder/skills/continue-work/SKILL.md +116 -0
  117. package/kits/builder/skills/deliver/SKILL.md +36 -6
  118. package/kits/builder/skills/design-probe/SKILL.md +28 -0
  119. package/kits/builder/skills/execute-plan/SKILL.md +9 -1
  120. package/kits/builder/skills/gate-review/SKILL.md +234 -0
  121. package/kits/builder/skills/learning-review/SKILL.md +30 -0
  122. package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
  123. package/kits/builder/skills/plan-work/SKILL.md +13 -1
  124. package/kits/builder/skills/pull-work/SKILL.md +19 -0
  125. package/kits/knowledge/adapters/default-store/index.js +38 -0
  126. package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
  127. package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
  128. package/kits/knowledge/docs/store-contract.md +314 -0
  129. package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
  130. package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
  131. package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
  132. package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
  133. package/kits/knowledge/evals/entities/suite.test.js +40 -0
  134. package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
  135. package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
  136. package/kits/knowledge/evals/retirement/suite.test.js +145 -0
  137. package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
  138. package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
  139. package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
  140. package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
  141. package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
  142. package/kits/knowledge/kit.json +51 -1
  143. package/package.json +4 -4
  144. package/packaging/conformance/README.md +10 -2
  145. package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
  146. package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
  147. package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
  148. package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
  149. package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
  150. package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
  151. package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
  152. package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
  153. package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
  154. package/packaging/conformance/run-conformance.js +1 -1
  155. package/scripts/README.md +2 -1
  156. package/scripts/build-universal-bundles.js +0 -1
  157. package/scripts/ci/mint-attestation.js +221 -0
  158. package/scripts/ci/trust-reconcile.js +545 -0
  159. package/scripts/hooks/config-protection.js +423 -1
  160. package/scripts/hooks/evidence-capture.js +348 -0
  161. package/scripts/hooks/lib/liveness-read.js +113 -0
  162. package/scripts/hooks/run-hook.js +6 -1
  163. package/scripts/hooks/stop-goal-fit.js +1471 -79
  164. package/scripts/hooks/workflow-steering.js +135 -5
  165. package/scripts/install-codex-home.sh +39 -0
  166. package/scripts/install-merge.js +330 -0
  167. package/src/cli/init.ts +218 -20
  168. package/src/cli/validate-workflow-artifacts.ts +18 -2
  169. package/src/cli/verify.ts +100 -0
  170. package/src/cli/workflow-sidecar.ts +2064 -77
  171. package/src/cli.ts +2 -3
  172. package/src/lib/flow-resolver.ts +284 -0
  173. package/src/tools/build-universal-bundles.ts +34 -21
  174. package/src/tools/generate-context-map.ts +3 -17
  175. package/src/tools/validate-source-tree.ts +44 -104
  176. package/build/src/tools/filter-installed-packs.d.ts +0 -2
  177. package/build/src/tools/filter-installed-packs.js +0 -135
  178. package/packaging/packs.json +0 -49
  179. package/scripts/filter-installed-packs.js +0 -2
  180. package/src/tools/filter-installed-packs.ts +0 -132
@@ -0,0 +1,348 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Evidence Capture Hook (capture-first determinism)
4
+ *
5
+ * A postToolUse hook that DETERMINISTICALLY records every command/shell tool
6
+ * execution to an append-only log, so evidence about what actually ran is
7
+ * machine-recorded at the source — not transcribed later by the model. The Stop
8
+ * gate (stop-goal-fit.js) then cross-references the model's evidence.json claims
9
+ * against this captured truth: a check claiming a command passed while the log
10
+ * shows it FAILED is a caught false-completion.
11
+ *
12
+ * Records to `.flow-agents/<slug>/command-log.jsonl`, one JSON object per line:
13
+ * {
14
+ * "command": "<the command string the agent ran>",
15
+ * "observedResult": "pass" | "fail", // deterministically inferred
16
+ * "exitCode": <integer> | null, // null when only pass/fail is inferable
17
+ * "capturedAt": "<ISO-8601 timestamp>",
18
+ * "source": "postToolUse-capture",
19
+ * "_chain": { "seq": <n>, "prevHash": "<hex>", "hash": "<hex>" }
20
+ * }
21
+ *
22
+ * Hash-chain integrity (Increment B2, tamper-EVIDENCE):
23
+ * command-log.jsonl is the independent capture truth source. Each record carries
24
+ * a `_chain` block: hash = sha256(prevHash + canonicalJson(record)) where
25
+ * canonicalJson is the record WITHOUT the `_chain` field, keys sorted.
26
+ * The genesis prevHash is a fixed constant. This makes alteration, removal, or
27
+ * reordering of entries DETECTABLE by the Stop gate's verifyCommandLogChain().
28
+ *
29
+ * HONEST FRAMING: this is tamper-EVIDENCE, NOT tamper-PROOF. An agent that
30
+ * recomputes the whole chain from scratch can still forge a valid chain. The
31
+ * real tamper-proof boundary is the signed checkpoint (B1). The chain raises the
32
+ * local bar and catches casual/accidental tampering and corruption.
33
+ *
34
+ * Fail-open: any chain computation error falls back to writing the plain record
35
+ * without `_chain`. A chain failure must NEVER block capture or corrupt the log.
36
+ *
37
+ * Exit-code nuance: the host payload exposes `tool_response`/`tool_output`/`error`
38
+ * (per docs/spec/runtime-hook-surface.md §1, postToolUse). A clean integer exit
39
+ * code is host-dependent. We extract the real exit code where present; otherwise
40
+ * we derive `observedResult` from `error`/stderr-style failure indication and
41
+ * record `exitCode: null`. We never record the model's words about the outcome.
42
+ *
43
+ * Non-blocking — always exits 0. Idempotent/append-only. Fail-open on any error:
44
+ * a capture failure must never block the agent or corrupt the log.
45
+ */
46
+
47
+ 'use strict';
48
+
49
+ const fs = require('fs');
50
+ const path = require('path');
51
+ const crypto = require('crypto');
52
+
53
+ const MAX_STDIN = 1024 * 1024;
54
+ const MAX_COMMAND_LEN = 4096;
55
+ const MAX_OUTPUT_SCAN = 64 * 1024;
56
+
57
+ // Tools whose tool_input.command is a shell/command execution. Identified by the
58
+ // presence of tool_input.command plus a command/shell-ish tool_name. We keep the
59
+ // name set permissive (substring match) so unknown-but-command-shaped tools on
60
+ // other runtimes still get captured when they carry a command string.
61
+ const COMMAND_TOOL_NAME = /(^|[^a-z])(bash|shell|sh|exec|run|command|terminal|cmd|process|executebash|executecommand)([^a-z]|$)/i;
62
+
63
+ // ─── Hash-chain integrity (tamper-EVIDENCE) ───────────────────────────────────
64
+ //
65
+ // Genesis prevHash: a fixed arbitrary sentinel used when the log is empty or
66
+ // the last entry has no _chain field (legacy record). This is NOT the SHA256 of
67
+ // any specific input string — it is a fixed constant chosen for the original
68
+ // implementation. (A previous comment incorrectly claimed it was
69
+ // sha256("flow-agents:command-log:genesis"); that is wrong.)
70
+ //
71
+ // Writer (this file, CHAIN_GENESIS) and verifier (stop-goal-fit.js,
72
+ // CHAIN_GENESIS_VERIFY) MUST use the same value. Do not change one without
73
+ // changing the other — existing chained logs depend on this constant.
74
+ //
75
+ // HONEST FRAMING: this makes alteration DETECTABLE, not impossible. An agent
76
+ // that rewrites all hashes can still forge the chain. The real tamper-proof
77
+ // boundary is the signed checkpoint (B1). We do not oversell this boundary.
78
+ const CHAIN_GENESIS = 'a3f9e2b7d5c84f1e6a0d2c3b9f7e1a4d8c6b5f2e9a0d3c7b1f4e8a2d6c0b9f3';
79
+
80
+ /**
81
+ * Stable canonical JSON for the chain input: the record WITHOUT the `_chain`
82
+ * field, keys sorted alphabetically. This ensures the hash is independent of
83
+ * key insertion order and that `_chain` itself does not contribute to its own
84
+ * hash (circular dependency).
85
+ */
86
+ function canonicalJsonForChain(record) {
87
+ // Strip _chain if present (should not be, but defensive).
88
+ const keys = Object.keys(record).filter(k => k !== '_chain').sort();
89
+ const obj = {};
90
+ for (const k of keys) obj[k] = record[k];
91
+ return JSON.stringify(obj);
92
+ }
93
+
94
+ /**
95
+ * Compute the sha256 hex hash for this chain link.
96
+ * hash = sha256(prevHash + canonicalJson(record))
97
+ */
98
+ function computeChainHash(prevHash, record) {
99
+ const input = prevHash + canonicalJsonForChain(record);
100
+ return crypto.createHash('sha256').update(input, 'utf8').digest('hex');
101
+ }
102
+
103
+ /**
104
+ * Read the last entry from command-log.jsonl that has a `_chain` block.
105
+ * Returns { seq, hash } of that entry, or { seq: -1, hash: CHAIN_GENESIS }
106
+ * when the log is absent, empty, or all existing entries are legacy (no _chain).
107
+ *
108
+ * We scan from the end so we can stop as soon as we find a chained entry
109
+ * without loading the whole file (practical optimization for long logs).
110
+ */
111
+ function readLastChainState(logFile) {
112
+ let raw = '';
113
+ try { raw = fs.readFileSync(logFile, 'utf8'); } catch { return { seq: -1, hash: CHAIN_GENESIS }; }
114
+ const lines = raw.split('\n').filter(l => l.trim());
115
+ for (let i = lines.length - 1; i >= 0; i--) {
116
+ let entry;
117
+ try { entry = JSON.parse(lines[i]); } catch { continue; }
118
+ if (entry && entry._chain && typeof entry._chain.hash === 'string' && typeof entry._chain.seq === 'number') {
119
+ return { seq: entry._chain.seq, hash: entry._chain.hash };
120
+ }
121
+ }
122
+ return { seq: -1, hash: CHAIN_GENESIS };
123
+ }
124
+ // ─────────────────────────────────────────────────────────────────────────────
125
+
126
+ function parseJson(raw) {
127
+ try { return JSON.parse(raw || '{}'); } catch { return {}; }
128
+ }
129
+
130
+ function findRepoRoot(startDir) {
131
+ let dir = path.resolve(startDir || process.cwd());
132
+ const root = path.parse(dir).root;
133
+ for (let depth = 0; dir && depth < 40; depth++) {
134
+ if (fs.existsSync(path.join(dir, '.git')) || fs.existsSync(path.join(dir, 'AGENTS.md'))) return dir;
135
+ if (dir === root) break;
136
+ dir = path.dirname(dir);
137
+ }
138
+ return path.resolve(startDir || process.cwd());
139
+ }
140
+
141
+ function readJsonFile(file) {
142
+ try { return JSON.parse(fs.readFileSync(file, 'utf8')); } catch { return null; }
143
+ }
144
+
145
+ // Newest-mtime state.json under .flow-agents/<slug>/, mirroring how
146
+ // workflow-steering.js and stop-goal-fit.js locate the active artifact dir.
147
+ function latestStateDir(flowAgentsDir) {
148
+ let best = null;
149
+ const stack = [flowAgentsDir];
150
+ while (stack.length) {
151
+ const dir = stack.pop();
152
+ let entries;
153
+ try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { continue; }
154
+ for (const entry of entries) {
155
+ const full = path.join(dir, entry.name);
156
+ if (entry.isDirectory()) {
157
+ if (entry.name === 'archive') continue;
158
+ stack.push(full);
159
+ } else if (entry.isFile() && entry.name === 'state.json') {
160
+ let mtimeMs;
161
+ try { mtimeMs = fs.statSync(full).mtimeMs; } catch { continue; }
162
+ if (!best || mtimeMs > best.mtimeMs) best = { dir, mtimeMs };
163
+ }
164
+ }
165
+ }
166
+ return best ? best.dir : null;
167
+ }
168
+
169
+ /**
170
+ * Resolve the active artifact directory the same way the other hooks do:
171
+ * prefer .flow-agents/current.json (active_slug / artifact_dir), then fall back
172
+ * to the newest-mtime state.json directory.
173
+ */
174
+ function resolveArtifactDir(root) {
175
+ const flowAgentsDir = path.join(root, '.flow-agents');
176
+ const current = readJsonFile(path.join(flowAgentsDir, 'current.json'));
177
+ if (current) {
178
+ const slug = current.artifact_dir || current.active_slug;
179
+ if (typeof slug === 'string' && slug.trim()) {
180
+ // Guard against path traversal in the slug.
181
+ const safe = slug.replace(/\.\.+/g, '').replace(/^[/\\]+/, '');
182
+ const dir = path.join(flowAgentsDir, safe);
183
+ if (dir.startsWith(flowAgentsDir + path.sep) && fs.existsSync(dir)) return dir;
184
+ }
185
+ }
186
+ return latestStateDir(flowAgentsDir);
187
+ }
188
+
189
+ function isCommandTool(toolName, command) {
190
+ if (typeof command !== 'string' || !command.trim()) return false;
191
+ // A tool_name is not always present (some runtimes omit it). If a command
192
+ // string is present we still capture; the name match is a fast-path that also
193
+ // covers the no-name case by defaulting to true when the name is empty.
194
+ if (!toolName) return true;
195
+ return COMMAND_TOOL_NAME.test(String(toolName));
196
+ }
197
+
198
+ function clamp(text, max) {
199
+ const s = String(text == null ? '' : text);
200
+ return s.length > max ? s.slice(0, max) : s;
201
+ }
202
+
203
+ // Coerce a value to a clean integer exit code, or null. Accepts numbers and
204
+ // integer-looking strings; rejects NaN/floats/anything else.
205
+ function cleanExitCode(value) {
206
+ if (typeof value === 'number' && Number.isInteger(value)) return value;
207
+ if (typeof value === 'string' && /^-?\d+$/.test(value.trim())) return parseInt(value.trim(), 10);
208
+ return null;
209
+ }
210
+
211
+ /**
212
+ * Deterministically observe { exitCode, observedResult } from the host tool
213
+ * result. NEVER consults the model's narration — only structured host fields.
214
+ *
215
+ * Priority:
216
+ * 1. A clean integer exit code anywhere the host surfaces it → exitCode set;
217
+ * observedResult = pass iff exitCode === 0.
218
+ * 2. Else, a non-empty `error` field or stderr-style failure indication →
219
+ * observedResult = fail, exitCode = null.
220
+ * 3. Else → observedResult = pass, exitCode = null.
221
+ */
222
+ function observeResult(input) {
223
+ const response = input.tool_response;
224
+ const output = input.tool_output;
225
+ const error = input.error;
226
+
227
+ // Candidate locations for a host-provided exit code.
228
+ const candidates = [];
229
+ for (const src of [response, output]) {
230
+ if (src && typeof src === 'object') {
231
+ candidates.push(src.exitCode, src.exit_code, src.exitcode, src.status, src.code, src.returnCode, src.return_code);
232
+ }
233
+ }
234
+ candidates.push(input.exitCode, input.exit_code, input.status, input.code);
235
+
236
+ let exitCode = null;
237
+ for (const c of candidates) {
238
+ const clean = cleanExitCode(c);
239
+ if (clean !== null) { exitCode = clean; break; }
240
+ }
241
+
242
+ if (exitCode !== null) {
243
+ return { exitCode, observedResult: exitCode === 0 ? 'pass' : 'fail' };
244
+ }
245
+
246
+ // No clean exit code — infer pass/fail from failure indicators only.
247
+ if (isFailureIndicated(error, response, output)) {
248
+ return { exitCode: null, observedResult: 'fail' };
249
+ }
250
+ return { exitCode: null, observedResult: 'pass' };
251
+ }
252
+
253
+ // True when the host surfaces a deterministic failure signal: a non-empty
254
+ // `error`, a falsey `success`/truthy `failed`/`is_error` flag, or a non-empty
255
+ // stderr field. Plain stdout text is NOT scanned for the words "error"/"fail"
256
+ // because that would be guessing, not observing.
257
+ function isFailureIndicated(error, response, output) {
258
+ if (typeof error === 'string' && error.trim()) return true;
259
+ if (error && typeof error === 'object' && Object.keys(error).length > 0) return true;
260
+ for (const src of [response, output]) {
261
+ if (!src || typeof src !== 'object') continue;
262
+ if (src.success === false) return true;
263
+ if (src.failed === true || src.is_error === true || src.isError === true) return true;
264
+ if (typeof src.error === 'string' && src.error.trim()) return true;
265
+ if (error == null && typeof src.stderr === 'string' && src.stderr.trim()) {
266
+ // A non-empty stderr alone is a weak signal (many passing tools write to
267
+ // stderr). Only treat it as failure when there is no stdout to suggest
268
+ // a normal result. This stays conservative: false-fail capture is worse
269
+ // than missing a fail (the Stop backstop re-runs un-captured claims).
270
+ const stdout = typeof src.stdout === 'string' ? src.stdout : '';
271
+ if (!stdout.trim()) return true;
272
+ }
273
+ }
274
+ return false;
275
+ }
276
+
277
+ function run(rawInput) {
278
+ try {
279
+ const input = parseJson(rawInput);
280
+ const command = input.tool_input && input.tool_input.command;
281
+ if (!isCommandTool(input.tool_name, command)) return rawInput;
282
+
283
+ const root = findRepoRoot(input.cwd || process.cwd());
284
+ const artifactDir = resolveArtifactDir(root);
285
+ if (!artifactDir) return rawInput; // no active workflow — nothing to anchor the log to
286
+
287
+ const { exitCode, observedResult } = observeResult({
288
+ tool_response: input.tool_response,
289
+ tool_output: input.tool_output,
290
+ error: input.error,
291
+ exitCode: input.exitCode,
292
+ exit_code: input.exit_code,
293
+ status: input.status,
294
+ code: input.code,
295
+ });
296
+
297
+ const record = {
298
+ command: clamp(command, MAX_COMMAND_LEN).replace(/\s+/g, ' ').trim(),
299
+ observedResult,
300
+ exitCode,
301
+ capturedAt: new Date().toISOString(),
302
+ source: 'postToolUse-capture',
303
+ };
304
+
305
+ const logFile = path.join(artifactDir, 'command-log.jsonl');
306
+ fs.mkdirSync(artifactDir, { recursive: true });
307
+
308
+ // Hash-chain integrity: compute _chain before appending. Fail-open: any
309
+ // error in chain computation falls back to the plain record (no _chain).
310
+ // A chain failure must NEVER block capture or corrupt the log.
311
+ let recordToWrite = record;
312
+ try {
313
+ const { seq: prevSeq, hash: prevHash } = readLastChainState(logFile);
314
+ const seq = prevSeq + 1;
315
+ const hash = computeChainHash(prevHash, record);
316
+ // Spread record fields then add _chain so the chain field is appended last
317
+ // (cosmetic ordering; canonicalJsonForChain excludes it during hashing).
318
+ recordToWrite = { ...record, _chain: { seq, prevHash, hash } };
319
+ } catch { /* chain computation failed — write plain record, do not block */ }
320
+
321
+ fs.appendFileSync(logFile, JSON.stringify(recordToWrite) + '\n');
322
+ } catch { /* fail-open: capture never blocks or corrupts */ }
323
+ return rawInput;
324
+ }
325
+
326
+ if (require.main === module) {
327
+ let data = '';
328
+ process.stdin.setEncoding('utf8');
329
+ process.stdin.on('data', chunk => {
330
+ if (data.length < MAX_STDIN) data += chunk.substring(0, MAX_STDIN - data.length);
331
+ });
332
+ process.stdin.on('end', () => {
333
+ process.stdout.write(String(run(data)));
334
+ process.exit(0);
335
+ });
336
+ }
337
+
338
+ module.exports = {
339
+ run,
340
+ resolveArtifactDir,
341
+ observeResult,
342
+ isCommandTool,
343
+ findRepoRoot,
344
+ // Chain helpers exported for testing and gate verification.
345
+ canonicalJsonForChain,
346
+ computeChainHash,
347
+ CHAIN_GENESIS,
348
+ };
@@ -0,0 +1,113 @@
1
+ 'use strict';
2
+ /**
3
+ * liveness-read.js — shared pure-CJS liveness freshness helper
4
+ *
5
+ * Zero external dependencies. Consumed by:
6
+ * - scripts/hooks/workflow-steering.js (CJS, direct require)
7
+ * - build/src/cli/workflow-sidecar.js (ESM compiled, via createRequire)
8
+ *
9
+ * Exports:
10
+ * readLivenessEvents(streamPath) → AnyObj[] (tolerates malformed lines)
11
+ * freshHolders(events, slug, selfActor, nowMs) → holder[]
12
+ *
13
+ * freshHolders returns, for each actor (other than selfActor) with a
14
+ * within-TTL claim/heartbeat on subjectId === slug, an object:
15
+ * { actor: string, lastAt: string, ttlSeconds: number, fresh: boolean }
16
+ * Only actors where fresh === true are returned (i.e., elapsed < ttlSeconds*1000
17
+ * and no subsequent release event).
18
+ *
19
+ * Freshness rule mirrors the ADR 0012 grouping logic in workflow-sidecar.ts:
20
+ * - Group events by subjectId::actor.
21
+ * - Track the latest ttlSeconds from claim events (default 1800 s).
22
+ * - Track the latest event.at per group.
23
+ * - If the last event is a release → not fresh (regardless of elapsed).
24
+ * - Otherwise → fresh if (nowMs - Date.parse(lastAt)) < ttlSeconds * 1000.
25
+ */
26
+
27
+ const fs = require('fs');
28
+
29
+ /**
30
+ * Read a liveness JSONL stream from the given path.
31
+ * Tolerates missing file (returns []) and malformed lines (silently skips).
32
+ *
33
+ * @param {string} streamPath Absolute path to events.jsonl
34
+ * @returns {object[]}
35
+ */
36
+ function readLivenessEvents(streamPath) {
37
+ let raw = '';
38
+ try {
39
+ raw = fs.readFileSync(streamPath, 'utf8');
40
+ } catch {
41
+ return [];
42
+ }
43
+ const out = [];
44
+ for (const line of raw.split('\n')) {
45
+ const trimmed = line.trim();
46
+ if (!trimmed) continue;
47
+ try {
48
+ out.push(JSON.parse(trimmed));
49
+ } catch {
50
+ /* skip malformed line */
51
+ }
52
+ }
53
+ return out;
54
+ }
55
+
56
+ /**
57
+ * Compute fresh liveness holders for a given slug.
58
+ *
59
+ * @param {object[]} events Array of parsed liveness event objects
60
+ * @param {string} slug Work-item subjectId to filter on
61
+ * @param {string} selfActor Actor to exclude (current agent's identity)
62
+ * @param {number} nowMs Current epoch ms (Date.now())
63
+ * @returns {{ actor: string, lastAt: string, ttlSeconds: number, fresh: boolean }[]}
64
+ */
65
+ function freshHolders(events, slug, selfActor, nowMs) {
66
+ // Group by actor for the given slug
67
+ /** @type {Map<string, { actor: string, ttlSeconds: number, lastAt: string, released: boolean }>} */
68
+ const groups = new Map();
69
+
70
+ for (const e of events) {
71
+ if (!e || typeof e !== 'object') continue;
72
+ if (e.subjectId !== slug) continue;
73
+ if (!e.actor || !e.at) continue;
74
+
75
+ const actor = String(e.actor);
76
+ if (actor === selfActor) continue;
77
+
78
+ let g = groups.get(actor);
79
+ if (!g) {
80
+ g = { actor, ttlSeconds: 1800, lastAt: String(e.at), released: false };
81
+ groups.set(actor, g);
82
+ }
83
+
84
+ // Update lastAt to the latest event timestamp
85
+ if (e.at > g.lastAt) g.lastAt = String(e.at);
86
+
87
+ // Track TTL from claim events
88
+ if (e.type === 'claim' && typeof e.ttlSeconds === 'number' && e.ttlSeconds > 0) {
89
+ g.ttlSeconds = e.ttlSeconds;
90
+ }
91
+
92
+ // Track release — if a release event exists after all others, mark released
93
+ if (e.type === 'release') {
94
+ g.released = true;
95
+ } else if (e.type === 'claim' || e.type === 'heartbeat') {
96
+ // A new claim or heartbeat after a release re-activates
97
+ g.released = false;
98
+ }
99
+ }
100
+
101
+ const result = [];
102
+ for (const g of groups.values()) {
103
+ if (g.released) continue;
104
+ const elapsed = nowMs - Date.parse(g.lastAt);
105
+ const fresh = elapsed < g.ttlSeconds * 1000;
106
+ if (fresh) {
107
+ result.push({ actor: g.actor, lastAt: g.lastAt, ttlSeconds: g.ttlSeconds, fresh: true });
108
+ }
109
+ }
110
+ return result;
111
+ }
112
+
113
+ module.exports = { readLivenessEvents, freshHolders };
@@ -100,7 +100,12 @@ async function main() {
100
100
 
101
101
  if (hookModule && typeof hookModule.run === 'function') {
102
102
  try {
103
- const output = hookModule.run(raw, { truncated, maxStdin: MAX_STDIN });
103
+ const outputOrPromise = hookModule.run(raw, { truncated, maxStdin: MAX_STDIN });
104
+ // Support async run() (returns a Promise): await before emitting result.
105
+ // Synchronous run() returns a plain value — Promise.resolve wraps it safely.
106
+ const output = (outputOrPromise && typeof outputOrPromise.then === 'function')
107
+ ? await outputOrPromise
108
+ : outputOrPromise;
104
109
  process.exit(emitHookResult(raw, output));
105
110
  } catch (e) {
106
111
  process.stderr.write(`[Hook] run() error for ${hookId}: ${e.message}\n`);