@kontourai/flow-agents 1.4.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/runtime-compat.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +103 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +316 -8
- package/build/src/cli/workflow-sidecar.js +1996 -91
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +111 -0
- package/build/src/lib/flow-resolver.js +308 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +55 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_fork_classification.sh +134 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_kit_identity_trust.sh +393 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +47 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +6 -6
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1524 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/scripts/repair-command-log.js +115 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2127 -84
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +369 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Evidence Capture Hook (capture-first determinism)
|
|
4
|
+
*
|
|
5
|
+
* A postToolUse hook that DETERMINISTICALLY records every command/shell tool
|
|
6
|
+
* execution to an append-only log, so evidence about what actually ran is
|
|
7
|
+
* machine-recorded at the source — not transcribed later by the model. The Stop
|
|
8
|
+
* gate (stop-goal-fit.js) then cross-references the model's evidence.json claims
|
|
9
|
+
* against this captured truth: a check claiming a command passed while the log
|
|
10
|
+
* shows it FAILED is a caught false-completion.
|
|
11
|
+
*
|
|
12
|
+
* Records to `.flow-agents/<slug>/command-log.jsonl`, one JSON object per line:
|
|
13
|
+
* {
|
|
14
|
+
* "command": "<the command string the agent ran>",
|
|
15
|
+
* "observedResult": "pass" | "fail", // deterministically inferred
|
|
16
|
+
* "exitCode": <integer> | null, // null when only pass/fail is inferable
|
|
17
|
+
* "capturedAt": "<ISO-8601 timestamp>",
|
|
18
|
+
* "source": "postToolUse-capture",
|
|
19
|
+
* "_chain": { "seq": <n>, "prevHash": "<hex>", "hash": "<hex>" }
|
|
20
|
+
* }
|
|
21
|
+
*
|
|
22
|
+
* Hash-chain integrity (Increment B2, tamper-EVIDENCE):
|
|
23
|
+
* command-log.jsonl is the independent capture truth source. Each record carries
|
|
24
|
+
* a `_chain` block: hash = sha256(prevHash + canonicalJson(record)) where
|
|
25
|
+
* canonicalJson is the record WITHOUT the `_chain` field, keys sorted.
|
|
26
|
+
* The genesis prevHash is a fixed constant. This makes alteration, removal, or
|
|
27
|
+
* reordering of entries DETECTABLE by the Stop gate's verifyCommandLogChain().
|
|
28
|
+
*
|
|
29
|
+
* HONEST FRAMING: this is tamper-EVIDENCE, NOT tamper-PROOF. An agent that
|
|
30
|
+
* recomputes the whole chain from scratch can still forge a valid chain. The
|
|
31
|
+
* real tamper-proof boundary is the signed checkpoint (B1). The chain raises the
|
|
32
|
+
* local bar and catches casual/accidental tampering and corruption.
|
|
33
|
+
*
|
|
34
|
+
* Fail-open: any chain computation error falls back to writing the plain record
|
|
35
|
+
* without `_chain`. A chain failure must NEVER block capture or corrupt the log.
|
|
36
|
+
*
|
|
37
|
+
* Exit-code nuance: the host payload exposes `tool_response`/`tool_output`/`error`
|
|
38
|
+
* (per docs/spec/runtime-hook-surface.md §1, postToolUse). A clean integer exit
|
|
39
|
+
* code is host-dependent. We extract the real exit code where present; otherwise
|
|
40
|
+
* we derive `observedResult` from `error`/stderr-style failure indication and
|
|
41
|
+
* record `exitCode: null`. We never record the model's words about the outcome.
|
|
42
|
+
*
|
|
43
|
+
* Non-blocking — always exits 0. Idempotent/append-only. Fail-open on any error:
|
|
44
|
+
* a capture failure must never block the agent or corrupt the log.
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
'use strict';
|
|
48
|
+
|
|
49
|
+
const fs = require('fs');
|
|
50
|
+
const path = require('path');
|
|
51
|
+
const crypto = require('crypto');
|
|
52
|
+
|
|
53
|
+
const MAX_STDIN = 1024 * 1024;
|
|
54
|
+
const MAX_COMMAND_LEN = 4096;
|
|
55
|
+
const MAX_OUTPUT_SCAN = 64 * 1024;
|
|
56
|
+
|
|
57
|
+
// Tools whose tool_input.command is a shell/command execution. Identified by the
|
|
58
|
+
// presence of tool_input.command plus a command/shell-ish tool_name. We keep the
|
|
59
|
+
// name set permissive (substring match) so unknown-but-command-shaped tools on
|
|
60
|
+
// other runtimes still get captured when they carry a command string.
|
|
61
|
+
const COMMAND_TOOL_NAME = /(^|[^a-z])(bash|shell|sh|exec|run|command|terminal|cmd|process|executebash|executecommand)([^a-z]|$)/i;
|
|
62
|
+
|
|
63
|
+
// ─── Hash-chain integrity (tamper-EVIDENCE) ───────────────────────────────────
|
|
64
|
+
//
|
|
65
|
+
// Genesis prevHash: a fixed arbitrary sentinel used when the log is empty or
|
|
66
|
+
// the last entry has no _chain field (legacy record). This is NOT the SHA256 of
|
|
67
|
+
// any specific input string — it is a fixed constant chosen for the original
|
|
68
|
+
// implementation. (A previous comment incorrectly claimed it was
|
|
69
|
+
// sha256("flow-agents:command-log:genesis"); that is wrong.)
|
|
70
|
+
//
|
|
71
|
+
// Writer (this file, CHAIN_GENESIS) and verifier (stop-goal-fit.js,
|
|
72
|
+
// CHAIN_GENESIS_VERIFY) MUST use the same value. Do not change one without
|
|
73
|
+
// changing the other — existing chained logs depend on this constant.
|
|
74
|
+
//
|
|
75
|
+
// HONEST FRAMING: this makes alteration DETECTABLE, not impossible. An agent
|
|
76
|
+
// that rewrites all hashes can still forge the chain. The real tamper-proof
|
|
77
|
+
// boundary is the signed checkpoint (B1). We do not oversell this boundary.
|
|
78
|
+
const CHAIN_GENESIS = 'a3f9e2b7d5c84f1e6a0d2c3b9f7e1a4d8c6b5f2e9a0d3c7b1f4e8a2d6c0b9f3';
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Stable canonical JSON for the chain input: the record WITHOUT the `_chain`
|
|
82
|
+
* field, keys sorted alphabetically. This ensures the hash is independent of
|
|
83
|
+
* key insertion order and that `_chain` itself does not contribute to its own
|
|
84
|
+
* hash (circular dependency).
|
|
85
|
+
*/
|
|
86
|
+
function canonicalJsonForChain(record) {
|
|
87
|
+
// Strip _chain if present (should not be, but defensive).
|
|
88
|
+
const keys = Object.keys(record).filter(k => k !== '_chain').sort();
|
|
89
|
+
const obj = {};
|
|
90
|
+
for (const k of keys) obj[k] = record[k];
|
|
91
|
+
return JSON.stringify(obj);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Compute the sha256 hex hash for this chain link.
|
|
96
|
+
* hash = sha256(prevHash + canonicalJson(record))
|
|
97
|
+
*/
|
|
98
|
+
function computeChainHash(prevHash, record) {
|
|
99
|
+
const input = prevHash + canonicalJsonForChain(record);
|
|
100
|
+
return crypto.createHash('sha256').update(input, 'utf8').digest('hex');
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Read the last entry from command-log.jsonl that has a `_chain` block.
|
|
105
|
+
* Returns { seq, hash } of that entry, or { seq: -1, hash: CHAIN_GENESIS }
|
|
106
|
+
* when the log is absent, empty, or all existing entries are legacy (no _chain).
|
|
107
|
+
*
|
|
108
|
+
* We scan from the end so we can stop as soon as we find a chained entry
|
|
109
|
+
* without loading the whole file (practical optimization for long logs).
|
|
110
|
+
*/
|
|
111
|
+
function readLastChainState(logFile) {
|
|
112
|
+
let raw = '';
|
|
113
|
+
try { raw = fs.readFileSync(logFile, 'utf8'); } catch { return { seq: -1, hash: CHAIN_GENESIS }; }
|
|
114
|
+
const lines = raw.split('\n').filter(l => l.trim());
|
|
115
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
116
|
+
let entry;
|
|
117
|
+
try { entry = JSON.parse(lines[i]); } catch { continue; }
|
|
118
|
+
if (entry && entry._chain && typeof entry._chain.hash === 'string' && typeof entry._chain.seq === 'number') {
|
|
119
|
+
return { seq: entry._chain.seq, hash: entry._chain.hash };
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return { seq: -1, hash: CHAIN_GENESIS };
|
|
123
|
+
}
|
|
124
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
function parseJson(raw) {
|
|
127
|
+
try { return JSON.parse(raw || '{}'); } catch { return {}; }
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function findRepoRoot(startDir) {
|
|
131
|
+
let dir = path.resolve(startDir || process.cwd());
|
|
132
|
+
const root = path.parse(dir).root;
|
|
133
|
+
for (let depth = 0; dir && depth < 40; depth++) {
|
|
134
|
+
if (fs.existsSync(path.join(dir, '.git')) || fs.existsSync(path.join(dir, 'AGENTS.md'))) return dir;
|
|
135
|
+
if (dir === root) break;
|
|
136
|
+
dir = path.dirname(dir);
|
|
137
|
+
}
|
|
138
|
+
return path.resolve(startDir || process.cwd());
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function readJsonFile(file) {
|
|
142
|
+
try { return JSON.parse(fs.readFileSync(file, 'utf8')); } catch { return null; }
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Newest-mtime state.json under .flow-agents/<slug>/, mirroring how
|
|
146
|
+
// workflow-steering.js and stop-goal-fit.js locate the active artifact dir.
|
|
147
|
+
function latestStateDir(flowAgentsDir) {
|
|
148
|
+
let best = null;
|
|
149
|
+
const stack = [flowAgentsDir];
|
|
150
|
+
while (stack.length) {
|
|
151
|
+
const dir = stack.pop();
|
|
152
|
+
let entries;
|
|
153
|
+
try { entries = fs.readdirSync(dir, { withFileTypes: true }); } catch { continue; }
|
|
154
|
+
for (const entry of entries) {
|
|
155
|
+
const full = path.join(dir, entry.name);
|
|
156
|
+
if (entry.isDirectory()) {
|
|
157
|
+
if (entry.name === 'archive') continue;
|
|
158
|
+
stack.push(full);
|
|
159
|
+
} else if (entry.isFile() && entry.name === 'state.json') {
|
|
160
|
+
let mtimeMs;
|
|
161
|
+
try { mtimeMs = fs.statSync(full).mtimeMs; } catch { continue; }
|
|
162
|
+
if (!best || mtimeMs > best.mtimeMs) best = { dir, mtimeMs };
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return best ? best.dir : null;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Resolve the active artifact directory the same way the other hooks do:
|
|
171
|
+
* prefer .flow-agents/current.json (active_slug / artifact_dir), then fall back
|
|
172
|
+
* to the newest-mtime state.json directory.
|
|
173
|
+
*/
|
|
174
|
+
function resolveArtifactDir(root) {
|
|
175
|
+
const flowAgentsDir = path.join(root, '.flow-agents');
|
|
176
|
+
const current = readJsonFile(path.join(flowAgentsDir, 'current.json'));
|
|
177
|
+
if (current) {
|
|
178
|
+
const slug = current.artifact_dir || current.active_slug;
|
|
179
|
+
if (typeof slug === 'string' && slug.trim()) {
|
|
180
|
+
// Guard against path traversal in the slug.
|
|
181
|
+
const safe = slug.replace(/\.\.+/g, '').replace(/^[/\\]+/, '');
|
|
182
|
+
const dir = path.join(flowAgentsDir, safe);
|
|
183
|
+
if (dir.startsWith(flowAgentsDir + path.sep) && fs.existsSync(dir)) return dir;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return latestStateDir(flowAgentsDir);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function isCommandTool(toolName, command) {
|
|
190
|
+
if (typeof command !== 'string' || !command.trim()) return false;
|
|
191
|
+
// A tool_name is not always present (some runtimes omit it). If a command
|
|
192
|
+
// string is present we still capture; the name match is a fast-path that also
|
|
193
|
+
// covers the no-name case by defaulting to true when the name is empty.
|
|
194
|
+
if (!toolName) return true;
|
|
195
|
+
return COMMAND_TOOL_NAME.test(String(toolName));
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function clamp(text, max) {
|
|
199
|
+
const s = String(text == null ? '' : text);
|
|
200
|
+
return s.length > max ? s.slice(0, max) : s;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Coerce a value to a clean integer exit code, or null. Accepts numbers and
|
|
204
|
+
// integer-looking strings; rejects NaN/floats/anything else.
|
|
205
|
+
function cleanExitCode(value) {
|
|
206
|
+
if (typeof value === 'number' && Number.isInteger(value)) return value;
|
|
207
|
+
if (typeof value === 'string' && /^-?\d+$/.test(value.trim())) return parseInt(value.trim(), 10);
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Deterministically observe { exitCode, observedResult } from the host tool
|
|
213
|
+
* result. NEVER consults the model's narration — only structured host fields.
|
|
214
|
+
*
|
|
215
|
+
* Priority:
|
|
216
|
+
* 1. A clean integer exit code anywhere the host surfaces it → exitCode set;
|
|
217
|
+
* observedResult = pass iff exitCode === 0.
|
|
218
|
+
* 2. Else, a non-empty `error` field or stderr-style failure indication →
|
|
219
|
+
* observedResult = fail, exitCode = null.
|
|
220
|
+
* 3. Else → observedResult = pass, exitCode = null.
|
|
221
|
+
*/
|
|
222
|
+
function observeResult(input) {
|
|
223
|
+
const response = input.tool_response;
|
|
224
|
+
const output = input.tool_output;
|
|
225
|
+
const error = input.error;
|
|
226
|
+
|
|
227
|
+
// Candidate locations for a host-provided exit code.
|
|
228
|
+
const candidates = [];
|
|
229
|
+
for (const src of [response, output]) {
|
|
230
|
+
if (src && typeof src === 'object') {
|
|
231
|
+
candidates.push(src.exitCode, src.exit_code, src.exitcode, src.status, src.code, src.returnCode, src.return_code);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
candidates.push(input.exitCode, input.exit_code, input.status, input.code);
|
|
235
|
+
|
|
236
|
+
let exitCode = null;
|
|
237
|
+
for (const c of candidates) {
|
|
238
|
+
const clean = cleanExitCode(c);
|
|
239
|
+
if (clean !== null) { exitCode = clean; break; }
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if (exitCode !== null) {
|
|
243
|
+
return { exitCode, observedResult: exitCode === 0 ? 'pass' : 'fail' };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// No clean exit code — infer pass/fail from failure indicators only.
|
|
247
|
+
if (isFailureIndicated(error, response, output)) {
|
|
248
|
+
return { exitCode: null, observedResult: 'fail' };
|
|
249
|
+
}
|
|
250
|
+
return { exitCode: null, observedResult: 'pass' };
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// True when the host surfaces a deterministic failure signal: a non-empty
|
|
254
|
+
// `error`, a falsey `success`/truthy `failed`/`is_error` flag, or a non-empty
|
|
255
|
+
// stderr field. Plain stdout text is NOT scanned for the words "error"/"fail"
|
|
256
|
+
// because that would be guessing, not observing.
|
|
257
|
+
function isFailureIndicated(error, response, output) {
|
|
258
|
+
if (typeof error === 'string' && error.trim()) return true;
|
|
259
|
+
if (error && typeof error === 'object' && Object.keys(error).length > 0) return true;
|
|
260
|
+
for (const src of [response, output]) {
|
|
261
|
+
if (!src || typeof src !== 'object') continue;
|
|
262
|
+
if (src.success === false) return true;
|
|
263
|
+
if (src.failed === true || src.is_error === true || src.isError === true) return true;
|
|
264
|
+
if (typeof src.error === 'string' && src.error.trim()) return true;
|
|
265
|
+
if (error == null && typeof src.stderr === 'string' && src.stderr.trim()) {
|
|
266
|
+
// A non-empty stderr alone is a weak signal (many passing tools write to
|
|
267
|
+
// stderr). Only treat it as failure when there is no stdout to suggest
|
|
268
|
+
// a normal result. This stays conservative: false-fail capture is worse
|
|
269
|
+
// than missing a fail (the Stop backstop re-runs un-captured claims).
|
|
270
|
+
const stdout = typeof src.stdout === 'string' ? src.stdout : '';
|
|
271
|
+
if (!stdout.trim()) return true;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
return false;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function run(rawInput) {
|
|
278
|
+
try {
|
|
279
|
+
const input = parseJson(rawInput);
|
|
280
|
+
const command = input.tool_input && input.tool_input.command;
|
|
281
|
+
if (!isCommandTool(input.tool_name, command)) return rawInput;
|
|
282
|
+
|
|
283
|
+
const root = findRepoRoot(input.cwd || process.cwd());
|
|
284
|
+
const artifactDir = resolveArtifactDir(root);
|
|
285
|
+
if (!artifactDir) return rawInput; // no active workflow — nothing to anchor the log to
|
|
286
|
+
|
|
287
|
+
const { exitCode, observedResult } = observeResult({
|
|
288
|
+
tool_response: input.tool_response,
|
|
289
|
+
tool_output: input.tool_output,
|
|
290
|
+
error: input.error,
|
|
291
|
+
exitCode: input.exitCode,
|
|
292
|
+
exit_code: input.exit_code,
|
|
293
|
+
status: input.status,
|
|
294
|
+
code: input.code,
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
const record = {
|
|
298
|
+
command: clamp(command, MAX_COMMAND_LEN).replace(/\s+/g, ' ').trim(),
|
|
299
|
+
observedResult,
|
|
300
|
+
exitCode,
|
|
301
|
+
capturedAt: new Date().toISOString(),
|
|
302
|
+
source: 'postToolUse-capture',
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
const logFile = path.join(artifactDir, 'command-log.jsonl');
|
|
306
|
+
fs.mkdirSync(artifactDir, { recursive: true });
|
|
307
|
+
|
|
308
|
+
// Hash-chain integrity: compute _chain before appending. Fail-open: any
|
|
309
|
+
// error in chain computation falls back to the plain record (no _chain).
|
|
310
|
+
// A chain failure must NEVER block capture or corrupt the log.
|
|
311
|
+
let recordToWrite = record;
|
|
312
|
+
try {
|
|
313
|
+
const { seq: prevSeq, hash: prevHash } = readLastChainState(logFile);
|
|
314
|
+
const seq = prevSeq + 1;
|
|
315
|
+
const hash = computeChainHash(prevHash, record);
|
|
316
|
+
// Spread record fields then add _chain so the chain field is appended last
|
|
317
|
+
// (cosmetic ordering; canonicalJsonForChain excludes it during hashing).
|
|
318
|
+
recordToWrite = { ...record, _chain: { seq, prevHash, hash } };
|
|
319
|
+
} catch { /* chain computation failed — write plain record, do not block */ }
|
|
320
|
+
|
|
321
|
+
fs.appendFileSync(logFile, JSON.stringify(recordToWrite) + '\n');
|
|
322
|
+
} catch { /* fail-open: capture never blocks or corrupts */ }
|
|
323
|
+
return rawInput;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
if (require.main === module) {
|
|
327
|
+
let data = '';
|
|
328
|
+
process.stdin.setEncoding('utf8');
|
|
329
|
+
process.stdin.on('data', chunk => {
|
|
330
|
+
if (data.length < MAX_STDIN) data += chunk.substring(0, MAX_STDIN - data.length);
|
|
331
|
+
});
|
|
332
|
+
process.stdin.on('end', () => {
|
|
333
|
+
process.stdout.write(String(run(data)));
|
|
334
|
+
process.exit(0);
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
module.exports = {
|
|
339
|
+
run,
|
|
340
|
+
resolveArtifactDir,
|
|
341
|
+
observeResult,
|
|
342
|
+
isCommandTool,
|
|
343
|
+
findRepoRoot,
|
|
344
|
+
// Chain helpers exported for testing and gate verification.
|
|
345
|
+
canonicalJsonForChain,
|
|
346
|
+
computeChainHash,
|
|
347
|
+
CHAIN_GENESIS,
|
|
348
|
+
};
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* liveness-read.js — shared pure-CJS liveness freshness helper
|
|
4
|
+
*
|
|
5
|
+
* Zero external dependencies. Consumed by:
|
|
6
|
+
* - scripts/hooks/workflow-steering.js (CJS, direct require)
|
|
7
|
+
* - build/src/cli/workflow-sidecar.js (ESM compiled, via createRequire)
|
|
8
|
+
*
|
|
9
|
+
* Exports:
|
|
10
|
+
* readLivenessEvents(streamPath) → AnyObj[] (tolerates malformed lines)
|
|
11
|
+
* freshHolders(events, slug, selfActor, nowMs) → holder[]
|
|
12
|
+
*
|
|
13
|
+
* freshHolders returns, for each actor (other than selfActor) with a
|
|
14
|
+
* within-TTL claim/heartbeat on subjectId === slug, an object:
|
|
15
|
+
* { actor: string, lastAt: string, ttlSeconds: number, fresh: boolean }
|
|
16
|
+
* Only actors where fresh === true are returned (i.e., elapsed < ttlSeconds*1000
|
|
17
|
+
* and no subsequent release event).
|
|
18
|
+
*
|
|
19
|
+
* Freshness rule mirrors the ADR 0012 grouping logic in workflow-sidecar.ts:
|
|
20
|
+
* - Group events by subjectId::actor.
|
|
21
|
+
* - Track the latest ttlSeconds from claim events (default 1800 s).
|
|
22
|
+
* - Track the latest event.at per group.
|
|
23
|
+
* - If the last event is a release → not fresh (regardless of elapsed).
|
|
24
|
+
* - Otherwise → fresh if (nowMs - Date.parse(lastAt)) < ttlSeconds * 1000.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const fs = require('fs');
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Read a liveness JSONL stream from the given path.
|
|
31
|
+
* Tolerates missing file (returns []) and malformed lines (silently skips).
|
|
32
|
+
*
|
|
33
|
+
* @param {string} streamPath Absolute path to events.jsonl
|
|
34
|
+
* @returns {object[]}
|
|
35
|
+
*/
|
|
36
|
+
function readLivenessEvents(streamPath) {
|
|
37
|
+
let raw = '';
|
|
38
|
+
try {
|
|
39
|
+
raw = fs.readFileSync(streamPath, 'utf8');
|
|
40
|
+
} catch {
|
|
41
|
+
return [];
|
|
42
|
+
}
|
|
43
|
+
const out = [];
|
|
44
|
+
for (const line of raw.split('\n')) {
|
|
45
|
+
const trimmed = line.trim();
|
|
46
|
+
if (!trimmed) continue;
|
|
47
|
+
try {
|
|
48
|
+
out.push(JSON.parse(trimmed));
|
|
49
|
+
} catch {
|
|
50
|
+
/* skip malformed line */
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Compute fresh liveness holders for a given slug.
|
|
58
|
+
*
|
|
59
|
+
* @param {object[]} events Array of parsed liveness event objects
|
|
60
|
+
* @param {string} slug Work-item subjectId to filter on
|
|
61
|
+
* @param {string} selfActor Actor to exclude (current agent's identity)
|
|
62
|
+
* @param {number} nowMs Current epoch ms (Date.now())
|
|
63
|
+
* @returns {{ actor: string, lastAt: string, ttlSeconds: number, fresh: boolean }[]}
|
|
64
|
+
*/
|
|
65
|
+
function freshHolders(events, slug, selfActor, nowMs) {
|
|
66
|
+
// Group by actor for the given slug
|
|
67
|
+
/** @type {Map<string, { actor: string, ttlSeconds: number, lastAt: string, released: boolean }>} */
|
|
68
|
+
const groups = new Map();
|
|
69
|
+
|
|
70
|
+
for (const e of events) {
|
|
71
|
+
if (!e || typeof e !== 'object') continue;
|
|
72
|
+
if (e.subjectId !== slug) continue;
|
|
73
|
+
if (!e.actor || !e.at) continue;
|
|
74
|
+
|
|
75
|
+
const actor = String(e.actor);
|
|
76
|
+
if (actor === selfActor) continue;
|
|
77
|
+
|
|
78
|
+
let g = groups.get(actor);
|
|
79
|
+
if (!g) {
|
|
80
|
+
g = { actor, ttlSeconds: 1800, lastAt: String(e.at), released: false };
|
|
81
|
+
groups.set(actor, g);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Update lastAt to the latest event timestamp
|
|
85
|
+
if (e.at > g.lastAt) g.lastAt = String(e.at);
|
|
86
|
+
|
|
87
|
+
// Track TTL from claim events
|
|
88
|
+
if (e.type === 'claim' && typeof e.ttlSeconds === 'number' && e.ttlSeconds > 0) {
|
|
89
|
+
g.ttlSeconds = e.ttlSeconds;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Track release — if a release event exists after all others, mark released
|
|
93
|
+
if (e.type === 'release') {
|
|
94
|
+
g.released = true;
|
|
95
|
+
} else if (e.type === 'claim' || e.type === 'heartbeat') {
|
|
96
|
+
// A new claim or heartbeat after a release re-activates
|
|
97
|
+
g.released = false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const result = [];
|
|
102
|
+
for (const g of groups.values()) {
|
|
103
|
+
if (g.released) continue;
|
|
104
|
+
const elapsed = nowMs - Date.parse(g.lastAt);
|
|
105
|
+
const fresh = elapsed < g.ttlSeconds * 1000;
|
|
106
|
+
if (fresh) {
|
|
107
|
+
result.push({ actor: g.actor, lastAt: g.lastAt, ttlSeconds: g.ttlSeconds, fresh: true });
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = { readLivenessEvents, freshHolders };
|
|
@@ -100,7 +100,12 @@ async function main() {
|
|
|
100
100
|
|
|
101
101
|
if (hookModule && typeof hookModule.run === 'function') {
|
|
102
102
|
try {
|
|
103
|
-
const
|
|
103
|
+
const outputOrPromise = hookModule.run(raw, { truncated, maxStdin: MAX_STDIN });
|
|
104
|
+
// Support async run() (returns a Promise): await before emitting result.
|
|
105
|
+
// Synchronous run() returns a plain value — Promise.resolve wraps it safely.
|
|
106
|
+
const output = (outputOrPromise && typeof outputOrPromise.then === 'function')
|
|
107
|
+
? await outputOrPromise
|
|
108
|
+
: outputOrPromise;
|
|
104
109
|
process.exit(emitHookResult(raw, output));
|
|
105
110
|
} catch (e) {
|
|
106
111
|
process.stderr.write(`[Hook] run() error for ${hookId}: ${e.message}\n`);
|