@kontourai/flow-agents 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/runtime-compat.yml +1 -1
- package/CHANGELOG.md +8 -0
- package/build/src/cli/workflow-sidecar.d.ts +16 -0
- package/build/src/cli/workflow-sidecar.js +64 -10
- package/build/src/lib/flow-resolver.d.ts +29 -0
- package/build/src/lib/flow-resolver.js +71 -0
- package/evals/ci/antigaming-suite.sh +1 -0
- package/evals/integration/test_command_log_fork_classification.sh +134 -0
- package/evals/integration/test_kit_identity_trust.sh +393 -0
- package/evals/run.sh +2 -0
- package/package.json +4 -4
- package/scripts/hooks/stop-goal-fit.js +76 -23
- package/scripts/repair-command-log.js +115 -0
- package/src/cli/workflow-sidecar.ts +65 -9
- package/src/lib/flow-resolver.ts +85 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [2.0.1](https://github.com/kontourai/flow-agents/compare/v2.0.0...v2.0.1) (2026-06-27)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Fixes
|
|
7
|
+
|
|
8
|
+
* carry KIT IDENTITY through the trust chain — stop flattening non-builder kits to "builder" ([#235](https://github.com/kontourai/flow-agents/issues/235)) ([02d2782](https://github.com/kontourai/flow-agents/commit/02d2782ca8d9158a018d0fc6c35adc6a34c827d5))
|
|
9
|
+
* **gate:** classify concurrent-fork vs tamper; never hard-block a benign race ([#233](https://github.com/kontourai/flow-agents/issues/233)) ([e24743b](https://github.com/kontourai/flow-agents/commit/e24743b7dbff05df64e198e420e47841ce534df3))
|
|
10
|
+
|
|
3
11
|
## [2.0.0](https://github.com/kontourai/flow-agents/compare/v1.4.0...v2.0.0) (2026-06-27)
|
|
4
12
|
|
|
5
13
|
|
|
@@ -167,6 +167,22 @@ export declare function sidecarBase(slug: string): AnyObj;
|
|
|
167
167
|
export declare function validateEvidenceRef(ref: AnyObj, label: string): AnyObj;
|
|
168
168
|
export declare function normalizeEvidenceRefs(raw: unknown, label: string): AnyObj[];
|
|
169
169
|
export declare function normalizeCheck(raw: AnyObj): AnyObj;
|
|
170
|
+
/**
|
|
171
|
+
* Derive kit identity from a parsed trust.bundle by structurally reading the
|
|
172
|
+
* DECLARED primary claim (kit-typed) rather than hardcoding "builder".
|
|
173
|
+
*
|
|
174
|
+
* Resolution order (no fallbacks to "builder"):
|
|
175
|
+
* 1. First non-workflow.* claim in bundle.claims[] → claimType drives kitId + subject.
|
|
176
|
+
* 2. No kit-typed claim: try current.json active_flow_id adjacent to the bundle file
|
|
177
|
+
* (bundle lives at <session-dir>/trust.bundle → flowAgentsDir = grandparent).
|
|
178
|
+
* 3. Genuinely unknown: mark as "unknown" — never hardcode a kit identity.
|
|
179
|
+
*/
|
|
180
|
+
export declare function kitIdentityFromBundle(raw: AnyObj, bundleFile: string): {
|
|
181
|
+
claimType: string;
|
|
182
|
+
kitId: string;
|
|
183
|
+
subject: string;
|
|
184
|
+
gateId: string;
|
|
185
|
+
};
|
|
170
186
|
export declare function writeState(dir: string, slug: string, status: string, phase: string, timestamp: string, summary: string, next?: string): void;
|
|
171
187
|
export declare function normalizeFinding(raw: AnyObj): AnyObj;
|
|
172
188
|
/**
|
|
@@ -6,7 +6,7 @@ import { createHash } from "node:crypto";
|
|
|
6
6
|
import { createRequire } from "node:module";
|
|
7
7
|
import { fileURLToPath } from "node:url";
|
|
8
8
|
// ADR 0016 Abstraction A: shared FlowDefinition resolver (P-a)
|
|
9
|
-
import { resolveActiveFlowStep, resolveFlowFilePath, resolvePhaseMap } from "../lib/flow-resolver.js";
|
|
9
|
+
import { resolveActiveFlowStep, resolveFlowFilePath, resolvePhaseMap, resolveRouteBackPolicy } from "../lib/flow-resolver.js";
|
|
10
10
|
export const statuses = new Set(["new", "planning", "planned", "in_progress", "blocked", "verifying", "verified", "needs_decision", "not_verified", "failed", "delivered", "accepted", "archived"]);
|
|
11
11
|
export const phases = ["idea", "backlog", "pickup", "planning", "execution", "verification", "goal_fit", "evidence", "release", "learning", "done"];
|
|
12
12
|
export const checkKinds = new Set(["build", "types", "lint", "test", "security", "diff", "browser", "runtime", "policy", "external"]);
|
|
@@ -1071,15 +1071,63 @@ function deriveSurfaceStatus(ref) {
|
|
|
1071
1071
|
return "fail";
|
|
1072
1072
|
return "pass";
|
|
1073
1073
|
}
|
|
1074
|
+
/**
|
|
1075
|
+
* Derive kit identity from a parsed trust.bundle by structurally reading the
|
|
1076
|
+
* DECLARED primary claim (kit-typed) rather than hardcoding "builder".
|
|
1077
|
+
*
|
|
1078
|
+
* Resolution order (no fallbacks to "builder"):
|
|
1079
|
+
* 1. First non-workflow.* claim in bundle.claims[] → claimType drives kitId + subject.
|
|
1080
|
+
* 2. No kit-typed claim: try current.json active_flow_id adjacent to the bundle file
|
|
1081
|
+
* (bundle lives at <session-dir>/trust.bundle → flowAgentsDir = grandparent).
|
|
1082
|
+
* 3. Genuinely unknown: mark as "unknown" — never hardcode a kit identity.
|
|
1083
|
+
*/
|
|
1084
|
+
export function kitIdentityFromBundle(raw, bundleFile) {
|
|
1085
|
+
// 1. Structurally read the bundle's declared kit-typed claim.
|
|
1086
|
+
const claims = Array.isArray(raw.claims) ? raw.claims : [];
|
|
1087
|
+
for (const claim of claims) {
|
|
1088
|
+
const ct = typeof claim?.claimType === "string" ? claim.claimType : "";
|
|
1089
|
+
if (ct && !ct.startsWith("workflow.")) {
|
|
1090
|
+
const kitId = ct.split(".")[0] ?? "unknown";
|
|
1091
|
+
if (kitId && kitId !== "unknown") {
|
|
1092
|
+
return { claimType: ct, kitId, subject: `${kitId}-kit`, gateId: ct };
|
|
1093
|
+
}
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
// 2. No kit-typed claim in bundle — try to derive kit from current.json active_flow_id.
|
|
1097
|
+
// The bundle lives at <session-dir>/trust.bundle, so:
|
|
1098
|
+
// sessionDir = path.dirname(bundleFile)
|
|
1099
|
+
// flowAgentsDir = path.dirname(sessionDir)
|
|
1100
|
+
try {
|
|
1101
|
+
const sessionDir = path.dirname(bundleFile);
|
|
1102
|
+
const flowAgentsDir = path.dirname(sessionDir);
|
|
1103
|
+
const currentFile = path.join(flowAgentsDir, "current.json");
|
|
1104
|
+
const current = JSON.parse(fs.readFileSync(currentFile, "utf8"));
|
|
1105
|
+
const flowId = typeof current["active_flow_id"] === "string" ? current["active_flow_id"] : null;
|
|
1106
|
+
if (flowId && flowId.includes(".")) {
|
|
1107
|
+
const kitId = flowId.split(".")[0];
|
|
1108
|
+
if (kitId) {
|
|
1109
|
+
const derivedClaimType = `${kitId}.trust.bundle`;
|
|
1110
|
+
return { claimType: derivedClaimType, kitId, subject: `${kitId}-kit`, gateId: derivedClaimType };
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
catch {
|
|
1115
|
+
// Ignore — fall through to unknown
|
|
1116
|
+
}
|
|
1117
|
+
// 3. Genuinely unknown — never fallback to "builder".
|
|
1118
|
+
return { claimType: "unknown.trust.bundle", kitId: "unknown", subject: "unknown-kit", gateId: "unknown.trust.bundle" };
|
|
1119
|
+
}
|
|
1074
1120
|
function surfaceCheckFromArtifact(file, index) {
|
|
1075
1121
|
const raw = JSON.parse(read(file));
|
|
1076
1122
|
const lower = JSON.stringify(raw).toLowerCase();
|
|
1123
|
+
// Structurally read kit identity from the bundle — never hardcode "builder".
|
|
1124
|
+
const { claimType: bundleClaimType, subject: bundleSubject, gateId: bundleGateId } = kitIdentityFromBundle(raw, file);
|
|
1077
1125
|
let ref;
|
|
1078
1126
|
if (lower.includes("provider") && lower.includes("absent")) {
|
|
1079
|
-
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "provider.unavailable", claim_type:
|
|
1127
|
+
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "provider.unavailable", claim_type: bundleClaimType, claim_status: "unknown", subject: bundleSubject, freshness: { status: "unknown", summary: "No trust provider is configured" }, authority: { producer: "unknown", summary: "No trust provider is configured" }, integrity: { status: "unknown", summary: "Unknown" }, status: "not_verified", summary: "No trust provider is configured" };
|
|
1080
1128
|
}
|
|
1081
1129
|
else if (lower.includes("artifact") && lower.includes("absent")) {
|
|
1082
|
-
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "artifact.unavailable", claim_type:
|
|
1130
|
+
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: "artifact.unavailable", claim_type: bundleClaimType, claim_status: "unknown", subject: bundleSubject, freshness: { status: "unknown", summary: "Artifact not readable" }, authority: { producer: "unknown", summary: "Artifact not readable" }, integrity: { status: "unknown", summary: "Artifact not readable" }, status: "not_verified", summary: "artifact not readable" };
|
|
1083
1131
|
}
|
|
1084
1132
|
else {
|
|
1085
1133
|
const claimStatus = lower.includes("rejected") ? "rejected" : "accepted";
|
|
@@ -1087,7 +1135,7 @@ function surfaceCheckFromArtifact(file, index) {
|
|
|
1087
1135
|
const producer = lower.includes("missing-authority") ? "unknown" : "surface-local";
|
|
1088
1136
|
const integrity = lower.includes("mismatch") ? "mismatch" : "matched";
|
|
1089
1137
|
// Use trust.bundle as the canonical Hachure-aligned artifact_kind for all trust-backed evidence refs
|
|
1090
|
-
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id:
|
|
1138
|
+
ref = { artifact_kind: "trust.bundle", artifact_ref: file, gate_id: bundleGateId, claim_type: bundleClaimType, claim_status: claimStatus, subject: bundleSubject, freshness: { status: freshness, summary: freshness === "fresh" ? "fresh" : "not currently verifiable" }, authority: { producer, summary: producer === "unknown" ? "missing authority" : "Local Surface trust producer." }, integrity: { status: integrity, summary: integrity === "matched" ? "matched" : "integrity mismatch" } };
|
|
1091
1139
|
ref.status = deriveSurfaceStatus(ref);
|
|
1092
1140
|
ref.summary = ref.status === "pass" ? "accepted" : ref.status === "not_verified" ? "not currently verifiable" : (claimStatus === "rejected" ? "rejected" : producer === "unknown" ? "missing authority" : "integrity mismatch");
|
|
1093
1141
|
}
|
|
@@ -1342,16 +1390,22 @@ async function advanceState(p) {
|
|
|
1342
1390
|
if ((status === "archived" || status === "accepted") && prev.phase !== "learning")
|
|
1343
1391
|
diagnostic(dir, "terminal_jump_rejected", "Terminal workflow states require release and learning gates.");
|
|
1344
1392
|
const flow = opt(p, "flow-definition");
|
|
1345
|
-
|
|
1393
|
+
// Route-back guard: FlowDefinition-driven (not hardcoded to builder.build).
|
|
1394
|
+
// Fires when the active flow's gate for prev.phase declares a route_back_policy
|
|
1395
|
+
// AND the target phase maps to a step listed in on_route_back values.
|
|
1396
|
+
// builder.build verify-gate already carries this declaration — behavior preserved.
|
|
1397
|
+
const repoRoot = flow ? findRepoRootFromDir(dir) : "";
|
|
1398
|
+
const routeBack = flow ? resolveRouteBackPolicy(flow, prev.phase, phase, repoRoot) : null;
|
|
1399
|
+
if (routeBack) {
|
|
1346
1400
|
const reason = opt(p, "route-back-reason");
|
|
1347
1401
|
if (!reason)
|
|
1348
|
-
diagnostic(dir, "route_back_reason_required",
|
|
1402
|
+
diagnostic(dir, "route_back_reason_required", `Route-back from ${prev.phase} to ${phase} requires a --route-back-reason (e.g. implementation_defect).`);
|
|
1349
1403
|
const file = path.join(dir, "transition-attempts.json");
|
|
1350
1404
|
const attempts = loadJson(file);
|
|
1351
|
-
const key =
|
|
1405
|
+
const key = `${prev.phase}->${phase}:${reason}`;
|
|
1352
1406
|
const count = attempts[key]?.count ?? 0;
|
|
1353
|
-
if (count >=
|
|
1354
|
-
diagnostic(dir, "route_back_attempts_exceeded",
|
|
1407
|
+
if (count >= routeBack.maxAttempts)
|
|
1408
|
+
diagnostic(dir, "route_back_attempts_exceeded", `Route-back attempt limit (${routeBack.maxAttempts}) exceeded for ${prev.phase}→${phase}.`);
|
|
1355
1409
|
attempts[key] = { count: count + 1, reason, updated_at: opt(p, "timestamp", now()) };
|
|
1356
1410
|
writeJson(file, attempts);
|
|
1357
1411
|
}
|
|
@@ -1365,7 +1419,7 @@ async function advanceState(p) {
|
|
|
1365
1419
|
// --step-id individually. The repoRoot is derived by walking up from dir to find kits/.
|
|
1366
1420
|
if (flow) {
|
|
1367
1421
|
const root = path.resolve(opt(p, "artifact-root", path.dirname(dir)));
|
|
1368
|
-
|
|
1422
|
+
// repoRoot already computed above when flow is present
|
|
1369
1423
|
const phaseMap = resolvePhaseMap(flow, repoRoot);
|
|
1370
1424
|
const stepId = phaseMap?.[phase] ?? undefined;
|
|
1371
1425
|
if (stepId) {
|
|
@@ -80,3 +80,32 @@ export declare function resolvePhaseMap(flowId: string, repoRoot: string): Recor
|
|
|
80
80
|
* @returns ActiveFlowStep or null when fields are absent or resolution fails.
|
|
81
81
|
*/
|
|
82
82
|
export declare function resolveActiveFlowStep(flowAgentsDir: string): ActiveFlowStep | null;
|
|
83
|
+
/** The resolved route-back policy for a phase transition. */
|
|
84
|
+
export type RouteBackPolicy = {
|
|
85
|
+
/** Maximum allowed route-back attempts for this transition key. */
|
|
86
|
+
maxAttempts: number;
|
|
87
|
+
/** Action when attempts are exceeded (e.g. "block"). */
|
|
88
|
+
onExceeded: string;
|
|
89
|
+
/** The step id whose gate declared this policy (e.g. "verify"). */
|
|
90
|
+
fromStepId: string;
|
|
91
|
+
};
|
|
92
|
+
/**
|
|
93
|
+
* Resolve the route-back policy for a phase transition, if the active FlowDefinition
|
|
94
|
+
* declares one on the source phase's gate.
|
|
95
|
+
*
|
|
96
|
+
* A route-back is a transition where the source phase's gate declares both
|
|
97
|
+
* `route_back_policy` and `on_route_back`, and the target phase maps to a step
|
|
98
|
+
* listed as a route-back target in `on_route_back` values.
|
|
99
|
+
*
|
|
100
|
+
* This is the FlowDefinition-driven replacement for the hardcoded
|
|
101
|
+
* `flow === "builder.build" && prev.phase === "verification" && phase === "execution"`
|
|
102
|
+
* guard in advance-state. Any flow that declares `route_back_policy` on a gate
|
|
103
|
+
* automatically gets route-back enforcement without code changes.
|
|
104
|
+
*
|
|
105
|
+
* @param flowId e.g. "builder.build" — kitId is the prefix before the first ".".
|
|
106
|
+
* @param fromPhase Lifecycle phase leaving (e.g. "verification").
|
|
107
|
+
* @param toPhase Lifecycle phase entering (e.g. "execution").
|
|
108
|
+
* @param repoRoot Absolute path to the repository root (kits/ lives here).
|
|
109
|
+
* @returns RouteBackPolicy when the transition is a declared route-back, null otherwise.
|
|
110
|
+
*/
|
|
111
|
+
export declare function resolveRouteBackPolicy(flowId: string, fromPhase: string, toPhase: string, repoRoot: string): RouteBackPolicy | null;
|
|
@@ -235,3 +235,74 @@ export function resolveActiveFlowStep(flowAgentsDir) {
|
|
|
235
235
|
const repoRoot = findRepoRoot(path.dirname(flowAgentsDir));
|
|
236
236
|
return resolveFlowStep(flowId, stepId, repoRoot);
|
|
237
237
|
}
|
|
238
|
+
/**
|
|
239
|
+
* Resolve the route-back policy for a phase transition, if the active FlowDefinition
|
|
240
|
+
* declares one on the source phase's gate.
|
|
241
|
+
*
|
|
242
|
+
* A route-back is a transition where the source phase's gate declares both
|
|
243
|
+
* `route_back_policy` and `on_route_back`, and the target phase maps to a step
|
|
244
|
+
* listed as a route-back target in `on_route_back` values.
|
|
245
|
+
*
|
|
246
|
+
* This is the FlowDefinition-driven replacement for the hardcoded
|
|
247
|
+
* `flow === "builder.build" && prev.phase === "verification" && phase === "execution"`
|
|
248
|
+
* guard in advance-state. Any flow that declares `route_back_policy` on a gate
|
|
249
|
+
* automatically gets route-back enforcement without code changes.
|
|
250
|
+
*
|
|
251
|
+
* @param flowId e.g. "builder.build" — kitId is the prefix before the first ".".
|
|
252
|
+
* @param fromPhase Lifecycle phase leaving (e.g. "verification").
|
|
253
|
+
* @param toPhase Lifecycle phase entering (e.g. "execution").
|
|
254
|
+
* @param repoRoot Absolute path to the repository root (kits/ lives here).
|
|
255
|
+
* @returns RouteBackPolicy when the transition is a declared route-back, null otherwise.
|
|
256
|
+
*/
|
|
257
|
+
export function resolveRouteBackPolicy(flowId, fromPhase, toPhase, repoRoot) {
|
|
258
|
+
if (!flowId || !fromPhase || !toPhase)
|
|
259
|
+
return null;
|
|
260
|
+
const dotIdx = flowId.indexOf(".");
|
|
261
|
+
if (dotIdx < 1)
|
|
262
|
+
return null;
|
|
263
|
+
const kitId = flowId.slice(0, dotIdx);
|
|
264
|
+
const flowName = flowId.slice(dotIdx + 1);
|
|
265
|
+
if (!kitId || !flowName)
|
|
266
|
+
return null;
|
|
267
|
+
const flowFilePath = resolveFlowFilePath(kitId, flowName, flowId, repoRoot);
|
|
268
|
+
if (!flowFilePath)
|
|
269
|
+
return null;
|
|
270
|
+
let flowDef;
|
|
271
|
+
try {
|
|
272
|
+
const raw = fs.readFileSync(flowFilePath, "utf8");
|
|
273
|
+
flowDef = JSON.parse(raw);
|
|
274
|
+
}
|
|
275
|
+
catch {
|
|
276
|
+
return null; // ENOENT, permission error, or parse error — fail-open
|
|
277
|
+
}
|
|
278
|
+
if (!flowDef || typeof flowDef !== "object")
|
|
279
|
+
return null;
|
|
280
|
+
const phaseMap = flowDef.phase_map;
|
|
281
|
+
if (!phaseMap || typeof phaseMap !== "object" || Array.isArray(phaseMap))
|
|
282
|
+
return null;
|
|
283
|
+
const fromStep = phaseMap[fromPhase];
|
|
284
|
+
const toStep = phaseMap[toPhase];
|
|
285
|
+
if (!fromStep || !toStep)
|
|
286
|
+
return null; // phases not in this flow
|
|
287
|
+
if (!flowDef.gates)
|
|
288
|
+
return null;
|
|
289
|
+
for (const gate of Object.values(flowDef.gates)) {
|
|
290
|
+
if (!gate || gate.step !== fromStep)
|
|
291
|
+
continue;
|
|
292
|
+
if (!gate.route_back_policy || !gate.on_route_back)
|
|
293
|
+
return null;
|
|
294
|
+
// Check if toStep is a valid route-back target declared in on_route_back
|
|
295
|
+
const routeBackTargets = Object.values(gate.on_route_back);
|
|
296
|
+
if (!routeBackTargets.includes(toStep))
|
|
297
|
+
return null;
|
|
298
|
+
const maxAttempts = typeof gate.route_back_policy.max_attempts === "number"
|
|
299
|
+
? gate.route_back_policy.max_attempts
|
|
300
|
+
: 3;
|
|
301
|
+
return {
|
|
302
|
+
maxAttempts,
|
|
303
|
+
onExceeded: gate.route_back_policy.on_exceeded ?? "block",
|
|
304
|
+
fromStepId: fromStep,
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
return null;
|
|
308
|
+
}
|
|
@@ -19,6 +19,7 @@ TESTS=(
|
|
|
19
19
|
"evals/integration/test_reconcile_soundness.sh"
|
|
20
20
|
"evals/integration/test_captured_fail_reconciliation.sh"
|
|
21
21
|
"evals/integration/test_command_log_integrity.sh"
|
|
22
|
+
"evals/integration/test_command_log_fork_classification.sh"
|
|
22
23
|
"evals/integration/test_resolvefirststep_security.sh"
|
|
23
24
|
"evals/integration/test_enforcer_expects_driven.sh"
|
|
24
25
|
"evals/integration/test_goal_fit_rederive.sh"
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_command_log_fork_classification.sh
|
|
3
|
+
#
|
|
4
|
+
# The verifier must tell a BENIGN concurrent fork apart from real TAMPER, and
|
|
5
|
+
# the repair tool must refuse to touch tamper. This is what prevents an honest
|
|
6
|
+
# parallel-write race from becoming a hard block an agent is tempted to launder.
|
|
7
|
+
#
|
|
8
|
+
# forked = two PostToolUse captures share a parent; all hashes self-consistent
|
|
9
|
+
# and reachable. NON-blocking advisory; records stay trusted.
|
|
10
|
+
# broken = content edit (self-hash mismatch) / reorder / deletion / a
|
|
11
|
+
# non-capture sibling on a shared parent. Hard block (unchanged).
|
|
12
|
+
#
|
|
13
|
+
# Also proves: repair re-linearizes forked→ok, and REFUSES broken (no laundering).
|
|
14
|
+
set -uo pipefail
|
|
15
|
+
|
|
16
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
17
|
+
export GATE="$ROOT/scripts/hooks/stop-goal-fit.js"
|
|
18
|
+
REPAIR="$ROOT/scripts/repair-command-log.js"
|
|
19
|
+
|
|
20
|
+
TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT
|
|
21
|
+
errors=0
|
|
22
|
+
_pass() { echo " ✓ $1"; }
|
|
23
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
24
|
+
|
|
25
|
+
SD=".flow-agents/s"
|
|
26
|
+
|
|
27
|
+
# Build a command-log from a spec: JSON array of {cmd,exit,src,parent} where
|
|
28
|
+
# parent is the 0-based index of the entry whose hash is this entry's prevHash
|
|
29
|
+
# (-1 = genesis). Lets us construct linear chains AND forks deterministically.
|
|
30
|
+
build() { # $1=dir $2=spec-json
|
|
31
|
+
mkdir -p "$1/$SD"
|
|
32
|
+
DIR="$1" node -e '
|
|
33
|
+
const fs=require("fs"),crypto=require("crypto"),path=require("path");
|
|
34
|
+
const g=require(process.env.GATE), GEN=g.CHAIN_GENESIS_VERIFY;
|
|
35
|
+
const canon=r=>{const k=Object.keys(r).filter(x=>x!=="_chain").sort();const o={};for(const x of k)o[x]=r[x];return JSON.stringify(o);};
|
|
36
|
+
const H=(p,r)=>crypto.createHash("sha256").update(p+canon(r)).digest("hex");
|
|
37
|
+
const spec=JSON.parse(process.argv[1]); const hashes=[],lines=[];
|
|
38
|
+
spec.forEach((s,i)=>{
|
|
39
|
+
const rec={command:s.cmd,observedResult:s.exit===0?"pass":"fail",exitCode:s.exit,
|
|
40
|
+
capturedAt:new Date(Date.UTC(2026,0,1,0,0,i)).toISOString(),source:s.src||"postToolUse-capture"};
|
|
41
|
+
const prev=s.parent===-1?GEN:hashes[s.parent]; const h=H(prev,rec);
|
|
42
|
+
hashes.push(h); lines.push(JSON.stringify({...rec,_chain:{seq:i,prevHash:prev,hash:h}}));
|
|
43
|
+
});
|
|
44
|
+
fs.writeFileSync(path.join(process.env.DIR,".flow-agents/s/command-log.jsonl"),lines.join("\n")+"\n");
|
|
45
|
+
' "$2"
|
|
46
|
+
}
|
|
47
|
+
status() { DIR="$1" node -e 'const g=require(process.env.GATE);console.log(g.verifyCommandLogChain(process.env.DIR+"/.flow-agents/s").status)' ; }
|
|
48
|
+
|
|
49
|
+
# ── 1. linear → ok ────────────────────────────────────────────────────────────
|
|
50
|
+
D="$TMP/linear"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
|
|
51
|
+
[ "$(status "$D")" = "ok" ] && _pass "linear chain → ok" || _fail "linear → $(status "$D"), want ok"
|
|
52
|
+
|
|
53
|
+
# ── 2. concurrent fork (two captures share a parent) → forked ─────────────────
|
|
54
|
+
D="$TMP/fork"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0}]'
|
|
55
|
+
[ "$(status "$D")" = "forked" ] && _pass "concurrent fork → forked (not broken)" || _fail "fork → $(status "$D"), want forked"
|
|
56
|
+
|
|
57
|
+
# ── 3. content edit (flip exitCode, keep hash) → broken ───────────────────────
|
|
58
|
+
D="$TMP/flip"; build "$D" '[{"cmd":"npm test","exit":0,"parent":-1},{"cmd":"npm run lint","exit":1,"parent":0}]'
|
|
59
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
60
|
+
import json,sys
|
|
61
|
+
L=open(sys.argv[1]).read().strip().split("\n"); e=json.loads(L[1]); e["exitCode"]=0; e["observedResult"]="pass"
|
|
62
|
+
L[1]=json.dumps(e); open(sys.argv[1],"w").write("\n".join(L)+"\n")
|
|
63
|
+
PY
|
|
64
|
+
[ "$(status "$D")" = "broken" ] && _pass "content edit → broken (tamper, not fork)" || _fail "flip → $(status "$D"), want broken"
|
|
65
|
+
|
|
66
|
+
# ── 4. reorder → broken ───────────────────────────────────────────────────────
|
|
67
|
+
D="$TMP/reorder"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
|
|
68
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
69
|
+
import sys
|
|
70
|
+
L=open(sys.argv[1]).read().strip().split("\n"); L[0],L[1]=L[1],L[0]; open(sys.argv[1],"w").write("\n".join(L)+"\n")
|
|
71
|
+
PY
|
|
72
|
+
[ "$(status "$D")" = "broken" ] && _pass "reorder → broken" || _fail "reorder → $(status "$D"), want broken"
|
|
73
|
+
|
|
74
|
+
# ── 5. deleted predecessor → broken ───────────────────────────────────────────
|
|
75
|
+
D="$TMP/delete"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0}]'
|
|
76
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
77
|
+
import sys
|
|
78
|
+
L=open(sys.argv[1]).read().strip().split("\n"); open(sys.argv[1],"w").write(L[1]+"\n")
|
|
79
|
+
PY
|
|
80
|
+
[ "$(status "$D")" = "broken" ] && _pass "deleted predecessor → broken" || _fail "delete → $(status "$D"), want broken"
|
|
81
|
+
|
|
82
|
+
# ── 6. non-capture sibling on a shared parent → broken (not a benign fork) ─────
|
|
83
|
+
D="$TMP/badfork"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0,"src":"manual-inject"}]'
|
|
84
|
+
[ "$(status "$D")" = "broken" ] && _pass "non-capture sibling fork → broken (conservative)" || _fail "badfork → $(status "$D"), want broken"
|
|
85
|
+
|
|
86
|
+
# ── 7. repair re-linearizes forked → ok; refuses broken ───────────────────────
|
|
87
|
+
D="$TMP/fork2"; build "$D" '[{"cmd":"a","exit":0,"parent":-1},{"cmd":"b","exit":0,"parent":0},{"cmd":"c","exit":0,"parent":0}]'
|
|
88
|
+
node "$REPAIR" "$D/$SD" --reason "test" >/dev/null 2>&1
|
|
89
|
+
[ "$(status "$D")" = "ok" ] && _pass "repair: forked → ok" || _fail "repair forked → $(status "$D"), want ok"
|
|
90
|
+
|
|
91
|
+
D="$TMP/flip2"; build "$D" '[{"cmd":"x","exit":0,"parent":-1},{"cmd":"y","exit":1,"parent":0}]'
|
|
92
|
+
python3 - "$D/$SD/command-log.jsonl" <<'PY'
|
|
93
|
+
import json,sys
|
|
94
|
+
L=open(sys.argv[1]).read().strip().split("\n"); e=json.loads(L[1]); e["exitCode"]=0
|
|
95
|
+
L[1]=json.dumps(e); open(sys.argv[1],"w").write("\n".join(L)+"\n")
|
|
96
|
+
PY
|
|
97
|
+
before=$(cat "$D/$SD/command-log.jsonl")
|
|
98
|
+
set +e; node "$REPAIR" "$D/$SD" >/dev/null 2>&1; rc=$?; set -e
|
|
99
|
+
after=$(cat "$D/$SD/command-log.jsonl")
|
|
100
|
+
if [ "$rc" -ne 0 ] && [ "$before" = "$after" ]; then _pass "repair: REFUSES broken (exit!=0, log unchanged — no laundering)"; else _fail "repair touched/accepted a broken log (rc=$rc)"; fi
|
|
101
|
+
|
|
102
|
+
# ── 8. the Stop gate does NOT hard-block a forked log ─────────────────────────
|
|
103
|
+
D="$TMP/gate"; mkdir -p "$D/$SD"
|
|
104
|
+
printf '# Repo\n' > "$D/AGENTS.md"
|
|
105
|
+
printf '%s' '{"schema_version":"1.0","task_slug":"s","status":"delivered","phase":"done","updated_at":"2026-06-23T00:00:00Z","next_action":{"status":"done","summary":"done"}}' > "$D/$SD/state.json"
|
|
106
|
+
cat > "$D/$SD/s--deliver.md" <<'MD'
|
|
107
|
+
# s
|
|
108
|
+
|
|
109
|
+
branch: main
|
|
110
|
+
status: delivered
|
|
111
|
+
type: deliver
|
|
112
|
+
|
|
113
|
+
## Definition Of Done
|
|
114
|
+
- [x] tests pass
|
|
115
|
+
|
|
116
|
+
## Goal Fit Gate
|
|
117
|
+
- [x] acceptance verified
|
|
118
|
+
|
|
119
|
+
### Verdict: PASS
|
|
120
|
+
MD
|
|
121
|
+
# forked log whose captures are all PASS, so there is no contradiction to flag
|
|
122
|
+
build "$D" '[{"cmd":"npm test","exit":0,"parent":-1},{"cmd":"npm run build","exit":0,"parent":0},{"cmd":"npm run build","exit":0,"parent":0}]'
|
|
123
|
+
printf '%s' '{"schema_version":"1.0","task_slug":"s","verdict":"pass","checks":[{"id":"t","kind":"command","status":"pass","command":"npm test","summary":"ok"}]}' > "$D/$SD/evidence.json"
|
|
124
|
+
set +e
|
|
125
|
+
out=$(FLOW_AGENTS_GOAL_FIT_MODE=block FLOW_AGENTS_GOAL_FIT_BACKSTOP=skip node "$GATE" 2>&1 <<< "{\"hook_event_name\":\"Stop\",\"cwd\":\"$D\"}")
|
|
126
|
+
rc=$?
|
|
127
|
+
set -e
|
|
128
|
+
if [ "$rc" -eq 0 ]; then _pass "gate does NOT hard-block forked log (exit 0)"; else _fail "gate blocked forked log (exit $rc): $out"; fi
|
|
129
|
+
echo "$out" | grep -q "concurrent-capture fork" && _pass "gate emits the concurrent-fork advisory" || _fail "missing fork advisory: $out"
|
|
130
|
+
echo "$out" | grep -q "command-log integrity check FAILED" && _fail "gate wrongly emitted tamper warning for a fork" || _pass "no false tamper warning for a fork"
|
|
131
|
+
|
|
132
|
+
echo ""
|
|
133
|
+
if [ "$errors" -eq 0 ]; then echo "fork classification tests passed."; exit 0; fi
|
|
134
|
+
echo "fork classification tests FAILED: $errors issue(s)."; exit 1
|