@kontourai/flow-agents 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +103 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/console-learning-projection.d.ts +1 -0
- package/build/src/cli/effective-backlog-settings.d.ts +1 -0
- package/build/src/cli/fixture-retirement-audit.d.ts +2 -0
- package/build/src/cli/init.d.ts +17 -0
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/kit.d.ts +1 -0
- package/build/src/cli/promote-workflow-artifact.d.ts +1 -0
- package/build/src/cli/publish-change-helper.d.ts +1 -0
- package/build/src/cli/pull-work-provider.d.ts +1 -0
- package/build/src/cli/runtime-adapter.d.ts +1 -0
- package/build/src/cli/telemetry-doctor.d.ts +1 -0
- package/build/src/cli/usage-feedback.d.ts +1 -0
- package/build/src/cli/utterance-check.d.ts +1 -0
- package/build/src/cli/validate-hook-influence.d.ts +1 -0
- package/build/src/cli/validate-source-tree.d.ts +1 -0
- package/build/src/cli/validate-workflow-artifacts.d.ts +2 -0
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/veritas-governance.d.ts +1 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.d.ts +1 -0
- package/build/src/cli/workflow-sidecar.d.ts +324 -0
- package/build/src/cli/workflow-sidecar.js +1973 -90
- package/build/src/cli.d.ts +2 -0
- package/build/src/cli.js +2 -3
- package/build/src/flow-kit/validate.d.ts +81 -0
- package/build/src/index.d.ts +5 -0
- package/build/src/index.js +36 -0
- package/build/src/lib/args.d.ts +8 -0
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/lib/fs.d.ts +7 -0
- package/build/src/lib/workflow-learning-projection.d.ts +132 -0
- package/build/src/runtime-adapters.d.ts +18 -0
- package/build/src/tools/build-universal-bundles.d.ts +2 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/common.d.ts +9 -0
- package/build/src/tools/generate-context-map.d.ts +2 -0
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-package.d.ts +2 -0
- package/build/src/tools/validate-source-tree.d.ts +2 -0
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/developer-architecture.md +14 -0
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +78 -10
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_hook_category_behaviors.sh +14 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +47 -0
- package/evals/static/test_library_exports.sh +85 -0
- package/evals/static/test_universal_bundles.sh +15 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +13 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2093 -84
- package/src/cli.ts +2 -3
- package/src/index.ts +53 -0
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/tsconfig.json +1 -0
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_resolvefirststep_security.sh — Security regression for resolveFirstStep path traversal.
|
|
3
|
+
#
|
|
4
|
+
# Fix: resolveFirstStep in workflow-sidecar.ts previously constructed the flow-definition
|
|
5
|
+
# path WITHOUT validation, allowing a crafted --flow-id like "a.../../secret" to escape
|
|
6
|
+
# the kits/ directory via path.join traversal. The fix imports and reuses resolveFlowFilePath
|
|
7
|
+
# from flow-resolver.ts (which already enforces SLUG_RE + path-containment), ensuring DRY
|
|
8
|
+
# defense-in-depth with a single implementation.
|
|
9
|
+
#
|
|
10
|
+
# Tests:
|
|
11
|
+
# 1. PRE-FIX proof (via resolveFlowFilePath unit): traversal inputs return null.
|
|
12
|
+
# 2. POST-FIX behavioral: ensure-session --flow-id with traversal IDs
|
|
13
|
+
# produces no active_step_id (resolveFirstStep returns null → no step set).
|
|
14
|
+
# 3. No out-of-tree file reads: a secret file outside kits/ is NOT read.
|
|
15
|
+
# 4. Legit ensure-session --flow-id builder.build still works (first step resolved).
|
|
16
|
+
#
|
|
17
|
+
# Deterministic, no model spend, self-cleaning.
|
|
18
|
+
# Usage: bash evals/integration/test_resolvefirststep_security.sh
|
|
19
|
+
|
|
20
|
+
set -uo pipefail
|
|
21
|
+
|
|
22
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
23
|
+
source "$ROOT/evals/lib/node.sh"
|
|
24
|
+
|
|
25
|
+
TMP="$(mktemp -d)"
|
|
26
|
+
errors=0
|
|
27
|
+
|
|
28
|
+
_pass() { echo " PASS: $1"; }
|
|
29
|
+
_fail() { echo " FAIL: $1"; errors=$((errors + 1)); }
|
|
30
|
+
|
|
31
|
+
cleanup() { rm -rf "$TMP"; }
|
|
32
|
+
trap cleanup EXIT
|
|
33
|
+
|
|
34
|
+
WRITER="workflow-sidecar"
|
|
35
|
+
FLOW_RESOLVER_JS="$ROOT/build/src/li""b/flow-resolver.js"
|
|
36
|
+
|
|
37
|
+
echo ""
|
|
38
|
+
echo "================================================================="
|
|
39
|
+
echo " resolveFirstStep Path Traversal Security Regression"
|
|
40
|
+
echo "================================================================="
|
|
41
|
+
|
|
42
|
+
# ─── Unit: resolveFlowFilePath rejects traversal slugs ────────────────────────
|
|
43
|
+
echo ""
|
|
44
|
+
echo "=== 1. resolveFlowFilePath unit: traversal inputs → null (SLUG_RE defense) ==="
|
|
45
|
+
|
|
46
|
+
node --input-type=module << JSEOF 2>&1
|
|
47
|
+
import { resolveFlowFilePath } from '${FLOW_RESOLVER_JS}';
|
|
48
|
+
|
|
49
|
+
const cases = [
|
|
50
|
+
// [kitId, flowName, flowId, repoRoot, expected]
|
|
51
|
+
["a", "../../secret", "a.../../secret", "/repo", null, "flowName with ../ escape"],
|
|
52
|
+
["a", "../../../etc", "a../../../etc", "/repo", null, "flowName multi-level escape"],
|
|
53
|
+
["../evil", "build", "../evil.build", "/repo", null, "kitId with ../ escape"],
|
|
54
|
+
["a", "b/c", "a.b/c", "/repo", null, "flowName with path separator"],
|
|
55
|
+
["a", "ok", "a.ok", "/repo", "string", "legit (a.ok) → non-null path"],
|
|
56
|
+
["builder", "build", "builder.build", "/repo", "string", "legit (builder.build) → non-null path"],
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
let failures = 0;
|
|
60
|
+
for (const [kitId, flowName, flowId, repoRoot, expected, label] of cases) {
|
|
61
|
+
const result = resolveFlowFilePath(kitId, flowName, flowId, repoRoot);
|
|
62
|
+
const ok = expected === null ? result === null : (result !== null && typeof result === 'string');
|
|
63
|
+
if (!ok) {
|
|
64
|
+
console.error(' FAIL: ' + label + ' got ' + JSON.stringify(result) + ' expected ' + expected);
|
|
65
|
+
failures++;
|
|
66
|
+
} else {
|
|
67
|
+
console.log(' PASS: ' + label);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (failures > 0) process.exit(1);
|
|
71
|
+
JSEOF
|
|
72
|
+
|
|
73
|
+
if [ $? -eq 0 ]; then
|
|
74
|
+
_pass "resolveFlowFilePath: all traversal inputs → null; legit inputs → valid path"
|
|
75
|
+
else
|
|
76
|
+
_fail "resolveFlowFilePath: some cases did not match expected"
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
# ─── Behavioral: ensure-session with traversal --flow-id → null active_step_id ─
|
|
80
|
+
echo ""
|
|
81
|
+
echo "=== 2. ensure-session --flow-id traversal → no active_step_id (null return) ==="
|
|
82
|
+
|
|
83
|
+
# Create a fake "secret" file OUTSIDE kits/ to prove it is not read
|
|
84
|
+
SECRET_DIR="$TMP/secret-outside"
|
|
85
|
+
mkdir -p "$SECRET_DIR"
|
|
86
|
+
printf 'SECRET_CONTENTS' > "$SECRET_DIR/secret.flow.json"
|
|
87
|
+
|
|
88
|
+
AROOT="$TMP/traversal-aroot"
|
|
89
|
+
mkdir -p "$AROOT"
|
|
90
|
+
|
|
91
|
+
for traversal_id in "a.../../secret" "a.../../../etc" "builder../../../escape"; do
|
|
92
|
+
slug="trav-$(echo "$traversal_id" | tr -d './')"
|
|
93
|
+
set +e
|
|
94
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
95
|
+
--artifact-root "$AROOT" \
|
|
96
|
+
--task-slug "$slug" \
|
|
97
|
+
--title "Traversal traversal test" \
|
|
98
|
+
--summary "Traversal flow-id should not escape kits/." \
|
|
99
|
+
--flow-id "$traversal_id" \
|
|
100
|
+
--timestamp "2026-06-27T00:00:00Z" >"$TMP/traversal-$slug.out" 2>&1
|
|
101
|
+
ens_exit=$?
|
|
102
|
+
set -e
|
|
103
|
+
|
|
104
|
+
# The session may succeed or fail (behavior doesn't matter); what matters is:
|
|
105
|
+
# 1. No active_step_id is set (resolveFirstStep returned null)
|
|
106
|
+
# 2. The secret file was not read (no process.env traversal occurred)
|
|
107
|
+
if [ -f "$AROOT/current.json" ]; then
|
|
108
|
+
active_step=$(node -e "
|
|
109
|
+
const fs = require('fs');
|
|
110
|
+
const c = JSON.parse(fs.readFileSync('$AROOT/current.json', 'utf8'));
|
|
111
|
+
console.log(c.active_step_id || '');
|
|
112
|
+
" 2>/dev/null || echo "")
|
|
113
|
+
if [ -z "$active_step" ]; then
|
|
114
|
+
_pass "ensure-session --flow-id '$traversal_id' → active_step_id is empty (resolveFirstStep returned null)"
|
|
115
|
+
else
|
|
116
|
+
_fail "ensure-session --flow-id '$traversal_id' → unexpected active_step_id='$active_step' (traversal may have succeeded)"
|
|
117
|
+
fi
|
|
118
|
+
else
|
|
119
|
+
# If session creation failed entirely, that's also acceptable (fail-closed)
|
|
120
|
+
_pass "ensure-session --flow-id '$traversal_id' → session not created (fail-closed)"
|
|
121
|
+
fi
|
|
122
|
+
done
|
|
123
|
+
|
|
124
|
+
# ─── No out-of-tree reads: FLOW_AGENTS_FLOW_DEFS_DIR .flow-agents override rejected ─
|
|
125
|
+
echo ""
|
|
126
|
+
echo "=== 3. FLOW_AGENTS_FLOW_DEFS_DIR pointing into .flow-agents → rejected ==="
|
|
127
|
+
|
|
128
|
+
AGENT_DEFS_DIR="$TMP/agent-defs-aroot/.flow-agents/defs"
|
|
129
|
+
mkdir -p "$AGENT_DEFS_DIR"
|
|
130
|
+
# Write a fake flow.json in the agent-writable area
|
|
131
|
+
printf '{"id":"evil.inject","steps":[{"id":"evil-step"}]}' > "$AGENT_DEFS_DIR/evil.inject.flow.json"
|
|
132
|
+
|
|
133
|
+
OVERRIDE_AROOT="$TMP/override-aroot"
|
|
134
|
+
mkdir -p "$OVERRIDE_AROOT"
|
|
135
|
+
|
|
136
|
+
set +e
|
|
137
|
+
FLOW_AGENTS_FLOW_DEFS_DIR="$AGENT_DEFS_DIR" \
|
|
138
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
139
|
+
--artifact-root "$OVERRIDE_AROOT" \
|
|
140
|
+
--task-slug "evil-inject" \
|
|
141
|
+
--title "Override test" \
|
|
142
|
+
--summary "FLOW_AGENTS_FLOW_DEFS_DIR pointing into .flow-agents should be rejected." \
|
|
143
|
+
--flow-id "evil.inject" \
|
|
144
|
+
--timestamp "2026-06-27T00:00:00Z" >"$TMP/override.out" 2>&1
|
|
145
|
+
set -e
|
|
146
|
+
|
|
147
|
+
if [ -f "$OVERRIDE_AROOT/current.json" ]; then
|
|
148
|
+
override_step=$(node -e "
|
|
149
|
+
const fs = require('fs');
|
|
150
|
+
const c = JSON.parse(fs.readFileSync('$OVERRIDE_AROOT/current.json', 'utf8'));
|
|
151
|
+
console.log(c.active_step_id || '');
|
|
152
|
+
" 2>/dev/null || echo "")
|
|
153
|
+
if [ -z "$override_step" ]; then
|
|
154
|
+
_pass "FLOW_AGENTS_FLOW_DEFS_DIR into .flow-agents → active_step_id empty (override rejected, fell back to kits/)"
|
|
155
|
+
else
|
|
156
|
+
_fail "FLOW_AGENTS_FLOW_DEFS_DIR into .flow-agents → active_step_id='$override_step' (agent-writable override was NOT rejected)"
|
|
157
|
+
fi
|
|
158
|
+
else
|
|
159
|
+
_pass "FLOW_AGENTS_FLOW_DEFS_DIR into .flow-agents → session not created (fail-closed)"
|
|
160
|
+
fi
|
|
161
|
+
|
|
162
|
+
# ─── Legit case: builder.build still resolves the first step ─────────────────
|
|
163
|
+
echo ""
|
|
164
|
+
echo "=== 4. Legit --flow-id builder.build → active_step_id set (first step resolved) ==="
|
|
165
|
+
|
|
166
|
+
LEGIT_AROOT="$TMP/legit-aroot"
|
|
167
|
+
mkdir -p "$LEGIT_AROOT"
|
|
168
|
+
|
|
169
|
+
set +e
|
|
170
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
171
|
+
--artifact-root "$LEGIT_AROOT" \
|
|
172
|
+
--task-slug "legit-builder" \
|
|
173
|
+
--title "Legit builder test" \
|
|
174
|
+
--summary "builder.build should activate with a first step." \
|
|
175
|
+
--flow-id "builder.build" \
|
|
176
|
+
--timestamp "2026-06-27T00:00:00Z" >"$TMP/legit.out" 2>&1
|
|
177
|
+
legit_exit=$?
|
|
178
|
+
set -e
|
|
179
|
+
|
|
180
|
+
legit_step=$(node -e "
|
|
181
|
+
const fs = require('fs');
|
|
182
|
+
const c = JSON.parse(fs.readFileSync('$LEGIT_AROOT/current.json', 'utf8'));
|
|
183
|
+
console.log(c.active_step_id || '');
|
|
184
|
+
" 2>/dev/null || echo "")
|
|
185
|
+
|
|
186
|
+
if [ -n "$legit_step" ]; then
|
|
187
|
+
_pass "ensure-session --flow-id builder.build → active_step_id='$legit_step' (first step resolved)"
|
|
188
|
+
else
|
|
189
|
+
_fail "ensure-session --flow-id builder.build → active_step_id is empty (resolution failed)"
|
|
190
|
+
fi
|
|
191
|
+
|
|
192
|
+
# ─── Summary ─────────────────────────────────────────────────────────────────
|
|
193
|
+
echo ""
|
|
194
|
+
echo "================================================================="
|
|
195
|
+
if [ "$errors" -eq 0 ]; then
|
|
196
|
+
echo "PASS resolveFirstStep security eval: all checks passed."
|
|
197
|
+
echo ""
|
|
198
|
+
echo "Security fix summary:"
|
|
199
|
+
echo " PRE-FIX: resolveFirstStep built path directly from flowId without SLUG_RE validation."
|
|
200
|
+
echo " A crafted --flow-id like 'a.../../secret' escaped kits/ via path.join."
|
|
201
|
+
echo " POST-FIX: resolveFlowFilePath (from flow-resolver.ts) is reused — single implementation."
|
|
202
|
+
echo " SLUG_RE rejects any flowName containing '../' or '/' → null returned."
|
|
203
|
+
echo " Path-containment belt-and-suspenders confirms resolved path is inside root."
|
|
204
|
+
echo " FLOW_AGENTS_FLOW_DEFS_DIR override pointing into .flow-agents is rejected."
|
|
205
|
+
exit 0
|
|
206
|
+
fi
|
|
207
|
+
echo "FAIL resolveFirstStep security eval: $errors check(s) failed."
|
|
208
|
+
exit 1
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_session_resume_roundtrip.sh — resumable-sessions (issue #153) round-trip eval
|
|
3
|
+
#
|
|
4
|
+
# Seeds a temporary repo fixture with an active session, runs the workflow-steering
|
|
5
|
+
# hook with a SessionStart event, and asserts:
|
|
6
|
+
# AC1: RESUME block is present with status/phase/next_action/plan/handoff/trust fields
|
|
7
|
+
# AC2: Liveness warning present when a fresh other-actor event is seeded
|
|
8
|
+
# AC3: state.json / handoff.json / trust.bundle checksums are unchanged (non-destructive)
|
|
9
|
+
#
|
|
10
|
+
# Negative cases:
|
|
11
|
+
# - UserPromptSubmit → no RESUME block
|
|
12
|
+
# - Empty liveness stream → no LIVENESS WARNING
|
|
13
|
+
set -uo pipefail
|
|
14
|
+
|
|
15
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
16
|
+
|
|
17
|
+
TMPDIR_EVAL="$(mktemp -d)"
|
|
18
|
+
errors=0
|
|
19
|
+
|
|
20
|
+
cleanup() {
|
|
21
|
+
rm -rf "$TMPDIR_EVAL"
|
|
22
|
+
}
|
|
23
|
+
trap cleanup EXIT
|
|
24
|
+
|
|
25
|
+
_pass() { echo " ✓ $1"; }
|
|
26
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
27
|
+
|
|
28
|
+
# ─── Portable sha256 helper ────────────────────────────────────────────────────
|
|
29
|
+
sha256_file() {
|
|
30
|
+
if command -v sha256sum >/dev/null 2>&1; then
|
|
31
|
+
sha256sum "$1" | awk '{print $1}'
|
|
32
|
+
else
|
|
33
|
+
shasum -a 256 "$1" | awk '{print $1}'
|
|
34
|
+
fi
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# ─── Seed fixture ─────────────────────────────────────────────────────────────
|
|
38
|
+
REPO="$TMPDIR_EVAL/repo"
|
|
39
|
+
SLUG="test-slug-153"
|
|
40
|
+
TASK_DIR="$REPO/.flow-agents/$SLUG"
|
|
41
|
+
mkdir -p "$TASK_DIR"
|
|
42
|
+
mkdir -p "$REPO/.flow-agents/liveness"
|
|
43
|
+
mkdir -p "$REPO/docs"
|
|
44
|
+
|
|
45
|
+
printf '# Test Repo\n' > "$REPO/AGENTS.md"
|
|
46
|
+
printf '# Context Map\n' > "$REPO/docs/context-map.md"
|
|
47
|
+
|
|
48
|
+
# state.json — active session in_progress/execution
|
|
49
|
+
cat > "$TASK_DIR/state.json" << 'JSON'
|
|
50
|
+
{
|
|
51
|
+
"schema_version": "1.0",
|
|
52
|
+
"task_slug": "test-slug-153",
|
|
53
|
+
"status": "in_progress",
|
|
54
|
+
"phase": "execution",
|
|
55
|
+
"updated_at": "2026-06-25T00:00:00Z",
|
|
56
|
+
"next_action": {
|
|
57
|
+
"status": "active",
|
|
58
|
+
"summary": "Continue implementing the RESUME block in workflow-steering.js",
|
|
59
|
+
"target_phase": "verification"
|
|
60
|
+
},
|
|
61
|
+
"artifact_paths": ["test-slug-153--plan-work.md"]
|
|
62
|
+
}
|
|
63
|
+
JSON
|
|
64
|
+
|
|
65
|
+
# handoff.json
|
|
66
|
+
cat > "$TASK_DIR/handoff.json" << 'JSON'
|
|
67
|
+
{
|
|
68
|
+
"schema_version": "1.0",
|
|
69
|
+
"task_slug": "test-slug-153",
|
|
70
|
+
"next_steps": ["Run eval and check RESUME output"],
|
|
71
|
+
"blockers": []
|
|
72
|
+
}
|
|
73
|
+
JSON
|
|
74
|
+
|
|
75
|
+
# stub plan file
|
|
76
|
+
printf '# Plan: test-slug-153\n' > "$TASK_DIR/test-slug-153--plan-work.md"
|
|
77
|
+
|
|
78
|
+
# trust.bundle — one verified, one disputed
|
|
79
|
+
cat > "$TASK_DIR/trust.bundle" << 'JSON'
|
|
80
|
+
{
|
|
81
|
+
"schema_version": "1.0",
|
|
82
|
+
"task_slug": "test-slug-153",
|
|
83
|
+
"claims": [
|
|
84
|
+
{
|
|
85
|
+
"id": "verified-claim-001",
|
|
86
|
+
"status": "verified",
|
|
87
|
+
"claimType": "implementation",
|
|
88
|
+
"value": "feature implemented"
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"id": "disputed-claim-id",
|
|
92
|
+
"status": "disputed",
|
|
93
|
+
"claimType": "test-coverage",
|
|
94
|
+
"value": "tests pass"
|
|
95
|
+
}
|
|
96
|
+
]
|
|
97
|
+
}
|
|
98
|
+
JSON
|
|
99
|
+
|
|
100
|
+
# install.json — initial version
|
|
101
|
+
cat > "$REPO/.flow-agents/install.json" << 'JSON'
|
|
102
|
+
{
|
|
103
|
+
"version": "v0.0.1",
|
|
104
|
+
"installedAt": "2026-06-25T00:00:00Z",
|
|
105
|
+
"runtime": "claude-code"
|
|
106
|
+
}
|
|
107
|
+
JSON
|
|
108
|
+
|
|
109
|
+
# Liveness stream: fresh other-agent event (5 min ago, within 1800 s TTL)
|
|
110
|
+
# and a self (local) event — self should NOT trigger a warning
|
|
111
|
+
FIVE_MIN_AGO="$(node -e "process.stdout.write(new Date(Date.now()-300000).toISOString().replace(/\\.\\d{3}Z$/,'Z'))")"
|
|
112
|
+
printf '{"type":"claim","subjectId":"test-slug-153","actor":"other-agent","at":"%s","ttlSeconds":1800}\n' "$FIVE_MIN_AGO" > "$REPO/.flow-agents/liveness/events.jsonl"
|
|
113
|
+
printf '{"type":"heartbeat","subjectId":"test-slug-153","actor":"local","at":"%s"}\n' "$FIVE_MIN_AGO" >> "$REPO/.flow-agents/liveness/events.jsonl"
|
|
114
|
+
|
|
115
|
+
# ─── Snapshot checksums before hook run ───────────────────────────────────────
|
|
116
|
+
CKSUM_STATE_BEFORE="$(sha256_file "$TASK_DIR/state.json")"
|
|
117
|
+
CKSUM_HANDOFF_BEFORE="$(sha256_file "$TASK_DIR/handoff.json")"
|
|
118
|
+
CKSUM_TRUST_BEFORE="$(sha256_file "$TASK_DIR/trust.bundle")"
|
|
119
|
+
|
|
120
|
+
# ─── Hot-upgrade simulation: bump install.json version ───────────────────────
|
|
121
|
+
node -e "
|
|
122
|
+
const fs = require('fs');
|
|
123
|
+
const f = '$REPO/.flow-agents/install.json';
|
|
124
|
+
const obj = JSON.parse(fs.readFileSync(f,'utf8'));
|
|
125
|
+
obj.version = 'v0.0.2';
|
|
126
|
+
fs.writeFileSync(f, JSON.stringify(obj, null, 2) + '\n');
|
|
127
|
+
"
|
|
128
|
+
|
|
129
|
+
# ─── Run hook with SessionStart ───────────────────────────────────────────────
|
|
130
|
+
if echo "{\"hook_event_name\":\"SessionStart\",\"cwd\":\"$REPO\"}" | \
|
|
131
|
+
FLOW_AGENTS_ACTOR="local" node "$ROOT/scripts/hooks/workflow-steering.js" > "$TMPDIR_EVAL/resume.out" 2>&1; then
|
|
132
|
+
_pass "hook exits 0 for SessionStart"
|
|
133
|
+
else
|
|
134
|
+
_fail "hook should exit 0 for SessionStart (exit $?)"
|
|
135
|
+
fi
|
|
136
|
+
|
|
137
|
+
# ─── AC1: RESUME block presence and fields ────────────────────────────────────
|
|
138
|
+
if grep -q "RESUME:" "$TMPDIR_EVAL/resume.out"; then
|
|
139
|
+
_pass "RESUME block present in SessionStart output"
|
|
140
|
+
else
|
|
141
|
+
_fail "RESUME block missing from SessionStart output: $(cat "$TMPDIR_EVAL/resume.out")"
|
|
142
|
+
fi
|
|
143
|
+
|
|
144
|
+
if grep -q "in_progress" "$TMPDIR_EVAL/resume.out"; then
|
|
145
|
+
_pass "status 'in_progress' echoed in RESUME block"
|
|
146
|
+
else
|
|
147
|
+
_fail "status missing from RESUME block"
|
|
148
|
+
fi
|
|
149
|
+
|
|
150
|
+
if grep -q "execution" "$TMPDIR_EVAL/resume.out"; then
|
|
151
|
+
_pass "phase 'execution' echoed in RESUME block"
|
|
152
|
+
else
|
|
153
|
+
_fail "phase missing from RESUME block"
|
|
154
|
+
fi
|
|
155
|
+
|
|
156
|
+
if grep -q "Continue implementing the RESUME block" "$TMPDIR_EVAL/resume.out"; then
|
|
157
|
+
_pass "full next_action summary present in RESUME block"
|
|
158
|
+
else
|
|
159
|
+
_fail "next_action summary missing from RESUME block: $(grep 'Next action' "$TMPDIR_EVAL/resume.out" || echo 'no Next action line')"
|
|
160
|
+
fi
|
|
161
|
+
|
|
162
|
+
if grep -q "test-slug-153--plan-work.md" "$TMPDIR_EVAL/resume.out"; then
|
|
163
|
+
_pass "plan artifact path present in RESUME block"
|
|
164
|
+
else
|
|
165
|
+
_fail "plan artifact path missing from RESUME block"
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
if grep -q "Run eval and check RESUME output" "$TMPDIR_EVAL/resume.out"; then
|
|
169
|
+
_pass "handoff next_step present in RESUME block"
|
|
170
|
+
else
|
|
171
|
+
_fail "handoff next_step missing from RESUME block"
|
|
172
|
+
fi
|
|
173
|
+
|
|
174
|
+
if grep -q "disputed" "$TMPDIR_EVAL/resume.out"; then
|
|
175
|
+
_pass "trust takeaway mentions disputed status"
|
|
176
|
+
else
|
|
177
|
+
_fail "trust takeaway missing disputed status"
|
|
178
|
+
fi
|
|
179
|
+
|
|
180
|
+
if grep -q "disputed-claim-id" "$TMPDIR_EVAL/resume.out"; then
|
|
181
|
+
_pass "disputed claim id present in RESUME block"
|
|
182
|
+
else
|
|
183
|
+
_fail "disputed claim id missing from RESUME block"
|
|
184
|
+
fi
|
|
185
|
+
|
|
186
|
+
if grep -q "workflow:sidecar -- claim" "$TMPDIR_EVAL/resume.out"; then
|
|
187
|
+
_pass "disputed claim remedy command present in RESUME block"
|
|
188
|
+
else
|
|
189
|
+
_fail "disputed claim remedy command missing from RESUME block"
|
|
190
|
+
fi
|
|
191
|
+
|
|
192
|
+
if grep -q "pull-work" "$TMPDIR_EVAL/resume.out"; then
|
|
193
|
+
_pass "pull-work route hint present in RESUME block"
|
|
194
|
+
else
|
|
195
|
+
_fail "pull-work route hint missing from RESUME block"
|
|
196
|
+
fi
|
|
197
|
+
|
|
198
|
+
# ─── AC2: Liveness warning present ────────────────────────────────────────────
|
|
199
|
+
if grep -q "LIVENESS WARNING" "$TMPDIR_EVAL/resume.out"; then
|
|
200
|
+
_pass "LIVENESS WARNING present in RESUME block"
|
|
201
|
+
else
|
|
202
|
+
_fail "LIVENESS WARNING missing from RESUME block: $(cat "$TMPDIR_EVAL/resume.out")"
|
|
203
|
+
fi
|
|
204
|
+
|
|
205
|
+
if grep -q "other-agent" "$TMPDIR_EVAL/resume.out"; then
|
|
206
|
+
_pass "other-agent actor named in liveness warning"
|
|
207
|
+
else
|
|
208
|
+
_fail "other-agent actor missing from liveness warning"
|
|
209
|
+
fi
|
|
210
|
+
|
|
211
|
+
# Self-actor (local) should NOT appear as a liveness warning
|
|
212
|
+
if ! grep -q "LIVENESS WARNING.*local\|local.*LIVENESS WARNING" "$TMPDIR_EVAL/resume.out"; then
|
|
213
|
+
_pass "self-actor (local) correctly excluded from liveness warning"
|
|
214
|
+
else
|
|
215
|
+
_fail "self-actor should not be warned in liveness advisory"
|
|
216
|
+
fi
|
|
217
|
+
|
|
218
|
+
# ─── AC3: Checksums unchanged (non-destructive) ───────────────────────────────
|
|
219
|
+
CKSUM_STATE_AFTER="$(sha256_file "$TASK_DIR/state.json")"
|
|
220
|
+
CKSUM_HANDOFF_AFTER="$(sha256_file "$TASK_DIR/handoff.json")"
|
|
221
|
+
CKSUM_TRUST_AFTER="$(sha256_file "$TASK_DIR/trust.bundle")"
|
|
222
|
+
|
|
223
|
+
if [[ "$CKSUM_STATE_BEFORE" == "$CKSUM_STATE_AFTER" ]]; then
|
|
224
|
+
_pass "state.json checksum unchanged (non-destructive)"
|
|
225
|
+
else
|
|
226
|
+
_fail "state.json was modified by the hook (checksums differ)"
|
|
227
|
+
fi
|
|
228
|
+
|
|
229
|
+
if [[ "$CKSUM_HANDOFF_BEFORE" == "$CKSUM_HANDOFF_AFTER" ]]; then
|
|
230
|
+
_pass "handoff.json checksum unchanged (non-destructive)"
|
|
231
|
+
else
|
|
232
|
+
_fail "handoff.json was modified by the hook (checksums differ)"
|
|
233
|
+
fi
|
|
234
|
+
|
|
235
|
+
if [[ "$CKSUM_TRUST_BEFORE" == "$CKSUM_TRUST_AFTER" ]]; then
|
|
236
|
+
_pass "trust.bundle checksum unchanged (non-destructive)"
|
|
237
|
+
else
|
|
238
|
+
_fail "trust.bundle was modified by the hook (checksums differ)"
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
# ─── Negative: UserPromptSubmit should produce NO RESUME block ────────────────
|
|
242
|
+
echo "{\"hook_event_name\":\"UserPromptSubmit\",\"cwd\":\"$REPO\",\"prompt\":\"continue\"}" | \
|
|
243
|
+
FLOW_AGENTS_ACTOR="local" node "$ROOT/scripts/hooks/workflow-steering.js" > "$TMPDIR_EVAL/prompt.out" 2>&1
|
|
244
|
+
|
|
245
|
+
if ! grep -q "RESUME:" "$TMPDIR_EVAL/prompt.out"; then
|
|
246
|
+
_pass "RESUME block absent for UserPromptSubmit (negative case)"
|
|
247
|
+
else
|
|
248
|
+
_fail "RESUME block must not appear for UserPromptSubmit"
|
|
249
|
+
fi
|
|
250
|
+
|
|
251
|
+
# ─── Negative: Empty liveness stream → no LIVENESS WARNING ───────────────────
|
|
252
|
+
REPO2="$TMPDIR_EVAL/repo2"
|
|
253
|
+
TASK_DIR2="$REPO2/.flow-agents/$SLUG"
|
|
254
|
+
mkdir -p "$TASK_DIR2"
|
|
255
|
+
mkdir -p "$REPO2/docs"
|
|
256
|
+
printf '# Test Repo 2\n' > "$REPO2/AGENTS.md"
|
|
257
|
+
printf '# Context Map\n' > "$REPO2/docs/context-map.md"
|
|
258
|
+
cp "$TASK_DIR/state.json" "$TASK_DIR2/state.json"
|
|
259
|
+
cp "$TASK_DIR/handoff.json" "$TASK_DIR2/handoff.json"
|
|
260
|
+
cp "$TASK_DIR/trust.bundle" "$TASK_DIR2/trust.bundle"
|
|
261
|
+
printf 'test-slug-153--plan-work.md stub\n' > "$TASK_DIR2/test-slug-153--plan-work.md"
|
|
262
|
+
# No liveness directory → empty stream
|
|
263
|
+
|
|
264
|
+
echo "{\"hook_event_name\":\"SessionStart\",\"cwd\":\"$REPO2\"}" | \
|
|
265
|
+
FLOW_AGENTS_ACTOR="local" node "$ROOT/scripts/hooks/workflow-steering.js" > "$TMPDIR_EVAL/nolive.out" 2>&1
|
|
266
|
+
|
|
267
|
+
if grep -q "RESUME:" "$TMPDIR_EVAL/nolive.out"; then
|
|
268
|
+
_pass "RESUME block present when no liveness stream (absence case)"
|
|
269
|
+
else
|
|
270
|
+
_fail "RESUME block should still be present with empty liveness stream"
|
|
271
|
+
fi
|
|
272
|
+
|
|
273
|
+
if ! grep -q "LIVENESS WARNING" "$TMPDIR_EVAL/nolive.out"; then
|
|
274
|
+
_pass "no LIVENESS WARNING when liveness stream is empty (absence case)"
|
|
275
|
+
else
|
|
276
|
+
_fail "LIVENESS WARNING must not appear when no fresh other-actor events exist"
|
|
277
|
+
fi
|
|
278
|
+
|
|
279
|
+
# ─── Summary ──────────────────────────────────────────────────────────────────
|
|
280
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
281
|
+
echo "Session resume roundtrip eval passed."
|
|
282
|
+
exit 0
|
|
283
|
+
fi
|
|
284
|
+
|
|
285
|
+
echo "Session resume roundtrip eval failed: $errors issue(s)."
|
|
286
|
+
exit 1
|