@kontourai/flow-agents 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/CODEOWNERS +29 -0
- package/.github/actions/trust-verify/action.yml +145 -0
- package/.github/workflows/ci.yml +11 -4
- package/.github/workflows/kit-gates-demo.yml +2 -2
- package/.github/workflows/publish-npm.yml +10 -2
- package/.github/workflows/release-please.yml +1 -1
- package/.github/workflows/trust-reconcile.yml +113 -0
- package/AGENTS.md +13 -0
- package/CHANGELOG.md +95 -0
- package/CONTRIBUTING.md +4 -4
- package/README.md +1 -0
- package/agents/tool-planner.json +1 -1
- package/build/src/cli/init.js +242 -20
- package/build/src/cli/validate-workflow-artifacts.js +19 -2
- package/build/src/cli/verify.d.ts +1 -0
- package/build/src/cli/verify.js +90 -0
- package/build/src/cli/workflow-sidecar.d.ts +300 -8
- package/build/src/cli/workflow-sidecar.js +1934 -83
- package/build/src/cli.js +2 -3
- package/build/src/lib/flow-resolver.d.ts +82 -0
- package/build/src/lib/flow-resolver.js +237 -0
- package/build/src/tools/build-universal-bundles.js +34 -22
- package/build/src/tools/generate-context-map.js +3 -16
- package/build/src/tools/validate-source-tree.d.ts +1 -1
- package/build/src/tools/validate-source-tree.js +42 -162
- package/context/contracts/artifact-contract.md +10 -0
- package/context/contracts/delivery-contract.md +1 -0
- package/context/contracts/review-contract.md +1 -0
- package/context/contracts/verification-contract.md +2 -0
- package/context/gate-awareness.md +39 -0
- package/context/scripts/hooks/stop-goal-fit.js +632 -70
- package/docs/adr/0001-flow-agents-consumes-flow.md +1 -1
- package/docs/adr/0002-flow-kits-as-extension-unit.md +1 -1
- package/docs/adr/0004-gates-expect-surface-claims.md +2 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +2 -0
- package/docs/adr/0007-skill-audit.md +1 -1
- package/docs/adr/0009-canonical-hook-core-kit-boundary.md +95 -0
- package/docs/adr/0010-workflow-trust-state-as-hachure-bundle.md +139 -0
- package/docs/adr/0011-mcp-posture.md +100 -0
- package/docs/adr/0012-agent-coordination-as-liveness-claims.md +119 -0
- package/docs/adr/0013-context-lifecycle.md +151 -0
- package/docs/adr/0014-core-vs-domain-kit-boundary.md +143 -0
- package/docs/adr/0015-flow-flow-agents-boundary-reconciliation.md +120 -0
- package/docs/adr/0016-three-hard-boundary-model.md +71 -0
- package/docs/adr/0017-anti-gaming-trust-security-model.md +155 -0
- package/docs/agent-system-guidebook.md +5 -12
- package/docs/context-map.md +4 -10
- package/docs/index.md +3 -2
- package/docs/integrations/framework-adapter.md +19 -6
- package/docs/integrations/index.md +2 -2
- package/docs/north-star.md +4 -4
- package/docs/operating-layers.md +3 -3
- package/docs/plans/adr-0010-phase2-gate-recompute.md +55 -0
- package/docs/repository-structure.md +2 -2
- package/docs/skills-map.md +1 -0
- package/docs/spec/runtime-hook-surface.md +62 -9
- package/docs/standards-register.md +3 -3
- package/docs/survey-utterance-check.md +1 -1
- package/docs/trust-anchor-adoption.md +197 -0
- package/docs/verifiable-trust.md +95 -0
- package/docs/veritas-integration.md +2 -2
- package/docs/workflow-usage-guide.md +69 -0
- package/evals/acceptance/DEMO-false-completion.md +144 -0
- package/evals/acceptance/demo-cast.sh +92 -0
- package/evals/acceptance/demo-false-completion.sh +72 -0
- package/evals/acceptance/demo-real-evidence.sh +104 -0
- package/evals/acceptance/demo.tape +29 -0
- package/evals/acceptance/prove-capture-teeth-declared.sh +335 -0
- package/evals/acceptance/prove-capture-teeth.sh +114 -0
- package/evals/acceptance/prove-teeth.sh +105 -0
- package/evals/ci/antigaming-suite.sh +54 -0
- package/evals/ci/run-baseline.sh +2 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-extension-asset/kit.json +20 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-unknown-extension/kit.json +18 -0
- package/evals/integration/test_builder_step_producers.sh +379 -0
- package/evals/integration/test_bundle_install.sh +35 -71
- package/evals/integration/test_bundle_lifecycle.sh +39 -2
- package/evals/integration/test_captured_fail_reconciliation.sh +820 -0
- package/evals/integration/test_checkpoint_signing.sh +489 -0
- package/evals/integration/test_claim_lookup.sh +352 -0
- package/evals/integration/test_command_log_integrity.sh +275 -0
- package/evals/integration/test_context_map.sh +0 -2
- package/evals/integration/test_dual_emit_flow_step.sh +278 -0
- package/evals/integration/test_enforcer_expects_driven.sh +281 -0
- package/evals/integration/test_evidence_capture_hook.sh +185 -0
- package/evals/integration/test_flow_kit_repository.sh +2 -0
- package/evals/integration/test_flowdef_session_activation.sh +273 -0
- package/evals/integration/test_flowdef_session_history_preservation.sh +250 -0
- package/evals/integration/test_gate_bypass_chain.sh +448 -0
- package/evals/integration/test_gate_lockdown.sh +1137 -0
- package/evals/integration/test_gate_review_inquiry_records.sh +399 -0
- package/evals/integration/test_goal_fit_escape_hatch.sh +73 -0
- package/evals/integration/test_goal_fit_hook.sh +69 -4
- package/evals/integration/test_goal_fit_rederive.sh +263 -0
- package/evals/integration/test_install_merge.sh +1176 -0
- package/evals/integration/test_mint_attestation.sh +373 -0
- package/evals/integration/test_phase_map_and_gate_claim.sh +365 -0
- package/evals/integration/test_publish_delivery.sh +269 -0
- package/evals/integration/test_reconcile_soundness.sh +528 -0
- package/evals/integration/test_resolvefirststep_security.sh +208 -0
- package/evals/integration/test_session_resume_roundtrip.sh +286 -0
- package/evals/integration/test_trust_checkpoint.sh +325 -0
- package/evals/integration/test_trust_reconcile.sh +293 -0
- package/evals/integration/test_verify_cli.sh +208 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +549 -34
- package/evals/lib/node.sh +0 -6
- package/evals/run.sh +45 -0
- package/evals/static/test_workflow_skills.sh +6 -13
- package/install.sh +0 -7
- package/integrations/strands-ts/README.md +25 -15
- package/integrations/veritas/flow-agents.adapter.json +1 -2
- package/kits/builder/flows/build.flow.json +59 -12
- package/kits/builder/kit.json +85 -15
- package/kits/builder/skills/continue-work/SKILL.md +116 -0
- package/kits/builder/skills/deliver/SKILL.md +36 -6
- package/kits/builder/skills/design-probe/SKILL.md +28 -0
- package/kits/builder/skills/execute-plan/SKILL.md +9 -1
- package/kits/builder/skills/gate-review/SKILL.md +234 -0
- package/kits/builder/skills/learning-review/SKILL.md +30 -0
- package/kits/builder/skills/pickup-probe/SKILL.md +29 -0
- package/kits/builder/skills/plan-work/SKILL.md +13 -1
- package/kits/builder/skills/pull-work/SKILL.md +19 -0
- package/kits/knowledge/adapters/default-store/index.js +38 -0
- package/kits/knowledge/adapters/flow-runner/index.js +1620 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +36 -6
- package/kits/knowledge/docs/store-contract.md +314 -0
- package/kits/knowledge/evals/audit-freshness/suite.test.js +368 -0
- package/kits/knowledge/evals/canonicalize-category/suite.test.js +383 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +111 -0
- package/kits/knowledge/evals/detect-contradictions/suite.test.js +324 -0
- package/kits/knowledge/evals/entities/suite.test.js +40 -0
- package/kits/knowledge/evals/glossary-sync/suite.test.js +416 -0
- package/kits/knowledge/evals/hygiene-review/suite.test.js +396 -0
- package/kits/knowledge/evals/retirement/suite.test.js +145 -0
- package/kits/knowledge/flows/audit-freshness.flow.json +44 -0
- package/kits/knowledge/flows/canonicalize-category.flow.json +44 -0
- package/kits/knowledge/flows/detect-contradictions.flow.json +44 -0
- package/kits/knowledge/flows/glossary-sync.flow.json +61 -0
- package/kits/knowledge/flows/hygiene-review.flow.json +43 -0
- package/kits/knowledge/kit.json +51 -1
- package/package.json +4 -4
- package/packaging/conformance/README.md +10 -2
- package/packaging/conformance/fixtures/evidence-capture--allow-records-command.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-bundle-disputed-claim.json +29 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-capture-contradicts-claimed-pass.json +30 -0
- package/packaging/conformance/fixtures/stop-goal-fit--block-mode.json +23 -0
- package/packaging/conformance/fixtures/stop-goal-fit--off-mode.json +24 -0
- package/packaging/conformance/fixtures/stop-goal-fit--warn-active-delivery.json +5 -2
- package/packaging/conformance/fixtures/stop-goal-fit--warn-no-bundle.json +23 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-active-prompt.json +30 -0
- package/packaging/conformance/fixtures/workflow-steering--reground-session-start.json +30 -0
- package/packaging/conformance/run-conformance.js +1 -1
- package/scripts/README.md +2 -1
- package/scripts/build-universal-bundles.js +0 -1
- package/scripts/ci/mint-attestation.js +221 -0
- package/scripts/ci/trust-reconcile.js +545 -0
- package/scripts/hooks/config-protection.js +423 -1
- package/scripts/hooks/evidence-capture.js +348 -0
- package/scripts/hooks/lib/liveness-read.js +113 -0
- package/scripts/hooks/run-hook.js +6 -1
- package/scripts/hooks/stop-goal-fit.js +1471 -79
- package/scripts/hooks/workflow-steering.js +135 -5
- package/scripts/install-codex-home.sh +39 -0
- package/scripts/install-merge.js +330 -0
- package/src/cli/init.ts +218 -20
- package/src/cli/validate-workflow-artifacts.ts +18 -2
- package/src/cli/verify.ts +100 -0
- package/src/cli/workflow-sidecar.ts +2064 -77
- package/src/cli.ts +2 -3
- package/src/lib/flow-resolver.ts +284 -0
- package/src/tools/build-universal-bundles.ts +34 -21
- package/src/tools/generate-context-map.ts +3 -17
- package/src/tools/validate-source-tree.ts +44 -104
- package/build/src/tools/filter-installed-packs.d.ts +0 -2
- package/build/src/tools/filter-installed-packs.js +0 -135
- package/packaging/packs.json +0 -49
- package/scripts/filter-installed-packs.js +0 -2
- package/src/tools/filter-installed-packs.ts +0 -132
package/evals/lib/node.sh
CHANGED
|
@@ -21,12 +21,6 @@ flow_agents_node() {
|
|
|
21
21
|
node "$FLOW_AGENTS_EVAL_ROOT/build/src/cli.js" context-map "$@"
|
|
22
22
|
return
|
|
23
23
|
;;
|
|
24
|
-
*/scripts/filter-installed-packs.js|scripts/filter-installed-packs.js)
|
|
25
|
-
shift
|
|
26
|
-
flow_agents_build_ts || return
|
|
27
|
-
node "$FLOW_AGENTS_EVAL_ROOT/build/src/cli.js" filter-installed-packs "$@"
|
|
28
|
-
return
|
|
29
|
-
;;
|
|
30
24
|
workflow-sidecar)
|
|
31
25
|
shift
|
|
32
26
|
flow_agents_build_ts || return
|
package/evals/run.sh
CHANGED
|
@@ -165,6 +165,12 @@ run_integration() {
|
|
|
165
165
|
echo ""
|
|
166
166
|
bash "$EVAL_DIR/integration/test_goal_fit_hook.sh" || result=1
|
|
167
167
|
echo ""
|
|
168
|
+
bash "$EVAL_DIR/integration/test_goal_fit_escape_hatch.sh" || result=1
|
|
169
|
+
echo ""
|
|
170
|
+
bash "$EVAL_DIR/integration/test_goal_fit_rederive.sh" || result=1
|
|
171
|
+
echo ""
|
|
172
|
+
bash "$EVAL_DIR/integration/test_evidence_capture_hook.sh" || result=1
|
|
173
|
+
echo ""
|
|
168
174
|
bash "$EVAL_DIR/integration/test_hook_category_behaviors.sh" || result=1
|
|
169
175
|
echo ""
|
|
170
176
|
bash "$EVAL_DIR/integration/test_workflow_artifacts.sh" || result=1
|
|
@@ -181,6 +187,8 @@ run_integration() {
|
|
|
181
187
|
echo ""
|
|
182
188
|
bash "$EVAL_DIR/integration/test_workflow_steering_hook.sh" || result=1
|
|
183
189
|
echo ""
|
|
190
|
+
bash "$EVAL_DIR/integration/test_session_resume_roundtrip.sh" || result=1
|
|
191
|
+
echo ""
|
|
184
192
|
bash "$EVAL_DIR/integration/test_hook_influence_cases.sh" || result=1
|
|
185
193
|
echo ""
|
|
186
194
|
bash "$EVAL_DIR/integration/test_flow_agents_statusline.sh" || result=1
|
|
@@ -198,6 +206,43 @@ run_integration() {
|
|
|
198
206
|
bash "$EVAL_DIR/integration/test_bundle_lifecycle.sh" || result=1
|
|
199
207
|
echo ""
|
|
200
208
|
bash "$EVAL_DIR/integration/test_kit_conformance_levels.sh" || result=1
|
|
209
|
+
echo ""
|
|
210
|
+
bash "$EVAL_DIR/integration/test_dual_emit_flow_step.sh" || result=1
|
|
211
|
+
echo ""
|
|
212
|
+
bash "$EVAL_DIR/integration/test_enforcer_expects_driven.sh" || result=1
|
|
213
|
+
echo ""
|
|
214
|
+
bash "$EVAL_DIR/integration/test_phase_map_and_gate_claim.sh" || result=1
|
|
215
|
+
echo ""
|
|
216
|
+
bash "$EVAL_DIR/integration/test_builder_step_producers.sh" || result=1
|
|
217
|
+
echo ""
|
|
218
|
+
bash "$EVAL_DIR/integration/test_flowdef_session_history_preservation.sh" || result=1
|
|
219
|
+
echo ""
|
|
220
|
+
bash "$EVAL_DIR/integration/test_flowdef_session_activation.sh" || result=1
|
|
221
|
+
echo ""
|
|
222
|
+
bash "$EVAL_DIR/integration/test_trust_checkpoint.sh" || result=1
|
|
223
|
+
echo ""
|
|
224
|
+
bash "$EVAL_DIR/integration/test_checkpoint_signing.sh" || result=1
|
|
225
|
+
echo ""
|
|
226
|
+
bash "$EVAL_DIR/integration/test_gate_bypass_chain.sh" || result=1
|
|
227
|
+
echo ""
|
|
228
|
+
bash "$EVAL_DIR/integration/test_command_log_integrity.sh" || result=1
|
|
229
|
+
echo ""
|
|
230
|
+
bash "$EVAL_DIR/integration/test_gate_lockdown.sh" || result=1
|
|
231
|
+
echo ""
|
|
232
|
+
bash "$EVAL_DIR/integration/test_resolvefirststep_security.sh" || result=1
|
|
233
|
+
bash "$EVAL_DIR/integration/test_captured_fail_reconciliation.sh" || result=1
|
|
234
|
+
echo ""
|
|
235
|
+
bash "$EVAL_DIR/integration/test_trust_reconcile.sh" || result=1
|
|
236
|
+
echo ""
|
|
237
|
+
bash "$EVAL_DIR/integration/test_reconcile_soundness.sh" || result=1
|
|
238
|
+
echo ""
|
|
239
|
+
bash "$EVAL_DIR/integration/test_publish_delivery.sh" || result=1
|
|
240
|
+
echo ""
|
|
241
|
+
bash "$EVAL_DIR/integration/test_mint_attestation.sh" || result=1
|
|
242
|
+
echo ""
|
|
243
|
+
bash "$EVAL_DIR/integration/test_verify_cli.sh" || result=1
|
|
244
|
+
echo ""
|
|
245
|
+
bash "$EVAL_DIR/acceptance/prove-capture-teeth-declared.sh" || result=1
|
|
201
246
|
return $result
|
|
202
247
|
}
|
|
203
248
|
|
|
@@ -65,6 +65,7 @@ PLAN_WORK="$ROOT/kits/builder/skills/plan-work/SKILL.md"
|
|
|
65
65
|
EXECUTE_PLAN="$ROOT/kits/builder/skills/execute-plan/SKILL.md"
|
|
66
66
|
REVIEW_WORK="$ROOT/kits/builder/skills/review-work/SKILL.md"
|
|
67
67
|
VERIFY_WORK="$ROOT/kits/builder/skills/verify-work/SKILL.md"
|
|
68
|
+
GATE_REVIEW="$ROOT/kits/builder/skills/gate-review/SKILL.md"
|
|
68
69
|
MAP="$ROOT/docs/skills-map.md"
|
|
69
70
|
ROOT_CONTEXT="$ROOT/CONTEXT.md"
|
|
70
71
|
CONTEXT_MAP="$ROOT/docs/context-map.md"
|
|
@@ -103,7 +104,6 @@ EFFECTIVE_BACKLOG_SETTINGS="$ROOT/src/cli/effective-backlog-settings.ts"
|
|
|
103
104
|
PULL_WORK_PROVIDER="$ROOT/src/cli/pull-work-provider.ts"
|
|
104
105
|
PULL_WORK_PROVIDER_INTEGRATION="$ROOT/evals/integration/test_pull_work_provider.sh"
|
|
105
106
|
PACKAGE_MANIFEST="$ROOT/packaging/manifest.json"
|
|
106
|
-
PACKS_MANIFEST="$ROOT/packaging/packs.json"
|
|
107
107
|
TOOL_PLANNER="$ROOT/agents/tool-planner.json"
|
|
108
108
|
TOOL_WORKER="$ROOT/agents/tool-worker.json"
|
|
109
109
|
TOOL_CODE_REVIEWER="$ROOT/agents/tool-code-reviewer.json"
|
|
@@ -114,7 +114,6 @@ DEV_PROMPTFOO="$ROOT/evals/cases/dev/promptfooconfig.yaml"
|
|
|
114
114
|
GOAL_FIT_HOOK="$ROOT/scripts/hooks/stop-goal-fit.js"
|
|
115
115
|
WORKFLOW_STEERING_HOOK="$ROOT/scripts/hooks/workflow-steering.js"
|
|
116
116
|
CONTEXT_MAP_GENERATOR="$ROOT/src/tools/generate-context-map.ts"
|
|
117
|
-
PACK_FILTER="$ROOT/src/tools/filter-installed-packs.ts"
|
|
118
117
|
PROMOTE_DOC="$ROOT/src/cli/promote-workflow-artifact.ts"
|
|
119
118
|
ARTIFACT_VALIDATOR="$ROOT/src/cli/validate-workflow-artifacts.ts"
|
|
120
119
|
SIDECAR_WRITER="$ROOT/src/cli/workflow-sidecar.ts"
|
|
@@ -167,6 +166,10 @@ require_file "$PLAN_WORK" "plan-work skill"
|
|
|
167
166
|
require_file "$EXECUTE_PLAN" "execute-plan skill"
|
|
168
167
|
require_file "$REVIEW_WORK" "review-work skill"
|
|
169
168
|
require_file "$VERIFY_WORK" "verify-work skill"
|
|
169
|
+
require_file "$GATE_REVIEW" "gate-review skill"
|
|
170
|
+
require_text "$GATE_REVIEW" 'advisory' "gate-review skill marks proposals as advisory"
|
|
171
|
+
reject_text "$GATE_REVIEW" 'auto_applied\|auto-apply' "gate-review skill does not auto-apply fixes"
|
|
172
|
+
require_text "$GATE_REVIEW" 'trust\.bundle' "gate-review skill references trust.bundle input"
|
|
170
173
|
require_file "$MAP" "skills map"
|
|
171
174
|
require_file "$ROOT_CONTEXT" "Flow Agents context glossary"
|
|
172
175
|
require_file "$CONTEXT_MAP" "context map"
|
|
@@ -190,7 +193,6 @@ require_file "$VERIFICATION_CONTRACT" "verification contract"
|
|
|
190
193
|
require_file "$REVIEW_CONTRACT" "review contract"
|
|
191
194
|
require_file "$DELIVERY_CONTRACT" "delivery contract"
|
|
192
195
|
require_file "$PACKAGE_MANIFEST" "packaging manifest"
|
|
193
|
-
require_file "$PACKS_MANIFEST" "pack manifest"
|
|
194
196
|
require_file "$TOOL_PLANNER" "tool-planner agent"
|
|
195
197
|
require_file "$TOOL_WORKER" "tool-worker agent"
|
|
196
198
|
require_file "$TOOL_CODE_REVIEWER" "tool-code-reviewer agent"
|
|
@@ -201,7 +203,6 @@ require_file "$DEV_PROMPTFOO" "dev behavioral eval config"
|
|
|
201
203
|
require_file "$GOAL_FIT_HOOK" "goal-fit stop hook"
|
|
202
204
|
require_file "$WORKFLOW_STEERING_HOOK" "workflow steering hook"
|
|
203
205
|
require_file "$CONTEXT_MAP_GENERATOR" "context map generator"
|
|
204
|
-
require_file "$PACK_FILTER" "pack filter helper"
|
|
205
206
|
require_file "$EFFECTIVE_BACKLOG_SETTINGS" "effective backlog settings helper"
|
|
206
207
|
require_file "$PULL_WORK_PROVIDER" "pull-work provider normalizer"
|
|
207
208
|
require_file "$PULL_WORK_PROVIDER_INTEGRATION" "pull-work provider integration test"
|
|
@@ -466,8 +467,6 @@ require_text "$WORKFLOW_STEERING_HOOK" 'docs/context-map.md' "workflow steering
|
|
|
466
467
|
require_text "$WORKFLOW_STEERING_HOOK" 'next_action' "workflow steering hook uses next action"
|
|
467
468
|
require_text "$WORKFLOW_STEERING_HOOK" 'stateNeedsAmbientSteering' "workflow steering hook supports ambient state guidance"
|
|
468
469
|
require_text "$CONTEXT_MAP_GENERATOR" 'check' "context map generator supports drift check"
|
|
469
|
-
require_text "$PACK_FILTER" 'selected_packs' "pack filter records selected packs"
|
|
470
|
-
require_text "$PACK_FILTER" 'known.*keep' "pack filter prunes only known Flow Agents entries"
|
|
471
470
|
require_text "$PROMOTE_DOC" 'docs/delivery' "promotion helper writes long-lived delivery docs"
|
|
472
471
|
require_text "$PROMOTE_DOC" 'archived_artifact' "promotion helper links archived artifact"
|
|
473
472
|
require_text "$SIDECAR_WRITER" 'init-plan' "sidecar writer initializes planning sidecars"
|
|
@@ -1098,13 +1097,7 @@ require_text "$MAP" 'commit/branch/PR/CI links' "map captures PR and CI links be
|
|
|
1098
1097
|
require_text "$CONTEXT_MAP" 'Repository Shape' "context map includes repo shape"
|
|
1099
1098
|
require_text "$CONTEXT_MAP" 'Core Commands' "context map includes commands"
|
|
1100
1099
|
require_text "$CONTEXT_MAP" 'Workflow Sidecars' "context map includes sidecars"
|
|
1101
|
-
require_text "$
|
|
1102
|
-
require_text "$PACKS_MANIFEST" '"name": "core"' "pack manifest defines core pack"
|
|
1103
|
-
require_text "$PACKS_MANIFEST" '"default": true' "pack manifest defines default pack"
|
|
1104
|
-
require_text "$PACKS_MANIFEST" '"name": "development"' "pack manifest defines development pack"
|
|
1105
|
-
require_text "$PACKS_MANIFEST" '"eval-rebuild"' "pack manifest includes eval-rebuild"
|
|
1106
|
-
require_text "$ROOT/scripts/build-universal-bundles.js" 'FLOW_AGENTS_PACKS' "bundle installer supports pack filtering"
|
|
1107
|
-
require_text "$ROOT/evals/integration/test_bundle_install.sh" 'core-pack install keeps core agents' "bundle install test covers pack filtering"
|
|
1100
|
+
require_text "$ROOT/evals/integration/test_bundle_install.sh" 'full install ships the complete agent base' "bundle install test covers full standalone base"
|
|
1108
1101
|
require_text "$CONTEXT_MAP" 'Context Loading Rules' "context map includes loading rules"
|
|
1109
1102
|
require_text "$PAGES_INDEX" 'context-map.html' "docs index links context map"
|
|
1110
1103
|
require_text "$PAGES_INDEX" 'veritas-integration.html' "docs index links Veritas boundary"
|
package/install.sh
CHANGED
|
@@ -51,13 +51,6 @@ SRC="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
51
51
|
|
|
52
52
|
mkdir -p "$DEST"
|
|
53
53
|
rsync -a "$SRC"/ "$DEST"/
|
|
54
|
-
if [[ -n "${FLOW_AGENTS_PACKS:-}" ]]; then
|
|
55
|
-
FILTER_SCRIPT="$DEST/scripts/filter-installed-packs.mjs"
|
|
56
|
-
if [[ ! -f "$FILTER_SCRIPT" ]]; then
|
|
57
|
-
FILTER_SCRIPT="$DEST/scripts/filter-installed-packs.js"
|
|
58
|
-
fi
|
|
59
|
-
node "$FILTER_SCRIPT" "$DEST" --packs "$FLOW_AGENTS_PACKS"
|
|
60
|
-
fi
|
|
61
54
|
if [[ ${#CONSOLE_CONFIG_ARGS[@]} -gt 0 || -n "${FLOW_AGENTS_TELEMETRY_SINK:-}" || -n "${FLOW_AGENTS_TELEMETRY_SINKS:-}" || -n "${FLOW_AGENTS_CONSOLE_URL:-}" || -n "${CONSOLE_TELEMETRY_URL:-}" || -n "${CONSOLE_URL:-}" || -n "${FLOW_AGENTS_CONSOLE_TOKEN_FILE:-}" || -n "${CONSOLE_TELEMETRY_TOKEN_FILE:-}" ]]; then
|
|
62
55
|
bash "$DEST/scripts/telemetry/install-console-config.sh" "$DEST/scripts/telemetry/telemetry.conf" "${CONSOLE_CONFIG_ARGS[@]}"
|
|
63
56
|
fi
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Native-import TypeScript adapter for AWS Strands Agents.**
|
|
4
4
|
|
|
5
|
-
This is the first native-import consumer of the Flow Agents policy engine contract. It wires Flow Agents telemetry
|
|
5
|
+
This is the first native-import consumer of the Flow Agents policy engine contract. It wires Flow Agents telemetry and native config-protection directly into Strands Agents TypeScript SDK hook callbacks — with no subprocess overhead for the critical hot path (config-protection on `BeforeToolCallEvent`). Workflow steering, quality-gate, and stop-goal-fit checks are exercised by the conformance shim only, not by the production `FlowAgentsHooks` callbacks.
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
@@ -15,13 +15,24 @@ This is the first native-import consumer of the Flow Agents policy engine contra
|
|
|
15
15
|
| Hot path latency | ~0 ms (direct function call) | ~50–100 ms per call (process spawn) |
|
|
16
16
|
| Strands SDK optional? | Yes — duck-typed, SDK not required to build/test | Yes |
|
|
17
17
|
| Config-protection | Native `run()` call | Subprocess, with Python fallback |
|
|
18
|
-
| Other policies (steering, quality-gate, stop-goal-fit) |
|
|
19
|
-
| Conformance
|
|
18
|
+
| Other policies (steering, quality-gate, stop-goal-fit) | Subprocess checks in the conformance shim | Via subprocess |
|
|
19
|
+
| Conformance target | L2-targeted policy coverage via conformance shim | L0 (+ config-protection) |
|
|
20
20
|
|
|
21
21
|
The key innovation: `config-protection.js` exports `module.exports = { run }`. This adapter calls that function directly from the Node.js process, bypassing the subprocess round-trip for every `BeforeToolCallEvent` write call.
|
|
22
22
|
|
|
23
23
|
---
|
|
24
24
|
|
|
25
|
+
## Capability states
|
|
26
|
+
|
|
27
|
+
| Capability | State | Public behavior |
|
|
28
|
+
| --- | --- | --- |
|
|
29
|
+
| Telemetry callbacks | shipped | `FlowAgentsHooks` emits canonical JSONL events from Strands TS lifecycle callbacks. |
|
|
30
|
+
| Config-protection hot path | shipped | `BeforeToolCallEvent` write-like tools call the native `config-protection.js` `run()` export and can block via `event.cancel`. |
|
|
31
|
+
| Workflow steering L2 behavior | structural-only | The shim can exercise the canonical policy for L2-targeted fixtures; production callbacks emit telemetry only and do not inject per-turn steering. |
|
|
32
|
+
| Quality-gate L2 behavior | structural-only | The shim invokes `quality-gate.js` for conformance checks; production callbacks do not run quality gates after tool calls. |
|
|
33
|
+
| Stop-goal-fit L2 behavior | structural-only | The shim invokes `stop-goal-fit.js` for conformance checks; production callbacks emit stop telemetry only. |
|
|
34
|
+
| Analytics channel, Console/HTTP sink, subagent events, permission requests, token usage | unavailable | These gaps are not wired in this adapter. |
|
|
35
|
+
|
|
25
36
|
## Quickstart
|
|
26
37
|
|
|
27
38
|
```typescript
|
|
@@ -136,19 +147,18 @@ If blocked, `event.cancel` is set to the block reason. Strands cancels the tool
|
|
|
136
147
|
|
|
137
148
|
## Conformance
|
|
138
149
|
|
|
139
|
-
Tested against the Flow Agents conformance kit (`packaging/conformance/`)
|
|
150
|
+
Tested against the Flow Agents conformance kit (`packaging/conformance/`) through `bin/conformance-shim.mjs`:
|
|
140
151
|
|
|
141
152
|
```yaml
|
|
142
|
-
|
|
153
|
+
conformance_target: L2 via conformance shim
|
|
143
154
|
engine_contract_version: "1.0"
|
|
144
155
|
runner_version: "run-conformance.js"
|
|
145
|
-
test_date: 2026-06-11
|
|
146
|
-
verdict: PASS
|
|
147
|
-
fixture_count: 12
|
|
148
|
-
fixtures_passed: 12
|
|
149
|
-
gaps: []
|
|
150
156
|
```
|
|
151
157
|
|
|
158
|
+
This is a conformance-shim target, not a production callback capability. The shipped native adapter behavior is telemetry callbacks plus native config-protection blocking; the shim supplies workflow steering, quality-gate, and stop-goal-fit subprocess coverage so the canonical L2 fixtures can be exercised without claiming those callbacks are production Strands TS behavior. Treat the runner output as the current status for that target.
|
|
159
|
+
|
|
160
|
+
Current status: the L2 target is not passing. The runner currently reports 18/20 fixtures passing with highest achieved level L0; `stop-goal-fit--warn-active-delivery.json` and `workflow-steering--reground-session-start.json` remain failing.
|
|
161
|
+
|
|
152
162
|
Run the conformance test from the repo root:
|
|
153
163
|
|
|
154
164
|
```bash
|
|
@@ -176,7 +186,7 @@ node --test integrations/strands-ts/dist/test/test-telemetry.js \
|
|
|
176
186
|
|
|
177
187
|
1. **No per-turn workflow steering injection**: Strands' `BeforeInvocationEvent` does not expose a mutable system prompt. Unlike the harness adapters which inject workflow state at each `UserPromptSubmit`, this adapter emits the telemetry event only. Productization requires upstream SDK support or a custom model wrapper.
|
|
178
188
|
|
|
179
|
-
2. **Quality-gate and stop-goal-fit via subprocess in conformance shim only**: The production `FlowAgentsHooks` callbacks don't wire `quality-gate.js` or `stop-goal-fit.js` (they have no clear Strands analogue for direct callback injection). The `bin/conformance-shim.mjs` shim wires them via subprocess
|
|
189
|
+
2. **Quality-gate and stop-goal-fit via subprocess in conformance shim only**: The production `FlowAgentsHooks` callbacks don't wire `quality-gate.js` or `stop-goal-fit.js` (they have no clear Strands analogue for direct callback injection). The `bin/conformance-shim.mjs` shim wires them via subprocess to expose current target coverage and gaps.
|
|
180
190
|
|
|
181
191
|
3. **session.usage event omitted**: The `AfterInvocationEvent` does not expose token usage in the Strands TS SDK hook payload.
|
|
182
192
|
|
|
@@ -190,10 +200,10 @@ node --test integrations/strands-ts/dist/test/test-telemetry.js \
|
|
|
190
200
|
|
|
191
201
|
---
|
|
192
202
|
|
|
193
|
-
## Conformance
|
|
203
|
+
## Conformance status
|
|
194
204
|
|
|
195
205
|
```
|
|
196
|
-
|
|
206
|
+
conformance_target: L2 via conformance-shim.mjs
|
|
197
207
|
host: AWS Strands Agents TypeScript SDK
|
|
198
208
|
event_coverage:
|
|
199
209
|
agentSpawn: emitSessionStart() — full fidelity
|
|
@@ -220,5 +230,5 @@ canonical event types (`session.start`, `turn.user`, `tool.invoke`,
|
|
|
220
230
|
`tool.result`, `session.end`) on 2026-06-11. The TypeScript SDK currently
|
|
221
231
|
ships only a Bedrock model provider, so this adapter's live-agent run requires
|
|
222
232
|
AWS credentials; its correctness is covered by the real-engine tests and the
|
|
223
|
-
|
|
224
|
-
TS SDK is a candidate follow-up if keyless live runs are wanted here too.
|
|
233
|
+
conformance-shim validation path above. An Ollama `Model` implementation for
|
|
234
|
+
the TS SDK is a candidate follow-up if keyless live runs are wanted here too.
|
|
@@ -1,6 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"id": "builder.build",
|
|
3
3
|
"version": "1.0",
|
|
4
|
+
"phase_map": {
|
|
5
|
+
"pickup": "pull-work",
|
|
6
|
+
"planning": "plan",
|
|
7
|
+
"execution": "execute",
|
|
8
|
+
"verification": "verify",
|
|
9
|
+
"goal_fit": "merge-ready",
|
|
10
|
+
"evidence": "merge-ready",
|
|
11
|
+
"release": "pr-open",
|
|
12
|
+
"learning": "learn"
|
|
13
|
+
},
|
|
4
14
|
"steps": [
|
|
5
15
|
{ "id": "pull-work", "next": "design-probe" },
|
|
6
16
|
{ "id": "design-probe", "next": "plan" },
|
|
@@ -25,7 +35,10 @@
|
|
|
25
35
|
"bundle_claim": {
|
|
26
36
|
"claimType": "builder.pull-work.selected",
|
|
27
37
|
"subjectType": "work-item",
|
|
28
|
-
"accepted_statuses": [
|
|
38
|
+
"accepted_statuses": [
|
|
39
|
+
"trusted",
|
|
40
|
+
"accepted"
|
|
41
|
+
]
|
|
29
42
|
}
|
|
30
43
|
}
|
|
31
44
|
]
|
|
@@ -41,7 +54,10 @@
|
|
|
41
54
|
"bundle_claim": {
|
|
42
55
|
"claimType": "builder.design-probe.pickup-readiness",
|
|
43
56
|
"subjectType": "work-item",
|
|
44
|
-
"accepted_statuses": [
|
|
57
|
+
"accepted_statuses": [
|
|
58
|
+
"trusted",
|
|
59
|
+
"accepted"
|
|
60
|
+
]
|
|
45
61
|
}
|
|
46
62
|
},
|
|
47
63
|
{
|
|
@@ -52,7 +68,10 @@
|
|
|
52
68
|
"bundle_claim": {
|
|
53
69
|
"claimType": "builder.design-probe.decisions",
|
|
54
70
|
"subjectType": "decision",
|
|
55
|
-
"accepted_statuses": [
|
|
71
|
+
"accepted_statuses": [
|
|
72
|
+
"trusted",
|
|
73
|
+
"accepted"
|
|
74
|
+
]
|
|
56
75
|
}
|
|
57
76
|
}
|
|
58
77
|
]
|
|
@@ -68,7 +87,10 @@
|
|
|
68
87
|
"bundle_claim": {
|
|
69
88
|
"claimType": "builder.plan.implementation",
|
|
70
89
|
"subjectType": "artifact",
|
|
71
|
-
"accepted_statuses": [
|
|
90
|
+
"accepted_statuses": [
|
|
91
|
+
"trusted",
|
|
92
|
+
"accepted"
|
|
93
|
+
]
|
|
72
94
|
}
|
|
73
95
|
}
|
|
74
96
|
]
|
|
@@ -84,7 +106,10 @@
|
|
|
84
106
|
"bundle_claim": {
|
|
85
107
|
"claimType": "builder.execute.scope",
|
|
86
108
|
"subjectType": "change",
|
|
87
|
-
"accepted_statuses": [
|
|
109
|
+
"accepted_statuses": [
|
|
110
|
+
"trusted",
|
|
111
|
+
"accepted"
|
|
112
|
+
]
|
|
88
113
|
}
|
|
89
114
|
}
|
|
90
115
|
]
|
|
@@ -111,7 +136,10 @@
|
|
|
111
136
|
"bundle_claim": {
|
|
112
137
|
"claimType": "builder.verify.tests",
|
|
113
138
|
"subjectType": "flow-step",
|
|
114
|
-
"accepted_statuses": [
|
|
139
|
+
"accepted_statuses": [
|
|
140
|
+
"trusted",
|
|
141
|
+
"accepted"
|
|
142
|
+
]
|
|
115
143
|
}
|
|
116
144
|
},
|
|
117
145
|
{
|
|
@@ -123,7 +151,11 @@
|
|
|
123
151
|
"bundle_claim": {
|
|
124
152
|
"claimType": "builder.verify.policy-compliance",
|
|
125
153
|
"subjectType": "artifact",
|
|
126
|
-
"accepted_statuses": [
|
|
154
|
+
"accepted_statuses": [
|
|
155
|
+
"trusted",
|
|
156
|
+
"accepted",
|
|
157
|
+
"advisory"
|
|
158
|
+
]
|
|
127
159
|
}
|
|
128
160
|
}
|
|
129
161
|
]
|
|
@@ -150,7 +182,10 @@
|
|
|
150
182
|
"bundle_claim": {
|
|
151
183
|
"claimType": "builder.merge-ready.readiness",
|
|
152
184
|
"subjectType": "change",
|
|
153
|
-
"accepted_statuses": [
|
|
185
|
+
"accepted_statuses": [
|
|
186
|
+
"trusted",
|
|
187
|
+
"accepted"
|
|
188
|
+
]
|
|
154
189
|
}
|
|
155
190
|
}
|
|
156
191
|
]
|
|
@@ -166,7 +201,10 @@
|
|
|
166
201
|
"bundle_claim": {
|
|
167
202
|
"claimType": "builder.pr-open.pull-request",
|
|
168
203
|
"subjectType": "pull-request",
|
|
169
|
-
"accepted_statuses": [
|
|
204
|
+
"accepted_statuses": [
|
|
205
|
+
"trusted",
|
|
206
|
+
"accepted"
|
|
207
|
+
]
|
|
170
208
|
}
|
|
171
209
|
}
|
|
172
210
|
]
|
|
@@ -182,7 +220,10 @@
|
|
|
182
220
|
"bundle_claim": {
|
|
183
221
|
"claimType": "builder.merge-ready-ci.readiness",
|
|
184
222
|
"subjectType": "pull-request",
|
|
185
|
-
"accepted_statuses": [
|
|
223
|
+
"accepted_statuses": [
|
|
224
|
+
"trusted",
|
|
225
|
+
"accepted"
|
|
226
|
+
]
|
|
186
227
|
}
|
|
187
228
|
}
|
|
188
229
|
]
|
|
@@ -198,7 +239,10 @@
|
|
|
198
239
|
"bundle_claim": {
|
|
199
240
|
"claimType": "builder.learn.decisions",
|
|
200
241
|
"subjectType": "decision",
|
|
201
|
-
"accepted_statuses": [
|
|
242
|
+
"accepted_statuses": [
|
|
243
|
+
"trusted",
|
|
244
|
+
"accepted"
|
|
245
|
+
]
|
|
202
246
|
}
|
|
203
247
|
},
|
|
204
248
|
{
|
|
@@ -209,7 +253,10 @@
|
|
|
209
253
|
"bundle_claim": {
|
|
210
254
|
"claimType": "builder.learn.evidence",
|
|
211
255
|
"subjectType": "release",
|
|
212
|
-
"accepted_statuses": [
|
|
256
|
+
"accepted_statuses": [
|
|
257
|
+
"trusted",
|
|
258
|
+
"accepted"
|
|
259
|
+
]
|
|
213
260
|
}
|
|
214
261
|
}
|
|
215
262
|
]
|
package/kits/builder/kit.json
CHANGED
|
@@ -17,20 +17,90 @@
|
|
|
17
17
|
}
|
|
18
18
|
],
|
|
19
19
|
"skills": [
|
|
20
|
-
{
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
{
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
{
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
20
|
+
{
|
|
21
|
+
"id": "builder.builder-shape",
|
|
22
|
+
"path": "skills/builder-shape/SKILL.md",
|
|
23
|
+
"description": "Invoke Builder Kit shape from a raw idea or the current conversation context."
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": "builder.continue-work",
|
|
27
|
+
"path": "skills/continue-work/SKILL.md",
|
|
28
|
+
"description": "Advance a multi-slice work item to its next increment via a fresh-context handoff, routing the next slice through pull-work + pickup-probe."
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "builder.deliver",
|
|
32
|
+
"path": "skills/deliver/SKILL.md",
|
|
33
|
+
"description": "Delivery workflow — selected work to delivered code."
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"id": "builder.design-probe",
|
|
37
|
+
"path": "skills/design-probe/SKILL.md",
|
|
38
|
+
"description": "One-question-at-a-time design probing interview."
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": "builder.evidence-gate",
|
|
42
|
+
"path": "skills/evidence-gate/SKILL.md",
|
|
43
|
+
"description": "Evaluate whether completed work is trustworthy enough for human review, merge, or release."
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"id": "builder.gate-review",
|
|
47
|
+
"path": "skills/gate-review/SKILL.md",
|
|
48
|
+
"description": "Enumerate gate fires and suspected misses from the session trust.bundle; classify as correct/false_block/missed_block; route findings to learning-review; propose advisory fixes."
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"id": "builder.execute-plan",
|
|
52
|
+
"path": "skills/execute-plan/SKILL.md",
|
|
53
|
+
"description": "Parallel execution primitive — plan artifact path to implemented code."
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"id": "builder.fix-bug",
|
|
57
|
+
"path": "skills/fix-bug/SKILL.md",
|
|
58
|
+
"description": "Bug fix orchestrator — diagnose, plan, execute, review, verify, loop."
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"id": "builder.idea-to-backlog",
|
|
62
|
+
"path": "skills/idea-to-backlog/SKILL.md",
|
|
63
|
+
"description": "Turn raw ideas into shaped, prioritized, executable GitHub issue backlog."
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
"id": "builder.learning-review",
|
|
67
|
+
"path": "skills/learning-review/SKILL.md",
|
|
68
|
+
"description": "Capture post-merge learnings and feed them back into backlog, skills, tests, or knowledge."
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"id": "builder.pickup-probe",
|
|
72
|
+
"path": "skills/pickup-probe/SKILL.md",
|
|
73
|
+
"description": "Builder Kit work-item/docs/provider-grounded Probe specialization before plan-work."
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"id": "builder.plan-work",
|
|
77
|
+
"path": "skills/plan-work/SKILL.md",
|
|
78
|
+
"description": "Code planning primitive — goal + directory to structured execution plan."
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"id": "builder.pull-work",
|
|
82
|
+
"path": "skills/pull-work/SKILL.md",
|
|
83
|
+
"description": "Select ready GitHub issues from the executable backlog for implementation."
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"id": "builder.release-readiness",
|
|
87
|
+
"path": "skills/release-readiness/SKILL.md",
|
|
88
|
+
"description": "Decide whether evidence-backed work is ready to merge, release, deploy, or hold."
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"id": "builder.review-work",
|
|
92
|
+
"path": "skills/review-work/SKILL.md",
|
|
93
|
+
"description": "Review primitive — code, security, dependency, architecture critique before verification."
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"id": "builder.tdd-workflow",
|
|
97
|
+
"path": "skills/tdd-workflow/SKILL.md",
|
|
98
|
+
"description": "Test-driven development — RED, GREEN, REFACTOR with git checkpoints."
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"id": "builder.verify-work",
|
|
102
|
+
"path": "skills/verify-work/SKILL.md",
|
|
103
|
+
"description": "Verification primitive — session file path to structured evidence verdict."
|
|
104
|
+
}
|
|
35
105
|
]
|
|
36
106
|
}
|