@kontourai/flow-agents 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-push +11 -0
- package/.github/workflows/ci.yml +210 -0
- package/.github/workflows/docs-pages.yml +52 -0
- package/.github/workflows/publish-npm.yml +104 -0
- package/AGENTS.md +26 -0
- package/CHANGELOG.md +66 -0
- package/CODE_OF_CONDUCT.md +25 -0
- package/CONTEXT.md +300 -0
- package/CONTRIBUTING.md +44 -0
- package/LICENSE +201 -0
- package/README.md +129 -0
- package/SECURITY.md +33 -0
- package/agent-cards/dev.json +19 -0
- package/agents/dev.json +127 -0
- package/agents/tool-code-reviewer.json +61 -0
- package/agents/tool-dependencies-updater.json +118 -0
- package/agents/tool-explore-config.json +92 -0
- package/agents/tool-explore-deps.json +92 -0
- package/agents/tool-explore-entry.json +92 -0
- package/agents/tool-explore-patterns.json +92 -0
- package/agents/tool-explore-structure.json +92 -0
- package/agents/tool-explore-tests.json +92 -0
- package/agents/tool-planner.json +57 -0
- package/agents/tool-playwright.json +145 -0
- package/agents/tool-security-reviewer.json +56 -0
- package/agents/tool-verifier.json +61 -0
- package/agents/tool-worker.json +58 -0
- package/build/src/cli/console-learning-projection.js +123 -0
- package/build/src/cli/docs-preview.js +39 -0
- package/build/src/cli/effective-backlog-settings.js +102 -0
- package/build/src/cli/export-bookmarks.js +38 -0
- package/build/src/cli/fixture-retirement-audit.js +140 -0
- package/build/src/cli/flow-kit.js +138 -0
- package/build/src/cli/import-bookmarks.js +50 -0
- package/build/src/cli/init.js +239 -0
- package/build/src/cli/instinct-cli.js +93 -0
- package/build/src/cli/promote-workflow-artifact.js +63 -0
- package/build/src/cli/publish-change-helper.js +154 -0
- package/build/src/cli/pull-work-provider.js +469 -0
- package/build/src/cli/runtime-adapter.js +23 -0
- package/build/src/cli/telemetry-doctor.js +221 -0
- package/build/src/cli/usage-feedback.js +443 -0
- package/build/src/cli/validate-hook-influence.js +152 -0
- package/build/src/cli/validate-source-tree.js +31 -0
- package/build/src/cli/validate-workflow-artifacts.js +486 -0
- package/build/src/cli/veritas-governance.js +262 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
- package/build/src/cli/workflow-sidecar.js +816 -0
- package/build/src/cli.js +89 -0
- package/build/src/flow-kit/validate.js +75 -0
- package/build/src/lib/args.js +45 -0
- package/build/src/lib/fs.js +62 -0
- package/build/src/lib/workflow-learning-projection.js +334 -0
- package/build/src/runtime-adapters.js +146 -0
- package/build/src/tools/build-universal-bundles.js +397 -0
- package/build/src/tools/common.js +56 -0
- package/build/src/tools/filter-installed-packs.js +132 -0
- package/build/src/tools/generate-context-map.js +198 -0
- package/build/src/tools/validate-package.js +64 -0
- package/build/src/tools/validate-source-tree.js +622 -0
- package/console.telemetry.json +176 -0
- package/context/base-rules.md +17 -0
- package/context/code-review-standards.md +62 -0
- package/context/coding-standards.md +42 -0
- package/context/common/orchestrators.md +12 -0
- package/context/common/subagents.md +28 -0
- package/context/contracts/artifact-contract.md +182 -0
- package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
- package/context/contracts/delivery-contract.md +69 -0
- package/context/contracts/execution-contract.md +53 -0
- package/context/contracts/governance-adapter-contract.md +67 -0
- package/context/contracts/planning-contract.md +85 -0
- package/context/contracts/review-contract.md +104 -0
- package/context/contracts/sandbox-policy.md +52 -0
- package/context/contracts/verification-contract.md +134 -0
- package/context/contracts/work-item-contract.md +215 -0
- package/context/deferred/demo-mode.md +33 -0
- package/context/deferred/languages/go.md +31 -0
- package/context/deferred/languages/python.md +31 -0
- package/context/deferred/languages/typescript.md +34 -0
- package/context/deferred/parallelization.md +35 -0
- package/context/deferred/worktree-isolation.md +24 -0
- package/context/development-workflow.md +50 -0
- package/context/scripts/context-budget/budget-scan.sh +166 -0
- package/context/scripts/detect-tools.sh +3 -0
- package/context/scripts/discover-agents.sh +28 -0
- package/context/scripts/git-status.sh +49 -0
- package/context/scripts/hooks/config-protection.js +79 -0
- package/context/scripts/hooks/desktop-notify.sh +39 -0
- package/context/scripts/hooks/governance-audit.sh +135 -0
- package/context/scripts/hooks/lib/audit-transport.sh +40 -0
- package/context/scripts/hooks/lib/hook-flags.js +49 -0
- package/context/scripts/hooks/lib/patterns.sh +57 -0
- package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/context/scripts/hooks/post-edit-accumulator.js +66 -0
- package/context/scripts/hooks/pre-commit-quality.js +194 -0
- package/context/scripts/hooks/quality-gate.js +93 -0
- package/context/scripts/hooks/report-only-guard.js +21 -0
- package/context/scripts/hooks/run-hook.js +136 -0
- package/context/scripts/hooks/stop-format-typecheck.js +141 -0
- package/context/scripts/hooks/stop-goal-fit.js +337 -0
- package/context/scripts/hooks/workflow-steering.js +250 -0
- package/context/scripts/telemetry/console-presets.sh +14 -0
- package/context/scripts/telemetry/install-console-config.sh +214 -0
- package/context/scripts/telemetry/lib/config.sh +85 -0
- package/context/scripts/telemetry/lib/enrich.sh +115 -0
- package/context/scripts/telemetry/lib/redact.sh +22 -0
- package/context/scripts/telemetry/lib/session.sh +63 -0
- package/context/scripts/telemetry/lib/transport.sh +183 -0
- package/context/scripts/telemetry/lib/usage.sh +29 -0
- package/context/scripts/telemetry/sync-agents.sh +173 -0
- package/context/scripts/telemetry/telemetry.conf +23 -0
- package/context/scripts/telemetry/telemetry.sh +387 -0
- package/context/scripts/validate-package.sh +89 -0
- package/context/settings/backlog-provider-settings.json +54 -0
- package/context/templates/core/identity.md +26 -0
- package/context/templates/core/user.md +15 -0
- package/docs/_config.yml +15 -0
- package/docs/_layouts/default.html +87 -0
- package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
- package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
- package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
- package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
- package/docs/adr/0006-typescript-first-source-policy.md +98 -0
- package/docs/agent-system-guidebook.md +391 -0
- package/docs/agent-usage-feedback-loop.md +351 -0
- package/docs/assets/favicon.svg +13 -0
- package/docs/assets/og-image.png +0 -0
- package/docs/assets/site.css +774 -0
- package/docs/assets/site.js +139 -0
- package/docs/configurable-workflow-routing.md +174 -0
- package/docs/context-map.md +145 -0
- package/docs/developer-architecture.md +145 -0
- package/docs/developer-hook-setup.md +61 -0
- package/docs/fixture-ownership.md +44 -0
- package/docs/flow-kit-repository-contract.md +180 -0
- package/docs/index.md +129 -0
- package/docs/kontour-resource-contract.md +358 -0
- package/docs/migrations.md +64 -0
- package/docs/north-star.md +322 -0
- package/docs/operating-layers.md +110 -0
- package/docs/repository-structure.md +132 -0
- package/docs/sandbox-policy.md +56 -0
- package/docs/skills-map.md +203 -0
- package/docs/standards-register.md +96 -0
- package/docs/veritas-integration.md +165 -0
- package/docs/work-item-adapters.md +72 -0
- package/docs/workflow-artifact-lifecycle.md +141 -0
- package/docs/workflow-eval-strategy.md +295 -0
- package/docs/workflow-shared-contracts.md +51 -0
- package/docs/workflow-usage-guide.md +443 -0
- package/evals/ARCHITECTURE.md +143 -0
- package/evals/CONVENTIONS.md +58 -0
- package/evals/README.md +128 -0
- package/evals/acceptance/run.sh +29 -0
- package/evals/acceptance/test_claude_harness.sh +242 -0
- package/evals/acceptance/test_codex_harness.sh +108 -0
- package/evals/acceptance/test_kiro_harness.sh +128 -0
- package/evals/cases/dev/404.html +97 -0
- package/evals/cases/dev/code-review.yaml +44 -0
- package/evals/cases/dev/dashboard.html +300 -0
- package/evals/cases/dev/deliver.yaml +66 -0
- package/evals/cases/dev/dependency-update.yaml +16 -0
- package/evals/cases/dev/explore.yaml +20 -0
- package/evals/cases/dev/index.html +370 -0
- package/evals/cases/dev/package-lock.json +28 -0
- package/evals/cases/dev/package.json +16 -0
- package/evals/cases/dev/plan-work.yaml +20 -0
- package/evals/cases/dev/promptfooconfig.yaml +666 -0
- package/evals/cases/dev/search-first.yaml +20 -0
- package/evals/cases/dev/tdd-workflow.yaml +48 -0
- package/evals/cases/dev/verify-work.yaml +44 -0
- package/evals/cases/dev/workflow.yaml +34 -0
- package/evals/ci/run-baseline.sh +283 -0
- package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
- package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
- package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
- package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
- package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
- package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
- package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
- package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
- package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
- package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
- package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
- package/evals/fixtures/hook-influence/cases.json +336 -0
- package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
- package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
- package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
- package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
- package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
- package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
- package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
- package/evals/fixtures/surface-trust/provider-absent.json +19 -0
- package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
- package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
- package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
- package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
- package/evals/integration/test_bundle_install.sh +541 -0
- package/evals/integration/test_console_learning_projection.sh +192 -0
- package/evals/integration/test_context_map.sh +65 -0
- package/evals/integration/test_effective_backlog_settings.sh +58 -0
- package/evals/integration/test_fixture_retirement_audit.sh +58 -0
- package/evals/integration/test_flow_agents_statusline.sh +93 -0
- package/evals/integration/test_flow_kit_repository.sh +90 -0
- package/evals/integration/test_goal_fit_hook.sh +482 -0
- package/evals/integration/test_hook_category_behaviors.sh +190 -0
- package/evals/integration/test_hook_influence_cases.sh +69 -0
- package/evals/integration/test_local_flow_kit_install.sh +145 -0
- package/evals/integration/test_publish_change_helper.sh +176 -0
- package/evals/integration/test_pull_work_provider.sh +140 -0
- package/evals/integration/test_runtime_adapter_activation.sh +106 -0
- package/evals/integration/test_telemetry.sh +485 -0
- package/evals/integration/test_telemetry_doctor.sh +193 -0
- package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
- package/evals/integration/test_usage_feedback_global.sh +117 -0
- package/evals/integration/test_usage_feedback_import.sh +227 -0
- package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
- package/evals/integration/test_usage_feedback_report.sh +263 -0
- package/evals/integration/test_veritas_governance_adapter.sh +235 -0
- package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
- package/evals/integration/test_workflow_artifacts.sh +1247 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
- package/evals/integration/test_workflow_steering_hook.sh +337 -0
- package/evals/lib/assertions/delegated-to.js +40 -0
- package/evals/lib/assertions/max-tool-calls.js +15 -0
- package/evals/lib/assertions/no-write-tools.js +27 -0
- package/evals/lib/assertions/pass-at-k.js +39 -0
- package/evals/lib/assertions/telemetry-utils.js +105 -0
- package/evals/lib/assertions/tool-called.js +39 -0
- package/evals/lib/assertions/verify-after-fix.js +61 -0
- package/evals/lib/claude-judge.sh +40 -0
- package/evals/lib/claude-provider.sh +74 -0
- package/evals/lib/codex-judge.sh +39 -0
- package/evals/lib/codex-provider.sh +81 -0
- package/evals/lib/eval-dev.sh +5 -0
- package/evals/lib/eval-judge.sh +22 -0
- package/evals/lib/eval-provider.sh +26 -0
- package/evals/lib/eval-report.sh +73 -0
- package/evals/lib/kiro-dev.sh +4 -0
- package/evals/lib/kiro-judge.sh +17 -0
- package/evals/lib/kiro-provider.sh +62 -0
- package/evals/lib/node.sh +111 -0
- package/evals/promptfooconfig.yaml +70 -0
- package/evals/run.sh +309 -0
- package/evals/static/test_evidence_refs.sh +141 -0
- package/evals/static/test_package.sh +407 -0
- package/evals/static/test_repo_hooks.sh +68 -0
- package/evals/static/test_universal_bundles.sh +274 -0
- package/evals/static/test_workflow_skills.sh +1207 -0
- package/install.sh +64 -0
- package/integrations/veritas/flow-agents.adapter.json +138 -0
- package/integrations/veritas/flow-agents.authority-settings.json +26 -0
- package/integrations/veritas/flow-agents.repo-standards.json +82 -0
- package/kits/builder/flows/build.flow.json +218 -0
- package/kits/builder/flows/shape.flow.json +127 -0
- package/kits/builder/kit.json +19 -0
- package/kits/catalog.json +11 -0
- package/package.json +130 -0
- package/packaging/README.md +60 -0
- package/packaging/manifest.json +173 -0
- package/packaging/packs.json +69 -0
- package/powers/dependency-checker/POWER.md +20 -0
- package/powers/dependency-checker/mcp.json +20 -0
- package/powers/playwright/POWER.md +25 -0
- package/powers/playwright/mcp.json +12 -0
- package/prompts/code-audit.md +123 -0
- package/prompts/kcommit.md +88 -0
- package/schemas/backlog-provider-settings.schema.json +138 -0
- package/schemas/workflow-acceptance.schema.json +216 -0
- package/schemas/workflow-critique.schema.json +113 -0
- package/schemas/workflow-evidence.schema.json +357 -0
- package/schemas/workflow-handoff.schema.json +52 -0
- package/schemas/workflow-learning.schema.json +223 -0
- package/schemas/workflow-release.schema.json +172 -0
- package/schemas/workflow-state.schema.json +80 -0
- package/scripts/README.md +111 -0
- package/scripts/build-universal-bundles.js +3 -0
- package/scripts/check-content-boundary.cjs +99 -0
- package/scripts/context-budget/budget-scan.sh +166 -0
- package/scripts/detect-tools.sh +3 -0
- package/scripts/discover-agents.sh +28 -0
- package/scripts/effective-backlog-settings.js +2 -0
- package/scripts/filter-installed-packs.js +2 -0
- package/scripts/flow-kit.js +2 -0
- package/scripts/generate-context-map.js +2 -0
- package/scripts/git-status.sh +49 -0
- package/scripts/hooks/claude-hook-adapter.js +174 -0
- package/scripts/hooks/claude-telemetry-hook.js +115 -0
- package/scripts/hooks/codex-hook-adapter.js +176 -0
- package/scripts/hooks/codex-telemetry-hook.js +95 -0
- package/scripts/hooks/config-protection.js +79 -0
- package/scripts/hooks/desktop-notify.sh +39 -0
- package/scripts/hooks/governance-audit.sh +135 -0
- package/scripts/hooks/lib/audit-transport.sh +40 -0
- package/scripts/hooks/lib/hook-flags.js +49 -0
- package/scripts/hooks/lib/patterns.sh +57 -0
- package/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/scripts/hooks/post-edit-accumulator.js +66 -0
- package/scripts/hooks/pre-commit-quality.js +194 -0
- package/scripts/hooks/quality-gate.js +93 -0
- package/scripts/hooks/report-only-guard.js +21 -0
- package/scripts/hooks/run-hook.js +136 -0
- package/scripts/hooks/stop-format-typecheck.js +141 -0
- package/scripts/hooks/stop-goal-fit.js +337 -0
- package/scripts/hooks/workflow-steering.js +250 -0
- package/scripts/install-codex-home.sh +106 -0
- package/scripts/package.json +3 -0
- package/scripts/promote-workflow-artifact.js +2 -0
- package/scripts/publish-change-helper.js +2 -0
- package/scripts/pull-work-provider.js +2 -0
- package/scripts/setup-repo-hooks.sh +8 -0
- package/scripts/statusline/flow-agents-statusline.js +157 -0
- package/scripts/telemetry/console-presets.sh +14 -0
- package/scripts/telemetry/install-console-config.sh +214 -0
- package/scripts/telemetry/lib/config.sh +85 -0
- package/scripts/telemetry/lib/enrich.sh +115 -0
- package/scripts/telemetry/lib/redact.sh +22 -0
- package/scripts/telemetry/lib/session.sh +63 -0
- package/scripts/telemetry/lib/transport.sh +183 -0
- package/scripts/telemetry/lib/usage.sh +29 -0
- package/scripts/telemetry/sync-agents.sh +173 -0
- package/scripts/telemetry/telemetry.conf +23 -0
- package/scripts/telemetry/telemetry.sh +387 -0
- package/scripts/usage-feedback.js +2 -0
- package/scripts/validate-hook-influence-cases.js +2 -0
- package/scripts/validate-package.sh +89 -0
- package/scripts/validate-source-tree.js +9 -0
- package/skills/agentic-engineering/SKILL.md +62 -0
- package/skills/browser-test/SKILL.md +51 -0
- package/skills/builder-shape/SKILL.md +76 -0
- package/skills/context-budget/SKILL.md +40 -0
- package/skills/deliver/SKILL.md +241 -0
- package/skills/dependency-update/SKILL.md +68 -0
- package/skills/design-probe/SKILL.md +107 -0
- package/skills/eval-rebuild/SKILL.md +39 -0
- package/skills/evidence-gate/SKILL.md +186 -0
- package/skills/execute-plan/SKILL.md +110 -0
- package/skills/explore/SKILL.md +137 -0
- package/skills/feedback-loop/SKILL.md +87 -0
- package/skills/fix-bug/SKILL.md +133 -0
- package/skills/frontend-design/SKILL.md +80 -0
- package/skills/github-cli/SKILL.md +63 -0
- package/skills/idea-to-backlog/SKILL.md +267 -0
- package/skills/knowledge-capture/SKILL.md +55 -0
- package/skills/learning-review/SKILL.md +115 -0
- package/skills/pickup-probe/SKILL.md +114 -0
- package/skills/plan-work/SKILL.md +176 -0
- package/skills/pull-work/SKILL.md +309 -0
- package/skills/release-readiness/SKILL.md +121 -0
- package/skills/review-work/SKILL.md +161 -0
- package/skills/search-first/SKILL.md +66 -0
- package/skills/tdd-workflow/SKILL.md +140 -0
- package/skills/verify-work/SKILL.md +109 -0
- package/src/cli/console-learning-projection.ts +140 -0
- package/src/cli/effective-backlog-settings.ts +99 -0
- package/src/cli/fixture-retirement-audit.ts +154 -0
- package/src/cli/flow-kit.ts +139 -0
- package/src/cli/init.ts +248 -0
- package/src/cli/promote-workflow-artifact.ts +64 -0
- package/src/cli/publish-change-helper.ts +143 -0
- package/src/cli/pull-work-provider.ts +481 -0
- package/src/cli/runtime-adapter.ts +24 -0
- package/src/cli/telemetry-doctor.ts +243 -0
- package/src/cli/usage-feedback.ts +418 -0
- package/src/cli/validate-hook-influence.ts +119 -0
- package/src/cli/validate-source-tree.ts +30 -0
- package/src/cli/validate-workflow-artifacts.ts +411 -0
- package/src/cli/veritas-governance.ts +322 -0
- package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
- package/src/cli/workflow-sidecar.ts +676 -0
- package/src/cli.ts +95 -0
- package/src/flow-kit/validate.ts +74 -0
- package/src/lib/args.ts +43 -0
- package/src/lib/fs.ts +62 -0
- package/src/lib/workflow-learning-projection.ts +491 -0
- package/src/runtime-adapters.ts +154 -0
- package/src/tools/build-universal-bundles.ts +366 -0
- package/src/tools/common.ts +61 -0
- package/src/tools/filter-installed-packs.ts +129 -0
- package/src/tools/generate-context-map.ts +199 -0
- package/src/tools/validate-package.ts +57 -0
- package/src/tools/validate-source-tree.ts +488 -0
- package/tsconfig.json +19 -0
- package/veritas.claims.json +6 -0
|
@@ -0,0 +1,1247 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT/evals/lib/node.sh"
|
|
7
|
+
|
|
8
|
+
TMPDIR_EVAL="$(mktemp -d)"
|
|
9
|
+
errors=0
|
|
10
|
+
|
|
11
|
+
cleanup() {
|
|
12
|
+
rm -rf "$TMPDIR_EVAL"
|
|
13
|
+
}
|
|
14
|
+
trap cleanup EXIT
|
|
15
|
+
|
|
16
|
+
_pass() { echo " ✓ $1"; }
|
|
17
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
18
|
+
|
|
19
|
+
VALIDATOR="validate-workflow-artifacts"
|
|
20
|
+
REPO="$TMPDIR_EVAL/repo"
|
|
21
|
+
ARTIFACT_DIR="$REPO/.flow-agents/workflow-contract-e2e"
|
|
22
|
+
mkdir -p "$ARTIFACT_DIR"
|
|
23
|
+
|
|
24
|
+
cat > "$REPO/AGENTS.md" <<'MARKDOWN'
|
|
25
|
+
# Test Repo
|
|
26
|
+
MARKDOWN
|
|
27
|
+
|
|
28
|
+
cat > "$ARTIFACT_DIR/workflow-contract-e2e--deliver-plan.md" <<'MARKDOWN'
|
|
29
|
+
---
|
|
30
|
+
role: plan
|
|
31
|
+
parent: workflow-contract-e2e--deliver
|
|
32
|
+
created: 2026-05-06T00:00:00Z
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## Plan
|
|
36
|
+
|
|
37
|
+
Add deterministic artifact validation and wire it into integration evals.
|
|
38
|
+
|
|
39
|
+
## Definition Of Done
|
|
40
|
+
|
|
41
|
+
- **User outcome:** Maintainers can run one local command and know whether delivery artifacts still satisfy the shared contracts.
|
|
42
|
+
- **Scope:** Validator, integration smoke test, eval runner wiring, and docs.
|
|
43
|
+
- **Acceptance criteria:**
|
|
44
|
+
- [ ] Valid artifact chains pass - Evidence: validator returns exit 0.
|
|
45
|
+
- [ ] Missing Goal Fit fails - Evidence: validator returns non-zero and names Goal Fit.
|
|
46
|
+
- [ ] Green-build-only artifacts fail - Evidence: validator reports acceptance evidence is missing.
|
|
47
|
+
- [ ] Hidden NOT_VERIFIED fails - Evidence: validator reports explicit acceptance is required.
|
|
48
|
+
- **Usefulness checks:**
|
|
49
|
+
- [ ] User-facing workflow is documented or discoverable
|
|
50
|
+
- [ ] Local and global/project scope are separated when relevant
|
|
51
|
+
- [ ] Dashboard/UI changes have visual evidence when relevant
|
|
52
|
+
- [ ] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
|
|
53
|
+
- **Stop-short risks:** Static checks could pass while generated artifacts hide missing evidence.
|
|
54
|
+
- **Durable docs target:** docs/workflow-eval-strategy.md
|
|
55
|
+
- **Sandbox mode:** local-edit
|
|
56
|
+
|
|
57
|
+
### Wave 1 (parallel)
|
|
58
|
+
|
|
59
|
+
#### Task: Artifact validator
|
|
60
|
+
- **Files:** src/cli/validate-workflow-artifacts.ts
|
|
61
|
+
- **Changes:** Validate plan, delivery, and review artifact contracts.
|
|
62
|
+
- **Acceptance:** Good fixtures pass and bad fixtures fail with actionable messages.
|
|
63
|
+
- **Context:** Shared contracts in context/contracts/.
|
|
64
|
+
MARKDOWN
|
|
65
|
+
|
|
66
|
+
cat > "$ARTIFACT_DIR/workflow-contract-e2e--deliver-review.md" <<'MARKDOWN'
|
|
67
|
+
---
|
|
68
|
+
role: review
|
|
69
|
+
parent: workflow-contract-e2e--deliver
|
|
70
|
+
created: 2026-05-06T00:00:00Z
|
|
71
|
+
verdict: PASS
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Verification Report
|
|
75
|
+
|
|
76
|
+
Build: [PASS] flow_agents_node validate-workflow-artifacts fixture, exit 0
|
|
77
|
+
Types: [SKIP] no type checker configured for shell fixtures
|
|
78
|
+
Lint: [SKIP] no linter configured for shell fixtures
|
|
79
|
+
Tests: [PASS] bash evals/integration/test_workflow_artifacts.sh, exit 0
|
|
80
|
+
Security: [SKIP] no production code path touched
|
|
81
|
+
Diff: [PASS] validator and integration fixtures reviewed
|
|
82
|
+
|
|
83
|
+
### Acceptance Criteria
|
|
84
|
+
- [PASS] Valid artifact chains pass - Evidence: validator returned exit 0.
|
|
85
|
+
- [PASS] Missing Goal Fit fails - Evidence: validator returned non-zero and named Goal Fit.
|
|
86
|
+
- [PASS] Green-build-only artifacts fail - Evidence: validator reported missing acceptance evidence.
|
|
87
|
+
- [PASS] Hidden NOT_VERIFIED fails - Evidence: validator required explicit acceptance or routing.
|
|
88
|
+
|
|
89
|
+
### Goal Fit
|
|
90
|
+
- [PASS] User outcome - Evidence: one local integration command covers the artifact chain.
|
|
91
|
+
- [PASS] User-facing workflow - Evidence: docs name the command.
|
|
92
|
+
- [PASS] Durable docs target - Evidence: docs/workflow-eval-strategy.md.
|
|
93
|
+
- [PASS] Stop-short risks - Evidence: negative fixtures cover green-only and hidden NOT_VERIFIED.
|
|
94
|
+
|
|
95
|
+
### Verdict: PASS
|
|
96
|
+
Shared workflow artifacts satisfy the contract.
|
|
97
|
+
MARKDOWN
|
|
98
|
+
|
|
99
|
+
cat > "$ARTIFACT_DIR/workflow-contract-e2e--deliver.md" <<'MARKDOWN'
|
|
100
|
+
# Build workflow contract E2E tests
|
|
101
|
+
|
|
102
|
+
branch: main
|
|
103
|
+
worktree: main
|
|
104
|
+
created: 2026-05-06T00:00:00Z
|
|
105
|
+
status: delivered
|
|
106
|
+
type: deliver
|
|
107
|
+
iteration: 1
|
|
108
|
+
|
|
109
|
+
## Plan
|
|
110
|
+
|
|
111
|
+
See workflow-contract-e2e--deliver-plan.md.
|
|
112
|
+
|
|
113
|
+
## Definition Of Done
|
|
114
|
+
|
|
115
|
+
- **User outcome:** Maintainers can run one local command and know whether delivery artifacts still satisfy the shared contracts.
|
|
116
|
+
- **Scope:** Validator, integration smoke test, eval runner wiring, and docs.
|
|
117
|
+
- **Acceptance criteria:**
|
|
118
|
+
- [x] Valid artifact chains pass - Evidence: validator returns exit 0.
|
|
119
|
+
- [x] Missing Goal Fit fails - Evidence: validator returns non-zero and names Goal Fit.
|
|
120
|
+
- [x] Green-build-only artifacts fail - Evidence: validator reports acceptance evidence is missing.
|
|
121
|
+
- [x] Hidden NOT_VERIFIED fails - Evidence: validator reports explicit acceptance is required.
|
|
122
|
+
- **Usefulness checks:**
|
|
123
|
+
- [x] User-facing workflow is documented or discoverable
|
|
124
|
+
- [x] Local and global/project scope are separated when relevant
|
|
125
|
+
- [x] Dashboard/UI changes have visual evidence when relevant
|
|
126
|
+
- [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
|
|
127
|
+
- **Stop-short risks:** Static checks could pass while generated artifacts hide missing evidence.
|
|
128
|
+
- **Durable docs target:** docs/workflow-eval-strategy.md
|
|
129
|
+
- **Sandbox mode:** local-edit
|
|
130
|
+
|
|
131
|
+
## Execution Progress
|
|
132
|
+
|
|
133
|
+
### Wave 1 (completed)
|
|
134
|
+
- [x] Artifact validator - done
|
|
135
|
+
- [x] Integration fixtures - done
|
|
136
|
+
|
|
137
|
+
## Verification Report
|
|
138
|
+
|
|
139
|
+
Build: [PASS] flow_agents_node validate-workflow-artifacts fixture, exit 0
|
|
140
|
+
Types: [SKIP] no type checker configured for shell fixtures
|
|
141
|
+
Lint: [SKIP] no linter configured for shell fixtures
|
|
142
|
+
Tests: [PASS] bash evals/integration/test_workflow_artifacts.sh, exit 0
|
|
143
|
+
Security: [SKIP] no production code path touched
|
|
144
|
+
Diff: [PASS] validator and integration fixtures reviewed
|
|
145
|
+
|
|
146
|
+
### Acceptance Criteria
|
|
147
|
+
- [PASS] Valid artifact chains pass - Evidence: validator returned exit 0.
|
|
148
|
+
- [PASS] Missing Goal Fit fails - Evidence: validator returned non-zero and named Goal Fit.
|
|
149
|
+
- [PASS] Green-build-only artifacts fail - Evidence: validator reported missing acceptance evidence.
|
|
150
|
+
- [PASS] Hidden NOT_VERIFIED fails - Evidence: validator required explicit acceptance or routing.
|
|
151
|
+
|
|
152
|
+
### Goal Fit
|
|
153
|
+
- [PASS] User outcome - Evidence: one local integration command covers the artifact chain.
|
|
154
|
+
- [PASS] User-facing workflow - Evidence: docs name the command.
|
|
155
|
+
- [PASS] Durable docs target - Evidence: docs/workflow-eval-strategy.md.
|
|
156
|
+
- [PASS] Stop-short risks - Evidence: negative fixtures cover green-only and hidden NOT_VERIFIED.
|
|
157
|
+
|
|
158
|
+
### Verdict: PASS
|
|
159
|
+
Shared workflow artifacts satisfy the contract.
|
|
160
|
+
|
|
161
|
+
## Goal Fit Gate
|
|
162
|
+
|
|
163
|
+
- [x] Original user goal restated
|
|
164
|
+
- [x] Every acceptance criterion has evidence
|
|
165
|
+
- [x] User-facing workflow was exercised or documented
|
|
166
|
+
- [x] Local/project and global scope are handled when relevant
|
|
167
|
+
- [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
|
|
168
|
+
- [x] Dashboard/UI changes have visual evidence when relevant
|
|
169
|
+
- [x] Durable docs target is updated, scheduled for final acceptance, or marked not needed with reason
|
|
170
|
+
|
|
171
|
+
## Final Acceptance
|
|
172
|
+
|
|
173
|
+
- [x] CI/relevant checks passed
|
|
174
|
+
- [x] Merge/release decision recorded
|
|
175
|
+
- [x] Working artifacts archived or linked
|
|
176
|
+
- [x] Long-lived docs updated with why and how the feature was built
|
|
177
|
+
- [x] Follow-up issues or learning-review items created for deferred work
|
|
178
|
+
MARKDOWN
|
|
179
|
+
|
|
180
|
+
cat > "$ARTIFACT_DIR/state.json" <<'JSON'
|
|
181
|
+
{
|
|
182
|
+
"schema_version": "1.0",
|
|
183
|
+
"task_slug": "workflow-contract-e2e",
|
|
184
|
+
"status": "delivered",
|
|
185
|
+
"phase": "done",
|
|
186
|
+
"created_at": "2026-05-06T00:00:00Z",
|
|
187
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
188
|
+
"artifact_paths": [
|
|
189
|
+
"workflow-contract-e2e--deliver.md",
|
|
190
|
+
"workflow-contract-e2e--deliver-plan.md",
|
|
191
|
+
"workflow-contract-e2e--deliver-review.md"
|
|
192
|
+
],
|
|
193
|
+
"next_action": {
|
|
194
|
+
"status": "done",
|
|
195
|
+
"summary": "Workflow artifact contract fixtures pass validation."
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
JSON
|
|
199
|
+
|
|
200
|
+
cat > "$ARTIFACT_DIR/acceptance.json" <<'JSON'
|
|
201
|
+
{
|
|
202
|
+
"schema_version": "1.0",
|
|
203
|
+
"task_slug": "workflow-contract-e2e",
|
|
204
|
+
"source_request": "Build workflow contract E2E tests.",
|
|
205
|
+
"criteria": [
|
|
206
|
+
{
|
|
207
|
+
"id": "valid-chain-passes",
|
|
208
|
+
"description": "Valid artifact chains pass.",
|
|
209
|
+
"status": "pass",
|
|
210
|
+
"evidence_refs": [
|
|
211
|
+
{
|
|
212
|
+
"kind": "artifact",
|
|
213
|
+
"file": "workflow-contract-e2e--deliver-review.md",
|
|
214
|
+
"summary": "Verification artifact for the valid chain."
|
|
215
|
+
}
|
|
216
|
+
]
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
"id": "missing-goal-fit-fails",
|
|
220
|
+
"description": "Missing Goal Fit fails.",
|
|
221
|
+
"status": "pass",
|
|
222
|
+
"evidence_refs": [
|
|
223
|
+
{
|
|
224
|
+
"kind": "source",
|
|
225
|
+
"file": "evals/integration/test_workflow_artifacts.sh",
|
|
226
|
+
"line_start": 1,
|
|
227
|
+
"line_end": 1,
|
|
228
|
+
"excerpt": "test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests"
|
|
229
|
+
}
|
|
230
|
+
]
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
"id": "green-build-only-fails",
|
|
234
|
+
"description": "Green-build-only artifacts fail.",
|
|
235
|
+
"status": "pass",
|
|
236
|
+
"evidence_refs": [
|
|
237
|
+
{
|
|
238
|
+
"kind": "source",
|
|
239
|
+
"file": "evals/integration/test_workflow_artifacts.sh",
|
|
240
|
+
"line_start": 1,
|
|
241
|
+
"line_end": 1,
|
|
242
|
+
"excerpt": "test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests"
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
"id": "hidden-not-verified-fails",
|
|
248
|
+
"description": "Hidden NOT_VERIFIED fails.",
|
|
249
|
+
"status": "pass",
|
|
250
|
+
"evidence_refs": [
|
|
251
|
+
{
|
|
252
|
+
"kind": "source",
|
|
253
|
+
"file": "evals/integration/test_workflow_artifacts.sh",
|
|
254
|
+
"line_start": 1,
|
|
255
|
+
"line_end": 1,
|
|
256
|
+
"excerpt": "test_workflow_artifacts.sh - shared-contract artifact quality and E2E smoke tests"
|
|
257
|
+
}
|
|
258
|
+
]
|
|
259
|
+
}
|
|
260
|
+
],
|
|
261
|
+
"goal_fit": {
|
|
262
|
+
"status": "pass",
|
|
263
|
+
"summary": "Maintainers can run one local command and validate workflow artifacts."
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
JSON
|
|
267
|
+
|
|
268
|
+
cat > "$ARTIFACT_DIR/evidence.json" <<'JSON'
|
|
269
|
+
{
|
|
270
|
+
"schema_version": "1.0",
|
|
271
|
+
"task_slug": "workflow-contract-e2e",
|
|
272
|
+
"verdict": "pass",
|
|
273
|
+
"checks": [
|
|
274
|
+
{
|
|
275
|
+
"id": "workflow-artifact-validator",
|
|
276
|
+
"kind": "test",
|
|
277
|
+
"status": "pass",
|
|
278
|
+
"command": "flow_agents_node validate-workflow-artifacts fixture",
|
|
279
|
+
"summary": "Valid Markdown artifacts and sidecars pass.",
|
|
280
|
+
"artifact_refs": [
|
|
281
|
+
{
|
|
282
|
+
"kind": "artifact",
|
|
283
|
+
"file": "workflow-contract-e2e--deliver.md",
|
|
284
|
+
"summary": "Delivery artifact validated by the workflow artifact validator."
|
|
285
|
+
}
|
|
286
|
+
],
|
|
287
|
+
"standard_refs": [
|
|
288
|
+
{
|
|
289
|
+
"standard": "junit",
|
|
290
|
+
"ref": "reports/workflow-artifact-validator.xml",
|
|
291
|
+
"role": "mapping",
|
|
292
|
+
"summary": "JUnit-style test evidence can be linked without flattening it."
|
|
293
|
+
}
|
|
294
|
+
]
|
|
295
|
+
}
|
|
296
|
+
],
|
|
297
|
+
"external_evidence": [
|
|
298
|
+
{
|
|
299
|
+
"system": "veritas",
|
|
300
|
+
"ref": {
|
|
301
|
+
"kind": "external",
|
|
302
|
+
"url": "veritas://proof-lanes/workflow-contract-e2e",
|
|
303
|
+
"summary": "Optional Veritas proof-lane reference."
|
|
304
|
+
},
|
|
305
|
+
"summary": "Optional Veritas proof-lane reference.",
|
|
306
|
+
"standard": "veritas"
|
|
307
|
+
}
|
|
308
|
+
],
|
|
309
|
+
"not_verified_gaps": []
|
|
310
|
+
}
|
|
311
|
+
JSON
|
|
312
|
+
|
|
313
|
+
cat > "$ARTIFACT_DIR/handoff.json" <<'JSON'
|
|
314
|
+
{
|
|
315
|
+
"schema_version": "1.0",
|
|
316
|
+
"task_slug": "workflow-contract-e2e",
|
|
317
|
+
"summary": "Workflow artifact validation is complete.",
|
|
318
|
+
"current_state_ref": "state.json",
|
|
319
|
+
"next_steps": [
|
|
320
|
+
"Keep sidecar schemas aligned with the Markdown workflow contracts."
|
|
321
|
+
],
|
|
322
|
+
"blockers": [],
|
|
323
|
+
"warnings": []
|
|
324
|
+
}
|
|
325
|
+
JSON
|
|
326
|
+
|
|
327
|
+
cat > "$ARTIFACT_DIR/critique.json" <<'JSON'
|
|
328
|
+
{
|
|
329
|
+
"schema_version": "1.0",
|
|
330
|
+
"task_slug": "workflow-contract-e2e",
|
|
331
|
+
"status": "pass",
|
|
332
|
+
"required": true,
|
|
333
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
334
|
+
"critiques": [
|
|
335
|
+
{
|
|
336
|
+
"id": "workflow-contract-review",
|
|
337
|
+
"reviewer": "tool-code-reviewer",
|
|
338
|
+
"reviewed_at": "2026-05-06T00:00:00Z",
|
|
339
|
+
"verdict": "pass",
|
|
340
|
+
"summary": "No blocking findings in the workflow artifact fixture.",
|
|
341
|
+
"artifact_refs": ["workflow-contract-e2e--deliver.md"],
|
|
342
|
+
"findings": []
|
|
343
|
+
}
|
|
344
|
+
]
|
|
345
|
+
}
|
|
346
|
+
JSON
|
|
347
|
+
|
|
348
|
+
cat > "$ARTIFACT_DIR/learning.json" <<'JSON'
|
|
349
|
+
{
|
|
350
|
+
"schema_version": "1.0",
|
|
351
|
+
"task_slug": "workflow-contract-e2e",
|
|
352
|
+
"status": "learned",
|
|
353
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
354
|
+
"records": [
|
|
355
|
+
{
|
|
356
|
+
"id": "workflow-contract-fixture",
|
|
357
|
+
"recorded_at": "2026-05-06T00:00:00Z",
|
|
358
|
+
"source_refs": ["workflow-contract-e2e--deliver.md", "evidence.json"],
|
|
359
|
+
"outcome": "success",
|
|
360
|
+
"facts": ["The workflow artifact validator accepted the complete fixture chain."],
|
|
361
|
+
"interpretation": "A passing learning record can route completed workflow evidence into durable system improvements.",
|
|
362
|
+
"routing": [
|
|
363
|
+
{
|
|
364
|
+
"target": "eval",
|
|
365
|
+
"action": "Keep workflow artifact fixture coverage in integration tests.",
|
|
366
|
+
"status": "completed",
|
|
367
|
+
"ref": "evals/integration/test_workflow_artifacts.sh"
|
|
368
|
+
}
|
|
369
|
+
],
|
|
370
|
+
"correction": {
|
|
371
|
+
"needed": false,
|
|
372
|
+
"evidence": "The complete fixture chain matched intended workflow artifact behavior."
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
]
|
|
376
|
+
}
|
|
377
|
+
JSON
|
|
378
|
+
|
|
379
|
+
cat > "$ARTIFACT_DIR/release.json" <<'JSON'
|
|
380
|
+
{
|
|
381
|
+
"schema_version": "1.0",
|
|
382
|
+
"task_slug": "workflow-contract-e2e",
|
|
383
|
+
"decision": "merge",
|
|
384
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
385
|
+
"scope": "Workflow artifact validator fixtures and sidecar schemas.",
|
|
386
|
+
"evidence_ref": "evidence.json",
|
|
387
|
+
"gates": [
|
|
388
|
+
{
|
|
389
|
+
"name": "merge",
|
|
390
|
+
"status": "pass",
|
|
391
|
+
"summary": "Local static and integration checks passed.",
|
|
392
|
+
"evidence_refs": ["evidence.json"]
|
|
393
|
+
},
|
|
394
|
+
{
|
|
395
|
+
"name": "docs",
|
|
396
|
+
"status": "pass",
|
|
397
|
+
"summary": "Workflow docs are updated.",
|
|
398
|
+
"evidence_refs": ["docs/workflow-eval-strategy.md"]
|
|
399
|
+
}
|
|
400
|
+
],
|
|
401
|
+
"rollback_plan": {
|
|
402
|
+
"status": "not_required",
|
|
403
|
+
"summary": "No deployment occurs for this fixture.",
|
|
404
|
+
"owner": "maintainer"
|
|
405
|
+
},
|
|
406
|
+
"observability_plan": {
|
|
407
|
+
"status": "not_required",
|
|
408
|
+
"summary": "No runtime surface changes."
|
|
409
|
+
},
|
|
410
|
+
"post_deploy_checks": [],
|
|
411
|
+
"docs": {
|
|
412
|
+
"status": "updated",
|
|
413
|
+
"summary": "Fixture coverage documents release readiness sidecar validation.",
|
|
414
|
+
"refs": ["evals/integration/test_workflow_artifacts.sh"]
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
JSON
|
|
418
|
+
|
|
419
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$ARTIFACT_DIR" >"$TMPDIR_EVAL/valid.out" 2>"$TMPDIR_EVAL/valid.err"; then
|
|
420
|
+
_pass "valid plan/review/delivery artifact chain and sidecars pass"
|
|
421
|
+
else
|
|
422
|
+
_fail "valid artifact chain failed: $(cat "$TMPDIR_EVAL/valid.out" "$TMPDIR_EVAL/valid.err")"
|
|
423
|
+
fi
|
|
424
|
+
|
|
425
|
+
BAD="$TMPDIR_EVAL/bad"
|
|
426
|
+
mkdir -p "$BAD"
|
|
427
|
+
|
|
428
|
+
cat > "$BAD/missing-goal-fit--deliver.md" <<'MARKDOWN'
|
|
429
|
+
# Missing Goal Fit
|
|
430
|
+
|
|
431
|
+
status: delivered
|
|
432
|
+
type: deliver
|
|
433
|
+
|
|
434
|
+
## Plan
|
|
435
|
+
Plan exists.
|
|
436
|
+
|
|
437
|
+
## Definition Of Done
|
|
438
|
+
- **User outcome:** User can inspect the result.
|
|
439
|
+
- **Acceptance criteria:**
|
|
440
|
+
- [x] It works - Evidence: test output
|
|
441
|
+
- **Stop-short risks:** Goal Fit could be missing.
|
|
442
|
+
- **Durable docs target:** docs/test.md
|
|
443
|
+
|
|
444
|
+
## Verification Report
|
|
445
|
+
Build: [PASS] test
|
|
446
|
+
|
|
447
|
+
### Acceptance Criteria
|
|
448
|
+
- [PASS] It works - Evidence: test output.
|
|
449
|
+
|
|
450
|
+
### Verdict: PASS
|
|
451
|
+
|
|
452
|
+
## Final Acceptance
|
|
453
|
+
- [x] CI/relevant checks passed
|
|
454
|
+
MARKDOWN
|
|
455
|
+
|
|
456
|
+
if flow_agents_node "$VALIDATOR" "$BAD/missing-goal-fit--deliver.md" >"$TMPDIR_EVAL/missing.out" 2>&1; then
|
|
457
|
+
_fail "missing Goal Fit artifact should fail"
|
|
458
|
+
elif rg -q 'Goal Fit' "$TMPDIR_EVAL/missing.out"; then
|
|
459
|
+
_pass "missing Goal Fit artifact fails with actionable message"
|
|
460
|
+
else
|
|
461
|
+
_fail "missing Goal Fit failure did not mention Goal Fit"
|
|
462
|
+
fi
|
|
463
|
+
|
|
464
|
+
cat > "$BAD/missing-sandbox--deliver-plan.md" <<'MARKDOWN'
|
|
465
|
+
---
|
|
466
|
+
role: plan
|
|
467
|
+
created: 2026-05-06T00:00:00Z
|
|
468
|
+
---
|
|
469
|
+
|
|
470
|
+
## Plan
|
|
471
|
+
Plan exists.
|
|
472
|
+
|
|
473
|
+
## Definition Of Done
|
|
474
|
+
- **User outcome:** User can inspect the result.
|
|
475
|
+
- **Acceptance criteria:**
|
|
476
|
+
- [ ] It works - Evidence: test output
|
|
477
|
+
- **Stop-short risks:** Execution boundary could be ambiguous.
|
|
478
|
+
- **Durable docs target:** docs/test.md
|
|
479
|
+
|
|
480
|
+
### Wave 1 (parallel)
|
|
481
|
+
|
|
482
|
+
#### Task: Test
|
|
483
|
+
- **Files:** test.txt
|
|
484
|
+
- **Changes:** Test fixture.
|
|
485
|
+
- **Acceptance:** Validator reports missing sandbox mode.
|
|
486
|
+
MARKDOWN
|
|
487
|
+
|
|
488
|
+
if flow_agents_node "$VALIDATOR" "$BAD/missing-sandbox--deliver-plan.md" >"$TMPDIR_EVAL/missing-sandbox.out" 2>&1; then
|
|
489
|
+
_fail "missing Sandbox mode artifact should fail"
|
|
490
|
+
elif rg -q 'Sandbox mode' "$TMPDIR_EVAL/missing-sandbox.out"; then
|
|
491
|
+
_pass "missing Sandbox mode artifact fails with actionable message"
|
|
492
|
+
else
|
|
493
|
+
_fail "missing Sandbox mode failure did not mention Sandbox mode"
|
|
494
|
+
fi
|
|
495
|
+
|
|
496
|
+
cat > "$BAD/invalid-sandbox--deliver-plan.md" <<'MARKDOWN'
|
|
497
|
+
---
|
|
498
|
+
role: plan
|
|
499
|
+
created: 2026-05-06T00:00:00Z
|
|
500
|
+
---
|
|
501
|
+
|
|
502
|
+
## Plan
|
|
503
|
+
Plan exists.
|
|
504
|
+
|
|
505
|
+
## Definition Of Done
|
|
506
|
+
- **User outcome:** User can inspect the result.
|
|
507
|
+
- **Acceptance criteria:**
|
|
508
|
+
- [ ] It works - Evidence: test output
|
|
509
|
+
- **Stop-short risks:** Execution boundary could be ambiguous.
|
|
510
|
+
- **Durable docs target:** docs/test.md
|
|
511
|
+
- **Sandbox mode:** global-admin
|
|
512
|
+
|
|
513
|
+
### Wave 1 (parallel)
|
|
514
|
+
|
|
515
|
+
#### Task: Test
|
|
516
|
+
- **Files:** test.txt
|
|
517
|
+
- **Changes:** Test fixture.
|
|
518
|
+
- **Acceptance:** Validator reports invalid sandbox mode.
|
|
519
|
+
MARKDOWN
|
|
520
|
+
|
|
521
|
+
if flow_agents_node "$VALIDATOR" "$BAD/invalid-sandbox--deliver-plan.md" >"$TMPDIR_EVAL/invalid-sandbox.out" 2>&1; then
|
|
522
|
+
_fail "invalid Sandbox mode artifact should fail"
|
|
523
|
+
elif rg -q 'invalid Sandbox mode' "$TMPDIR_EVAL/invalid-sandbox.out"; then
|
|
524
|
+
_pass "invalid Sandbox mode artifact fails with actionable message"
|
|
525
|
+
else
|
|
526
|
+
_fail "invalid Sandbox mode failure did not mention invalid Sandbox mode"
|
|
527
|
+
fi
|
|
528
|
+
|
|
529
|
+
cat > "$BAD/green-only--deliver.md" <<'MARKDOWN'
|
|
530
|
+
# Green Build Only
|
|
531
|
+
|
|
532
|
+
status: delivered
|
|
533
|
+
type: deliver
|
|
534
|
+
|
|
535
|
+
## Plan
|
|
536
|
+
Plan exists.
|
|
537
|
+
|
|
538
|
+
## Definition Of Done
|
|
539
|
+
- **User outcome:** User can act on the dashboard.
|
|
540
|
+
- **Acceptance criteria:**
|
|
541
|
+
- [x] Build passes - Evidence: build output
|
|
542
|
+
- **Stop-short risks:** Build could pass while the dashboard is not useful.
|
|
543
|
+
- **Durable docs target:** docs/test.md
|
|
544
|
+
|
|
545
|
+
## Verification Report
|
|
546
|
+
Build: PASS
|
|
547
|
+
Verdict: PASS
|
|
548
|
+
|
|
549
|
+
## Goal Fit Gate
|
|
550
|
+
- [x] Original user goal restated
|
|
551
|
+
|
|
552
|
+
## Final Acceptance
|
|
553
|
+
- [x] CI/relevant checks passed
|
|
554
|
+
MARKDOWN
|
|
555
|
+
|
|
556
|
+
if flow_agents_node "$VALIDATOR" "$BAD/green-only--deliver.md" >"$TMPDIR_EVAL/green.out" 2>&1; then
|
|
557
|
+
_fail "green-build-only artifact should fail"
|
|
558
|
+
elif rg -q 'green build is not enough' "$TMPDIR_EVAL/green.out"; then
|
|
559
|
+
_pass "green-build-only artifact fails usefulness gate"
|
|
560
|
+
else
|
|
561
|
+
_fail "green-build-only failure did not mention usefulness gate"
|
|
562
|
+
fi
|
|
563
|
+
|
|
564
|
+
cat > "$BAD/hidden-not-verified--deliver.md" <<'MARKDOWN'
|
|
565
|
+
# Hidden NOT_VERIFIED
|
|
566
|
+
|
|
567
|
+
status: delivered
|
|
568
|
+
type: deliver
|
|
569
|
+
|
|
570
|
+
## Plan
|
|
571
|
+
Plan exists.
|
|
572
|
+
|
|
573
|
+
## Definition Of Done
|
|
574
|
+
- **User outcome:** User can rely on verification.
|
|
575
|
+
- **Acceptance criteria:**
|
|
576
|
+
- [x] Browser checked - Evidence: attempted screenshot
|
|
577
|
+
- **Stop-short risks:** Browser check might be unavailable.
|
|
578
|
+
- **Durable docs target:** docs/test.md
|
|
579
|
+
|
|
580
|
+
## Verification Report
|
|
581
|
+
Build: [PASS] test
|
|
582
|
+
|
|
583
|
+
### Acceptance Criteria
|
|
584
|
+
- [NOT_VERIFIED] Browser checked - browser was unavailable.
|
|
585
|
+
|
|
586
|
+
### Verdict: PASS
|
|
587
|
+
|
|
588
|
+
## Goal Fit Gate
|
|
589
|
+
- [x] Original user goal restated
|
|
590
|
+
- [x] Every acceptance criterion has evidence
|
|
591
|
+
|
|
592
|
+
## Final Acceptance
|
|
593
|
+
- [x] CI/relevant checks passed
|
|
594
|
+
MARKDOWN
|
|
595
|
+
|
|
596
|
+
if flow_agents_node "$VALIDATOR" "$BAD/hidden-not-verified--deliver.md" >"$TMPDIR_EVAL/notverified.out" 2>&1; then
|
|
597
|
+
_fail "hidden NOT_VERIFIED artifact should fail"
|
|
598
|
+
elif rg -q 'NOT_VERIFIED' "$TMPDIR_EVAL/notverified.out"; then
|
|
599
|
+
_pass "hidden NOT_VERIFIED artifact requires explicit decision"
|
|
600
|
+
else
|
|
601
|
+
_fail "hidden NOT_VERIFIED failure did not mention NOT_VERIFIED"
|
|
602
|
+
fi
|
|
603
|
+
|
|
604
|
+
mkdir -p "$BAD/bad-sidecar"
|
|
605
|
+
cat > "$BAD/bad-sidecar/evidence.json" <<'JSON'
|
|
606
|
+
{
|
|
607
|
+
"schema_version": "1.0",
|
|
608
|
+
"task_slug": "bad-sidecar",
|
|
609
|
+
"verdict": "maybe",
|
|
610
|
+
"checks": []
|
|
611
|
+
}
|
|
612
|
+
JSON
|
|
613
|
+
|
|
614
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-sidecar" >"$TMPDIR_EVAL/bad-sidecar.out" 2>&1; then
|
|
615
|
+
_fail "bad sidecar should fail"
|
|
616
|
+
elif rg -q 'verdict must be one of: pass, partial, fail, not_verified' "$TMPDIR_EVAL/bad-sidecar.out"; then
|
|
617
|
+
_pass "bad sidecar fails with actionable message"
|
|
618
|
+
else
|
|
619
|
+
_fail "bad sidecar failure did not mention verdict"
|
|
620
|
+
fi
|
|
621
|
+
|
|
622
|
+
mkdir -p "$BAD/contradictory-evidence"
|
|
623
|
+
cat > "$BAD/contradictory-evidence/evidence.json" <<'JSON'
|
|
624
|
+
{
|
|
625
|
+
"schema_version": "1.0",
|
|
626
|
+
"task_slug": "contradictory-evidence",
|
|
627
|
+
"verdict": "pass",
|
|
628
|
+
"checks": [
|
|
629
|
+
{
|
|
630
|
+
"id": "failing-check",
|
|
631
|
+
"kind": "test",
|
|
632
|
+
"status": "fail",
|
|
633
|
+
"summary": "A failing check cannot produce a pass verdict."
|
|
634
|
+
}
|
|
635
|
+
]
|
|
636
|
+
}
|
|
637
|
+
JSON
|
|
638
|
+
|
|
639
|
+
if flow_agents_node "$VALIDATOR" "$BAD/contradictory-evidence" >"$TMPDIR_EVAL/contradictory-evidence.out" 2>&1; then
|
|
640
|
+
_fail "contradictory evidence sidecar should fail"
|
|
641
|
+
elif rg -q 'pass verdict requires all non-skipped checks to pass' "$TMPDIR_EVAL/contradictory-evidence.out"; then
|
|
642
|
+
_pass "contradictory evidence sidecar fails with actionable message"
|
|
643
|
+
else
|
|
644
|
+
_fail "contradictory evidence failure did not mention pass verdict"
|
|
645
|
+
fi
|
|
646
|
+
|
|
647
|
+
mkdir -p "$BAD/empty-evidence"
|
|
648
|
+
cat > "$BAD/empty-evidence/evidence.json" <<'JSON'
|
|
649
|
+
{
|
|
650
|
+
"schema_version": "1.0",
|
|
651
|
+
"task_slug": "empty-evidence",
|
|
652
|
+
"verdict": "pass",
|
|
653
|
+
"checks": []
|
|
654
|
+
}
|
|
655
|
+
JSON
|
|
656
|
+
|
|
657
|
+
if flow_agents_node "$VALIDATOR" "$BAD/empty-evidence" >"$TMPDIR_EVAL/empty-evidence.out" 2>&1; then
|
|
658
|
+
_fail "pass evidence with no checks should fail"
|
|
659
|
+
elif rg -q 'checks must contain at least 1 item' "$TMPDIR_EVAL/empty-evidence.out"; then
|
|
660
|
+
_pass "pass evidence with no checks fails with actionable message"
|
|
661
|
+
else
|
|
662
|
+
_fail "empty evidence failure did not mention checks"
|
|
663
|
+
fi
|
|
664
|
+
|
|
665
|
+
mkdir -p "$BAD/bad-standard-ref"
|
|
666
|
+
cat > "$BAD/bad-standard-ref/evidence.json" <<'JSON'
|
|
667
|
+
{
|
|
668
|
+
"schema_version": "1.0",
|
|
669
|
+
"task_slug": "bad-standard-ref",
|
|
670
|
+
"verdict": "pass",
|
|
671
|
+
"checks": [
|
|
672
|
+
{
|
|
673
|
+
"id": "unknown-standard",
|
|
674
|
+
"kind": "policy",
|
|
675
|
+
"status": "pass",
|
|
676
|
+
"summary": "Unknown standards should not pass validation.",
|
|
677
|
+
"standard_refs": [
|
|
678
|
+
{
|
|
679
|
+
"standard": "spreadsheet",
|
|
680
|
+
"ref": "proof.xlsx"
|
|
681
|
+
}
|
|
682
|
+
]
|
|
683
|
+
}
|
|
684
|
+
]
|
|
685
|
+
}
|
|
686
|
+
JSON
|
|
687
|
+
|
|
688
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-standard-ref" >"$TMPDIR_EVAL/bad-standard-ref.out" 2>&1; then
|
|
689
|
+
_fail "unknown evidence standard should fail"
|
|
690
|
+
elif rg -q 'standard must be one of' "$TMPDIR_EVAL/bad-standard-ref.out"; then
|
|
691
|
+
_pass "evidence sidecar rejects unknown standard refs"
|
|
692
|
+
else
|
|
693
|
+
_fail "bad standard ref failure did not mention standard"
|
|
694
|
+
fi
|
|
695
|
+
|
|
696
|
+
mkdir -p "$BAD/legacy-string-ref"
|
|
697
|
+
cat > "$BAD/legacy-string-ref/acceptance.json" <<'JSON'
|
|
698
|
+
{
|
|
699
|
+
"schema_version": "1.0",
|
|
700
|
+
"task_slug": "legacy-string-ref",
|
|
701
|
+
"source_request": "Legacy refs are rejected.",
|
|
702
|
+
"criteria": [
|
|
703
|
+
{
|
|
704
|
+
"id": "legacy-string-ref",
|
|
705
|
+
"description": "Legacy string evidence refs fail validation.",
|
|
706
|
+
"status": "pass",
|
|
707
|
+
"evidence_refs": ["evidence.json"]
|
|
708
|
+
}
|
|
709
|
+
],
|
|
710
|
+
"goal_fit": {
|
|
711
|
+
"status": "pass",
|
|
712
|
+
"summary": "Legacy refs are rejected."
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
JSON
|
|
716
|
+
|
|
717
|
+
if flow_agents_node "$VALIDATOR" "$BAD/legacy-string-ref" >"$TMPDIR_EVAL/legacy-string-ref.out" 2>&1; then
|
|
718
|
+
_fail "legacy string evidence refs should fail"
|
|
719
|
+
elif rg -q 'evidence_refs\[0\] must be object' "$TMPDIR_EVAL/legacy-string-ref.out"; then
|
|
720
|
+
_pass "custom validator rejects legacy string evidence refs"
|
|
721
|
+
else
|
|
722
|
+
_fail "legacy string ref failure did not mention object refs"
|
|
723
|
+
fi
|
|
724
|
+
|
|
725
|
+
mkdir -p "$BAD/source-missing-required"
|
|
726
|
+
cat > "$BAD/source-missing-required/evidence.json" <<'JSON'
|
|
727
|
+
{
|
|
728
|
+
"schema_version": "1.0",
|
|
729
|
+
"task_slug": "source-missing-required",
|
|
730
|
+
"verdict": "pass",
|
|
731
|
+
"checks": [
|
|
732
|
+
{
|
|
733
|
+
"id": "source-missing-required",
|
|
734
|
+
"kind": "test",
|
|
735
|
+
"status": "pass",
|
|
736
|
+
"summary": "Source refs must include line and excerpt fields.",
|
|
737
|
+
"artifact_refs": [
|
|
738
|
+
{
|
|
739
|
+
"kind": "source",
|
|
740
|
+
"file": "src/index.ts"
|
|
741
|
+
}
|
|
742
|
+
]
|
|
743
|
+
}
|
|
744
|
+
]
|
|
745
|
+
}
|
|
746
|
+
JSON
|
|
747
|
+
|
|
748
|
+
if flow_agents_node "$VALIDATOR" "$BAD/source-missing-required" >"$TMPDIR_EVAL/source-missing-required.out" 2>&1; then
|
|
749
|
+
_fail "source ref missing required fields should fail"
|
|
750
|
+
elif rg -q 'line_start is required|line_end is required|excerpt is required' "$TMPDIR_EVAL/source-missing-required.out"; then
|
|
751
|
+
_pass "custom validator rejects source refs missing required fields"
|
|
752
|
+
else
|
|
753
|
+
_fail "source missing required failure did not mention source fields"
|
|
754
|
+
fi
|
|
755
|
+
|
|
756
|
+
mkdir -p "$BAD/empty-non-source-ref"
|
|
757
|
+
cat > "$BAD/empty-non-source-ref/evidence.json" <<'JSON'
|
|
758
|
+
{
|
|
759
|
+
"schema_version": "1.0",
|
|
760
|
+
"task_slug": "empty-non-source-ref",
|
|
761
|
+
"verdict": "pass",
|
|
762
|
+
"checks": [
|
|
763
|
+
{
|
|
764
|
+
"id": "empty-non-source-ref",
|
|
765
|
+
"kind": "test",
|
|
766
|
+
"status": "pass",
|
|
767
|
+
"summary": "Empty non-source refs must fail.",
|
|
768
|
+
"artifact_refs": [
|
|
769
|
+
{
|
|
770
|
+
"kind": "artifact"
|
|
771
|
+
},
|
|
772
|
+
{
|
|
773
|
+
"kind": "command"
|
|
774
|
+
}
|
|
775
|
+
]
|
|
776
|
+
}
|
|
777
|
+
],
|
|
778
|
+
"external_evidence": [
|
|
779
|
+
{
|
|
780
|
+
"system": "provider",
|
|
781
|
+
"ref": {
|
|
782
|
+
"kind": "provider"
|
|
783
|
+
},
|
|
784
|
+
"summary": "Provider refs need URLs.",
|
|
785
|
+
"standard": "custom"
|
|
786
|
+
}
|
|
787
|
+
]
|
|
788
|
+
}
|
|
789
|
+
JSON
|
|
790
|
+
|
|
791
|
+
if flow_agents_node "$VALIDATOR" "$BAD/empty-non-source-ref" >"$TMPDIR_EVAL/empty-non-source-ref.out" 2>&1; then
|
|
792
|
+
_fail "empty non-source refs should fail"
|
|
793
|
+
elif rg -q 'must match at least one allowed schema|url is required' "$TMPDIR_EVAL/empty-non-source-ref.out"; then
|
|
794
|
+
_pass "custom validator rejects empty non-source evidence refs"
|
|
795
|
+
else
|
|
796
|
+
_fail "empty non-source ref failure did not mention required ref detail"
|
|
797
|
+
fi
|
|
798
|
+
|
|
799
|
+
mkdir -p "$BAD/open-critique"
|
|
800
|
+
cat > "$BAD/open-critique/critique.json" <<'JSON'
|
|
801
|
+
{
|
|
802
|
+
"schema_version": "1.0",
|
|
803
|
+
"task_slug": "open-critique",
|
|
804
|
+
"status": "pass",
|
|
805
|
+
"required": true,
|
|
806
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
807
|
+
"critiques": [
|
|
808
|
+
{
|
|
809
|
+
"id": "blocking-review",
|
|
810
|
+
"reviewer": "tool-code-reviewer",
|
|
811
|
+
"reviewed_at": "2026-05-06T00:00:00Z",
|
|
812
|
+
"verdict": "fail",
|
|
813
|
+
"summary": "A medium severity finding is still open.",
|
|
814
|
+
"findings": [
|
|
815
|
+
{
|
|
816
|
+
"id": "medium-open",
|
|
817
|
+
"severity": "medium",
|
|
818
|
+
"status": "open",
|
|
819
|
+
"description": "Open findings must be resolved before critique can pass."
|
|
820
|
+
}
|
|
821
|
+
]
|
|
822
|
+
}
|
|
823
|
+
]
|
|
824
|
+
}
|
|
825
|
+
JSON
|
|
826
|
+
|
|
827
|
+
if flow_agents_node "$VALIDATOR" "$BAD/open-critique" >"$TMPDIR_EVAL/open-critique.out" 2>&1; then
|
|
828
|
+
_fail "critique pass with open finding should fail"
|
|
829
|
+
elif rg -q 'critique pass cannot have open findings' "$TMPDIR_EVAL/open-critique.out"; then
|
|
830
|
+
_pass "critique sidecar blocks open findings"
|
|
831
|
+
else
|
|
832
|
+
_fail "open critique failure did not mention open findings"
|
|
833
|
+
fi
|
|
834
|
+
|
|
835
|
+
mkdir -p "$BAD/bad-learning"
|
|
836
|
+
cat > "$BAD/bad-learning/learning.json" <<'JSON'
|
|
837
|
+
{
|
|
838
|
+
"schema_version": "1.0",
|
|
839
|
+
"task_slug": "bad-learning",
|
|
840
|
+
"status": "learned",
|
|
841
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
842
|
+
"records": [
|
|
843
|
+
{
|
|
844
|
+
"id": "missing-source",
|
|
845
|
+
"recorded_at": "2026-05-06T00:00:00Z",
|
|
846
|
+
"source_refs": [],
|
|
847
|
+
"outcome": "success",
|
|
848
|
+
"facts": ["A learning record without evidence should fail."],
|
|
849
|
+
"interpretation": "Learning must be traceable.",
|
|
850
|
+
"routing": [
|
|
851
|
+
{
|
|
852
|
+
"target": "eval",
|
|
853
|
+
"action": "Reject untraceable learning records.",
|
|
854
|
+
"status": "open"
|
|
855
|
+
}
|
|
856
|
+
]
|
|
857
|
+
}
|
|
858
|
+
]
|
|
859
|
+
}
|
|
860
|
+
JSON
|
|
861
|
+
|
|
862
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-learning" >"$TMPDIR_EVAL/bad-learning.out" 2>&1; then
|
|
863
|
+
_fail "learning record without source refs should fail"
|
|
864
|
+
elif rg -q 'source_refs must contain at least 1 item' "$TMPDIR_EVAL/bad-learning.out"; then
|
|
865
|
+
_pass "learning sidecar requires traceable source refs"
|
|
866
|
+
else
|
|
867
|
+
_fail "bad learning failure did not mention source refs"
|
|
868
|
+
fi
|
|
869
|
+
|
|
870
|
+
mkdir -p "$BAD/empty-learning"
|
|
871
|
+
cat > "$BAD/empty-learning/learning.json" <<'JSON'
|
|
872
|
+
{
|
|
873
|
+
"schema_version": "1.0",
|
|
874
|
+
"task_slug": "empty-learning",
|
|
875
|
+
"status": "learned",
|
|
876
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
877
|
+
"records": []
|
|
878
|
+
}
|
|
879
|
+
JSON
|
|
880
|
+
|
|
881
|
+
if flow_agents_node "$VALIDATOR" "$BAD/empty-learning" >"$TMPDIR_EVAL/empty-learning.out" 2>&1; then
|
|
882
|
+
_fail "learned status with no records should fail"
|
|
883
|
+
elif rg -q 'records must contain at least 1 item' "$TMPDIR_EVAL/empty-learning.out"; then
|
|
884
|
+
_pass "learning sidecar requires at least one record"
|
|
885
|
+
else
|
|
886
|
+
_fail "empty learning failure did not mention records"
|
|
887
|
+
fi
|
|
888
|
+
|
|
889
|
+
mkdir -p "$BAD/learned-missing-correction"
|
|
890
|
+
cat > "$BAD/learned-missing-correction/learning.json" <<'JSON'
|
|
891
|
+
{
|
|
892
|
+
"schema_version": "1.0",
|
|
893
|
+
"task_slug": "learned-missing-correction",
|
|
894
|
+
"status": "learned",
|
|
895
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
896
|
+
"records": [
|
|
897
|
+
{
|
|
898
|
+
"id": "missing-correction",
|
|
899
|
+
"recorded_at": "2026-05-06T00:00:00Z",
|
|
900
|
+
"source_refs": ["evidence.json"],
|
|
901
|
+
"outcome": "success",
|
|
902
|
+
"facts": ["Terminal learned records must include a correction decision."],
|
|
903
|
+
"interpretation": "Learned closeout cannot omit correction.needed.",
|
|
904
|
+
"routing": [
|
|
905
|
+
{
|
|
906
|
+
"target": "none",
|
|
907
|
+
"action": "No follow-up.",
|
|
908
|
+
"status": "completed"
|
|
909
|
+
}
|
|
910
|
+
]
|
|
911
|
+
}
|
|
912
|
+
]
|
|
913
|
+
}
|
|
914
|
+
JSON
|
|
915
|
+
|
|
916
|
+
if flow_agents_node "$VALIDATOR" "$BAD/learned-missing-correction" >"$TMPDIR_EVAL/learned-missing-correction.out" 2>&1; then
|
|
917
|
+
_fail "learned status without correction should fail"
|
|
918
|
+
elif rg -q 'correction.*needed.*required' "$TMPDIR_EVAL/learned-missing-correction.out"; then
|
|
919
|
+
_pass "learning sidecar requires correction decision for learned status"
|
|
920
|
+
else
|
|
921
|
+
_fail "missing correction failure did not mention correction.needed"
|
|
922
|
+
fi
|
|
923
|
+
|
|
924
|
+
mkdir -p "$BAD/open-learning-routing"
|
|
925
|
+
cat > "$BAD/open-learning-routing/learning.json" <<'JSON'
|
|
926
|
+
{
|
|
927
|
+
"schema_version": "1.0",
|
|
928
|
+
"task_slug": "open-learning-routing",
|
|
929
|
+
"status": "learned",
|
|
930
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
931
|
+
"records": [
|
|
932
|
+
{
|
|
933
|
+
"id": "open-routing",
|
|
934
|
+
"recorded_at": "2026-05-06T00:00:00Z",
|
|
935
|
+
"source_refs": ["evidence.json"],
|
|
936
|
+
"outcome": "mixed",
|
|
937
|
+
"facts": ["A follow-up remains open."],
|
|
938
|
+
"interpretation": "Open learning routing should keep the top-level status from being learned.",
|
|
939
|
+
"routing": [
|
|
940
|
+
{
|
|
941
|
+
"target": "backlog",
|
|
942
|
+
"action": "Create a follow-up issue.",
|
|
943
|
+
"status": "open"
|
|
944
|
+
}
|
|
945
|
+
]
|
|
946
|
+
}
|
|
947
|
+
]
|
|
948
|
+
}
|
|
949
|
+
JSON
|
|
950
|
+
|
|
951
|
+
if flow_agents_node "$VALIDATOR" "$BAD/open-learning-routing" >"$TMPDIR_EVAL/open-learning-routing.out" 2>&1; then
|
|
952
|
+
_fail "learned status with open routing should fail"
|
|
953
|
+
elif rg -q 'learning status learned cannot have open routing' "$TMPDIR_EVAL/open-learning-routing.out"; then
|
|
954
|
+
_pass "learning sidecar keeps open routing out of learned status"
|
|
955
|
+
else
|
|
956
|
+
_fail "open learning routing failure did not mention status"
|
|
957
|
+
fi
|
|
958
|
+
|
|
959
|
+
mkdir -p "$BAD/bad-release-gate"
|
|
960
|
+
cat > "$BAD/bad-release-gate/release.json" <<'JSON'
|
|
961
|
+
{
|
|
962
|
+
"schema_version": "1.0",
|
|
963
|
+
"task_slug": "bad-release-gate",
|
|
964
|
+
"decision": "merge",
|
|
965
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
966
|
+
"scope": "Bad release fixture.",
|
|
967
|
+
"evidence_ref": "evidence.json",
|
|
968
|
+
"gates": [
|
|
969
|
+
{
|
|
970
|
+
"name": "merge",
|
|
971
|
+
"status": "not_verified",
|
|
972
|
+
"summary": "CI was not verified."
|
|
973
|
+
}
|
|
974
|
+
],
|
|
975
|
+
"rollback_plan": {
|
|
976
|
+
"status": "not_required",
|
|
977
|
+
"summary": "No deploy.",
|
|
978
|
+
"owner": "maintainer"
|
|
979
|
+
},
|
|
980
|
+
"observability_plan": {
|
|
981
|
+
"status": "not_required",
|
|
982
|
+
"summary": "No runtime surface."
|
|
983
|
+
},
|
|
984
|
+
"post_deploy_checks": [],
|
|
985
|
+
"docs": {
|
|
986
|
+
"status": "updated",
|
|
987
|
+
"summary": "Docs are irrelevant for this negative fixture."
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
JSON
|
|
991
|
+
|
|
992
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-release-gate" >"$TMPDIR_EVAL/bad-release-gate.out" 2>&1; then
|
|
993
|
+
_fail "positive release decision with non-pass gate should fail"
|
|
994
|
+
elif rg -q 'positive release decision requires all required gates to pass' "$TMPDIR_EVAL/bad-release-gate.out"; then
|
|
995
|
+
_pass "release sidecar blocks positive decisions with non-pass gates"
|
|
996
|
+
else
|
|
997
|
+
_fail "bad release gate failure did not mention gate pass"
|
|
998
|
+
fi
|
|
999
|
+
|
|
1000
|
+
mkdir -p "$BAD/bad-deploy-release"
|
|
1001
|
+
cat > "$BAD/bad-deploy-release/release.json" <<'JSON'
|
|
1002
|
+
{
|
|
1003
|
+
"schema_version": "1.0",
|
|
1004
|
+
"task_slug": "bad-deploy-release",
|
|
1005
|
+
"decision": "deploy",
|
|
1006
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
1007
|
+
"scope": "Bad deploy fixture.",
|
|
1008
|
+
"evidence_ref": "evidence.json",
|
|
1009
|
+
"gates": [
|
|
1010
|
+
{
|
|
1011
|
+
"name": "deploy",
|
|
1012
|
+
"status": "pass",
|
|
1013
|
+
"summary": "Deploy gate claims pass."
|
|
1014
|
+
}
|
|
1015
|
+
],
|
|
1016
|
+
"rollback_plan": {
|
|
1017
|
+
"status": "missing",
|
|
1018
|
+
"summary": "Rollback is missing.",
|
|
1019
|
+
"owner": "maintainer"
|
|
1020
|
+
},
|
|
1021
|
+
"observability_plan": {
|
|
1022
|
+
"status": "missing",
|
|
1023
|
+
"summary": "Observability is missing."
|
|
1024
|
+
},
|
|
1025
|
+
"post_deploy_checks": [],
|
|
1026
|
+
"docs": {
|
|
1027
|
+
"status": "updated",
|
|
1028
|
+
"summary": "Docs are irrelevant for this negative fixture."
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
JSON
|
|
1032
|
+
|
|
1033
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-deploy-release" >"$TMPDIR_EVAL/bad-deploy-release.out" 2>&1; then
|
|
1034
|
+
_fail "deploy decision without operational plans should fail"
|
|
1035
|
+
elif rg -q 'deploy decision requires rollback_plan status ready' "$TMPDIR_EVAL/bad-deploy-release.out" && rg -q 'deploy decision requires post_deploy_checks' "$TMPDIR_EVAL/bad-deploy-release.out"; then
|
|
1036
|
+
_pass "release sidecar requires deploy rollback and post-deploy checks"
|
|
1037
|
+
else
|
|
1038
|
+
_fail "bad deploy release failure did not mention operational plans"
|
|
1039
|
+
fi
|
|
1040
|
+
|
|
1041
|
+
mkdir -p "$BAD/bad-deploy-missing-gate"
|
|
1042
|
+
cat > "$BAD/bad-deploy-missing-gate/release.json" <<'JSON'
|
|
1043
|
+
{
|
|
1044
|
+
"schema_version": "1.0",
|
|
1045
|
+
"task_slug": "bad-deploy-missing-gate",
|
|
1046
|
+
"decision": "deploy",
|
|
1047
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
1048
|
+
"scope": "Bad deploy missing gate fixture.",
|
|
1049
|
+
"evidence_ref": "evidence.json",
|
|
1050
|
+
"gates": [
|
|
1051
|
+
{
|
|
1052
|
+
"name": "merge",
|
|
1053
|
+
"status": "pass",
|
|
1054
|
+
"summary": "Merge gate passed, but deploy gate is missing."
|
|
1055
|
+
}
|
|
1056
|
+
],
|
|
1057
|
+
"rollback_plan": {
|
|
1058
|
+
"status": "ready",
|
|
1059
|
+
"summary": "Rollback is ready.",
|
|
1060
|
+
"owner": "maintainer"
|
|
1061
|
+
},
|
|
1062
|
+
"observability_plan": {
|
|
1063
|
+
"status": "ready",
|
|
1064
|
+
"summary": "Observability is ready."
|
|
1065
|
+
},
|
|
1066
|
+
"post_deploy_checks": [
|
|
1067
|
+
{
|
|
1068
|
+
"id": "smoke",
|
|
1069
|
+
"status": "planned",
|
|
1070
|
+
"summary": "Smoke test is planned."
|
|
1071
|
+
}
|
|
1072
|
+
],
|
|
1073
|
+
"docs": {
|
|
1074
|
+
"status": "updated",
|
|
1075
|
+
"summary": "Docs are irrelevant for this negative fixture."
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
JSON
|
|
1079
|
+
|
|
1080
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-deploy-missing-gate" >"$TMPDIR_EVAL/bad-deploy-missing-gate.out" 2>&1; then
|
|
1081
|
+
_fail "deploy decision without deploy gate should fail"
|
|
1082
|
+
elif rg -q 'positive release decision requires deploy gate to pass' "$TMPDIR_EVAL/bad-deploy-missing-gate.out"; then
|
|
1083
|
+
_pass "release sidecar requires matching gate for positive decisions"
|
|
1084
|
+
else
|
|
1085
|
+
_fail "bad deploy missing gate failure did not mention matching gate"
|
|
1086
|
+
fi
|
|
1087
|
+
|
|
1088
|
+
mkdir -p "$BAD/bad-deploy-check"
|
|
1089
|
+
cat > "$BAD/bad-deploy-check/release.json" <<'JSON'
|
|
1090
|
+
{
|
|
1091
|
+
"schema_version": "1.0",
|
|
1092
|
+
"task_slug": "bad-deploy-check",
|
|
1093
|
+
"decision": "deploy",
|
|
1094
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
1095
|
+
"scope": "Bad deploy check fixture.",
|
|
1096
|
+
"evidence_ref": "evidence.json",
|
|
1097
|
+
"gates": [
|
|
1098
|
+
{
|
|
1099
|
+
"name": "deploy",
|
|
1100
|
+
"status": "pass",
|
|
1101
|
+
"summary": "Deploy gate passed."
|
|
1102
|
+
}
|
|
1103
|
+
],
|
|
1104
|
+
"rollback_plan": {
|
|
1105
|
+
"status": "ready",
|
|
1106
|
+
"summary": "Rollback is ready.",
|
|
1107
|
+
"owner": "maintainer"
|
|
1108
|
+
},
|
|
1109
|
+
"observability_plan": {
|
|
1110
|
+
"status": "ready",
|
|
1111
|
+
"summary": "Observability is ready."
|
|
1112
|
+
},
|
|
1113
|
+
"post_deploy_checks": [
|
|
1114
|
+
{
|
|
1115
|
+
"id": "smoke",
|
|
1116
|
+
"status": "fail",
|
|
1117
|
+
"summary": "Smoke test failed."
|
|
1118
|
+
}
|
|
1119
|
+
],
|
|
1120
|
+
"docs": {
|
|
1121
|
+
"status": "updated",
|
|
1122
|
+
"summary": "Docs are irrelevant for this negative fixture."
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
JSON
|
|
1126
|
+
|
|
1127
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-deploy-check" >"$TMPDIR_EVAL/bad-deploy-check.out" 2>&1; then
|
|
1128
|
+
_fail "deploy decision with failed post-deploy check should fail"
|
|
1129
|
+
elif rg -q 'deploy decision requires post_deploy_checks to be planned or pass' "$TMPDIR_EVAL/bad-deploy-check.out"; then
|
|
1130
|
+
_pass "release sidecar rejects failed deploy checks"
|
|
1131
|
+
else
|
|
1132
|
+
_fail "bad deploy check failure did not mention post-deploy status"
|
|
1133
|
+
fi
|
|
1134
|
+
|
|
1135
|
+
mkdir -p "$BAD/missing-sidecars"
|
|
1136
|
+
cp "$ARTIFACT_DIR/workflow-contract-e2e--deliver.md" "$BAD/missing-sidecars/missing-sidecars--deliver.md"
|
|
1137
|
+
|
|
1138
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars "$BAD/missing-sidecars" >"$TMPDIR_EVAL/missing-sidecars.out" 2>&1; then
|
|
1139
|
+
_fail "missing required sidecars should fail"
|
|
1140
|
+
elif rg -q 'required sidecar is missing' "$TMPDIR_EVAL/missing-sidecars.out"; then
|
|
1141
|
+
_pass "missing required sidecars fail with actionable message"
|
|
1142
|
+
else
|
|
1143
|
+
_fail "missing sidecar failure did not mention required sidecar"
|
|
1144
|
+
fi
|
|
1145
|
+
|
|
1146
|
+
mkdir -p "$BAD/mismatched-sidecars"
|
|
1147
|
+
cat > "$BAD/mismatched-sidecars/state.json" <<'JSON'
|
|
1148
|
+
{
|
|
1149
|
+
"schema_version": "1.0",
|
|
1150
|
+
"task_slug": "left",
|
|
1151
|
+
"status": "planned",
|
|
1152
|
+
"phase": "planning",
|
|
1153
|
+
"updated_at": "2026-05-06T00:00:00Z",
|
|
1154
|
+
"next_action": {
|
|
1155
|
+
"status": "continue",
|
|
1156
|
+
"summary": "Continue."
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
JSON
|
|
1160
|
+
cat > "$BAD/mismatched-sidecars/acceptance.json" <<'JSON'
|
|
1161
|
+
{
|
|
1162
|
+
"schema_version": "1.0",
|
|
1163
|
+
"task_slug": "right",
|
|
1164
|
+
"criteria": [
|
|
1165
|
+
{
|
|
1166
|
+
"id": "criterion",
|
|
1167
|
+
"description": "Criterion.",
|
|
1168
|
+
"status": "pending"
|
|
1169
|
+
}
|
|
1170
|
+
],
|
|
1171
|
+
"goal_fit": {
|
|
1172
|
+
"status": "pending",
|
|
1173
|
+
"summary": "Pending."
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
JSON
|
|
1177
|
+
|
|
1178
|
+
if flow_agents_node "$VALIDATOR" "$BAD/mismatched-sidecars" >"$TMPDIR_EVAL/mismatched-sidecars.out" 2>&1; then
|
|
1179
|
+
_fail "mismatched sidecar task slugs should fail"
|
|
1180
|
+
elif rg -q 'sidecar task_slug mismatch' "$TMPDIR_EVAL/mismatched-sidecars.out"; then
|
|
1181
|
+
_pass "mismatched sidecar task slugs fail with actionable message"
|
|
1182
|
+
else
|
|
1183
|
+
_fail "mismatched sidecar failure did not mention task_slug"
|
|
1184
|
+
fi
|
|
1185
|
+
|
|
1186
|
+
mkdir -p "$BAD/bad-date"
|
|
1187
|
+
cat > "$BAD/bad-date/state.json" <<'JSON'
|
|
1188
|
+
{
|
|
1189
|
+
"schema_version": "1.0",
|
|
1190
|
+
"task_slug": "bad-date",
|
|
1191
|
+
"status": "planned",
|
|
1192
|
+
"phase": "planning",
|
|
1193
|
+
"updated_at": "2026-05-06T00:00:00",
|
|
1194
|
+
"next_action": {
|
|
1195
|
+
"status": "continue",
|
|
1196
|
+
"summary": "Continue."
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
JSON
|
|
1200
|
+
|
|
1201
|
+
if flow_agents_node "$VALIDATOR" "$BAD/bad-date" >"$TMPDIR_EVAL/bad-date.out" 2>&1; then
|
|
1202
|
+
_fail "date-time without timezone should fail"
|
|
1203
|
+
elif rg -q 'updated_at must be date-time' "$TMPDIR_EVAL/bad-date.out"; then
|
|
1204
|
+
_pass "date-time without timezone fails with actionable message"
|
|
1205
|
+
else
|
|
1206
|
+
_fail "bad date failure did not mention date-time"
|
|
1207
|
+
fi
|
|
1208
|
+
|
|
1209
|
+
mkdir -p "$BAD/extra-criteria"
|
|
1210
|
+
cp "$ARTIFACT_DIR/workflow-contract-e2e--deliver.md" "$BAD/extra-criteria/extra-criteria--deliver.md"
|
|
1211
|
+
cp "$ARTIFACT_DIR/state.json" "$BAD/extra-criteria/state.json"
|
|
1212
|
+
cp "$ARTIFACT_DIR/evidence.json" "$BAD/extra-criteria/evidence.json"
|
|
1213
|
+
cp "$ARTIFACT_DIR/handoff.json" "$BAD/extra-criteria/handoff.json"
|
|
1214
|
+
cp "$ARTIFACT_DIR/critique.json" "$BAD/extra-criteria/critique.json"
|
|
1215
|
+
cat > "$BAD/extra-criteria/acceptance.json" <<'JSON'
|
|
1216
|
+
{
|
|
1217
|
+
"schema_version": "1.0",
|
|
1218
|
+
"task_slug": "workflow-contract-e2e",
|
|
1219
|
+
"criteria": [
|
|
1220
|
+
{"id": "a", "description": "A.", "status": "pass"},
|
|
1221
|
+
{"id": "b", "description": "B.", "status": "pass"},
|
|
1222
|
+
{"id": "c", "description": "C.", "status": "pass"},
|
|
1223
|
+
{"id": "d", "description": "D.", "status": "pass"},
|
|
1224
|
+
{"id": "e", "description": "E.", "status": "pass"}
|
|
1225
|
+
],
|
|
1226
|
+
"goal_fit": {
|
|
1227
|
+
"status": "pass",
|
|
1228
|
+
"summary": "Pass."
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
JSON
|
|
1232
|
+
|
|
1233
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars "$BAD/extra-criteria" >"$TMPDIR_EVAL/extra-criteria.out" 2>&1; then
|
|
1234
|
+
_fail "extra acceptance criteria should fail"
|
|
1235
|
+
elif rg -q 'acceptance.json has 5 criteria but Markdown defines 4' "$TMPDIR_EVAL/extra-criteria.out"; then
|
|
1236
|
+
_pass "extra acceptance criteria fail with actionable message"
|
|
1237
|
+
else
|
|
1238
|
+
_fail "extra criteria failure did not mention criteria mismatch"
|
|
1239
|
+
fi
|
|
1240
|
+
|
|
1241
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
1242
|
+
echo "Workflow artifact integration passed."
|
|
1243
|
+
exit 0
|
|
1244
|
+
fi
|
|
1245
|
+
|
|
1246
|
+
echo "Workflow artifact integration failed: $errors issue(s)."
|
|
1247
|
+
exit 1
|