@kontourai/flow-agents 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-push +11 -0
- package/.github/workflows/ci.yml +210 -0
- package/.github/workflows/docs-pages.yml +52 -0
- package/.github/workflows/publish-npm.yml +104 -0
- package/AGENTS.md +26 -0
- package/CHANGELOG.md +66 -0
- package/CODE_OF_CONDUCT.md +25 -0
- package/CONTEXT.md +300 -0
- package/CONTRIBUTING.md +44 -0
- package/LICENSE +201 -0
- package/README.md +129 -0
- package/SECURITY.md +33 -0
- package/agent-cards/dev.json +19 -0
- package/agents/dev.json +127 -0
- package/agents/tool-code-reviewer.json +61 -0
- package/agents/tool-dependencies-updater.json +118 -0
- package/agents/tool-explore-config.json +92 -0
- package/agents/tool-explore-deps.json +92 -0
- package/agents/tool-explore-entry.json +92 -0
- package/agents/tool-explore-patterns.json +92 -0
- package/agents/tool-explore-structure.json +92 -0
- package/agents/tool-explore-tests.json +92 -0
- package/agents/tool-planner.json +57 -0
- package/agents/tool-playwright.json +145 -0
- package/agents/tool-security-reviewer.json +56 -0
- package/agents/tool-verifier.json +61 -0
- package/agents/tool-worker.json +58 -0
- package/build/src/cli/console-learning-projection.js +123 -0
- package/build/src/cli/docs-preview.js +39 -0
- package/build/src/cli/effective-backlog-settings.js +102 -0
- package/build/src/cli/export-bookmarks.js +38 -0
- package/build/src/cli/fixture-retirement-audit.js +140 -0
- package/build/src/cli/flow-kit.js +138 -0
- package/build/src/cli/import-bookmarks.js +50 -0
- package/build/src/cli/init.js +239 -0
- package/build/src/cli/instinct-cli.js +93 -0
- package/build/src/cli/promote-workflow-artifact.js +63 -0
- package/build/src/cli/publish-change-helper.js +154 -0
- package/build/src/cli/pull-work-provider.js +469 -0
- package/build/src/cli/runtime-adapter.js +23 -0
- package/build/src/cli/telemetry-doctor.js +221 -0
- package/build/src/cli/usage-feedback.js +443 -0
- package/build/src/cli/validate-hook-influence.js +152 -0
- package/build/src/cli/validate-source-tree.js +31 -0
- package/build/src/cli/validate-workflow-artifacts.js +486 -0
- package/build/src/cli/veritas-governance.js +262 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
- package/build/src/cli/workflow-sidecar.js +816 -0
- package/build/src/cli.js +89 -0
- package/build/src/flow-kit/validate.js +75 -0
- package/build/src/lib/args.js +45 -0
- package/build/src/lib/fs.js +62 -0
- package/build/src/lib/workflow-learning-projection.js +334 -0
- package/build/src/runtime-adapters.js +146 -0
- package/build/src/tools/build-universal-bundles.js +397 -0
- package/build/src/tools/common.js +56 -0
- package/build/src/tools/filter-installed-packs.js +132 -0
- package/build/src/tools/generate-context-map.js +198 -0
- package/build/src/tools/validate-package.js +64 -0
- package/build/src/tools/validate-source-tree.js +622 -0
- package/console.telemetry.json +176 -0
- package/context/base-rules.md +17 -0
- package/context/code-review-standards.md +62 -0
- package/context/coding-standards.md +42 -0
- package/context/common/orchestrators.md +12 -0
- package/context/common/subagents.md +28 -0
- package/context/contracts/artifact-contract.md +182 -0
- package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
- package/context/contracts/delivery-contract.md +69 -0
- package/context/contracts/execution-contract.md +53 -0
- package/context/contracts/governance-adapter-contract.md +67 -0
- package/context/contracts/planning-contract.md +85 -0
- package/context/contracts/review-contract.md +104 -0
- package/context/contracts/sandbox-policy.md +52 -0
- package/context/contracts/verification-contract.md +134 -0
- package/context/contracts/work-item-contract.md +215 -0
- package/context/deferred/demo-mode.md +33 -0
- package/context/deferred/languages/go.md +31 -0
- package/context/deferred/languages/python.md +31 -0
- package/context/deferred/languages/typescript.md +34 -0
- package/context/deferred/parallelization.md +35 -0
- package/context/deferred/worktree-isolation.md +24 -0
- package/context/development-workflow.md +50 -0
- package/context/scripts/context-budget/budget-scan.sh +166 -0
- package/context/scripts/detect-tools.sh +3 -0
- package/context/scripts/discover-agents.sh +28 -0
- package/context/scripts/git-status.sh +49 -0
- package/context/scripts/hooks/config-protection.js +79 -0
- package/context/scripts/hooks/desktop-notify.sh +39 -0
- package/context/scripts/hooks/governance-audit.sh +135 -0
- package/context/scripts/hooks/lib/audit-transport.sh +40 -0
- package/context/scripts/hooks/lib/hook-flags.js +49 -0
- package/context/scripts/hooks/lib/patterns.sh +57 -0
- package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/context/scripts/hooks/post-edit-accumulator.js +66 -0
- package/context/scripts/hooks/pre-commit-quality.js +194 -0
- package/context/scripts/hooks/quality-gate.js +93 -0
- package/context/scripts/hooks/report-only-guard.js +21 -0
- package/context/scripts/hooks/run-hook.js +136 -0
- package/context/scripts/hooks/stop-format-typecheck.js +141 -0
- package/context/scripts/hooks/stop-goal-fit.js +337 -0
- package/context/scripts/hooks/workflow-steering.js +250 -0
- package/context/scripts/telemetry/console-presets.sh +14 -0
- package/context/scripts/telemetry/install-console-config.sh +214 -0
- package/context/scripts/telemetry/lib/config.sh +85 -0
- package/context/scripts/telemetry/lib/enrich.sh +115 -0
- package/context/scripts/telemetry/lib/redact.sh +22 -0
- package/context/scripts/telemetry/lib/session.sh +63 -0
- package/context/scripts/telemetry/lib/transport.sh +183 -0
- package/context/scripts/telemetry/lib/usage.sh +29 -0
- package/context/scripts/telemetry/sync-agents.sh +173 -0
- package/context/scripts/telemetry/telemetry.conf +23 -0
- package/context/scripts/telemetry/telemetry.sh +387 -0
- package/context/scripts/validate-package.sh +89 -0
- package/context/settings/backlog-provider-settings.json +54 -0
- package/context/templates/core/identity.md +26 -0
- package/context/templates/core/user.md +15 -0
- package/docs/_config.yml +15 -0
- package/docs/_layouts/default.html +87 -0
- package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
- package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
- package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
- package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
- package/docs/adr/0006-typescript-first-source-policy.md +98 -0
- package/docs/agent-system-guidebook.md +391 -0
- package/docs/agent-usage-feedback-loop.md +351 -0
- package/docs/assets/favicon.svg +13 -0
- package/docs/assets/og-image.png +0 -0
- package/docs/assets/site.css +774 -0
- package/docs/assets/site.js +139 -0
- package/docs/configurable-workflow-routing.md +174 -0
- package/docs/context-map.md +145 -0
- package/docs/developer-architecture.md +145 -0
- package/docs/developer-hook-setup.md +61 -0
- package/docs/fixture-ownership.md +44 -0
- package/docs/flow-kit-repository-contract.md +180 -0
- package/docs/index.md +129 -0
- package/docs/kontour-resource-contract.md +358 -0
- package/docs/migrations.md +64 -0
- package/docs/north-star.md +322 -0
- package/docs/operating-layers.md +110 -0
- package/docs/repository-structure.md +132 -0
- package/docs/sandbox-policy.md +56 -0
- package/docs/skills-map.md +203 -0
- package/docs/standards-register.md +96 -0
- package/docs/veritas-integration.md +165 -0
- package/docs/work-item-adapters.md +72 -0
- package/docs/workflow-artifact-lifecycle.md +141 -0
- package/docs/workflow-eval-strategy.md +295 -0
- package/docs/workflow-shared-contracts.md +51 -0
- package/docs/workflow-usage-guide.md +443 -0
- package/evals/ARCHITECTURE.md +143 -0
- package/evals/CONVENTIONS.md +58 -0
- package/evals/README.md +128 -0
- package/evals/acceptance/run.sh +29 -0
- package/evals/acceptance/test_claude_harness.sh +242 -0
- package/evals/acceptance/test_codex_harness.sh +108 -0
- package/evals/acceptance/test_kiro_harness.sh +128 -0
- package/evals/cases/dev/404.html +97 -0
- package/evals/cases/dev/code-review.yaml +44 -0
- package/evals/cases/dev/dashboard.html +300 -0
- package/evals/cases/dev/deliver.yaml +66 -0
- package/evals/cases/dev/dependency-update.yaml +16 -0
- package/evals/cases/dev/explore.yaml +20 -0
- package/evals/cases/dev/index.html +370 -0
- package/evals/cases/dev/package-lock.json +28 -0
- package/evals/cases/dev/package.json +16 -0
- package/evals/cases/dev/plan-work.yaml +20 -0
- package/evals/cases/dev/promptfooconfig.yaml +666 -0
- package/evals/cases/dev/search-first.yaml +20 -0
- package/evals/cases/dev/tdd-workflow.yaml +48 -0
- package/evals/cases/dev/verify-work.yaml +44 -0
- package/evals/cases/dev/workflow.yaml +34 -0
- package/evals/ci/run-baseline.sh +283 -0
- package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
- package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
- package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
- package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
- package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
- package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
- package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
- package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
- package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
- package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
- package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
- package/evals/fixtures/hook-influence/cases.json +336 -0
- package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
- package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
- package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
- package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
- package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
- package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
- package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
- package/evals/fixtures/surface-trust/provider-absent.json +19 -0
- package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
- package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
- package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
- package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
- package/evals/integration/test_bundle_install.sh +541 -0
- package/evals/integration/test_console_learning_projection.sh +192 -0
- package/evals/integration/test_context_map.sh +65 -0
- package/evals/integration/test_effective_backlog_settings.sh +58 -0
- package/evals/integration/test_fixture_retirement_audit.sh +58 -0
- package/evals/integration/test_flow_agents_statusline.sh +93 -0
- package/evals/integration/test_flow_kit_repository.sh +90 -0
- package/evals/integration/test_goal_fit_hook.sh +482 -0
- package/evals/integration/test_hook_category_behaviors.sh +190 -0
- package/evals/integration/test_hook_influence_cases.sh +69 -0
- package/evals/integration/test_local_flow_kit_install.sh +145 -0
- package/evals/integration/test_publish_change_helper.sh +176 -0
- package/evals/integration/test_pull_work_provider.sh +140 -0
- package/evals/integration/test_runtime_adapter_activation.sh +106 -0
- package/evals/integration/test_telemetry.sh +485 -0
- package/evals/integration/test_telemetry_doctor.sh +193 -0
- package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
- package/evals/integration/test_usage_feedback_global.sh +117 -0
- package/evals/integration/test_usage_feedback_import.sh +227 -0
- package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
- package/evals/integration/test_usage_feedback_report.sh +263 -0
- package/evals/integration/test_veritas_governance_adapter.sh +235 -0
- package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
- package/evals/integration/test_workflow_artifacts.sh +1247 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
- package/evals/integration/test_workflow_steering_hook.sh +337 -0
- package/evals/lib/assertions/delegated-to.js +40 -0
- package/evals/lib/assertions/max-tool-calls.js +15 -0
- package/evals/lib/assertions/no-write-tools.js +27 -0
- package/evals/lib/assertions/pass-at-k.js +39 -0
- package/evals/lib/assertions/telemetry-utils.js +105 -0
- package/evals/lib/assertions/tool-called.js +39 -0
- package/evals/lib/assertions/verify-after-fix.js +61 -0
- package/evals/lib/claude-judge.sh +40 -0
- package/evals/lib/claude-provider.sh +74 -0
- package/evals/lib/codex-judge.sh +39 -0
- package/evals/lib/codex-provider.sh +81 -0
- package/evals/lib/eval-dev.sh +5 -0
- package/evals/lib/eval-judge.sh +22 -0
- package/evals/lib/eval-provider.sh +26 -0
- package/evals/lib/eval-report.sh +73 -0
- package/evals/lib/kiro-dev.sh +4 -0
- package/evals/lib/kiro-judge.sh +17 -0
- package/evals/lib/kiro-provider.sh +62 -0
- package/evals/lib/node.sh +111 -0
- package/evals/promptfooconfig.yaml +70 -0
- package/evals/run.sh +309 -0
- package/evals/static/test_evidence_refs.sh +141 -0
- package/evals/static/test_package.sh +407 -0
- package/evals/static/test_repo_hooks.sh +68 -0
- package/evals/static/test_universal_bundles.sh +274 -0
- package/evals/static/test_workflow_skills.sh +1207 -0
- package/install.sh +64 -0
- package/integrations/veritas/flow-agents.adapter.json +138 -0
- package/integrations/veritas/flow-agents.authority-settings.json +26 -0
- package/integrations/veritas/flow-agents.repo-standards.json +82 -0
- package/kits/builder/flows/build.flow.json +218 -0
- package/kits/builder/flows/shape.flow.json +127 -0
- package/kits/builder/kit.json +19 -0
- package/kits/catalog.json +11 -0
- package/package.json +130 -0
- package/packaging/README.md +60 -0
- package/packaging/manifest.json +173 -0
- package/packaging/packs.json +69 -0
- package/powers/dependency-checker/POWER.md +20 -0
- package/powers/dependency-checker/mcp.json +20 -0
- package/powers/playwright/POWER.md +25 -0
- package/powers/playwright/mcp.json +12 -0
- package/prompts/code-audit.md +123 -0
- package/prompts/kcommit.md +88 -0
- package/schemas/backlog-provider-settings.schema.json +138 -0
- package/schemas/workflow-acceptance.schema.json +216 -0
- package/schemas/workflow-critique.schema.json +113 -0
- package/schemas/workflow-evidence.schema.json +357 -0
- package/schemas/workflow-handoff.schema.json +52 -0
- package/schemas/workflow-learning.schema.json +223 -0
- package/schemas/workflow-release.schema.json +172 -0
- package/schemas/workflow-state.schema.json +80 -0
- package/scripts/README.md +111 -0
- package/scripts/build-universal-bundles.js +3 -0
- package/scripts/check-content-boundary.cjs +99 -0
- package/scripts/context-budget/budget-scan.sh +166 -0
- package/scripts/detect-tools.sh +3 -0
- package/scripts/discover-agents.sh +28 -0
- package/scripts/effective-backlog-settings.js +2 -0
- package/scripts/filter-installed-packs.js +2 -0
- package/scripts/flow-kit.js +2 -0
- package/scripts/generate-context-map.js +2 -0
- package/scripts/git-status.sh +49 -0
- package/scripts/hooks/claude-hook-adapter.js +174 -0
- package/scripts/hooks/claude-telemetry-hook.js +115 -0
- package/scripts/hooks/codex-hook-adapter.js +176 -0
- package/scripts/hooks/codex-telemetry-hook.js +95 -0
- package/scripts/hooks/config-protection.js +79 -0
- package/scripts/hooks/desktop-notify.sh +39 -0
- package/scripts/hooks/governance-audit.sh +135 -0
- package/scripts/hooks/lib/audit-transport.sh +40 -0
- package/scripts/hooks/lib/hook-flags.js +49 -0
- package/scripts/hooks/lib/patterns.sh +57 -0
- package/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/scripts/hooks/post-edit-accumulator.js +66 -0
- package/scripts/hooks/pre-commit-quality.js +194 -0
- package/scripts/hooks/quality-gate.js +93 -0
- package/scripts/hooks/report-only-guard.js +21 -0
- package/scripts/hooks/run-hook.js +136 -0
- package/scripts/hooks/stop-format-typecheck.js +141 -0
- package/scripts/hooks/stop-goal-fit.js +337 -0
- package/scripts/hooks/workflow-steering.js +250 -0
- package/scripts/install-codex-home.sh +106 -0
- package/scripts/package.json +3 -0
- package/scripts/promote-workflow-artifact.js +2 -0
- package/scripts/publish-change-helper.js +2 -0
- package/scripts/pull-work-provider.js +2 -0
- package/scripts/setup-repo-hooks.sh +8 -0
- package/scripts/statusline/flow-agents-statusline.js +157 -0
- package/scripts/telemetry/console-presets.sh +14 -0
- package/scripts/telemetry/install-console-config.sh +214 -0
- package/scripts/telemetry/lib/config.sh +85 -0
- package/scripts/telemetry/lib/enrich.sh +115 -0
- package/scripts/telemetry/lib/redact.sh +22 -0
- package/scripts/telemetry/lib/session.sh +63 -0
- package/scripts/telemetry/lib/transport.sh +183 -0
- package/scripts/telemetry/lib/usage.sh +29 -0
- package/scripts/telemetry/sync-agents.sh +173 -0
- package/scripts/telemetry/telemetry.conf +23 -0
- package/scripts/telemetry/telemetry.sh +387 -0
- package/scripts/usage-feedback.js +2 -0
- package/scripts/validate-hook-influence-cases.js +2 -0
- package/scripts/validate-package.sh +89 -0
- package/scripts/validate-source-tree.js +9 -0
- package/skills/agentic-engineering/SKILL.md +62 -0
- package/skills/browser-test/SKILL.md +51 -0
- package/skills/builder-shape/SKILL.md +76 -0
- package/skills/context-budget/SKILL.md +40 -0
- package/skills/deliver/SKILL.md +241 -0
- package/skills/dependency-update/SKILL.md +68 -0
- package/skills/design-probe/SKILL.md +107 -0
- package/skills/eval-rebuild/SKILL.md +39 -0
- package/skills/evidence-gate/SKILL.md +186 -0
- package/skills/execute-plan/SKILL.md +110 -0
- package/skills/explore/SKILL.md +137 -0
- package/skills/feedback-loop/SKILL.md +87 -0
- package/skills/fix-bug/SKILL.md +133 -0
- package/skills/frontend-design/SKILL.md +80 -0
- package/skills/github-cli/SKILL.md +63 -0
- package/skills/idea-to-backlog/SKILL.md +267 -0
- package/skills/knowledge-capture/SKILL.md +55 -0
- package/skills/learning-review/SKILL.md +115 -0
- package/skills/pickup-probe/SKILL.md +114 -0
- package/skills/plan-work/SKILL.md +176 -0
- package/skills/pull-work/SKILL.md +309 -0
- package/skills/release-readiness/SKILL.md +121 -0
- package/skills/review-work/SKILL.md +161 -0
- package/skills/search-first/SKILL.md +66 -0
- package/skills/tdd-workflow/SKILL.md +140 -0
- package/skills/verify-work/SKILL.md +109 -0
- package/src/cli/console-learning-projection.ts +140 -0
- package/src/cli/effective-backlog-settings.ts +99 -0
- package/src/cli/fixture-retirement-audit.ts +154 -0
- package/src/cli/flow-kit.ts +139 -0
- package/src/cli/init.ts +248 -0
- package/src/cli/promote-workflow-artifact.ts +64 -0
- package/src/cli/publish-change-helper.ts +143 -0
- package/src/cli/pull-work-provider.ts +481 -0
- package/src/cli/runtime-adapter.ts +24 -0
- package/src/cli/telemetry-doctor.ts +243 -0
- package/src/cli/usage-feedback.ts +418 -0
- package/src/cli/validate-hook-influence.ts +119 -0
- package/src/cli/validate-source-tree.ts +30 -0
- package/src/cli/validate-workflow-artifacts.ts +411 -0
- package/src/cli/veritas-governance.ts +322 -0
- package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
- package/src/cli/workflow-sidecar.ts +676 -0
- package/src/cli.ts +95 -0
- package/src/flow-kit/validate.ts +74 -0
- package/src/lib/args.ts +43 -0
- package/src/lib/fs.ts +62 -0
- package/src/lib/workflow-learning-projection.ts +491 -0
- package/src/runtime-adapters.ts +154 -0
- package/src/tools/build-universal-bundles.ts +366 -0
- package/src/tools/common.ts +61 -0
- package/src/tools/filter-installed-packs.ts +129 -0
- package/src/tools/generate-context-map.ts +199 -0
- package/src/tools/validate-package.ts +57 -0
- package/src/tools/validate-source-tree.ts +488 -0
- package/tsconfig.json +19 -0
- package/veritas.claims.json +6 -0
|
@@ -0,0 +1,2112 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_workflow_sidecar_writer.sh - workflow sidecar writer integration tests
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT/evals/lib/node.sh"
|
|
7
|
+
|
|
8
|
+
TMPDIR_EVAL="$(mktemp -d)"
|
|
9
|
+
errors=0
|
|
10
|
+
|
|
11
|
+
cleanup() {
|
|
12
|
+
rm -rf "$TMPDIR_EVAL"
|
|
13
|
+
}
|
|
14
|
+
trap cleanup EXIT
|
|
15
|
+
|
|
16
|
+
_pass() { echo " ✓ $1"; }
|
|
17
|
+
_fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
18
|
+
run_bounded() {
|
|
19
|
+
local seconds="$1"
|
|
20
|
+
shift
|
|
21
|
+
"$@" &
|
|
22
|
+
local pid=$!
|
|
23
|
+
local deadline=$((SECONDS + seconds))
|
|
24
|
+
while kill -0 "$pid" 2>/dev/null; do
|
|
25
|
+
if [[ "$SECONDS" -ge "$deadline" ]]; then
|
|
26
|
+
kill "$pid" 2>/dev/null || true
|
|
27
|
+
wait "$pid" 2>/dev/null || true
|
|
28
|
+
return 124
|
|
29
|
+
fi
|
|
30
|
+
sleep 0.05
|
|
31
|
+
done
|
|
32
|
+
wait "$pid"
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
WRITER="workflow-sidecar"
|
|
36
|
+
VALIDATOR="validate-workflow-artifacts"
|
|
37
|
+
ARTIFACT_DIR="$TMPDIR_EVAL/repo/.flow-agents/auto-sidecars"
|
|
38
|
+
mkdir -p "$ARTIFACT_DIR"
|
|
39
|
+
|
|
40
|
+
SESSION_ROOT="$TMPDIR_EVAL/repo/.flow-agents"
|
|
41
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
42
|
+
--artifact-root "$SESSION_ROOT" \
|
|
43
|
+
--task-slug ensured-session \
|
|
44
|
+
--source-request "Create a current workflow session automatically." \
|
|
45
|
+
--title "Ensured Session" \
|
|
46
|
+
--summary "Automatically create a durable session artifact and initial sidecars." \
|
|
47
|
+
--criterion "Session artifact exists" \
|
|
48
|
+
--criterion "Initial sidecars validate" \
|
|
49
|
+
--next-action "Continue execution with durable state." \
|
|
50
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/ensure.out" 2>"$TMPDIR_EVAL/ensure.err"; then
|
|
51
|
+
_pass "sidecar writer ensures current session artifact"
|
|
52
|
+
else
|
|
53
|
+
_fail "sidecar writer ensure-session failed: $(cat "$TMPDIR_EVAL/ensure.out" "$TMPDIR_EVAL/ensure.err")"
|
|
54
|
+
fi
|
|
55
|
+
|
|
56
|
+
ENSURED_DIR="$SESSION_ROOT/ensured-session"
|
|
57
|
+
if [[ -f "$ENSURED_DIR/ensured-session--deliver.md" ]] \
|
|
58
|
+
&& [[ -f "$ENSURED_DIR/state.json" ]] \
|
|
59
|
+
&& [[ -f "$ENSURED_DIR/acceptance.json" ]] \
|
|
60
|
+
&& [[ -f "$ENSURED_DIR/handoff.json" ]] \
|
|
61
|
+
&& [[ -f "$SESSION_ROOT/current.json" ]]; then
|
|
62
|
+
_pass "sidecar writer creates session markdown and initial sidecars"
|
|
63
|
+
else
|
|
64
|
+
_fail "sidecar writer did not create expected session files"
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
if node - "$ENSURED_DIR/state.json" "$ENSURED_DIR/acceptance.json" "$ENSURED_DIR/handoff.json" <<'NODE'
|
|
68
|
+
const fs = require("node:fs");
|
|
69
|
+
for (const file of process.argv.slice(2)) {
|
|
70
|
+
const repo = JSON.parse(fs.readFileSync(file, "utf8")).repo;
|
|
71
|
+
if (repo !== "kontourai/flow-agents") throw new Error(`${file} repo was ${JSON.stringify(repo)}`);
|
|
72
|
+
if (repo.includes("/") && repo.startsWith("/")) throw new Error(`${file} repo is an absolute path`);
|
|
73
|
+
}
|
|
74
|
+
NODE
|
|
75
|
+
then
|
|
76
|
+
_pass "sidecar writer records stable repository identity without local paths"
|
|
77
|
+
else
|
|
78
|
+
_fail "sidecar writer did not record stable repository identity"
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
UNSAFE_REPO_ROOT="$TMPDIR_EVAL/unsafe-repo"
|
|
82
|
+
mkdir -p "$UNSAFE_REPO_ROOT"
|
|
83
|
+
if (cd "$UNSAFE_REPO_ROOT" \
|
|
84
|
+
&& git init -q \
|
|
85
|
+
&& git remote add origin "file:///Users/alice/customer-secret.git" \
|
|
86
|
+
&& FLOW_AGENTS_REPO="/Users/alice/customer-secret" flow_agents_node "$WRITER" ensure-session \
|
|
87
|
+
--artifact-root ".flow-agents" \
|
|
88
|
+
--task-slug unsafe-repo \
|
|
89
|
+
--title "Unsafe repo" \
|
|
90
|
+
--summary "Unsafe repo fallback." \
|
|
91
|
+
--timestamp "2026-05-09T00:00:00Z" >/dev/null 2>"$TMPDIR_EVAL/unsafe-repo.err" \
|
|
92
|
+
&& node - ".flow-agents/unsafe-repo/state.json" <<'NODE'
|
|
93
|
+
const fs = require("node:fs");
|
|
94
|
+
const repo = JSON.parse(fs.readFileSync(process.argv[2], "utf8")).repo;
|
|
95
|
+
if (repo !== "unsafe-repo") throw new Error(`unsafe repo fallback was ${JSON.stringify(repo)}`);
|
|
96
|
+
if (repo.includes("alice") || repo.includes("/") || repo.startsWith("/")) throw new Error(`unsafe repo leaked local path material: ${repo}`);
|
|
97
|
+
NODE
|
|
98
|
+
); then
|
|
99
|
+
_pass "sidecar writer rejects path-like repository identity inputs"
|
|
100
|
+
else
|
|
101
|
+
_fail "sidecar writer leaked or rejected path-like repository identity inputs: $(cat "$TMPDIR_EVAL/unsafe-repo.err" 2>/dev/null)"
|
|
102
|
+
fi
|
|
103
|
+
|
|
104
|
+
if flow_agents_node "$WRITER" current --artifact-root "$SESSION_ROOT" --format slug >"$TMPDIR_EVAL/current-slug.out" 2>"$TMPDIR_EVAL/current-slug.err" \
|
|
105
|
+
&& [[ "$(cat "$TMPDIR_EVAL/current-slug.out")" == "ensured-session" ]] \
|
|
106
|
+
&& flow_agents_node "$WRITER" current --artifact-root "$SESSION_ROOT" --format path >"$TMPDIR_EVAL/current-path.out" 2>"$TMPDIR_EVAL/current-path.err" \
|
|
107
|
+
&& [[ "$(cd "$TMPDIR_EVAL/repo" && realpath "$(cat "$TMPDIR_EVAL/current-path.out")")" == "$(realpath "$ENSURED_DIR")" ]]; then
|
|
108
|
+
_pass "sidecar writer resolves current workflow identity"
|
|
109
|
+
else
|
|
110
|
+
_fail "sidecar writer did not resolve current workflow identity: $(cat "$TMPDIR_EVAL/current-slug.out" "$TMPDIR_EVAL/current-slug.err" "$TMPDIR_EVAL/current-path.out" "$TMPDIR_EVAL/current-path.err")"
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
AGENT_EVENT_PATH="$ENSURED_DIR/ag""ents/tool-worker-1/events.jsonl"
|
|
114
|
+
if flow_agents_node "$WRITER" record-agent-event \
|
|
115
|
+
--artifact-root "$SESSION_ROOT" \
|
|
116
|
+
--agent-id tool-worker-1 \
|
|
117
|
+
--kind evidence \
|
|
118
|
+
--status active \
|
|
119
|
+
--summary "Worker started a bounded implementation pass." \
|
|
120
|
+
--ref wave-1 \
|
|
121
|
+
--timestamp "2026-05-09T00:00:30Z" >"$TMPDIR_EVAL/agent-event.out" 2>"$TMPDIR_EVAL/agent-event.err" \
|
|
122
|
+
&& [[ -f "$AGENT_EVENT_PATH" ]] \
|
|
123
|
+
&& rg -q '"agent_id": "tool-worker-1"' "$AGENT_EVENT_PATH" \
|
|
124
|
+
&& rg -q '"agent_id": "tool-worker-1"' "$SESSION_ROOT/current.json"; then
|
|
125
|
+
_pass "sidecar writer records delegation-safe agent events"
|
|
126
|
+
else
|
|
127
|
+
_fail "sidecar writer did not record delegation-safe agent event: $(cat "$TMPDIR_EVAL/agent-event.out" "$TMPDIR_EVAL/agent-event.err")"
|
|
128
|
+
fi
|
|
129
|
+
|
|
130
|
+
cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-traversal-agent.json"
|
|
131
|
+
TRAVERSAL_AGENT_OUTSIDE="$TMPDIR_EVAL/repo/.flow-agents/evil-agent-outside.jsonl"
|
|
132
|
+
if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
|
|
133
|
+
--artifact-root "$SESSION_ROOT" \
|
|
134
|
+
--agent-id ../evil-agent-outside \
|
|
135
|
+
--kind evidence \
|
|
136
|
+
--status active \
|
|
137
|
+
--summary "This traversal agent id should fail before mutation." >"$TMPDIR_EVAL/traversal-agent-event.out" 2>&1; then
|
|
138
|
+
_fail "sidecar writer should reject traversal agent ids"
|
|
139
|
+
elif rg -q -- '--agent-id must not contain' "$TMPDIR_EVAL/traversal-agent-event.out" \
|
|
140
|
+
&& [[ ! -e "$TRAVERSAL_AGENT_OUTSIDE" ]] \
|
|
141
|
+
&& cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-traversal-agent.json" \
|
|
142
|
+
&& [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]]; then
|
|
143
|
+
_pass "sidecar writer rejects traversal agent ids without mutation or lock residue"
|
|
144
|
+
else
|
|
145
|
+
_fail "sidecar writer traversal agent rejection lacked diagnostics or left residue: $(cat "$TMPDIR_EVAL/traversal-agent-event.out")"
|
|
146
|
+
fi
|
|
147
|
+
|
|
148
|
+
cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-bad-agent.json"
|
|
149
|
+
if flow_agents_node "$WRITER" record-agent-event \
|
|
150
|
+
--artifact-root "$SESSION_ROOT" \
|
|
151
|
+
--artifact-dir "$SESSION_ROOT/ensured-sessoin" \
|
|
152
|
+
--agent-id typo-worker \
|
|
153
|
+
--kind evidence \
|
|
154
|
+
--status active \
|
|
155
|
+
--summary "This typo should not create a workflow." >"$TMPDIR_EVAL/bad-agent-event.out" 2>&1; then
|
|
156
|
+
_fail "sidecar writer should reject missing explicit artifact dirs"
|
|
157
|
+
elif cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-bad-agent.json"; then
|
|
158
|
+
_pass "sidecar writer rejects bad explicit artifact dirs without changing current"
|
|
159
|
+
else
|
|
160
|
+
_fail "sidecar writer changed current after bad explicit artifact dir"
|
|
161
|
+
fi
|
|
162
|
+
|
|
163
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
164
|
+
--artifact-root "$SESSION_ROOT" \
|
|
165
|
+
--task-slug fresh-session \
|
|
166
|
+
--source-request "Create a fresh session after worker activity." \
|
|
167
|
+
--title "Fresh Session" \
|
|
168
|
+
--summary "A new active workflow should not inherit agents from the prior slug." \
|
|
169
|
+
--criterion "Fresh session is active" \
|
|
170
|
+
--timestamp "2026-05-09T00:00:45Z" >"$TMPDIR_EVAL/ensure-fresh.out" 2>"$TMPDIR_EVAL/ensure-fresh.err" \
|
|
171
|
+
&& rg -q '"active_slug": "fresh-session"' "$SESSION_ROOT/current.json" \
|
|
172
|
+
&& node -e 'const fs=require("fs"); const current=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); if (JSON.stringify(current.active_agents)!=="[]") process.exit(1);' "$SESSION_ROOT/current.json"
|
|
173
|
+
then
|
|
174
|
+
_pass "sidecar writer resets active agents for a new current workflow"
|
|
175
|
+
else
|
|
176
|
+
_fail "sidecar writer carried stale active agents into a new workflow: $(cat "$TMPDIR_EVAL/ensure-fresh.out" "$TMPDIR_EVAL/ensure-fresh.err")"
|
|
177
|
+
fi
|
|
178
|
+
|
|
179
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
180
|
+
--artifact-root "$SESSION_ROOT" \
|
|
181
|
+
--task-slug ../outside \
|
|
182
|
+
--source-request "Traversal should be rejected." \
|
|
183
|
+
--title "Traversal Fixture" \
|
|
184
|
+
--summary "This must not create artifacts outside the root." \
|
|
185
|
+
--timestamp "2026-05-09T00:00:50Z" >"$TMPDIR_EVAL/ensure-traversal.out" 2>&1; then
|
|
186
|
+
_fail "sidecar writer should reject traversal task slugs"
|
|
187
|
+
elif rg -q -- '--task-slug must not contain' "$TMPDIR_EVAL/ensure-traversal.out" \
|
|
188
|
+
&& [[ ! -d "$TMPDIR_EVAL/repo/.flow-agents/outside" ]]; then
|
|
189
|
+
_pass "sidecar writer rejects traversal task slugs without creating outside dirs"
|
|
190
|
+
else
|
|
191
|
+
_fail "sidecar writer traversal rejection was not fail-closed: $(cat "$TMPDIR_EVAL/ensure-traversal.out")"
|
|
192
|
+
fi
|
|
193
|
+
|
|
194
|
+
LATE_AGENT_EVENT_PATH="$ENSURED_DIR/ag""ents/late-worker/events.jsonl"
|
|
195
|
+
if flow_agents_node "$WRITER" record-agent-event \
|
|
196
|
+
--artifact-root "$SESSION_ROOT" \
|
|
197
|
+
--artifact-dir "$ENSURED_DIR" \
|
|
198
|
+
--agent-id late-worker \
|
|
199
|
+
--kind completed \
|
|
200
|
+
--status done \
|
|
201
|
+
--summary "A late worker finished the old workflow after a newer session became active." \
|
|
202
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/late-agent-event.out" 2>"$TMPDIR_EVAL/late-agent-event.err" \
|
|
203
|
+
&& [[ -f "$LATE_AGENT_EVENT_PATH" ]] \
|
|
204
|
+
&& rg -q '"agent_id": "late-worker"' "$LATE_AGENT_EVENT_PATH" \
|
|
205
|
+
&& rg -q '"active_slug": "fresh-session"' "$SESSION_ROOT/current.json" \
|
|
206
|
+
&& ! rg -q '"agent_id": "late-worker"' "$SESSION_ROOT/current.json"; then
|
|
207
|
+
_pass "sidecar writer keeps late explicit agent events from stealing current workflow"
|
|
208
|
+
else
|
|
209
|
+
_fail "sidecar writer let a late explicit agent event change current workflow: $(cat "$TMPDIR_EVAL/late-agent-event.out" "$TMPDIR_EVAL/late-agent-event.err")"
|
|
210
|
+
fi
|
|
211
|
+
|
|
212
|
+
COPIED_ROOT="$TMPDIR_EVAL/copied-workflows"
|
|
213
|
+
COPIED_DIR="$COPIED_ROOT/ensured-session"
|
|
214
|
+
mkdir -p "$COPIED_ROOT"
|
|
215
|
+
cp -R "$ENSURED_DIR" "$COPIED_DIR"
|
|
216
|
+
cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-copied-agent.json"
|
|
217
|
+
COPIED_AGENT_EVENT_PATH="$COPIED_DIR/ag""ents/copied-worker/events.jsonl"
|
|
218
|
+
if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
|
|
219
|
+
--artifact-dir "$COPIED_DIR" \
|
|
220
|
+
--agent-id copied-worker \
|
|
221
|
+
--kind evidence \
|
|
222
|
+
--status done \
|
|
223
|
+
--summary "A copied workflow outside the default root records without hanging." \
|
|
224
|
+
--timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/copied-agent-event.out" 2>"$TMPDIR_EVAL/copied-agent-event.err" \
|
|
225
|
+
&& [[ -f "$COPIED_AGENT_EVENT_PATH" ]] \
|
|
226
|
+
&& rg -q '"agent_id": "copied-worker"' "$COPIED_AGENT_EVENT_PATH" \
|
|
227
|
+
&& cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-copied-agent.json" \
|
|
228
|
+
&& [[ ! -e "$COPIED_ROOT/.workflow-sidecar.lockdir" ]] \
|
|
229
|
+
&& [[ ! -e "$COPIED_DIR/.workflow-sidecar.lockdir" ]] \
|
|
230
|
+
&& [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]]; then
|
|
231
|
+
_pass "sidecar writer records bounded explicit events in copied workflow dirs"
|
|
232
|
+
else
|
|
233
|
+
_fail "sidecar writer copied explicit event failed or left residue: $(cat "$TMPDIR_EVAL/copied-agent-event.out" "$TMPDIR_EVAL/copied-agent-event.err")"
|
|
234
|
+
fi
|
|
235
|
+
|
|
236
|
+
cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-mismatch-agent.json"
|
|
237
|
+
MISMATCH_AGENT_EVENT_PATH="$COPIED_DIR/ag""ents/mismatch-worker/events.jsonl"
|
|
238
|
+
if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
|
|
239
|
+
--artifact-root "$SESSION_ROOT" \
|
|
240
|
+
--artifact-dir "$COPIED_DIR" \
|
|
241
|
+
--agent-id mismatch-worker \
|
|
242
|
+
--kind evidence \
|
|
243
|
+
--status active \
|
|
244
|
+
--summary "This root mismatch should fail before mutation." >"$TMPDIR_EVAL/mismatch-agent-event.out" 2>&1; then
|
|
245
|
+
_fail "sidecar writer should reject explicit artifact-dir/root mismatches"
|
|
246
|
+
elif rg -q 'artifact directory must be under artifact root' "$TMPDIR_EVAL/mismatch-agent-event.out" \
|
|
247
|
+
&& [[ ! -e "$MISMATCH_AGENT_EVENT_PATH" ]] \
|
|
248
|
+
&& cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-mismatch-agent.json" \
|
|
249
|
+
&& [[ ! -e "$COPIED_ROOT/.workflow-sidecar.lockdir" ]] \
|
|
250
|
+
&& [[ ! -e "$COPIED_DIR/.workflow-sidecar.lockdir" ]] \
|
|
251
|
+
&& [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]]; then
|
|
252
|
+
_pass "sidecar writer rejects artifact-dir/root mismatches without mutation or lock residue"
|
|
253
|
+
else
|
|
254
|
+
_fail "sidecar writer mismatch rejection lacked diagnostics or left residue: $(cat "$TMPDIR_EVAL/mismatch-agent-event.out")"
|
|
255
|
+
fi
|
|
256
|
+
|
|
257
|
+
SYMLINK_TARGET="$TMPDIR_EVAL/symlink-target-workflow"
|
|
258
|
+
SYMLINK_DIR="$SESSION_ROOT/symlink-session"
|
|
259
|
+
mkdir -p "$SYMLINK_TARGET"
|
|
260
|
+
if ln -s "$SYMLINK_TARGET" "$SYMLINK_DIR" 2>"$TMPDIR_EVAL/symlink-create.err"; then
|
|
261
|
+
cp "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-symlink-agent.json"
|
|
262
|
+
if run_bounded 5 flow_agents_node "$WRITER" record-agent-event \
|
|
263
|
+
--artifact-root "$SESSION_ROOT" \
|
|
264
|
+
--artifact-dir "$SYMLINK_DIR" \
|
|
265
|
+
--agent-id symlink-worker \
|
|
266
|
+
--kind evidence \
|
|
267
|
+
--status active \
|
|
268
|
+
--summary "A symlink artifact dir should fail before mutation." >"$TMPDIR_EVAL/symlink-agent-event.out" 2>&1; then
|
|
269
|
+
_fail "sidecar writer should reject symlink artifact dirs"
|
|
270
|
+
elif rg -q 'artifact directory must not be a symlink' "$TMPDIR_EVAL/symlink-agent-event.out" \
|
|
271
|
+
&& [[ ! -e "$SYMLINK_TARGET/ag""ents/symlink-worker/events.jsonl" ]] \
|
|
272
|
+
&& cmp -s "$SESSION_ROOT/current.json" "$TMPDIR_EVAL/current-before-symlink-agent.json" \
|
|
273
|
+
&& [[ ! -e "$SESSION_ROOT/.workflow-sidecar.lockdir" ]] \
|
|
274
|
+
&& [[ ! -e "$SYMLINK_TARGET/.workflow-sidecar.lockdir" ]]; then
|
|
275
|
+
_pass "sidecar writer rejects symlink artifact dirs without mutation or lock residue"
|
|
276
|
+
else
|
|
277
|
+
_fail "sidecar writer symlink artifact-dir rejection lacked diagnostics or left residue: $(cat "$TMPDIR_EVAL/symlink-agent-event.out")"
|
|
278
|
+
fi
|
|
279
|
+
else
|
|
280
|
+
_pass "sidecar writer symlink artifact-dir coverage skipped because symlink creation is unavailable: $(cat "$TMPDIR_EVAL/symlink-create.err")"
|
|
281
|
+
fi
|
|
282
|
+
|
|
283
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
284
|
+
--artifact-root "$SESSION_ROOT" \
|
|
285
|
+
--task-slug race-session-a \
|
|
286
|
+
--source-request "Create a race fixture session." \
|
|
287
|
+
--title "Race Session A" \
|
|
288
|
+
--summary "Explicit agent events should serialize with current session switches." \
|
|
289
|
+
--criterion "Race session A exists" \
|
|
290
|
+
--timestamp "2026-05-09T00:01:10Z" >"$TMPDIR_EVAL/ensure-race-a.out" 2>"$TMPDIR_EVAL/ensure-race-a.err"; then
|
|
291
|
+
RACE_A_DIR="$SESSION_ROOT/race-session-a"
|
|
292
|
+
FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY=1.2 flow_agents_node "$WRITER" record-agent-event \
|
|
293
|
+
--artifact-root "$SESSION_ROOT" \
|
|
294
|
+
--artifact-dir "$RACE_A_DIR" \
|
|
295
|
+
--agent-id race-worker \
|
|
296
|
+
--kind evidence \
|
|
297
|
+
--status active \
|
|
298
|
+
--summary "This explicit event races with a session switch." \
|
|
299
|
+
--timestamp "2026-05-09T00:01:12Z" >"$TMPDIR_EVAL/race-agent-event.out" 2>"$TMPDIR_EVAL/race-agent-event.err" &
|
|
300
|
+
race_pid=$!
|
|
301
|
+
node -e 'const fs=require("fs"); const lock=process.argv[1]; const deadline=Date.now()+5000; (function wait(){ if (fs.existsSync(lock)) process.exit(0); if (Date.now()>deadline) { console.error("record-agent-event did not acquire root lock before timeout"); process.exit(1); } setTimeout(wait,20); })();' "$SESSION_ROOT/.workflow-sidecar.lockdir"
|
|
302
|
+
flow_agents_node "$WRITER" ensure-session \
|
|
303
|
+
--artifact-root "$SESSION_ROOT" \
|
|
304
|
+
--task-slug race-session-b \
|
|
305
|
+
--source-request "Switch current session during explicit event." \
|
|
306
|
+
--title "Race Session B" \
|
|
307
|
+
--summary "The current workflow switch should not be lost." \
|
|
308
|
+
--criterion "Race session B remains current" \
|
|
309
|
+
--timestamp "2026-05-09T00:01:15Z" >"$TMPDIR_EVAL/ensure-race-b.out" 2>"$TMPDIR_EVAL/ensure-race-b.err"
|
|
310
|
+
race_status_b=$?
|
|
311
|
+
wait "$race_pid"
|
|
312
|
+
race_status_event=$?
|
|
313
|
+
race_event_path="$RACE_A_DIR/agen""ts/race-worker/even""ts.jsonl"
|
|
314
|
+
if [[ "$race_status_event" -eq 0 && "$race_status_b" -eq 0 ]] \
|
|
315
|
+
&& rg -q '"active_slug": "race-session-b"' "$SESSION_ROOT/current.json" \
|
|
316
|
+
&& [[ -f "$race_event_path" ]] \
|
|
317
|
+
&& rg -q '"agent_id": "race-worker"' "$race_event_path"
|
|
318
|
+
then
|
|
319
|
+
_pass "sidecar writer serializes explicit agent events with current workflow switches"
|
|
320
|
+
else
|
|
321
|
+
_fail "sidecar writer did not serialize explicit agent events with current workflow switches: $(cat "$TMPDIR_EVAL/race-agent-event.out" "$TMPDIR_EVAL/race-agent-event.err" "$TMPDIR_EVAL/ensure-race-b.out" "$TMPDIR_EVAL/ensure-race-b.err")"
|
|
322
|
+
fi
|
|
323
|
+
else
|
|
324
|
+
_fail "sidecar writer could not create race fixture: $(cat "$TMPDIR_EVAL/ensure-race-a.out" "$TMPDIR_EVAL/ensure-race-a.err")"
|
|
325
|
+
fi
|
|
326
|
+
|
|
327
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars "$ENSURED_DIR" >"$TMPDIR_EVAL/ensure-valid.out" 2>"$TMPDIR_EVAL/ensure-valid.err"; then
|
|
328
|
+
_pass "ensured session artifacts validate"
|
|
329
|
+
else
|
|
330
|
+
_fail "ensured session artifacts failed validation: $(cat "$TMPDIR_EVAL/ensure-valid.out" "$TMPDIR_EVAL/ensure-valid.err")"
|
|
331
|
+
fi
|
|
332
|
+
|
|
333
|
+
EXISTING_ONLY_DIR="$SESSION_ROOT/existing-session"
|
|
334
|
+
mkdir -p "$EXISTING_ONLY_DIR"
|
|
335
|
+
cat > "$EXISTING_ONLY_DIR/existing-session--deliver.md" <<'MARKDOWN'
|
|
336
|
+
# Existing Session
|
|
337
|
+
|
|
338
|
+
branch: main
|
|
339
|
+
worktree: main
|
|
340
|
+
created: 2026-05-09T00:00:00Z
|
|
341
|
+
status: planning
|
|
342
|
+
type: deliver
|
|
343
|
+
iteration: 1
|
|
344
|
+
|
|
345
|
+
## Plan
|
|
346
|
+
|
|
347
|
+
Existing artifact should keep its own criteria when sidecars are filled in later.
|
|
348
|
+
|
|
349
|
+
## Definition Of Done
|
|
350
|
+
|
|
351
|
+
- **User outcome:** Existing session remains the source of truth.
|
|
352
|
+
- **Scope:** Existing Markdown plus missing sidecars.
|
|
353
|
+
- **Acceptance criteria:**
|
|
354
|
+
- [ ] Existing artifact criterion - Evidence: existing Markdown.
|
|
355
|
+
- **Usefulness checks:**
|
|
356
|
+
- [ ] User-facing workflow is documented or discoverable
|
|
357
|
+
- **Stop-short risks:** Sidecars could drift from existing Markdown.
|
|
358
|
+
- **Durable docs target:** not needed
|
|
359
|
+
- **Sandbox mode:** local-edit
|
|
360
|
+
|
|
361
|
+
## Execution Progress
|
|
362
|
+
|
|
363
|
+
- [ ] Session initialized.
|
|
364
|
+
|
|
365
|
+
## Verification Report
|
|
366
|
+
|
|
367
|
+
Build: [FAIL] Verification has not run yet.
|
|
368
|
+
|
|
369
|
+
### Acceptance Criteria
|
|
370
|
+
- [FAIL] Verification has not run yet - Evidence: pending workflow execution and checks.
|
|
371
|
+
|
|
372
|
+
### Verdict: FAIL
|
|
373
|
+
|
|
374
|
+
## Goal Fit Gate
|
|
375
|
+
|
|
376
|
+
- [ ] Original user goal restated
|
|
377
|
+
|
|
378
|
+
## Final Acceptance
|
|
379
|
+
|
|
380
|
+
- [ ] CI/relevant checks passed or local equivalent recorded
|
|
381
|
+
MARKDOWN
|
|
382
|
+
|
|
383
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
384
|
+
--artifact-root "$SESSION_ROOT" \
|
|
385
|
+
--task-slug existing-session \
|
|
386
|
+
--source-request "Select existing session." \
|
|
387
|
+
--summary "Fill missing sidecars for an existing artifact." \
|
|
388
|
+
--criterion "Different CLI criterion" \
|
|
389
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/ensure-existing.out" 2>"$TMPDIR_EVAL/ensure-existing.err" \
|
|
390
|
+
&& rg -q '"description": "Existing artifact criterion"' "$EXISTING_ONLY_DIR/acceptance.json" \
|
|
391
|
+
&& ! rg -q 'Different CLI criterion' "$EXISTING_ONLY_DIR/acceptance.json"; then
|
|
392
|
+
_pass "sidecar writer derives missing sidecars from existing session Markdown"
|
|
393
|
+
else
|
|
394
|
+
_fail "sidecar writer drifted sidecars from existing session Markdown: $(cat "$TMPDIR_EVAL/ensure-existing.out" "$TMPDIR_EVAL/ensure-existing.err")"
|
|
395
|
+
fi
|
|
396
|
+
|
|
397
|
+
printf 'DO NOT OVERWRITE\n' >> "$ENSURED_DIR/ensured-session--deliver.md"
|
|
398
|
+
if flow_agents_node "$WRITER" ensure-session \
|
|
399
|
+
--artifact-root "$SESSION_ROOT" \
|
|
400
|
+
--task-slug ensured-session \
|
|
401
|
+
--source-request "Create a current workflow session automatically." \
|
|
402
|
+
--summary "This second call should select the existing session." \
|
|
403
|
+
--criterion "Should not replace the artifact" \
|
|
404
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/ensure-again.out" 2>"$TMPDIR_EVAL/ensure-again.err" \
|
|
405
|
+
&& rg -q 'DO NOT OVERWRITE' "$ENSURED_DIR/ensured-session--deliver.md"; then
|
|
406
|
+
_pass "sidecar writer selects existing session without overwrite"
|
|
407
|
+
else
|
|
408
|
+
_fail "sidecar writer overwrote existing ensured session: $(cat "$TMPDIR_EVAL/ensure-again.out" "$TMPDIR_EVAL/ensure-again.err")"
|
|
409
|
+
fi
|
|
410
|
+
|
|
411
|
+
cat > "$ARTIFACT_DIR/auto-sidecars--deliver.md" <<'MARKDOWN'
|
|
412
|
+
# Generate sidecars automatically
|
|
413
|
+
|
|
414
|
+
status: delivered
|
|
415
|
+
type: deliver
|
|
416
|
+
|
|
417
|
+
## Plan
|
|
418
|
+
|
|
419
|
+
Use a writer utility to create machine-readable workflow sidecars.
|
|
420
|
+
|
|
421
|
+
## Definition Of Done
|
|
422
|
+
|
|
423
|
+
- **User outcome:** Workflow agents can create sidecars without hand-writing JSON.
|
|
424
|
+
- **Scope:** Sidecar writer utility and integration tests.
|
|
425
|
+
- **Acceptance criteria:**
|
|
426
|
+
- [x] Planning sidecars are initialized - Evidence: writer creates state, acceptance, and handoff JSON.
|
|
427
|
+
- [x] Evidence sidecar is recorded - Evidence: writer records evidence JSON and updates acceptance state.
|
|
428
|
+
- [x] Critique sidecar is recorded - Evidence: writer records critique JSON and strict validation passes.
|
|
429
|
+
- [x] Release and learning sidecars are recorded - Evidence: writer records release and learning JSON and updates workflow state.
|
|
430
|
+
- **Usefulness checks:**
|
|
431
|
+
- [x] User-facing workflow is documented or discoverable
|
|
432
|
+
- [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
|
|
433
|
+
- **Stop-short risks:** Writer output could be syntactically valid but inconsistent with Markdown.
|
|
434
|
+
- **Durable docs target:** docs/workflow-usage-guide.md
|
|
435
|
+
- **Sandbox mode:** local-edit
|
|
436
|
+
|
|
437
|
+
## Verification Report
|
|
438
|
+
|
|
439
|
+
Build: [PASS] sidecar writer fixture
|
|
440
|
+
|
|
441
|
+
### Acceptance Criteria
|
|
442
|
+
- [PASS] Planning sidecars are initialized - Evidence: state, acceptance, and handoff JSON exist.
|
|
443
|
+
- [PASS] Evidence sidecar is recorded - Evidence: evidence JSON exists.
|
|
444
|
+
- [PASS] Critique sidecar is recorded - Evidence: critique JSON exists.
|
|
445
|
+
|
|
446
|
+
### Verdict: PASS
|
|
447
|
+
|
|
448
|
+
## Goal Fit Gate
|
|
449
|
+
|
|
450
|
+
- [x] Original user goal restated
|
|
451
|
+
- [x] Every acceptance criterion has evidence
|
|
452
|
+
|
|
453
|
+
## Final Acceptance
|
|
454
|
+
|
|
455
|
+
- [x] CI/relevant checks passed
|
|
456
|
+
MARKDOWN
|
|
457
|
+
|
|
458
|
+
if flow_agents_node "$WRITER" init-plan "$ARTIFACT_DIR/auto-sidecars--deliver.md" \
|
|
459
|
+
--source-request "Generate workflow sidecars automatically." \
|
|
460
|
+
--summary "Planning sidecars were initialized from Markdown." \
|
|
461
|
+
--next-action "Record evidence after checks run." \
|
|
462
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/init.out" 2>"$TMPDIR_EVAL/init.err"; then
|
|
463
|
+
_pass "sidecar writer initializes planning sidecars"
|
|
464
|
+
else
|
|
465
|
+
_fail "sidecar writer init failed: $(cat "$TMPDIR_EVAL/init.out" "$TMPDIR_EVAL/init.err")"
|
|
466
|
+
fi
|
|
467
|
+
|
|
468
|
+
if rg -q '"id": "planning-sidecars-are-initialized"' "$ARTIFACT_DIR/acceptance.json"; then
|
|
469
|
+
_pass "sidecar writer extracts Definition Of Done criteria"
|
|
470
|
+
else
|
|
471
|
+
_fail "sidecar writer did not extract expected acceptance criterion"
|
|
472
|
+
fi
|
|
473
|
+
|
|
474
|
+
if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
|
|
475
|
+
--verdict pass \
|
|
476
|
+
--check-json '{"id":"writer-fixture","kind":"test","status":"pass","summary":"Writer fixture passed.","command":"test_workflow_sidecar_writer.sh"}' \
|
|
477
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/evidence.out" 2>"$TMPDIR_EVAL/evidence.err"; then
|
|
478
|
+
_pass "sidecar writer records evidence"
|
|
479
|
+
else
|
|
480
|
+
_fail "sidecar writer evidence failed: $(cat "$TMPDIR_EVAL/evidence.out" "$TMPDIR_EVAL/evidence.err")"
|
|
481
|
+
fi
|
|
482
|
+
|
|
483
|
+
if rg -q '"status": "verified"' "$ARTIFACT_DIR/state.json" && rg -q '"status": "pass"' "$ARTIFACT_DIR/acceptance.json"; then
|
|
484
|
+
_pass "sidecar writer updates state and acceptance from evidence"
|
|
485
|
+
else
|
|
486
|
+
_fail "sidecar writer did not update state and acceptance"
|
|
487
|
+
fi
|
|
488
|
+
|
|
489
|
+
INVALID_REF_DIR="$TMPDIR_EVAL/repo/.flow-agents/invalid-evidence-ref"
|
|
490
|
+
mkdir -p "$INVALID_REF_DIR"
|
|
491
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_REF_DIR/invalid-evidence-ref--deliver.md"
|
|
492
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_REF_DIR/invalid-evidence-ref--deliver.md" \
|
|
493
|
+
--source-request "Reject invalid evidence refs." \
|
|
494
|
+
--summary "Invalid evidence refs fixture." \
|
|
495
|
+
--next-action "Try invalid evidence refs." \
|
|
496
|
+
--timestamp "2026-05-09T00:01:01Z" >"$TMPDIR_EVAL/invalid-ref-init.out" 2>"$TMPDIR_EVAL/invalid-ref-init.err"
|
|
497
|
+
|
|
498
|
+
if flow_agents_node "$WRITER" record-evidence "$INVALID_REF_DIR" \
|
|
499
|
+
--verdict pass \
|
|
500
|
+
--check-json '{"id":"legacy-ref-check","kind":"test","status":"pass","summary":"Should fail.","artifact_refs":["legacy-string-ref"]}' \
|
|
501
|
+
--timestamp "2026-05-09T00:01:02Z" >"$TMPDIR_EVAL/legacy-ref.out" 2>"$TMPDIR_EVAL/legacy-ref.err"; then
|
|
502
|
+
_fail "sidecar writer should reject legacy string artifact_refs"
|
|
503
|
+
elif rg -q 'legacy string refs are not supported' "$TMPDIR_EVAL/legacy-ref.out" "$TMPDIR_EVAL/legacy-ref.err" \
|
|
504
|
+
&& [[ ! -f "$INVALID_REF_DIR/evidence.json" ]] \
|
|
505
|
+
&& rg -q '"status": "planned"' "$INVALID_REF_DIR/state.json"; then
|
|
506
|
+
_pass "sidecar writer rejects legacy string artifact_refs before mutation"
|
|
507
|
+
else
|
|
508
|
+
_fail "legacy string artifact_refs rejection was not fail-closed: $(cat "$TMPDIR_EVAL/legacy-ref.out" "$TMPDIR_EVAL/legacy-ref.err")"
|
|
509
|
+
fi
|
|
510
|
+
|
|
511
|
+
if flow_agents_node "$WRITER" record-evidence "$INVALID_REF_DIR" \
|
|
512
|
+
--verdict pass \
|
|
513
|
+
--check-json '{"id":"incomplete-ref-check","kind":"test","status":"pass","summary":"Should fail.","artifact_refs":[{"kind":"artifact"}]}' \
|
|
514
|
+
--timestamp "2026-05-09T00:01:03Z" >"$TMPDIR_EVAL/incomplete-ref.out" 2>"$TMPDIR_EVAL/incomplete-ref.err"; then
|
|
515
|
+
_fail "sidecar writer should reject incomplete structured artifact_refs"
|
|
516
|
+
elif rg -q 'artifact refs require file or url' "$TMPDIR_EVAL/incomplete-ref.out" "$TMPDIR_EVAL/incomplete-ref.err" \
|
|
517
|
+
&& [[ ! -f "$INVALID_REF_DIR/evidence.json" ]] \
|
|
518
|
+
&& rg -q '"status": "planned"' "$INVALID_REF_DIR/state.json"; then
|
|
519
|
+
_pass "sidecar writer rejects incomplete structured artifact_refs before mutation"
|
|
520
|
+
else
|
|
521
|
+
_fail "incomplete structured artifact_refs rejection was not fail-closed: $(cat "$TMPDIR_EVAL/incomplete-ref.out" "$TMPDIR_EVAL/incomplete-ref.err")"
|
|
522
|
+
fi
|
|
523
|
+
|
|
524
|
+
INVALID_ACCEPTANCE_REF_DIR="$TMPDIR_EVAL/repo/.flow-agents/invalid-acceptance-ref"
|
|
525
|
+
mkdir -p "$INVALID_ACCEPTANCE_REF_DIR"
|
|
526
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_ACCEPTANCE_REF_DIR/invalid-acceptance-ref--deliver.md"
|
|
527
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_ACCEPTANCE_REF_DIR/invalid-acceptance-ref--deliver.md" \
|
|
528
|
+
--source-request "Reject invalid existing acceptance refs." \
|
|
529
|
+
--summary "Invalid acceptance refs fixture." \
|
|
530
|
+
--next-action "Try invalid acceptance refs." \
|
|
531
|
+
--timestamp "2026-05-09T00:01:04Z" >"$TMPDIR_EVAL/invalid-acceptance-ref-init.out" 2>"$TMPDIR_EVAL/invalid-acceptance-ref-init.err"
|
|
532
|
+
node -e 'const fs=require("fs"); const file=process.argv[1]; const data=JSON.parse(fs.readFileSync(file,"utf8")); data.criteria[0].evidence_refs=["legacy-acceptance-ref.md"]; fs.writeFileSync(file, JSON.stringify(data, null, 2) + "\n");' "$INVALID_ACCEPTANCE_REF_DIR/acceptance.json"
|
|
533
|
+
|
|
534
|
+
if flow_agents_node "$WRITER" record-evidence "$INVALID_ACCEPTANCE_REF_DIR" \
|
|
535
|
+
--verdict pass \
|
|
536
|
+
--check-json '{"id":"valid-check","kind":"test","status":"pass","summary":"Valid check."}' \
|
|
537
|
+
--timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/invalid-acceptance-ref.out" 2>"$TMPDIR_EVAL/invalid-acceptance-ref.err"; then
|
|
538
|
+
_fail "sidecar writer should reject existing legacy acceptance evidence_refs"
|
|
539
|
+
elif rg -q 'acceptance\.criteria\[0\]\.evidence_refs entries must be structured evidence reference objects' "$TMPDIR_EVAL/invalid-acceptance-ref.out" "$TMPDIR_EVAL/invalid-acceptance-ref.err" \
|
|
540
|
+
&& [[ ! -f "$INVALID_ACCEPTANCE_REF_DIR/evidence.json" ]] \
|
|
541
|
+
&& rg -q '"status": "planned"' "$INVALID_ACCEPTANCE_REF_DIR/state.json"; then
|
|
542
|
+
_pass "sidecar writer rejects existing invalid acceptance refs before mutation"
|
|
543
|
+
else
|
|
544
|
+
_fail "existing invalid acceptance ref rejection was not fail-closed: $(cat "$TMPDIR_EVAL/invalid-acceptance-ref.out" "$TMPDIR_EVAL/invalid-acceptance-ref.err")"
|
|
545
|
+
fi
|
|
546
|
+
|
|
547
|
+
SURFACE_CHECK='{"id":"surface-trust-fixture","kind":"policy","status":"pass","summary":"Surface trust evidence passed.","surface_trust_refs":[{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Issued during this workflow."},"authority":{"producer":"surface-local","summary":"Local Surface trust producer."},"integrity":{"status":"matched","summary":"Artifact digest matched expected subject and gate.","digest":"sha256:abc123"},"status":"pass","summary":"Accepted Surface claim."}]}'
|
|
548
|
+
if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
|
|
549
|
+
--verdict pass \
|
|
550
|
+
--check-json "$SURFACE_CHECK" \
|
|
551
|
+
--timestamp "2026-05-09T00:01:05Z" >"$TMPDIR_EVAL/surface-evidence.out" 2>"$TMPDIR_EVAL/surface-evidence.err" \
|
|
552
|
+
&& rg -q '"surface_trust_refs"' "$ARTIFACT_DIR/evidence.json" \
|
|
553
|
+
&& rg -q '"artifact_kind": "TrustReport"' "$ARTIFACT_DIR/evidence.json" \
|
|
554
|
+
&& ! rg -q 'veritas' "$ARTIFACT_DIR/evidence.json"; then
|
|
555
|
+
_pass "sidecar writer records provider-neutral Surface trust refs"
|
|
556
|
+
else
|
|
557
|
+
_fail "sidecar writer did not record Surface trust refs: $(cat "$TMPDIR_EVAL/surface-evidence.out" "$TMPDIR_EVAL/surface-evidence.err")"
|
|
558
|
+
fi
|
|
559
|
+
|
|
560
|
+
if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
|
|
561
|
+
--verdict pass \
|
|
562
|
+
--check-json '{"id":"surface-trust-native-field","kind":"policy","status":"pass","summary":"Should fail.","surface_trust_refs":[{"artifact_kind":"Trust Snapshot","artifact_ref":"trust/snapshot.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"surface-local","summary":"Producer exists.","veritas_policy":"native-field"},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}]}' >"$TMPDIR_EVAL/surface-invalid.out" 2>&1; then
|
|
563
|
+
_fail "sidecar writer should reject provider-specific Surface trust fields"
|
|
564
|
+
elif rg -q 'unsupported field' "$TMPDIR_EVAL/surface-invalid.out"; then
|
|
565
|
+
_pass "sidecar writer rejects provider-specific Surface trust fields"
|
|
566
|
+
else
|
|
567
|
+
_fail "provider-specific Surface trust failure was not actionable: $(cat "$TMPDIR_EVAL/surface-invalid.out")"
|
|
568
|
+
fi
|
|
569
|
+
|
|
570
|
+
check_contradictory_surface_ref() {
|
|
571
|
+
local name="$1"
|
|
572
|
+
local ref="$2"
|
|
573
|
+
if flow_agents_node "$WRITER" record-evidence "$ARTIFACT_DIR" \
|
|
574
|
+
--verdict pass \
|
|
575
|
+
--check-json "{\"id\":\"surface-trust-$name\",\"kind\":\"policy\",\"status\":\"pass\",\"summary\":\"Should fail.\",\"surface_trust_refs\":[$ref]}" >"$TMPDIR_EVAL/surface-contradictory-$name.out" 2>&1; then
|
|
576
|
+
_fail "sidecar writer should reject contradictory Surface trust ref: $name"
|
|
577
|
+
elif rg -q 'contradicts Surface trust facts' "$TMPDIR_EVAL/surface-contradictory-$name.out"; then
|
|
578
|
+
_pass "sidecar writer rejects contradictory Surface trust ref: $name"
|
|
579
|
+
else
|
|
580
|
+
_fail "contradictory Surface trust ref failure was not actionable for $name: $(cat "$TMPDIR_EVAL/surface-contradictory-$name.out")"
|
|
581
|
+
fi
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
check_contradictory_surface_ref "rejected-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"rejected","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"surface-local","summary":"Producer exists."},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}'
|
|
585
|
+
check_contradictory_surface_ref "stale-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"stale","summary":"Stale."},"authority":{"producer":"surface-local","summary":"Producer exists."},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}'
|
|
586
|
+
check_contradictory_surface_ref "missing-authority-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"unknown","summary":"Producer missing."},"integrity":{"status":"matched","summary":"Matched."},"status":"pass"}'
|
|
587
|
+
check_contradictory_surface_ref "integrity-mismatch-pass" '{"artifact_kind":"TrustReport","artifact_ref":"trust/report.json","gate_id":"builder.surface.claim","claim_type":"surface.claim","claim_status":"accepted","subject":"builder-kit","freshness":{"status":"fresh","summary":"Fresh."},"authority":{"producer":"surface-local","summary":"Producer exists."},"integrity":{"status":"mismatch","summary":"Mismatch."},"status":"pass"}'
|
|
588
|
+
|
|
589
|
+
SURFACE_FIXTURE_DIR="$ROOT/evals/fixtures/surface-trust"
|
|
590
|
+
check_surface_fixture() {
|
|
591
|
+
local name="$1"
|
|
592
|
+
local fixture="$2"
|
|
593
|
+
local verdict="$3"
|
|
594
|
+
local expected_status="$4"
|
|
595
|
+
local expected_text="$5"
|
|
596
|
+
local dir="$TMPDIR_EVAL/repo/.flow-agents/surface-$name"
|
|
597
|
+
mkdir -p "$dir"
|
|
598
|
+
if flow_agents_node "$WRITER" record-evidence "$dir" \
|
|
599
|
+
--task-slug "surface-$name" \
|
|
600
|
+
--verdict "$verdict" \
|
|
601
|
+
--check-json '{"id":"ordinary-builder-evidence","kind":"test","status":"pass","summary":"Ordinary Builder Kit evidence still records."}' \
|
|
602
|
+
--surface-trust-json "$SURFACE_FIXTURE_DIR/$fixture" \
|
|
603
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/surface-$name.out" 2>"$TMPDIR_EVAL/surface-$name.err" \
|
|
604
|
+
&& node -e 'const fs=require("fs"); const [file, expectedStatus, expectedText]=process.argv.slice(1); const data=JSON.parse(fs.readFileSync(file,"utf8")); const trustChecks=data.checks.filter((check)=>check.id.startsWith("surface-trust-")); if (trustChecks.length!==1) throw new Error(`expected one surface trust check, found ${trustChecks.length}`); const check=trustChecks[0]; if (check.status!==expectedStatus) throw new Error(`expected ${expectedStatus}, got ${check.status}`); const ref=check.surface_trust_refs[0]; const blob=JSON.stringify(check); if (!blob.includes(expectedText)) throw new Error(`missing expected text ${expectedText}: ${blob}`); if (blob.toLowerCase().includes("veritas")) throw new Error("surface trust output leaked a Veritas-specific field"); if (ref.gate_id==="unknown" || ref.claim_type==="unknown") throw new Error("surface trust ref did not map gate and claim metadata");' "$dir/evidence.json" "$expected_status" "$expected_text"
|
|
605
|
+
then
|
|
606
|
+
_pass "surface trust fixture maps $name to $expected_status evidence"
|
|
607
|
+
else
|
|
608
|
+
_fail "surface trust fixture $name failed: $(cat "$TMPDIR_EVAL/surface-$name.out" "$TMPDIR_EVAL/surface-$name.err")"
|
|
609
|
+
fi
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
check_surface_fixture "accepted" "accepted-claim-trust-report.json" "pass" "pass" "accepted"
|
|
613
|
+
check_surface_fixture "rejected" "rejected-claim-trust-report.json" "fail" "fail" "rejected"
|
|
614
|
+
check_surface_fixture "stale" "stale-claim-trust-snapshot.json" "not_verified" "not_verified" "not currently verifiable"
|
|
615
|
+
check_surface_fixture "missing-authority" "missing-authority-trust-report.json" "fail" "fail" "missing authority"
|
|
616
|
+
check_surface_fixture "integrity-mismatch" "integrity-mismatch-trust-report.json" "fail" "fail" "integrity"
|
|
617
|
+
check_surface_fixture "provider-absent" "provider-absent.json" "not_verified" "not_verified" "No trust provider is configured"
|
|
618
|
+
check_surface_fixture "artifact-absent" "artifact-absent.json" "not_verified" "not_verified" "not readable"
|
|
619
|
+
|
|
620
|
+
PURE_SURFACE_DIR="$TMPDIR_EVAL/repo/.flow-agents/surface-trust-only"
|
|
621
|
+
mkdir -p "$PURE_SURFACE_DIR"
|
|
622
|
+
if flow_agents_node "$WRITER" record-evidence "$PURE_SURFACE_DIR" \
|
|
623
|
+
--task-slug "surface-trust-only" \
|
|
624
|
+
--verdict pass \
|
|
625
|
+
--surface-trust-json "$SURFACE_FIXTURE_DIR/accepted-claim-trust-report.json" \
|
|
626
|
+
--timestamp "2026-05-09T00:02:30Z" >"$TMPDIR_EVAL/surface-only.out" 2>"$TMPDIR_EVAL/surface-only.err" \
|
|
627
|
+
&& rg -q '"surface_trust_refs"' "$PURE_SURFACE_DIR/evidence.json"; then
|
|
628
|
+
_pass "sidecar writer records Surface trust evidence without unrelated check-json"
|
|
629
|
+
else
|
|
630
|
+
_fail "sidecar writer should accept Surface trust evidence without check-json: $(cat "$TMPDIR_EVAL/surface-only.out" "$TMPDIR_EVAL/surface-only.err")"
|
|
631
|
+
fi
|
|
632
|
+
|
|
633
|
+
if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
|
|
634
|
+
--status in_progress \
|
|
635
|
+
--phase execution \
|
|
636
|
+
--summary "Execution started from the planned sidecars." \
|
|
637
|
+
--next-action "Run focused validation and record evidence." \
|
|
638
|
+
--target-phase verification \
|
|
639
|
+
--artifact-ref auto-sidecars--deliver.md \
|
|
640
|
+
--timestamp "2026-05-09T00:01:30Z" >"$TMPDIR_EVAL/advance.out" 2>"$TMPDIR_EVAL/advance.err"; then
|
|
641
|
+
_pass "sidecar writer advances workflow state"
|
|
642
|
+
else
|
|
643
|
+
_fail "sidecar writer advance-state failed: $(cat "$TMPDIR_EVAL/advance.out" "$TMPDIR_EVAL/advance.err")"
|
|
644
|
+
fi
|
|
645
|
+
|
|
646
|
+
if rg -q '"phase": "execution"' "$ARTIFACT_DIR/state.json" && rg -q 'Run focused validation' "$ARTIFACT_DIR/handoff.json"; then
|
|
647
|
+
_pass "sidecar writer updates handoff during phase transitions"
|
|
648
|
+
else
|
|
649
|
+
_fail "sidecar writer did not update state and handoff for phase transition"
|
|
650
|
+
fi
|
|
651
|
+
|
|
652
|
+
if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
|
|
653
|
+
--status dancing \
|
|
654
|
+
--phase execution \
|
|
655
|
+
--summary "Invalid status fixture." \
|
|
656
|
+
--next-action "Should fail." >"$TMPDIR_EVAL/advance-invalid.out" 2>&1; then
|
|
657
|
+
_fail "sidecar writer should reject invalid workflow states"
|
|
658
|
+
elif rg -q 'status must be one of' "$TMPDIR_EVAL/advance-invalid.out"; then
|
|
659
|
+
_pass "sidecar writer rejects invalid workflow states"
|
|
660
|
+
else
|
|
661
|
+
_fail "invalid state failure was not actionable"
|
|
662
|
+
fi
|
|
663
|
+
|
|
664
|
+
if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
|
|
665
|
+
--status in_progress \
|
|
666
|
+
--phase execution \
|
|
667
|
+
--summary "Invalid target phase fixture." \
|
|
668
|
+
--next-action "Should fail." \
|
|
669
|
+
--target-phase banana >"$TMPDIR_EVAL/advance-invalid-target.out" 2>&1; then
|
|
670
|
+
_fail "sidecar writer should reject invalid target phases"
|
|
671
|
+
elif rg -q 'target phase must be one of' "$TMPDIR_EVAL/advance-invalid-target.out"; then
|
|
672
|
+
_pass "sidecar writer rejects invalid target phases"
|
|
673
|
+
else
|
|
674
|
+
_fail "invalid target phase failure was not actionable"
|
|
675
|
+
fi
|
|
676
|
+
|
|
677
|
+
cp "$ARTIFACT_DIR/state.json" "$TMPDIR_EVAL/terminal-jump-state.before"
|
|
678
|
+
cp "$ARTIFACT_DIR/handoff.json" "$TMPDIR_EVAL/terminal-jump-handoff.before"
|
|
679
|
+
if flow_agents_node "$WRITER" advance-state "$ARTIFACT_DIR" \
|
|
680
|
+
--status archived \
|
|
681
|
+
--phase done \
|
|
682
|
+
--summary "Verifier terminal jump fixture." \
|
|
683
|
+
--next-status done \
|
|
684
|
+
--next-action "Should not become terminal before release and learning." \
|
|
685
|
+
--target-phase done \
|
|
686
|
+
--timestamp "2026-05-09T00:01:40Z" >"$TMPDIR_EVAL/terminal-jump.out" 2>&1; then
|
|
687
|
+
_fail "transition guard should reject verifier terminal jumps"
|
|
688
|
+
elif rg -q 'terminal_jump_rejected' "$TMPDIR_EVAL/terminal-jump.out" \
|
|
689
|
+
&& [[ -f "$ARTIFACT_DIR/transition-diagnostics.jsonl" ]] \
|
|
690
|
+
&& rg -q '"code": "terminal_jump_rejected"' "$ARTIFACT_DIR/transition-diagnostics.jsonl" \
|
|
691
|
+
&& cmp -s "$ARTIFACT_DIR/state.json" "$TMPDIR_EVAL/terminal-jump-state.before" \
|
|
692
|
+
&& cmp -s "$ARTIFACT_DIR/handoff.json" "$TMPDIR_EVAL/terminal-jump-handoff.before"; then
|
|
693
|
+
_pass "transition guard rejects terminal jumps without mutating state or handoff"
|
|
694
|
+
else
|
|
695
|
+
_fail "terminal jump rejection lacked diagnostics or mutated authoritative sidecars"
|
|
696
|
+
fi
|
|
697
|
+
|
|
698
|
+
BUILDER_TRANSITION_DIR="$TMPDIR_EVAL/repo/.flow-agents/builder-transition-guard"
|
|
699
|
+
mkdir -p "$BUILDER_TRANSITION_DIR"
|
|
700
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$BUILDER_TRANSITION_DIR/builder-transition-guard--deliver.md"
|
|
701
|
+
flow_agents_node "$WRITER" init-plan "$BUILDER_TRANSITION_DIR/builder-transition-guard--deliver.md" \
|
|
702
|
+
--source-request "Builder transition guard fixture." \
|
|
703
|
+
--summary "Builder transition guard fixture." \
|
|
704
|
+
--next-action "Move into verification." \
|
|
705
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/builder-transition-init.out" 2>"$TMPDIR_EVAL/builder-transition-init.err"
|
|
706
|
+
flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
707
|
+
--status verifying \
|
|
708
|
+
--phase verification \
|
|
709
|
+
--summary "Builder verification fixture." \
|
|
710
|
+
--next-action "Verify according to Builder Kit build flow." \
|
|
711
|
+
--target-phase evidence \
|
|
712
|
+
--flow-definition builder.build \
|
|
713
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/builder-transition-verify.out" 2>"$TMPDIR_EVAL/builder-transition-verify.err"
|
|
714
|
+
|
|
715
|
+
cp "$BUILDER_TRANSITION_DIR/state.json" "$TMPDIR_EVAL/builder-missing-reason-state.before"
|
|
716
|
+
cp "$BUILDER_TRANSITION_DIR/handoff.json" "$TMPDIR_EVAL/builder-missing-reason-handoff.before"
|
|
717
|
+
if flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
718
|
+
--status in_progress \
|
|
719
|
+
--phase execution \
|
|
720
|
+
--summary "Missing route-back reason fixture." \
|
|
721
|
+
--next-action "Route back to execution." \
|
|
722
|
+
--target-phase verification \
|
|
723
|
+
--flow-definition builder.build >"$TMPDIR_EVAL/builder-missing-reason.out" 2>&1; then
|
|
724
|
+
_fail "transition guard should reject Builder Kit route-back without reason"
|
|
725
|
+
elif rg -q 'route_back_reason_required' "$TMPDIR_EVAL/builder-missing-reason.out" \
|
|
726
|
+
&& rg -q 'implementation_defect' "$BUILDER_TRANSITION_DIR/transition-diagnostics.jsonl" \
|
|
727
|
+
&& cmp -s "$BUILDER_TRANSITION_DIR/state.json" "$TMPDIR_EVAL/builder-missing-reason-state.before" \
|
|
728
|
+
&& cmp -s "$BUILDER_TRANSITION_DIR/handoff.json" "$TMPDIR_EVAL/builder-missing-reason-handoff.before"; then
|
|
729
|
+
_pass "transition guard rejects missing Builder Kit route-back reasons without mutation"
|
|
730
|
+
else
|
|
731
|
+
_fail "missing Builder Kit route-back reason was not fail-closed"
|
|
732
|
+
fi
|
|
733
|
+
|
|
734
|
+
if flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
735
|
+
--status in_progress \
|
|
736
|
+
--phase execution \
|
|
737
|
+
--summary "Allowed route-back fixture." \
|
|
738
|
+
--next-action "Fix implementation defect." \
|
|
739
|
+
--target-phase verification \
|
|
740
|
+
--flow-definition builder.build \
|
|
741
|
+
--route-back-reason implementation_defect \
|
|
742
|
+
--timestamp "2026-05-09T00:01:10Z" >"$TMPDIR_EVAL/builder-route-back.out" 2>"$TMPDIR_EVAL/builder-route-back.err" \
|
|
743
|
+
&& rg -q '"phase": "execution"' "$BUILDER_TRANSITION_DIR/state.json" \
|
|
744
|
+
&& rg -q '"count": 1' "$BUILDER_TRANSITION_DIR/transition-attempts.json"; then
|
|
745
|
+
_pass "transition guard allows Builder Kit route-back with deterministic attempt key"
|
|
746
|
+
else
|
|
747
|
+
_fail "allowed Builder Kit route-back failed: $(cat "$TMPDIR_EVAL/builder-route-back.out" "$TMPDIR_EVAL/builder-route-back.err")"
|
|
748
|
+
fi
|
|
749
|
+
|
|
750
|
+
for attempt in 2 3; do
|
|
751
|
+
flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
752
|
+
--status verifying \
|
|
753
|
+
--phase verification \
|
|
754
|
+
--summary "Return to verification attempt $attempt." \
|
|
755
|
+
--next-action "Verify again." \
|
|
756
|
+
--target-phase evidence \
|
|
757
|
+
--flow-definition builder.build \
|
|
758
|
+
--timestamp "2026-05-09T00:01:${attempt}0Z" >"$TMPDIR_EVAL/builder-forward-$attempt.out" 2>"$TMPDIR_EVAL/builder-forward-$attempt.err"
|
|
759
|
+
flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
760
|
+
--status in_progress \
|
|
761
|
+
--phase execution \
|
|
762
|
+
--summary "Route back attempt $attempt." \
|
|
763
|
+
--next-action "Fix implementation defect again." \
|
|
764
|
+
--target-phase verification \
|
|
765
|
+
--flow-definition builder.build \
|
|
766
|
+
--route-back-reason implementation_defect \
|
|
767
|
+
--timestamp "2026-05-09T00:01:${attempt}5Z" >"$TMPDIR_EVAL/builder-route-back-$attempt.out" 2>"$TMPDIR_EVAL/builder-route-back-$attempt.err"
|
|
768
|
+
done
|
|
769
|
+
|
|
770
|
+
flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
771
|
+
--status verifying \
|
|
772
|
+
--phase verification \
|
|
773
|
+
--summary "Return to verification before exceeded route-back." \
|
|
774
|
+
--next-action "Verify again." \
|
|
775
|
+
--target-phase evidence \
|
|
776
|
+
--flow-definition builder.build \
|
|
777
|
+
--timestamp "2026-05-09T00:01:50Z" >"$TMPDIR_EVAL/builder-forward-4.out" 2>"$TMPDIR_EVAL/builder-forward-4.err"
|
|
778
|
+
cp "$BUILDER_TRANSITION_DIR/transition-attempts.json" "$TMPDIR_EVAL/builder-attempts.before"
|
|
779
|
+
if flow_agents_node "$WRITER" advance-state "$BUILDER_TRANSITION_DIR" \
|
|
780
|
+
--status in_progress \
|
|
781
|
+
--phase execution \
|
|
782
|
+
--summary "Exceeded route-back fixture." \
|
|
783
|
+
--next-action "Should block after max attempts." \
|
|
784
|
+
--target-phase verification \
|
|
785
|
+
--flow-definition builder.build \
|
|
786
|
+
--route-back-reason implementation_defect >"$TMPDIR_EVAL/builder-route-back-exceeded.out" 2>&1; then
|
|
787
|
+
_fail "transition guard should block exceeded Builder Kit route-back attempts"
|
|
788
|
+
elif rg -q 'route_back_attempts_exceeded' "$TMPDIR_EVAL/builder-route-back-exceeded.out" \
|
|
789
|
+
&& rg -q '"count": 3' "$BUILDER_TRANSITION_DIR/transition-attempts.json" \
|
|
790
|
+
&& cmp -s "$BUILDER_TRANSITION_DIR/transition-attempts.json" "$TMPDIR_EVAL/builder-attempts.before"; then
|
|
791
|
+
_pass "transition guard blocks route-back loops without double incrementing rejected attempts"
|
|
792
|
+
else
|
|
793
|
+
_fail "Builder Kit max-attempt route-back behavior was not deterministic"
|
|
794
|
+
fi
|
|
795
|
+
|
|
796
|
+
LEGACY_TRANSITION_DIR="$TMPDIR_EVAL/repo/.flow-agents/legacy-transition-guard"
|
|
797
|
+
mkdir -p "$LEGACY_TRANSITION_DIR"
|
|
798
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$LEGACY_TRANSITION_DIR/legacy-transition-guard--deliver.md"
|
|
799
|
+
flow_agents_node "$WRITER" init-plan "$LEGACY_TRANSITION_DIR/legacy-transition-guard--deliver.md" \
|
|
800
|
+
--source-request "Legacy transition guard fixture." \
|
|
801
|
+
--summary "Legacy transition guard fixture." \
|
|
802
|
+
--next-action "Move into verification." \
|
|
803
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/legacy-transition-init.out" 2>"$TMPDIR_EVAL/legacy-transition-init.err"
|
|
804
|
+
flow_agents_node "$WRITER" advance-state "$LEGACY_TRANSITION_DIR" \
|
|
805
|
+
--status verifying \
|
|
806
|
+
--phase verification \
|
|
807
|
+
--summary "Legacy verification fixture." \
|
|
808
|
+
--next-action "Verify direct primitive workflow." \
|
|
809
|
+
--target-phase evidence \
|
|
810
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/legacy-transition-verify.out" 2>"$TMPDIR_EVAL/legacy-transition-verify.err"
|
|
811
|
+
if flow_agents_node "$WRITER" advance-state "$LEGACY_TRANSITION_DIR" \
|
|
812
|
+
--status in_progress \
|
|
813
|
+
--phase execution \
|
|
814
|
+
--summary "Legacy direct primitive route-back." \
|
|
815
|
+
--next-action "Direct primitive can route back without Builder Kit metadata." \
|
|
816
|
+
--target-phase verification \
|
|
817
|
+
--timestamp "2026-05-09T00:01:10Z" >"$TMPDIR_EVAL/legacy-route-back.out" 2>"$TMPDIR_EVAL/legacy-route-back.err" \
|
|
818
|
+
&& rg -q '"phase": "execution"' "$LEGACY_TRANSITION_DIR/state.json" \
|
|
819
|
+
&& [[ ! -f "$LEGACY_TRANSITION_DIR/transition-attempts.json" ]]; then
|
|
820
|
+
_pass "transition guard preserves backward-compatible legacy direct primitives"
|
|
821
|
+
else
|
|
822
|
+
_fail "legacy-compatible direct primitive route-back failed: $(cat "$TMPDIR_EVAL/legacy-route-back.out" "$TMPDIR_EVAL/legacy-route-back.err")"
|
|
823
|
+
fi
|
|
824
|
+
|
|
825
|
+
NV_DIR="$TMPDIR_EVAL/repo/.flow-agents/not-verified-sidecars"
|
|
826
|
+
mkdir -p "$NV_DIR"
|
|
827
|
+
cat > "$NV_DIR/not-verified-sidecars--deliver.md" <<'MARKDOWN'
|
|
828
|
+
# Route not verified evidence
|
|
829
|
+
|
|
830
|
+
status: needs-decision
|
|
831
|
+
type: deliver
|
|
832
|
+
|
|
833
|
+
## Plan
|
|
834
|
+
|
|
835
|
+
Record uncertain evidence without pretending it passed.
|
|
836
|
+
|
|
837
|
+
## Definition Of Done
|
|
838
|
+
|
|
839
|
+
- **User outcome:** Workflow agents can persist uncertain evidence for routing.
|
|
840
|
+
- **Scope:** Not-verified sidecar writer behavior.
|
|
841
|
+
- **Acceptance criteria:**
|
|
842
|
+
- [x] Not verified evidence is recorded - Evidence: evidence sidecar.
|
|
843
|
+
- **Usefulness checks:**
|
|
844
|
+
- [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
|
|
845
|
+
- **Stop-short risks:** Not verified evidence could be hidden as pass.
|
|
846
|
+
- **Durable docs target:** not needed
|
|
847
|
+
- **Sandbox mode:** local-edit
|
|
848
|
+
|
|
849
|
+
## Verification Report
|
|
850
|
+
|
|
851
|
+
Build: [NOT_VERIFIED] external service unavailable
|
|
852
|
+
|
|
853
|
+
### Acceptance Criteria
|
|
854
|
+
- [NOT_VERIFIED] Not verified evidence is recorded - Evidence collection unavailable.
|
|
855
|
+
|
|
856
|
+
### Verdict: NOT_VERIFIED
|
|
857
|
+
|
|
858
|
+
## Goal Fit Gate
|
|
859
|
+
|
|
860
|
+
- [x] Original user goal restated
|
|
861
|
+
- [ ] Every acceptance criterion has evidence
|
|
862
|
+
|
|
863
|
+
## Final Acceptance
|
|
864
|
+
|
|
865
|
+
- [ ] CI/relevant checks passed
|
|
866
|
+
MARKDOWN
|
|
867
|
+
|
|
868
|
+
if flow_agents_node "$WRITER" init-plan "$NV_DIR/not-verified-sidecars--deliver.md" \
|
|
869
|
+
--source-request "Route not verified evidence." \
|
|
870
|
+
--summary "Not verified fixture initialized." \
|
|
871
|
+
--next-action "Record not verified evidence." \
|
|
872
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/nv-init.out" 2>"$TMPDIR_EVAL/nv-init.err"; then
|
|
873
|
+
_pass "sidecar writer initializes not-verified fixture"
|
|
874
|
+
else
|
|
875
|
+
_fail "sidecar writer not-verified init failed: $(cat "$TMPDIR_EVAL/nv-init.out" "$TMPDIR_EVAL/nv-init.err")"
|
|
876
|
+
fi
|
|
877
|
+
|
|
878
|
+
if flow_agents_node "$WRITER" record-evidence "$NV_DIR" \
|
|
879
|
+
--verdict not_verified \
|
|
880
|
+
--check-json '{"id":"external-check","kind":"external","status":"not_verified","summary":"External service was unavailable."}' \
|
|
881
|
+
--gap "External service was unavailable before user decision." \
|
|
882
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/nv-evidence.out" 2>"$TMPDIR_EVAL/nv-evidence.err"; then
|
|
883
|
+
_pass "sidecar writer records not-verified evidence for routing"
|
|
884
|
+
else
|
|
885
|
+
_fail "sidecar writer not-verified evidence failed: $(cat "$TMPDIR_EVAL/nv-evidence.out" "$TMPDIR_EVAL/nv-evidence.err")"
|
|
886
|
+
fi
|
|
887
|
+
|
|
888
|
+
if rg -q '"status": "not_verified"' "$NV_DIR/state.json" && rg -q '"not_verified_gaps"' "$NV_DIR/evidence.json"; then
|
|
889
|
+
_pass "sidecar writer preserves not-verified state and gaps"
|
|
890
|
+
else
|
|
891
|
+
_fail "sidecar writer did not preserve not-verified state"
|
|
892
|
+
fi
|
|
893
|
+
|
|
894
|
+
NEW_INVALID_DIR="$TMPDIR_EVAL/repo/.flow-agents/new-invalid-artifact"
|
|
895
|
+
if flow_agents_node "$WRITER" record-evidence "$NEW_INVALID_DIR" \
|
|
896
|
+
--verdict banana \
|
|
897
|
+
--check-json '{"id":"invalid-new","kind":"test","status":"pass","summary":"Should fail."}' >"$TMPDIR_EVAL/new-invalid.out" 2>&1; then
|
|
898
|
+
_fail "sidecar writer should reject invalid new artifact command"
|
|
899
|
+
elif [[ ! -e "$NEW_INVALID_DIR/.workflow-sidecar.lock" ]]; then
|
|
900
|
+
_pass "sidecar writer does not leave lock files for invalid new artifact commands"
|
|
901
|
+
else
|
|
902
|
+
_fail "sidecar writer left lock file for invalid new artifact command"
|
|
903
|
+
fi
|
|
904
|
+
|
|
905
|
+
LOCK_DENIED_DIR="$TMPDIR_EVAL/repo/.flow-agents/lock-denied"
|
|
906
|
+
mkdir -p "$LOCK_DENIED_DIR"
|
|
907
|
+
if chmod 500 "$LOCK_DENIED_DIR" 2>"$TMPDIR_EVAL/lock-denied-chmod.err"; then
|
|
908
|
+
if run_bounded 5 flow_agents_node "$WRITER" record-critique "$LOCK_DENIED_DIR" \
|
|
909
|
+
--id lock-denied-review \
|
|
910
|
+
--reviewer tool-code-reviewer \
|
|
911
|
+
--verdict pass \
|
|
912
|
+
--summary "This lock acquisition should fail quickly." >"$TMPDIR_EVAL/lock-denied.out" 2>&1; then
|
|
913
|
+
chmod 700 "$LOCK_DENIED_DIR" 2>/dev/null || true
|
|
914
|
+
_fail "sidecar writer should reject lock acquisition permission failures"
|
|
915
|
+
else
|
|
916
|
+
chmod 700 "$LOCK_DENIED_DIR" 2>/dev/null || true
|
|
917
|
+
if rg -q 'failed to acquire workflow sidecar lock' "$TMPDIR_EVAL/lock-denied.out" \
|
|
918
|
+
&& rg -q 'record-critique' "$TMPDIR_EVAL/lock-denied.out" \
|
|
919
|
+
&& rg -q "$LOCK_DENIED_DIR/.workflow-sidecar.lockdir" "$TMPDIR_EVAL/lock-denied.out" \
|
|
920
|
+
&& rg -q 'EPERM|EACCES|permission denied|operation not permitted' "$TMPDIR_EVAL/lock-denied.out" \
|
|
921
|
+
&& rg -q 'permissions, ownership, or sandbox restrictions' "$TMPDIR_EVAL/lock-denied.out" \
|
|
922
|
+
&& rg -q 'fix permissions or ownership' "$TMPDIR_EVAL/lock-denied.out" \
|
|
923
|
+
&& rg -q 'approved writable workspace' "$TMPDIR_EVAL/lock-denied.out" \
|
|
924
|
+
&& rg -q 'manually write schema-valid sidecars' "$TMPDIR_EVAL/lock-denied.out" \
|
|
925
|
+
&& rg -q 'workflow artifact validation rather than bypassing locks' "$TMPDIR_EVAL/lock-denied.out" \
|
|
926
|
+
&& [[ ! -e "$LOCK_DENIED_DIR/.workflow-sidecar.lockdir" ]] \
|
|
927
|
+
&& [[ ! -e "$LOCK_DENIED_DIR/critique.json" ]]; then
|
|
928
|
+
_pass "sidecar writer fails fast with actionable lock acquisition permission guidance"
|
|
929
|
+
else
|
|
930
|
+
_fail "sidecar writer lock acquisition failure was not actionable: $(cat "$TMPDIR_EVAL/lock-denied.out")"
|
|
931
|
+
fi
|
|
932
|
+
fi
|
|
933
|
+
else
|
|
934
|
+
_pass "sidecar writer lock permission coverage skipped because chmod is unavailable: $(cat "$TMPDIR_EVAL/lock-denied-chmod.err")"
|
|
935
|
+
fi
|
|
936
|
+
|
|
937
|
+
if flow_agents_node "$WRITER" record-critique "$ARTIFACT_DIR" \
|
|
938
|
+
--id writer-review \
|
|
939
|
+
--reviewer tool-code-reviewer \
|
|
940
|
+
--verdict pass \
|
|
941
|
+
--summary "No blocking findings." \
|
|
942
|
+
--artifact-ref auto-sidecars--deliver.md \
|
|
943
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/critique.out" 2>"$TMPDIR_EVAL/critique.err"; then
|
|
944
|
+
_pass "sidecar writer records passing critique"
|
|
945
|
+
else
|
|
946
|
+
_fail "sidecar writer critique failed: $(cat "$TMPDIR_EVAL/critique.out" "$TMPDIR_EVAL/critique.err")"
|
|
947
|
+
fi
|
|
948
|
+
|
|
949
|
+
CONCURRENT_DIR="$TMPDIR_EVAL/repo/.flow-agents/concurrent-critiques"
|
|
950
|
+
mkdir -p "$CONCURRENT_DIR"
|
|
951
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$CONCURRENT_DIR/concurrent-critiques--deliver.md"
|
|
952
|
+
flow_agents_node "$WRITER" init-plan "$CONCURRENT_DIR/concurrent-critiques--deliver.md" \
|
|
953
|
+
--source-request "Concurrent critique fixture." \
|
|
954
|
+
--summary "Concurrent critique fixture." \
|
|
955
|
+
--next-action "Record concurrent critique." \
|
|
956
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/concurrent-init.out" 2>"$TMPDIR_EVAL/concurrent-init.err"
|
|
957
|
+
flow_agents_node "$WRITER" record-evidence "$CONCURRENT_DIR" \
|
|
958
|
+
--verdict pass \
|
|
959
|
+
--check-json '{"id":"concurrent-fixture","kind":"test","status":"pass","summary":"Concurrent fixture setup passed."}' \
|
|
960
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/concurrent-evidence.out" 2>"$TMPDIR_EVAL/concurrent-evidence.err"
|
|
961
|
+
|
|
962
|
+
FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY=0.2 flow_agents_node "$WRITER" record-critique "$CONCURRENT_DIR" \
|
|
963
|
+
--id concurrent-review-a \
|
|
964
|
+
--reviewer tool-code-reviewer \
|
|
965
|
+
--verdict pass \
|
|
966
|
+
--summary "Concurrent review A passed." \
|
|
967
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/concurrent-a.out" 2>"$TMPDIR_EVAL/concurrent-a.err" &
|
|
968
|
+
pid_a=$!
|
|
969
|
+
FLOW_AGENTS_WORKFLOW_SIDECAR_LOCK_DELAY=0.2 flow_agents_node "$WRITER" record-critique "$CONCURRENT_DIR" \
|
|
970
|
+
--id concurrent-review-b \
|
|
971
|
+
--reviewer tool-code-reviewer \
|
|
972
|
+
--verdict pass \
|
|
973
|
+
--summary "Concurrent review B passed." \
|
|
974
|
+
--timestamp "2026-05-09T00:02:01Z" >"$TMPDIR_EVAL/concurrent-b.out" 2>"$TMPDIR_EVAL/concurrent-b.err" &
|
|
975
|
+
pid_b=$!
|
|
976
|
+
wait "$pid_a"
|
|
977
|
+
status_a=$?
|
|
978
|
+
wait "$pid_b"
|
|
979
|
+
status_b=$?
|
|
980
|
+
|
|
981
|
+
if [[ "$status_a" -eq 0 && "$status_b" -eq 0 ]] \
|
|
982
|
+
&& rg -q '"id": "concurrent-review-a"' "$CONCURRENT_DIR/critique.json" \
|
|
983
|
+
&& rg -q '"id": "concurrent-review-b"' "$CONCURRENT_DIR/critique.json"; then
|
|
984
|
+
_pass "sidecar writer serializes concurrent sidecar writes"
|
|
985
|
+
else
|
|
986
|
+
_fail "sidecar writer lost concurrent critique writes: $(cat "$TMPDIR_EVAL/concurrent-a.out" "$TMPDIR_EVAL/concurrent-a.err" "$TMPDIR_EVAL/concurrent-b.out" "$TMPDIR_EVAL/concurrent-b.err")"
|
|
987
|
+
fi
|
|
988
|
+
|
|
989
|
+
if flow_agents_node "$WRITER" record-release "$ARTIFACT_DIR" \
|
|
990
|
+
--decision merge \
|
|
991
|
+
--scope "Workflow sidecar writer fixture." \
|
|
992
|
+
--evidence-ref evidence.json \
|
|
993
|
+
--gate-json '{"name":"merge","status":"pass","summary":"Evidence and critique passed.","evidence_refs":["writer-fixture"]}' \
|
|
994
|
+
--gate-json '{"name":"docs","status":"pass","summary":"Workflow usage docs are the durable target."}' \
|
|
995
|
+
--rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
|
|
996
|
+
--observability-json '{"status":"not_required","summary":"No production telemetry needed for this fixture."}' \
|
|
997
|
+
--post-deploy-json '{"id":"post-merge-static","status":"planned","summary":"Run static checks after merge."}' \
|
|
998
|
+
--docs-json '{"status":"updated","summary":"Workflow usage documentation covers sidecar use.","refs":["docs/workflow-usage-guide.md"]}' \
|
|
999
|
+
--summary "Release readiness recorded for merge." \
|
|
1000
|
+
--timestamp "2026-05-09T00:03:00Z" >"$TMPDIR_EVAL/release.out" 2>"$TMPDIR_EVAL/release.err"; then
|
|
1001
|
+
_pass "sidecar writer records release readiness"
|
|
1002
|
+
else
|
|
1003
|
+
_fail "sidecar writer release failed: $(cat "$TMPDIR_EVAL/release.out" "$TMPDIR_EVAL/release.err")"
|
|
1004
|
+
fi
|
|
1005
|
+
|
|
1006
|
+
if rg -q '"decision": "merge"' "$ARTIFACT_DIR/release.json" && rg -q '"phase": "release"' "$ARTIFACT_DIR/state.json"; then
|
|
1007
|
+
_pass "sidecar writer advances state from release readiness"
|
|
1008
|
+
else
|
|
1009
|
+
_fail "sidecar writer did not update release state"
|
|
1010
|
+
fi
|
|
1011
|
+
|
|
1012
|
+
NO_SUMMARY_RELEASE_DIR="$TMPDIR_EVAL/repo/.flow-agents/no-summary-release"
|
|
1013
|
+
mkdir -p "$NO_SUMMARY_RELEASE_DIR"
|
|
1014
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$NO_SUMMARY_RELEASE_DIR/no-summary-release--deliver.md"
|
|
1015
|
+
flow_agents_node "$WRITER" init-plan "$NO_SUMMARY_RELEASE_DIR/no-summary-release--deliver.md" \
|
|
1016
|
+
--source-request "No-summary release fixture." \
|
|
1017
|
+
--summary "No-summary release fixture." \
|
|
1018
|
+
--next-action "Record release without an explicit summary." \
|
|
1019
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/no-summary-release-init.out" 2>"$TMPDIR_EVAL/no-summary-release-init.err"
|
|
1020
|
+
flow_agents_node "$WRITER" record-evidence "$NO_SUMMARY_RELEASE_DIR" \
|
|
1021
|
+
--verdict pass \
|
|
1022
|
+
--check-json '{"id":"no-summary-release-fixture","kind":"test","status":"pass","summary":"No-summary release setup passed."}' \
|
|
1023
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/no-summary-release-evidence.out" 2>"$TMPDIR_EVAL/no-summary-release-evidence.err"
|
|
1024
|
+
|
|
1025
|
+
if flow_agents_node "$WRITER" record-release "$NO_SUMMARY_RELEASE_DIR" \
|
|
1026
|
+
--decision merge \
|
|
1027
|
+
--scope "No-summary release fixture." \
|
|
1028
|
+
--evidence-ref evidence.json \
|
|
1029
|
+
--gate-json '{"name":"merge","status":"pass","summary":"Evidence passed.","evidence_refs":["no-summary-release-fixture"]}' \
|
|
1030
|
+
--rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
|
|
1031
|
+
--observability-json '{"status":"not_required","summary":"No production telemetry needed for this fixture."}' \
|
|
1032
|
+
--docs-json '{"status":"not_needed","summary":"No docs change needed."}' \
|
|
1033
|
+
--timestamp "2026-05-09T00:03:30Z" >"$TMPDIR_EVAL/no-summary-release.out" 2>"$TMPDIR_EVAL/no-summary-release.err" \
|
|
1034
|
+
&& node -e 'const fs = require("fs"); const dir = process.argv[1]; const state = JSON.parse(fs.readFileSync(`${dir}/state.json`, "utf8")); if (state.phase !== "release") throw new Error(`expected release phase, got ${state.phase}`); if (state.next_action?.summary !== "Release readiness recorded for merge.") throw new Error(`unexpected summary: ${state.next_action?.summary}`);' "$NO_SUMMARY_RELEASE_DIR" \
|
|
1035
|
+
&& rg -q '"decision": "merge"' "$NO_SUMMARY_RELEASE_DIR/release.json" \
|
|
1036
|
+
&& flow_agents_node "$VALIDATOR" --skip-markdown-validation "$NO_SUMMARY_RELEASE_DIR" >"$TMPDIR_EVAL/no-summary-release-valid.out" 2>"$TMPDIR_EVAL/no-summary-release-valid.err"; then
|
|
1037
|
+
_pass "sidecar writer records valid release state without explicit summary"
|
|
1038
|
+
else
|
|
1039
|
+
_fail "no-summary release state fallback failed: $(cat "$TMPDIR_EVAL/no-summary-release.out" "$TMPDIR_EVAL/no-summary-release.err" "$TMPDIR_EVAL/no-summary-release-valid.out" "$TMPDIR_EVAL/no-summary-release-valid.err" 2>/dev/null)"
|
|
1040
|
+
fi
|
|
1041
|
+
|
|
1042
|
+
if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
|
|
1043
|
+
--status learned \
|
|
1044
|
+
--record-json '{"id":"writer-loop","source_refs":["release.json","evidence.json"],"outcome":"success","facts":["Release sidecar validated."],"interpretation":"Writer commands can carry release and learning feedback without hand-authored JSON.","routing":[{"target":"none","action":"No follow-up required after intended-vs-observed closeout.","status":"completed"}],"correction":{"needed":false,"evidence":"Release, evidence, and learning closeout matched intended behavior."}}' \
|
|
1045
|
+
--summary "Learning recorded and no follow-up remains." \
|
|
1046
|
+
--timestamp "2026-05-09T00:04:00Z" >"$TMPDIR_EVAL/learning.out" 2>"$TMPDIR_EVAL/learning.err"; then
|
|
1047
|
+
_pass "sidecar writer records learning feedback"
|
|
1048
|
+
else
|
|
1049
|
+
_fail "sidecar writer learning failed: $(cat "$TMPDIR_EVAL/learning.out" "$TMPDIR_EVAL/learning.err")"
|
|
1050
|
+
fi
|
|
1051
|
+
|
|
1052
|
+
if rg -q '"status": "learned"' "$ARTIFACT_DIR/learning.json" && rg -q '"phase": "learning"' "$ARTIFACT_DIR/state.json"; then
|
|
1053
|
+
_pass "sidecar writer advances state from learning feedback"
|
|
1054
|
+
else
|
|
1055
|
+
_fail "sidecar writer did not update learning state"
|
|
1056
|
+
fi
|
|
1057
|
+
|
|
1058
|
+
if flow_agents_node "$VALIDATOR" --skip-markdown-validation "$ARTIFACT_DIR/learning.json" >"$TMPDIR_EVAL/learning-valid.out" 2>"$TMPDIR_EVAL/learning-valid.err" \
|
|
1059
|
+
&& rg -q '"needed": false' "$ARTIFACT_DIR/learning.json" \
|
|
1060
|
+
&& rg -q '"target": "none"' "$ARTIFACT_DIR/learning.json"; then
|
|
1061
|
+
_pass "sidecar writer records valid no-correction learning closeout"
|
|
1062
|
+
else
|
|
1063
|
+
_fail "no-correction learning closeout failed validation: $(cat "$TMPDIR_EVAL/learning-valid.out" "$TMPDIR_EVAL/learning-valid.err")"
|
|
1064
|
+
fi
|
|
1065
|
+
|
|
1066
|
+
CORRECTION_DIR="$TMPDIR_EVAL/repo/.flow-agents/correction-needed-learning"
|
|
1067
|
+
mkdir -p "$CORRECTION_DIR"
|
|
1068
|
+
if flow_agents_node "$WRITER" record-learning "$CORRECTION_DIR" \
|
|
1069
|
+
--task-slug correction-needed-learning \
|
|
1070
|
+
--status followup_required \
|
|
1071
|
+
--record-json '{"id":"stale-learning-route","source_refs":["release.json","issue-93"],"outcome":"mixed","facts":["A stale learning route remained local after durable tracking existed."],"interpretation":"Terminal learning review must force a correction or no-correction decision.","routing":[{"target":"skill","action":"Update learning-review closeout contract.","status":"open","ref":"https://github.com/kontourai/flow-agents/issues/93"}],"correction":{"needed":true,"type":"workflow","recurrence_key":"learning-review.stale-route-closeout","intended_behavior":"Terminal learning review routes or closes every actionable gap.","observed_behavior":"A stale learning route remained local after durable tracking existed.","gap":"Learning review did not force a correction/no-correction decision.","prevention":{"target":"skill","action":"Update learning-review closeout contract.","status":"open","ref":"https://github.com/kontourai/flow-agents/issues/93"}}}' \
|
|
1072
|
+
--summary "Correction-needed learning recorded." \
|
|
1073
|
+
--timestamp "2026-05-09T00:04:30Z" >"$TMPDIR_EVAL/correction-needed-learning.out" 2>"$TMPDIR_EVAL/correction-needed-learning.err" \
|
|
1074
|
+
&& flow_agents_node "$VALIDATOR" --skip-markdown-validation "$CORRECTION_DIR/learning.json" >"$TMPDIR_EVAL/correction-needed-valid.out" 2>"$TMPDIR_EVAL/correction-needed-valid.err"; then
|
|
1075
|
+
_pass "sidecar writer records valid correction-needed learning closeout"
|
|
1076
|
+
else
|
|
1077
|
+
_fail "correction-needed learning closeout failed: $(cat "$TMPDIR_EVAL/correction-needed-learning.out" "$TMPDIR_EVAL/correction-needed-learning.err" "$TMPDIR_EVAL/correction-needed-valid.out" "$TMPDIR_EVAL/correction-needed-valid.err")"
|
|
1078
|
+
fi
|
|
1079
|
+
|
|
1080
|
+
DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-pass"
|
|
1081
|
+
mkdir -p "$DOGFOOD_DIR"
|
|
1082
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$DOGFOOD_DIR/dogfood-pass--deliver.md"
|
|
1083
|
+
flow_agents_node "$WRITER" init-plan "$DOGFOOD_DIR/dogfood-pass--deliver.md" \
|
|
1084
|
+
--source-request "Dogfood pass fixture." \
|
|
1085
|
+
--summary "Dogfood pass fixture." \
|
|
1086
|
+
--next-action "Run dogfood pass." \
|
|
1087
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-init.out" 2>"$TMPDIR_EVAL/dogfood-init.err"
|
|
1088
|
+
|
|
1089
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1090
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1091
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1092
|
+
--verdict pass \
|
|
1093
|
+
--summary "Should fail without evidence." >"$TMPDIR_EVAL/dogfood-no-evidence.out" 2>&1; then
|
|
1094
|
+
_fail "dogfood-pass should reject clean pass without evidence"
|
|
1095
|
+
elif rg -q 'cannot mark clean without passing evidence' "$TMPDIR_EVAL/dogfood-no-evidence.out"; then
|
|
1096
|
+
_pass "dogfood-pass refuses clean completion without evidence"
|
|
1097
|
+
else
|
|
1098
|
+
_fail "dogfood-pass missing actionable no-evidence error"
|
|
1099
|
+
fi
|
|
1100
|
+
|
|
1101
|
+
DIRTY_EVIDENCE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-dirty-evidence"
|
|
1102
|
+
mkdir -p "$DIRTY_EVIDENCE_DOGFOOD_DIR"
|
|
1103
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$DIRTY_EVIDENCE_DOGFOOD_DIR/dogfood-dirty-evidence--deliver.md"
|
|
1104
|
+
flow_agents_node "$WRITER" init-plan "$DIRTY_EVIDENCE_DOGFOOD_DIR/dogfood-dirty-evidence--deliver.md" \
|
|
1105
|
+
--source-request "Dogfood dirty evidence fixture." \
|
|
1106
|
+
--summary "Dogfood dirty evidence fixture." \
|
|
1107
|
+
--next-action "Run dogfood pass against existing dirty evidence." \
|
|
1108
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-dirty-evidence-init.out" 2>"$TMPDIR_EVAL/dogfood-dirty-evidence-init.err"
|
|
1109
|
+
cat > "$DIRTY_EVIDENCE_DOGFOOD_DIR/evidence.json" <<'JSON'
|
|
1110
|
+
{
|
|
1111
|
+
"schema_version": "1.0",
|
|
1112
|
+
"task_slug": "dogfood-dirty-evidence",
|
|
1113
|
+
"verdict": "pass",
|
|
1114
|
+
"checks": [
|
|
1115
|
+
{
|
|
1116
|
+
"id": "existing-pass",
|
|
1117
|
+
"kind": "test",
|
|
1118
|
+
"status": "pass",
|
|
1119
|
+
"summary": "Existing pass check."
|
|
1120
|
+
},
|
|
1121
|
+
{
|
|
1122
|
+
"id": "existing-fail",
|
|
1123
|
+
"kind": "test",
|
|
1124
|
+
"status": "fail",
|
|
1125
|
+
"summary": "Existing fail check."
|
|
1126
|
+
}
|
|
1127
|
+
],
|
|
1128
|
+
"not_verified_gaps": []
|
|
1129
|
+
}
|
|
1130
|
+
JSON
|
|
1131
|
+
cp "$DIRTY_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-state.before"
|
|
1132
|
+
cp "$DIRTY_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-handoff.before"
|
|
1133
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1134
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1135
|
+
--artifact-dir "$DIRTY_EVIDENCE_DOGFOOD_DIR" \
|
|
1136
|
+
--verdict pass \
|
|
1137
|
+
--summary "Should fail before state writes." >"$TMPDIR_EVAL/dogfood-dirty-evidence.out" 2>&1; then
|
|
1138
|
+
_fail "dogfood-pass should reject existing dirty pass evidence before state writes"
|
|
1139
|
+
elif rg -q 'cannot mark clean without passing evidence' "$TMPDIR_EVAL/dogfood-dirty-evidence.out" \
|
|
1140
|
+
&& cmp -s "$DIRTY_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-state.before" \
|
|
1141
|
+
&& cmp -s "$DIRTY_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-dirty-evidence-handoff.before"; then
|
|
1142
|
+
_pass "dogfood-pass rejects existing dirty evidence before state and handoff writes"
|
|
1143
|
+
else
|
|
1144
|
+
_fail "dogfood-pass existing dirty evidence was not fail-closed"
|
|
1145
|
+
fi
|
|
1146
|
+
|
|
1147
|
+
INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-existing-invalid-evidence"
|
|
1148
|
+
mkdir -p "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR"
|
|
1149
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/dogfood-existing-invalid-evidence--deliver.md"
|
|
1150
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/dogfood-existing-invalid-evidence--deliver.md" \
|
|
1151
|
+
--source-request "Dogfood existing invalid evidence fixture." \
|
|
1152
|
+
--summary "Dogfood existing invalid evidence fixture." \
|
|
1153
|
+
--next-action "Run dogfood pass against existing invalid evidence." \
|
|
1154
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-existing-invalid-evidence-init.out" 2>"$TMPDIR_EVAL/dogfood-existing-invalid-evidence-init.err"
|
|
1155
|
+
cat > "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/evidence.json" <<'JSON'
|
|
1156
|
+
{
|
|
1157
|
+
"schema_version": "1.0",
|
|
1158
|
+
"task_slug": "dogfood-existing-invalid-evidence",
|
|
1159
|
+
"verdict": "pass",
|
|
1160
|
+
"checks": [
|
|
1161
|
+
{
|
|
1162
|
+
"id": "existing-invalid-pass",
|
|
1163
|
+
"kind": "test",
|
|
1164
|
+
"status": "pass",
|
|
1165
|
+
"summary": "Existing pass check with invalid metadata.",
|
|
1166
|
+
"standard_refs": [
|
|
1167
|
+
{
|
|
1168
|
+
"standard": "unknown",
|
|
1169
|
+
"ref": "bad-ref"
|
|
1170
|
+
}
|
|
1171
|
+
]
|
|
1172
|
+
}
|
|
1173
|
+
],
|
|
1174
|
+
"not_verified_gaps": []
|
|
1175
|
+
}
|
|
1176
|
+
JSON
|
|
1177
|
+
cp "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-state.before"
|
|
1178
|
+
cp "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-handoff.before"
|
|
1179
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1180
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1181
|
+
--artifact-dir "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR" \
|
|
1182
|
+
--verdict pass \
|
|
1183
|
+
--summary "Should fail before state writes." >"$TMPDIR_EVAL/dogfood-existing-invalid-evidence.out" 2>&1; then
|
|
1184
|
+
_fail "dogfood-pass should reject existing invalid pass evidence before state writes"
|
|
1185
|
+
elif rg -q 'cannot mark clean without passing evidence' "$TMPDIR_EVAL/dogfood-existing-invalid-evidence.out" \
|
|
1186
|
+
&& cmp -s "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-state.before" \
|
|
1187
|
+
&& cmp -s "$INVALID_EXISTING_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-evidence-handoff.before"; then
|
|
1188
|
+
_pass "dogfood-pass rejects existing invalid evidence before state and handoff writes"
|
|
1189
|
+
else
|
|
1190
|
+
_fail "dogfood-pass existing invalid evidence was not fail-closed"
|
|
1191
|
+
fi
|
|
1192
|
+
|
|
1193
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1194
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1195
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1196
|
+
--verdict pass \
|
|
1197
|
+
--check-json '{"id":"dogfood-failed-check","kind":"test","status":"fail","summary":"Should not write."}' \
|
|
1198
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-pass-failed-check.out" 2>&1; then
|
|
1199
|
+
_fail "dogfood-pass should reject failed checks on clean pass before evidence writes"
|
|
1200
|
+
elif rg -q 'clean evidence requires all non-skipped checks to pass' "$TMPDIR_EVAL/dogfood-pass-failed-check.out" \
|
|
1201
|
+
&& [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
|
|
1202
|
+
_pass "dogfood-pass rejects failed clean-pass checks before partial evidence writes"
|
|
1203
|
+
else
|
|
1204
|
+
_fail "dogfood-pass failed clean-pass check was not fail-closed"
|
|
1205
|
+
fi
|
|
1206
|
+
|
|
1207
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1208
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1209
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1210
|
+
--verdict pass \
|
|
1211
|
+
--check-json '{"id":"dogfood-not-verified-check","kind":"test","status":"not_verified","summary":"Should not write."}' \
|
|
1212
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-pass-not-verified-check.out" 2>&1; then
|
|
1213
|
+
_fail "dogfood-pass should reject not_verified checks on clean pass before evidence writes"
|
|
1214
|
+
elif rg -q 'clean evidence requires all non-skipped checks to pass' "$TMPDIR_EVAL/dogfood-pass-not-verified-check.out" \
|
|
1215
|
+
&& [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
|
|
1216
|
+
_pass "dogfood-pass rejects not_verified clean-pass checks before partial evidence writes"
|
|
1217
|
+
else
|
|
1218
|
+
_fail "dogfood-pass not_verified clean-pass check was not fail-closed"
|
|
1219
|
+
fi
|
|
1220
|
+
|
|
1221
|
+
INVALID_EVIDENCE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-evidence"
|
|
1222
|
+
mkdir -p "$INVALID_EVIDENCE_DOGFOOD_DIR"
|
|
1223
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_EVIDENCE_DOGFOOD_DIR/dogfood-invalid-evidence--deliver.md"
|
|
1224
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_EVIDENCE_DOGFOOD_DIR/dogfood-invalid-evidence--deliver.md" \
|
|
1225
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1226
|
+
--source-request "Dogfood invalid evidence fixture." \
|
|
1227
|
+
--summary "Dogfood invalid evidence fixture." \
|
|
1228
|
+
--next-action "Run dogfood pass with invalid evidence metadata." \
|
|
1229
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-evidence-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-evidence-init.err"
|
|
1230
|
+
cp "$INVALID_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-state.before"
|
|
1231
|
+
cp "$INVALID_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-handoff.before"
|
|
1232
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1233
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1234
|
+
--artifact-dir "$INVALID_EVIDENCE_DOGFOOD_DIR" \
|
|
1235
|
+
--verdict pass \
|
|
1236
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write invalid metadata.","standard_refs":[{"standard":"unknown","ref":"bad-ref"}]}' \
|
|
1237
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-evidence.out" 2>&1; then
|
|
1238
|
+
_fail "dogfood-pass should reject invalid evidence metadata before sidecar writes"
|
|
1239
|
+
elif rg -q 'standard' "$TMPDIR_EVAL/dogfood-invalid-evidence.out" \
|
|
1240
|
+
&& [[ ! -f "$INVALID_EVIDENCE_DOGFOOD_DIR/evidence.json" ]] \
|
|
1241
|
+
&& cmp -s "$INVALID_EVIDENCE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-state.before" \
|
|
1242
|
+
&& cmp -s "$INVALID_EVIDENCE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-evidence-handoff.before"; then
|
|
1243
|
+
_pass "dogfood-pass rejects invalid evidence metadata before partial sidecar writes"
|
|
1244
|
+
else
|
|
1245
|
+
_fail "dogfood-pass invalid evidence metadata was not fail-closed"
|
|
1246
|
+
fi
|
|
1247
|
+
|
|
1248
|
+
INVALID_LEARNING_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-learning"
|
|
1249
|
+
mkdir -p "$INVALID_LEARNING_DOGFOOD_DIR"
|
|
1250
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_LEARNING_DOGFOOD_DIR/dogfood-invalid-learning--deliver.md"
|
|
1251
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_LEARNING_DOGFOOD_DIR/dogfood-invalid-learning--deliver.md" \
|
|
1252
|
+
--source-request "Dogfood invalid learning fixture." \
|
|
1253
|
+
--summary "Dogfood invalid learning fixture." \
|
|
1254
|
+
--next-action "Run dogfood pass with invalid learning." \
|
|
1255
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-learning-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-learning-init.err"
|
|
1256
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1257
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1258
|
+
--artifact-dir "$INVALID_LEARNING_DOGFOOD_DIR" \
|
|
1259
|
+
--verdict pass \
|
|
1260
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before invalid learning."}' \
|
|
1261
|
+
--learning-status learned \
|
|
1262
|
+
--learning-record-json '{"id":"dogfood-invalid-learning","source_refs":["evidence.json"],"outcome":"mixed","facts":["Learning has open routing."],"interpretation":"Open routing cannot be learned.","routing":[{"target":"doc","action":"Close this follow-up later.","status":"open"}]}' \
|
|
1263
|
+
--learning-summary "Invalid learning should fail before writes." \
|
|
1264
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-learning.out" 2>&1; then
|
|
1265
|
+
_fail "dogfood-pass should reject invalid learning before evidence writes"
|
|
1266
|
+
elif rg -q 'learned status cannot have open learning routing' "$TMPDIR_EVAL/dogfood-invalid-learning.out" \
|
|
1267
|
+
&& [[ ! -f "$INVALID_LEARNING_DOGFOOD_DIR/evidence.json" ]] \
|
|
1268
|
+
&& [[ ! -f "$INVALID_LEARNING_DOGFOOD_DIR/learning.json" ]]; then
|
|
1269
|
+
_pass "dogfood-pass rejects invalid learning before partial sidecar writes"
|
|
1270
|
+
else
|
|
1271
|
+
_fail "dogfood-pass invalid learning was not fail-closed"
|
|
1272
|
+
fi
|
|
1273
|
+
|
|
1274
|
+
INVALID_LEARNING_SHAPE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-learning-shape"
|
|
1275
|
+
mkdir -p "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR"
|
|
1276
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/dogfood-invalid-learning-shape--deliver.md"
|
|
1277
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/dogfood-invalid-learning-shape--deliver.md" \
|
|
1278
|
+
--source-request "Dogfood invalid learning shape fixture." \
|
|
1279
|
+
--summary "Dogfood invalid learning shape fixture." \
|
|
1280
|
+
--next-action "Run dogfood pass with invalid learning shape." \
|
|
1281
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-learning-shape-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-learning-shape-init.err"
|
|
1282
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1283
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1284
|
+
--artifact-dir "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR" \
|
|
1285
|
+
--verdict pass \
|
|
1286
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before invalid learning shape."}' \
|
|
1287
|
+
--learning-status learned \
|
|
1288
|
+
--learning-record-json '{"id":"dogfood-invalid-learning-shape","source_refs":"evidence.json","outcome":"success","facts":"Learning facts must be an array.","interpretation":"Invalid shape cannot be learned.","routing":[{"target":"doc","action":"Already closed.","status":"completed"}]}' \
|
|
1289
|
+
--learning-summary "Invalid learning shape should fail before writes." \
|
|
1290
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-learning-shape.out" 2>&1; then
|
|
1291
|
+
_fail "dogfood-pass should reject invalid learning shape before evidence writes"
|
|
1292
|
+
elif rg -q 'source_refs' "$TMPDIR_EVAL/dogfood-invalid-learning-shape.out" \
|
|
1293
|
+
&& [[ ! -f "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/evidence.json" ]] \
|
|
1294
|
+
&& [[ ! -f "$INVALID_LEARNING_SHAPE_DOGFOOD_DIR/learning.json" ]]; then
|
|
1295
|
+
_pass "dogfood-pass rejects invalid learning shape before partial sidecar writes"
|
|
1296
|
+
else
|
|
1297
|
+
_fail "dogfood-pass invalid learning shape was not fail-closed"
|
|
1298
|
+
fi
|
|
1299
|
+
|
|
1300
|
+
EXISTING_INVALID_LEARNING_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-existing-invalid-learning"
|
|
1301
|
+
mkdir -p "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR"
|
|
1302
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/dogfood-existing-invalid-learning--deliver.md"
|
|
1303
|
+
flow_agents_node "$WRITER" init-plan "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/dogfood-existing-invalid-learning--deliver.md" \
|
|
1304
|
+
--source-request "Dogfood existing invalid learning fixture." \
|
|
1305
|
+
--summary "Dogfood existing invalid learning fixture." \
|
|
1306
|
+
--next-action "Run dogfood pass against existing invalid learning." \
|
|
1307
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-existing-invalid-learning-init.out" 2>"$TMPDIR_EVAL/dogfood-existing-invalid-learning-init.err"
|
|
1308
|
+
cat > "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/learning.json" <<'JSON'
|
|
1309
|
+
{
|
|
1310
|
+
"schema_version": "1.0",
|
|
1311
|
+
"task_slug": "dogfood-existing-invalid-learning",
|
|
1312
|
+
"status": "learned",
|
|
1313
|
+
"updated_at": "2026-05-09T00:01:00Z",
|
|
1314
|
+
"records": [
|
|
1315
|
+
{
|
|
1316
|
+
"id": "existing-invalid-learning",
|
|
1317
|
+
"recorded_at": "2026-05-09T00:01:00Z",
|
|
1318
|
+
"source_refs": "evidence.json",
|
|
1319
|
+
"outcome": "success",
|
|
1320
|
+
"facts": [
|
|
1321
|
+
"Existing learning has invalid source_refs shape."
|
|
1322
|
+
],
|
|
1323
|
+
"interpretation": "This should not be accepted by clean dogfood pass.",
|
|
1324
|
+
"routing": [
|
|
1325
|
+
{
|
|
1326
|
+
"target": "none",
|
|
1327
|
+
"action": "No follow-up.",
|
|
1328
|
+
"status": "completed"
|
|
1329
|
+
}
|
|
1330
|
+
]
|
|
1331
|
+
}
|
|
1332
|
+
]
|
|
1333
|
+
}
|
|
1334
|
+
JSON
|
|
1335
|
+
cp "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-state.before"
|
|
1336
|
+
cp "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-handoff.before"
|
|
1337
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1338
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1339
|
+
--artifact-dir "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR" \
|
|
1340
|
+
--verdict pass \
|
|
1341
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before existing invalid learning."}' \
|
|
1342
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-existing-invalid-learning.out" 2>&1; then
|
|
1343
|
+
_fail "dogfood-pass should reject existing invalid learning before evidence writes"
|
|
1344
|
+
elif rg -q 'source_refs' "$TMPDIR_EVAL/dogfood-existing-invalid-learning.out" \
|
|
1345
|
+
&& [[ ! -f "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/evidence.json" ]] \
|
|
1346
|
+
&& cmp -s "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-state.before" \
|
|
1347
|
+
&& cmp -s "$EXISTING_INVALID_LEARNING_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-learning-handoff.before"; then
|
|
1348
|
+
_pass "dogfood-pass rejects existing invalid learning before partial sidecar writes"
|
|
1349
|
+
else
|
|
1350
|
+
_fail "dogfood-pass existing invalid learning was not fail-closed"
|
|
1351
|
+
fi
|
|
1352
|
+
|
|
1353
|
+
EXISTING_LEARNED_NO_CORRECTION_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-learned-no-correction"
|
|
1354
|
+
mkdir -p "$EXISTING_LEARNED_NO_CORRECTION_DIR"
|
|
1355
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$EXISTING_LEARNED_NO_CORRECTION_DIR/dogfood-learned-no-correction--deliver.md"
|
|
1356
|
+
flow_agents_node "$WRITER" init-plan "$EXISTING_LEARNED_NO_CORRECTION_DIR/dogfood-learned-no-correction--deliver.md" \
|
|
1357
|
+
--source-request "Dogfood learned missing correction fixture." \
|
|
1358
|
+
--summary "Dogfood learned missing correction fixture." \
|
|
1359
|
+
--next-action "Run dogfood pass against terminal learning missing correction." \
|
|
1360
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-learned-no-correction-init.out" 2>"$TMPDIR_EVAL/dogfood-learned-no-correction-init.err"
|
|
1361
|
+
cat > "$EXISTING_LEARNED_NO_CORRECTION_DIR/learning.json" <<'JSON'
|
|
1362
|
+
{
|
|
1363
|
+
"schema_version": "1.0",
|
|
1364
|
+
"task_slug": "dogfood-learned-no-correction",
|
|
1365
|
+
"status": "learned",
|
|
1366
|
+
"updated_at": "2026-05-09T00:01:00Z",
|
|
1367
|
+
"records": [
|
|
1368
|
+
{
|
|
1369
|
+
"id": "learned-without-correction",
|
|
1370
|
+
"recorded_at": "2026-05-09T00:01:00Z",
|
|
1371
|
+
"source_refs": [
|
|
1372
|
+
"evidence.json"
|
|
1373
|
+
],
|
|
1374
|
+
"outcome": "success",
|
|
1375
|
+
"facts": [
|
|
1376
|
+
"Existing learning is otherwise well-shaped."
|
|
1377
|
+
],
|
|
1378
|
+
"interpretation": "Terminal learned records must include a correction or no-correction decision.",
|
|
1379
|
+
"routing": [
|
|
1380
|
+
{
|
|
1381
|
+
"target": "none",
|
|
1382
|
+
"action": "No follow-up.",
|
|
1383
|
+
"status": "completed"
|
|
1384
|
+
}
|
|
1385
|
+
]
|
|
1386
|
+
}
|
|
1387
|
+
]
|
|
1388
|
+
}
|
|
1389
|
+
JSON
|
|
1390
|
+
cp "$EXISTING_LEARNED_NO_CORRECTION_DIR/state.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-state.before"
|
|
1391
|
+
cp "$EXISTING_LEARNED_NO_CORRECTION_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-handoff.before"
|
|
1392
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1393
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1394
|
+
--artifact-dir "$EXISTING_LEARNED_NO_CORRECTION_DIR" \
|
|
1395
|
+
--verdict pass \
|
|
1396
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before existing learned learning is corrected."}' \
|
|
1397
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-learned-no-correction.out" 2>&1; then
|
|
1398
|
+
_fail "dogfood-pass should reject existing learned learning missing correction before evidence writes"
|
|
1399
|
+
elif rg -q 'learning status learned requires every record to include correction.needed' "$TMPDIR_EVAL/dogfood-learned-no-correction.out" \
|
|
1400
|
+
&& [[ ! -f "$EXISTING_LEARNED_NO_CORRECTION_DIR/evidence.json" ]] \
|
|
1401
|
+
&& cmp -s "$EXISTING_LEARNED_NO_CORRECTION_DIR/state.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-state.before" \
|
|
1402
|
+
&& cmp -s "$EXISTING_LEARNED_NO_CORRECTION_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-learned-no-correction-handoff.before"; then
|
|
1403
|
+
_pass "dogfood-pass rejects existing learned learning missing correction before partial sidecar writes"
|
|
1404
|
+
else
|
|
1405
|
+
_fail "dogfood-pass existing learned learning missing correction was not fail-closed"
|
|
1406
|
+
fi
|
|
1407
|
+
|
|
1408
|
+
INVALID_CRITIQUE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-invalid-critique"
|
|
1409
|
+
mkdir -p "$INVALID_CRITIQUE_DOGFOOD_DIR"
|
|
1410
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-invalid-critique--deliver.md"
|
|
1411
|
+
flow_agents_node "$WRITER" init-plan "$INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-invalid-critique--deliver.md" \
|
|
1412
|
+
--source-request "Dogfood invalid critique fixture." \
|
|
1413
|
+
--summary "Dogfood invalid critique fixture." \
|
|
1414
|
+
--next-action "Run dogfood pass with invalid critique metadata." \
|
|
1415
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-invalid-critique-init.out" 2>"$TMPDIR_EVAL/dogfood-invalid-critique-init.err"
|
|
1416
|
+
cp "$INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-critique-state.before"
|
|
1417
|
+
cp "$INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-critique-handoff.before"
|
|
1418
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1419
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1420
|
+
--artifact-dir "$INVALID_CRITIQUE_DOGFOOD_DIR" \
|
|
1421
|
+
--verdict pass \
|
|
1422
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before invalid critique."}' \
|
|
1423
|
+
--require-critique \
|
|
1424
|
+
--critique-id dogfood-invalid-critique \
|
|
1425
|
+
--critique-verdict pass \
|
|
1426
|
+
--critique-summary "Invalid critique finding metadata should fail before writes." \
|
|
1427
|
+
--finding-json '{"id":"invalid-file-refs","severity":"low","status":"fixed","description":"file_refs must be an array.","file_refs":"not-an-array"}' \
|
|
1428
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-invalid-critique.out" 2>&1; then
|
|
1429
|
+
_fail "dogfood-pass should reject invalid critique metadata before evidence writes"
|
|
1430
|
+
elif rg -q 'file_refs' "$TMPDIR_EVAL/dogfood-invalid-critique.out" \
|
|
1431
|
+
&& [[ ! -f "$INVALID_CRITIQUE_DOGFOOD_DIR/evidence.json" ]] \
|
|
1432
|
+
&& [[ ! -f "$INVALID_CRITIQUE_DOGFOOD_DIR/critique.json" ]] \
|
|
1433
|
+
&& cmp -s "$INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-invalid-critique-state.before" \
|
|
1434
|
+
&& cmp -s "$INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-invalid-critique-handoff.before"; then
|
|
1435
|
+
_pass "dogfood-pass rejects invalid critique metadata before partial sidecar writes"
|
|
1436
|
+
else
|
|
1437
|
+
_fail "dogfood-pass invalid critique metadata was not fail-closed"
|
|
1438
|
+
fi
|
|
1439
|
+
|
|
1440
|
+
EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-existing-invalid-critique"
|
|
1441
|
+
mkdir -p "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR"
|
|
1442
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-existing-invalid-critique--deliver.md"
|
|
1443
|
+
flow_agents_node "$WRITER" init-plan "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/dogfood-existing-invalid-critique--deliver.md" \
|
|
1444
|
+
--source-request "Dogfood existing invalid critique fixture." \
|
|
1445
|
+
--summary "Dogfood existing invalid critique fixture." \
|
|
1446
|
+
--next-action "Run dogfood pass against existing invalid critique." \
|
|
1447
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-existing-invalid-critique-init.out" 2>"$TMPDIR_EVAL/dogfood-existing-invalid-critique-init.err"
|
|
1448
|
+
cat > "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/critique.json" <<'JSON'
|
|
1449
|
+
{
|
|
1450
|
+
"schema_version": "1.0",
|
|
1451
|
+
"task_slug": "dogfood-existing-invalid-critique",
|
|
1452
|
+
"status": "pass",
|
|
1453
|
+
"required": true,
|
|
1454
|
+
"updated_at": "2026-05-09T00:01:00Z",
|
|
1455
|
+
"critiques": [
|
|
1456
|
+
{
|
|
1457
|
+
"id": "existing-invalid-review",
|
|
1458
|
+
"reviewer": "tool-code-reviewer",
|
|
1459
|
+
"reviewed_at": "2026-05-09T00:01:00Z",
|
|
1460
|
+
"verdict": "pass",
|
|
1461
|
+
"summary": "Looks clean but has invalid finding shape.",
|
|
1462
|
+
"findings": [
|
|
1463
|
+
{
|
|
1464
|
+
"id": "invalid-existing-file-refs",
|
|
1465
|
+
"severity": "low",
|
|
1466
|
+
"status": "fixed",
|
|
1467
|
+
"description": "file_refs must be an array.",
|
|
1468
|
+
"file_refs": "not-an-array"
|
|
1469
|
+
}
|
|
1470
|
+
]
|
|
1471
|
+
}
|
|
1472
|
+
]
|
|
1473
|
+
}
|
|
1474
|
+
JSON
|
|
1475
|
+
cp "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-state.before"
|
|
1476
|
+
cp "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-handoff.before"
|
|
1477
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1478
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1479
|
+
--artifact-dir "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR" \
|
|
1480
|
+
--verdict pass \
|
|
1481
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write before existing invalid critique."}' \
|
|
1482
|
+
--require-critique \
|
|
1483
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-existing-invalid-critique.out" 2>&1; then
|
|
1484
|
+
_fail "dogfood-pass should reject existing invalid critique before evidence writes"
|
|
1485
|
+
elif rg -q 'requires passing critique' "$TMPDIR_EVAL/dogfood-existing-invalid-critique.out" \
|
|
1486
|
+
&& [[ ! -f "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/evidence.json" ]] \
|
|
1487
|
+
&& cmp -s "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/state.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-state.before" \
|
|
1488
|
+
&& cmp -s "$EXISTING_INVALID_CRITIQUE_DOGFOOD_DIR/handoff.json" "$TMPDIR_EVAL/dogfood-existing-invalid-critique-handoff.before"; then
|
|
1489
|
+
_pass "dogfood-pass rejects existing invalid critique before partial sidecar writes"
|
|
1490
|
+
else
|
|
1491
|
+
_fail "dogfood-pass existing invalid critique was not fail-closed"
|
|
1492
|
+
fi
|
|
1493
|
+
|
|
1494
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1495
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1496
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1497
|
+
--verdict pass \
|
|
1498
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Dogfood check passed."}' \
|
|
1499
|
+
--require-critique \
|
|
1500
|
+
--summary "Should fail without critique." >"$TMPDIR_EVAL/dogfood-no-critique.out" 2>&1; then
|
|
1501
|
+
_fail "dogfood-pass should reject required critique gaps before writing evidence"
|
|
1502
|
+
elif rg -q 'requires passing critique' "$TMPDIR_EVAL/dogfood-no-critique.out" \
|
|
1503
|
+
&& [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
|
|
1504
|
+
_pass "dogfood-pass requires critique when configured without partial evidence writes"
|
|
1505
|
+
else
|
|
1506
|
+
_fail "dogfood-pass critique requirement was not fail-closed"
|
|
1507
|
+
fi
|
|
1508
|
+
|
|
1509
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1510
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1511
|
+
--artifact-dir "$SESSION_ROOT/dogfood-pas" \
|
|
1512
|
+
--verdict pass \
|
|
1513
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
|
|
1514
|
+
--summary "Should fail for typo artifact dir." >"$TMPDIR_EVAL/dogfood-bad-dir.out" 2>&1; then
|
|
1515
|
+
_fail "dogfood-pass should reject bad explicit artifact dirs"
|
|
1516
|
+
elif rg -q 'artifact directory does not exist' "$TMPDIR_EVAL/dogfood-bad-dir.out" \
|
|
1517
|
+
&& [[ ! -d "$SESSION_ROOT/dogfood-pas" ]]; then
|
|
1518
|
+
_pass "dogfood-pass rejects bad explicit artifact dirs without creating sidecars"
|
|
1519
|
+
else
|
|
1520
|
+
_fail "dogfood-pass bad artifact dir failure was not fail-closed"
|
|
1521
|
+
fi
|
|
1522
|
+
|
|
1523
|
+
OUTSIDE_DOGFOOD_DIR="$TMPDIR_EVAL/outside-dogfood"
|
|
1524
|
+
mkdir -p "$OUTSIDE_DOGFOOD_DIR"
|
|
1525
|
+
cat > "$OUTSIDE_DOGFOOD_DIR/outside--deliver.md" <<'MARKDOWN'
|
|
1526
|
+
# Outside artifact
|
|
1527
|
+
|
|
1528
|
+
status: planning
|
|
1529
|
+
type: deliver
|
|
1530
|
+
|
|
1531
|
+
## Plan
|
|
1532
|
+
|
|
1533
|
+
This should not be writable from a different artifact root.
|
|
1534
|
+
MARKDOWN
|
|
1535
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1536
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1537
|
+
--artifact-dir "$OUTSIDE_DOGFOOD_DIR" \
|
|
1538
|
+
--verdict pass \
|
|
1539
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
|
|
1540
|
+
--summary "Should fail outside root." >"$TMPDIR_EVAL/dogfood-outside-dir.out" 2>&1; then
|
|
1541
|
+
_fail "dogfood-pass should reject artifact dirs outside artifact root"
|
|
1542
|
+
elif rg -q 'artifact directory must be under artifact root' "$TMPDIR_EVAL/dogfood-outside-dir.out" \
|
|
1543
|
+
&& [[ ! -f "$OUTSIDE_DOGFOOD_DIR/evidence.json" ]]; then
|
|
1544
|
+
_pass "dogfood-pass rejects outside-root artifact dirs without writes"
|
|
1545
|
+
else
|
|
1546
|
+
_fail "dogfood-pass outside-root failure was not fail-closed"
|
|
1547
|
+
fi
|
|
1548
|
+
|
|
1549
|
+
DOGFOOD_SYMLINK_TARGET="$TMPDIR_EVAL/dogfood-symlink-target"
|
|
1550
|
+
DOGFOOD_SYMLINK_DIR="$SESSION_ROOT/dogfood-symlink"
|
|
1551
|
+
mkdir -p "$DOGFOOD_SYMLINK_TARGET"
|
|
1552
|
+
if ln -s "$DOGFOOD_SYMLINK_TARGET" "$DOGFOOD_SYMLINK_DIR" 2>"$TMPDIR_EVAL/dogfood-symlink-create.err"; then
|
|
1553
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1554
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1555
|
+
--artifact-dir "$DOGFOOD_SYMLINK_DIR" \
|
|
1556
|
+
--verdict pass \
|
|
1557
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
|
|
1558
|
+
--summary "Should reject symlink artifact dir." >"$TMPDIR_EVAL/dogfood-symlink-dir.out" 2>&1; then
|
|
1559
|
+
_fail "dogfood-pass should reject symlink artifact dirs"
|
|
1560
|
+
elif rg -q 'artifact directory must not be a symlink' "$TMPDIR_EVAL/dogfood-symlink-dir.out" \
|
|
1561
|
+
&& [[ ! -f "$DOGFOOD_SYMLINK_TARGET/evidence.json" ]]; then
|
|
1562
|
+
_pass "dogfood-pass rejects symlink artifact dirs without writes"
|
|
1563
|
+
else
|
|
1564
|
+
_fail "dogfood-pass symlink artifact-dir failure was not fail-closed"
|
|
1565
|
+
fi
|
|
1566
|
+
else
|
|
1567
|
+
_pass "dogfood-pass symlink artifact-dir coverage skipped because symlink creation is unavailable: $(cat "$TMPDIR_EVAL/dogfood-symlink-create.err")"
|
|
1568
|
+
fi
|
|
1569
|
+
|
|
1570
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1571
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1572
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1573
|
+
--verdict pass \
|
|
1574
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
|
|
1575
|
+
--require-critique \
|
|
1576
|
+
--critique-id dogfood-bad-json \
|
|
1577
|
+
--critique-summary "Invalid finding should fail before evidence." \
|
|
1578
|
+
--finding-json '{bad json' \
|
|
1579
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-bad-finding.out" 2>&1; then
|
|
1580
|
+
_fail "dogfood-pass should reject invalid critique JSON before evidence writes"
|
|
1581
|
+
elif rg -q -- '--finding-json must be valid JSON' "$TMPDIR_EVAL/dogfood-bad-finding.out" \
|
|
1582
|
+
&& [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
|
|
1583
|
+
_pass "dogfood-pass rejects invalid critique JSON before partial evidence writes"
|
|
1584
|
+
else
|
|
1585
|
+
_fail "dogfood-pass invalid critique JSON was not fail-closed"
|
|
1586
|
+
fi
|
|
1587
|
+
|
|
1588
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1589
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1590
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1591
|
+
--verdict pass \
|
|
1592
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
|
|
1593
|
+
--require-critique \
|
|
1594
|
+
--critique-id dogfood-failing-review \
|
|
1595
|
+
--critique-verdict fail \
|
|
1596
|
+
--critique-summary "Failing critique should fail before evidence." \
|
|
1597
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-failing-critique.out" 2>&1; then
|
|
1598
|
+
_fail "dogfood-pass should reject failing required critique before evidence writes"
|
|
1599
|
+
elif rg -q 'requires clean critique before recording pass evidence' "$TMPDIR_EVAL/dogfood-failing-critique.out" \
|
|
1600
|
+
&& [[ ! -f "$DOGFOOD_DIR/evidence.json" ]]; then
|
|
1601
|
+
_pass "dogfood-pass rejects failing required critique before partial evidence writes"
|
|
1602
|
+
else
|
|
1603
|
+
_fail "dogfood-pass failing critique was not fail-closed"
|
|
1604
|
+
fi
|
|
1605
|
+
|
|
1606
|
+
DIRTY_CRITIQUE_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-dirty-critique"
|
|
1607
|
+
mkdir -p "$DIRTY_CRITIQUE_DOGFOOD_DIR"
|
|
1608
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$DIRTY_CRITIQUE_DOGFOOD_DIR/dogfood-dirty-critique--deliver.md"
|
|
1609
|
+
flow_agents_node "$WRITER" init-plan "$DIRTY_CRITIQUE_DOGFOOD_DIR/dogfood-dirty-critique--deliver.md" \
|
|
1610
|
+
--source-request "Dogfood dirty critique fixture." \
|
|
1611
|
+
--summary "Dogfood dirty critique fixture." \
|
|
1612
|
+
--next-action "Run dogfood pass against existing open critique." \
|
|
1613
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-dirty-init.out" 2>"$TMPDIR_EVAL/dogfood-dirty-init.err"
|
|
1614
|
+
cat > "$DIRTY_CRITIQUE_DOGFOOD_DIR/critique.json" <<'JSON'
|
|
1615
|
+
{
|
|
1616
|
+
"schema_version": "1.0",
|
|
1617
|
+
"task_slug": "dogfood-dirty-critique",
|
|
1618
|
+
"status": "fail",
|
|
1619
|
+
"required": true,
|
|
1620
|
+
"updated_at": "2026-05-09T00:01:00Z",
|
|
1621
|
+
"critiques": [
|
|
1622
|
+
{
|
|
1623
|
+
"id": "existing-open-review",
|
|
1624
|
+
"reviewer": "tool-code-reviewer",
|
|
1625
|
+
"reviewed_at": "2026-05-09T00:01:00Z",
|
|
1626
|
+
"verdict": "fail",
|
|
1627
|
+
"summary": "Existing open finding blocks clean completion.",
|
|
1628
|
+
"findings": [
|
|
1629
|
+
{
|
|
1630
|
+
"severity": "high",
|
|
1631
|
+
"status": "open",
|
|
1632
|
+
"summary": "Existing finding remains open."
|
|
1633
|
+
}
|
|
1634
|
+
]
|
|
1635
|
+
}
|
|
1636
|
+
]
|
|
1637
|
+
}
|
|
1638
|
+
JSON
|
|
1639
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1640
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1641
|
+
--artifact-dir "$DIRTY_CRITIQUE_DOGFOOD_DIR" \
|
|
1642
|
+
--verdict pass \
|
|
1643
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Should not write."}' \
|
|
1644
|
+
--require-critique \
|
|
1645
|
+
--critique-id dogfood-clean-review \
|
|
1646
|
+
--critique-verdict pass \
|
|
1647
|
+
--critique-summary "New critique is clean but prior critique is still open." \
|
|
1648
|
+
--summary "Should fail before evidence." >"$TMPDIR_EVAL/dogfood-existing-dirty-critique.out" 2>&1; then
|
|
1649
|
+
_fail "dogfood-pass should reject existing dirty critique before evidence writes"
|
|
1650
|
+
elif rg -q 'requires clean critique before recording pass evidence' "$TMPDIR_EVAL/dogfood-existing-dirty-critique.out" \
|
|
1651
|
+
&& [[ ! -f "$DIRTY_CRITIQUE_DOGFOOD_DIR/evidence.json" ]]; then
|
|
1652
|
+
_pass "dogfood-pass rejects existing dirty critique before partial evidence writes"
|
|
1653
|
+
else
|
|
1654
|
+
_fail "dogfood-pass existing dirty critique was not fail-closed"
|
|
1655
|
+
fi
|
|
1656
|
+
|
|
1657
|
+
FAILED_DOGFOOD_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-failed-pass"
|
|
1658
|
+
mkdir -p "$FAILED_DOGFOOD_DIR"
|
|
1659
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$FAILED_DOGFOOD_DIR/dogfood-failed-pass--deliver.md"
|
|
1660
|
+
flow_agents_node "$WRITER" init-plan "$FAILED_DOGFOOD_DIR/dogfood-failed-pass--deliver.md" \
|
|
1661
|
+
--source-request "Dogfood failed pass fixture." \
|
|
1662
|
+
--summary "Dogfood failed pass fixture." \
|
|
1663
|
+
--next-action "Record failed dogfood pass." \
|
|
1664
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-failed-init.out" 2>"$TMPDIR_EVAL/dogfood-failed-init.err"
|
|
1665
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1666
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1667
|
+
--artifact-dir "$FAILED_DOGFOOD_DIR" \
|
|
1668
|
+
--verdict fail \
|
|
1669
|
+
--check-json '{"id":"dogfood-failed-check","kind":"test","status":"fail","summary":"Dogfood check failed."}' \
|
|
1670
|
+
--require-critique \
|
|
1671
|
+
--critique-id dogfood-failed-review \
|
|
1672
|
+
--critique-verdict fail \
|
|
1673
|
+
--critique-summary "Failed critique should be recorded for routing." \
|
|
1674
|
+
--finding-json '{"id":"failed-dogfood-finding","severity":"high","status":"open","description":"Failed dogfood finding remains open."}' \
|
|
1675
|
+
--summary "Dogfood pass failed and should route back to execution." \
|
|
1676
|
+
--timestamp "2026-05-09T00:04:30Z" >"$TMPDIR_EVAL/dogfood-failed-pass.out" 2>"$TMPDIR_EVAL/dogfood-failed-pass.err"; then
|
|
1677
|
+
_pass "dogfood-pass records failed evidence and failing critique for routing"
|
|
1678
|
+
else
|
|
1679
|
+
_fail "dogfood-pass should allow honest failed records: $(cat "$TMPDIR_EVAL/dogfood-failed-pass.out" "$TMPDIR_EVAL/dogfood-failed-pass.err")"
|
|
1680
|
+
fi
|
|
1681
|
+
|
|
1682
|
+
if rg -q '"verdict": "fail"' "$FAILED_DOGFOOD_DIR/evidence.json" \
|
|
1683
|
+
&& rg -q '"status": "fail"' "$FAILED_DOGFOOD_DIR/critique.json" \
|
|
1684
|
+
&& rg -q '"status": "failed"' "$FAILED_DOGFOOD_DIR/state.json" \
|
|
1685
|
+
&& rg -q 'Required dogfood critique is not passing' "$FAILED_DOGFOOD_DIR/handoff.json"; then
|
|
1686
|
+
_pass "dogfood-pass failed records preserve failed state and blockers"
|
|
1687
|
+
else
|
|
1688
|
+
_fail "dogfood-pass failed record did not preserve routing state"
|
|
1689
|
+
fi
|
|
1690
|
+
|
|
1691
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$FAILED_DOGFOOD_DIR" >"$TMPDIR_EVAL/dogfood-failed-valid.out" 2>"$TMPDIR_EVAL/dogfood-failed-valid.err"; then
|
|
1692
|
+
_fail "strict validator should still reject failed required critique"
|
|
1693
|
+
elif rg -q 'required critique must pass' "$TMPDIR_EVAL/dogfood-failed-valid.out" "$TMPDIR_EVAL/dogfood-failed-valid.err"; then
|
|
1694
|
+
_pass "dogfood-pass failed records remain visibly blocked under strict validation"
|
|
1695
|
+
else
|
|
1696
|
+
_fail "dogfood-pass failed record strict validation did not expose critique blocker"
|
|
1697
|
+
fi
|
|
1698
|
+
|
|
1699
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1700
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1701
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1702
|
+
--verdict pass \
|
|
1703
|
+
--check-json '{"id":"dogfood-check","kind":"test","status":"pass","summary":"Dogfood check passed.","artifact_refs":[{"kind":"artifact","file":"dogfood-pass--deliver.md","summary":"Dogfood pass deliver artifact."}]}' \
|
|
1704
|
+
--require-critique \
|
|
1705
|
+
--critique-id dogfood-review \
|
|
1706
|
+
--reviewer tool-code-reviewer \
|
|
1707
|
+
--critique-verdict pass \
|
|
1708
|
+
--critique-summary "Dogfood critique passed." \
|
|
1709
|
+
--learning-record-json '{"id":"dogfood-learning","source_refs":["evidence.json","critique.json"],"outcome":"success","facts":["Dogfood pass command recorded evidence and critique."],"interpretation":"Dogfood pass can close a clean local loop.","routing":[{"target":"none","action":"No follow-up required.","status":"completed"}],"correction":{"needed":false,"evidence":"Evidence, critique, and learning matched intended dogfood behavior."}}' \
|
|
1710
|
+
--learning-summary "Dogfood command learning recorded." \
|
|
1711
|
+
--summary "Dogfood pass completed." \
|
|
1712
|
+
--timestamp "2026-05-09T00:05:00Z" >"$TMPDIR_EVAL/dogfood-pass.out" 2>"$TMPDIR_EVAL/dogfood-pass.err"; then
|
|
1713
|
+
_pass "sidecar writer records dogfood pass"
|
|
1714
|
+
else
|
|
1715
|
+
_fail "dogfood-pass failed: $(cat "$TMPDIR_EVAL/dogfood-pass.out" "$TMPDIR_EVAL/dogfood-pass.err")"
|
|
1716
|
+
fi
|
|
1717
|
+
|
|
1718
|
+
if rg -q '"state_status": "verified"' "$TMPDIR_EVAL/dogfood-pass.out" \
|
|
1719
|
+
&& rg -q '"status": "pass"' "$DOGFOOD_DIR/critique.json" \
|
|
1720
|
+
&& rg -q '"status": "learned"' "$DOGFOOD_DIR/learning.json" \
|
|
1721
|
+
&& rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json"; then
|
|
1722
|
+
_pass "dogfood-pass writes clean evidence, critique, learning, and state"
|
|
1723
|
+
else
|
|
1724
|
+
_fail "dogfood-pass did not produce expected clean sidecars"
|
|
1725
|
+
fi
|
|
1726
|
+
|
|
1727
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$DOGFOOD_DIR" >"$TMPDIR_EVAL/dogfood-valid.out" 2>"$TMPDIR_EVAL/dogfood-valid.err"; then
|
|
1728
|
+
_pass "dogfood-pass output passes strict sidecar validation"
|
|
1729
|
+
else
|
|
1730
|
+
_fail "dogfood-pass output failed validation: $(cat "$TMPDIR_EVAL/dogfood-valid.out" "$TMPDIR_EVAL/dogfood-valid.err")"
|
|
1731
|
+
fi
|
|
1732
|
+
|
|
1733
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1734
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1735
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1736
|
+
--verdict pass \
|
|
1737
|
+
--check-json '{"id":"dogfood-release-fail-check","kind":"test","status":"pass","summary":"Dogfood release failure fixture check passed."}' \
|
|
1738
|
+
--critique-id dogfood-release-failing-review \
|
|
1739
|
+
--reviewer tool-code-reviewer \
|
|
1740
|
+
--critique-verdict fail \
|
|
1741
|
+
--critique-summary "Dogfood release critique failed." \
|
|
1742
|
+
--finding-json '{"id":"dogfood-release-finding","severity":"high","status":"open","description":"Release readiness must not ignore failing critique."}' \
|
|
1743
|
+
--release-decision merge \
|
|
1744
|
+
--release-scope "Dogfood pass release readiness should fail." \
|
|
1745
|
+
--summary "Dogfood pass release readiness should be blocked." \
|
|
1746
|
+
--timestamp "2026-05-09T00:05:20Z" >"$TMPDIR_EVAL/dogfood-release-fail.out" 2>"$TMPDIR_EVAL/dogfood-release-fail.err"; then
|
|
1747
|
+
_fail "dogfood-pass release readiness should reject failing critique even when critique is not explicitly required"
|
|
1748
|
+
elif rg -q 'requires clean critique' "$TMPDIR_EVAL/dogfood-release-fail.out" "$TMPDIR_EVAL/dogfood-release-fail.err" \
|
|
1749
|
+
&& [[ ! -f "$DOGFOOD_DIR/release.json" ]] \
|
|
1750
|
+
&& rg -q '"status": "verified"' "$DOGFOOD_DIR/state.json"; then
|
|
1751
|
+
_pass "dogfood-pass release readiness requires clean critique"
|
|
1752
|
+
else
|
|
1753
|
+
_fail "dogfood-pass release readiness failing critique was not fail-closed"
|
|
1754
|
+
fi
|
|
1755
|
+
|
|
1756
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1757
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1758
|
+
--artifact-dir "$DOGFOOD_DIR" \
|
|
1759
|
+
--verdict pass \
|
|
1760
|
+
--check-json '{"id":"dogfood-release-check","kind":"test","status":"pass","summary":"Dogfood release check passed.","artifact_refs":[{"kind":"artifact","file":"dogfood-pass--deliver.md","summary":"Dogfood pass deliver artifact."}]}' \
|
|
1761
|
+
--require-critique \
|
|
1762
|
+
--critique-id dogfood-release-review \
|
|
1763
|
+
--reviewer tool-code-reviewer \
|
|
1764
|
+
--critique-verdict pass \
|
|
1765
|
+
--critique-summary "Dogfood release critique passed." \
|
|
1766
|
+
--release-decision merge \
|
|
1767
|
+
--release-scope "Dogfood pass release readiness." \
|
|
1768
|
+
--release-summary "Dogfood pass can record release readiness after clean evidence and critique." \
|
|
1769
|
+
--release-doc-ref docs/workflow-usage-guide.md \
|
|
1770
|
+
--summary "Dogfood pass release readiness completed." \
|
|
1771
|
+
--timestamp "2026-05-09T00:05:30Z" >"$TMPDIR_EVAL/dogfood-release.out" 2>"$TMPDIR_EVAL/dogfood-release.err"; then
|
|
1772
|
+
_pass "dogfood-pass records release readiness after clean pass"
|
|
1773
|
+
else
|
|
1774
|
+
_fail "dogfood-pass release readiness failed: $(cat "$TMPDIR_EVAL/dogfood-release.out" "$TMPDIR_EVAL/dogfood-release.err")"
|
|
1775
|
+
fi
|
|
1776
|
+
|
|
1777
|
+
if rg -q '"release_decision": "merge"' "$TMPDIR_EVAL/dogfood-release.out" \
|
|
1778
|
+
&& rg -q '"decision": "merge"' "$DOGFOOD_DIR/release.json" \
|
|
1779
|
+
&& rg -q '"phase": "release"' "$DOGFOOD_DIR/state.json"; then
|
|
1780
|
+
_pass "dogfood-pass release readiness updates release sidecar and state"
|
|
1781
|
+
else
|
|
1782
|
+
_fail "dogfood-pass release readiness did not update expected sidecars"
|
|
1783
|
+
fi
|
|
1784
|
+
|
|
1785
|
+
DOGFOOD_NV_DIR="$TMPDIR_EVAL/repo/.flow-agents/dogfood-not-verified"
|
|
1786
|
+
mkdir -p "$DOGFOOD_NV_DIR"
|
|
1787
|
+
cat > "$DOGFOOD_NV_DIR/dogfood-not-verified--deliver.md" <<'MARKDOWN'
|
|
1788
|
+
# Dogfood not verified fixture
|
|
1789
|
+
|
|
1790
|
+
status: needs-decision
|
|
1791
|
+
type: deliver
|
|
1792
|
+
|
|
1793
|
+
## Plan
|
|
1794
|
+
|
|
1795
|
+
Record a dogfood pass with explicit not verified evidence.
|
|
1796
|
+
|
|
1797
|
+
## Definition Of Done
|
|
1798
|
+
|
|
1799
|
+
- **User outcome:** Dogfood pass preserves not verified evidence.
|
|
1800
|
+
- **Scope:** Dogfood not verified fixture.
|
|
1801
|
+
- **Acceptance criteria:**
|
|
1802
|
+
- [x] Not verified evidence is preserved - Evidence: evidence.json
|
|
1803
|
+
- **Usefulness checks:**
|
|
1804
|
+
- [x] Unknown, NOT_VERIFIED, and TODO gaps are resolved or explicitly accepted
|
|
1805
|
+
- **Stop-short risks:** Not verified evidence could be hidden as pass.
|
|
1806
|
+
- **Durable docs target:** not needed
|
|
1807
|
+
- **Sandbox mode:** local-edit
|
|
1808
|
+
|
|
1809
|
+
## Verification Report
|
|
1810
|
+
|
|
1811
|
+
Build: [NOT_VERIFIED] external live runtime unavailable
|
|
1812
|
+
|
|
1813
|
+
### Acceptance Criteria
|
|
1814
|
+
- [NOT_VERIFIED] Not verified evidence is preserved - Evidence: external live runtime unavailable.
|
|
1815
|
+
|
|
1816
|
+
### Verdict: NOT_VERIFIED
|
|
1817
|
+
|
|
1818
|
+
## Goal Fit Gate
|
|
1819
|
+
|
|
1820
|
+
- [x] Original user goal restated
|
|
1821
|
+
- [ ] Every acceptance criterion has evidence
|
|
1822
|
+
|
|
1823
|
+
## Final Acceptance
|
|
1824
|
+
|
|
1825
|
+
- [ ] CI/relevant checks passed
|
|
1826
|
+
MARKDOWN
|
|
1827
|
+
flow_agents_node "$WRITER" init-plan "$DOGFOOD_NV_DIR/dogfood-not-verified--deliver.md" \
|
|
1828
|
+
--source-request "Dogfood not verified fixture." \
|
|
1829
|
+
--summary "Dogfood not verified fixture." \
|
|
1830
|
+
--next-action "Record not verified dogfood pass." \
|
|
1831
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/dogfood-nv-init.out" 2>"$TMPDIR_EVAL/dogfood-nv-init.err"
|
|
1832
|
+
|
|
1833
|
+
if flow_agents_node "$WRITER" dogfood-pass \
|
|
1834
|
+
--artifact-root "$SESSION_ROOT" \
|
|
1835
|
+
--artifact-dir "$DOGFOOD_NV_DIR" \
|
|
1836
|
+
--verdict not_verified \
|
|
1837
|
+
--check-json '{"id":"dogfood-external","kind":"external","status":"not_verified","summary":"External live runtime was unavailable."}' \
|
|
1838
|
+
--gap "External live runtime unavailable." \
|
|
1839
|
+
--summary "Dogfood pass preserved not verified evidence." \
|
|
1840
|
+
--timestamp "2026-05-09T00:06:00Z" >"$TMPDIR_EVAL/dogfood-nv.out" 2>"$TMPDIR_EVAL/dogfood-nv.err" \
|
|
1841
|
+
&& rg -q '"verdict": "not_verified"' "$DOGFOOD_NV_DIR/evidence.json" \
|
|
1842
|
+
&& rg -q '"state_status": "not_verified"' "$TMPDIR_EVAL/dogfood-nv.out" \
|
|
1843
|
+
&& rg -q '"External live runtime unavailable."' "$DOGFOOD_NV_DIR/evidence.json"; then
|
|
1844
|
+
_pass "dogfood-pass preserves NOT_VERIFIED evidence and routing"
|
|
1845
|
+
else
|
|
1846
|
+
_fail "dogfood-pass did not preserve not verified evidence: $(cat "$TMPDIR_EVAL/dogfood-nv.out" "$TMPDIR_EVAL/dogfood-nv.err")"
|
|
1847
|
+
fi
|
|
1848
|
+
|
|
1849
|
+
if flow_agents_node "$WRITER" record-release "$ARTIFACT_DIR" \
|
|
1850
|
+
--decision launch \
|
|
1851
|
+
--scope "Invalid release fixture." \
|
|
1852
|
+
--gate-json '{"name":"merge","status":"pass","summary":"Should fail."}' \
|
|
1853
|
+
--rollback-json '{"status":"not_required","summary":"Should fail.","owner":"codex"}' \
|
|
1854
|
+
--observability-json '{"status":"not_required","summary":"Should fail."}' \
|
|
1855
|
+
--docs-json '{"status":"not_needed","summary":"Should fail."}' \
|
|
1856
|
+
--summary "Should fail." >"$TMPDIR_EVAL/release-invalid.out" 2>&1; then
|
|
1857
|
+
_fail "sidecar writer should reject invalid release decisions"
|
|
1858
|
+
elif rg -q 'decision must be one of' "$TMPDIR_EVAL/release-invalid.out"; then
|
|
1859
|
+
_pass "sidecar writer rejects invalid release decisions"
|
|
1860
|
+
else
|
|
1861
|
+
_fail "invalid release decision failure was not actionable"
|
|
1862
|
+
fi
|
|
1863
|
+
|
|
1864
|
+
SEMANTIC_RELEASE_DIR="$TMPDIR_EVAL/repo/.flow-agents/semantic-release"
|
|
1865
|
+
mkdir -p "$SEMANTIC_RELEASE_DIR"
|
|
1866
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$SEMANTIC_RELEASE_DIR/semantic-release--deliver.md"
|
|
1867
|
+
flow_agents_node "$WRITER" init-plan "$SEMANTIC_RELEASE_DIR/semantic-release--deliver.md" \
|
|
1868
|
+
--source-request "Semantic release failure fixture." \
|
|
1869
|
+
--summary "Semantic release failure fixture." \
|
|
1870
|
+
--next-action "Record evidence." \
|
|
1871
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/semantic-release-init.out" 2>"$TMPDIR_EVAL/semantic-release-init.err"
|
|
1872
|
+
flow_agents_node "$WRITER" record-evidence "$SEMANTIC_RELEASE_DIR" \
|
|
1873
|
+
--verdict pass \
|
|
1874
|
+
--check-json '{"id":"semantic-release-fixture","kind":"test","status":"pass","summary":"Semantic release setup passed."}' \
|
|
1875
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/semantic-release-evidence.out" 2>"$TMPDIR_EVAL/semantic-release-evidence.err"
|
|
1876
|
+
|
|
1877
|
+
if flow_agents_node "$WRITER" record-release "$SEMANTIC_RELEASE_DIR" \
|
|
1878
|
+
--decision merge \
|
|
1879
|
+
--scope "Semantic release fixture." \
|
|
1880
|
+
--gate-json '{"name":"docs","status":"pass","summary":"Docs passed but merge gate is missing."}' \
|
|
1881
|
+
--rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
|
|
1882
|
+
--observability-json '{"status":"not_required","summary":"No production telemetry needed."}' \
|
|
1883
|
+
--docs-json '{"status":"updated","summary":"Docs are updated."}' \
|
|
1884
|
+
--summary "Should fail before state advances." >"$TMPDIR_EVAL/semantic-release-invalid.out" 2>&1; then
|
|
1885
|
+
_fail "sidecar writer should reject semantically invalid release decisions"
|
|
1886
|
+
elif rg -q 'positive release decision requires merge gate to pass' "$TMPDIR_EVAL/semantic-release-invalid.out" \
|
|
1887
|
+
&& rg -q '"phase": "verification"' "$SEMANTIC_RELEASE_DIR/state.json"; then
|
|
1888
|
+
_pass "sidecar writer does not advance state after invalid release semantics"
|
|
1889
|
+
else
|
|
1890
|
+
_fail "semantic release failure advanced state or lacked actionable output"
|
|
1891
|
+
fi
|
|
1892
|
+
|
|
1893
|
+
if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
|
|
1894
|
+
--status learned \
|
|
1895
|
+
--record-json '{"id":"bad-learning","source_refs":["release.json"],"outcome":"celebration","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"doc","action":"Should fail.","status":"completed"}]}' \
|
|
1896
|
+
--summary "Should fail." >"$TMPDIR_EVAL/learning-invalid.out" 2>&1; then
|
|
1897
|
+
_fail "sidecar writer should reject invalid learning outcomes"
|
|
1898
|
+
elif rg -q 'learning outcome must be one of' "$TMPDIR_EVAL/learning-invalid.out"; then
|
|
1899
|
+
_pass "sidecar writer rejects invalid learning outcomes"
|
|
1900
|
+
else
|
|
1901
|
+
_fail "invalid learning outcome failure was not actionable"
|
|
1902
|
+
fi
|
|
1903
|
+
|
|
1904
|
+
if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
|
|
1905
|
+
--status followup_required \
|
|
1906
|
+
--record-json '{"id":"bad-correction-recurrence","source_refs":["release.json"],"outcome":"mixed","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"skill","action":"Should fail.","status":"open"}],"correction":{"needed":true,"type":"workflow","intended_behavior":"A recurrence key is recorded.","observed_behavior":"The recurrence key is missing.","gap":"Grouping would be impossible.","prevention":{"target":"skill","action":"Should fail.","status":"open"}}}' \
|
|
1907
|
+
--summary "Should fail." >"$TMPDIR_EVAL/correction-missing-recurrence.out" 2>&1; then
|
|
1908
|
+
_fail "sidecar writer should reject correction-needed records without recurrence key"
|
|
1909
|
+
elif rg -q 'correction.recurrence_key is required' "$TMPDIR_EVAL/correction-missing-recurrence.out"; then
|
|
1910
|
+
_pass "sidecar writer rejects correction-needed records without recurrence key"
|
|
1911
|
+
else
|
|
1912
|
+
detail="$(cat "$TMPDIR_EVAL/correction-missing-recurrence.out")"
|
|
1913
|
+
_fail "missing correction recurrence key failure was not actionable: $detail"
|
|
1914
|
+
fi
|
|
1915
|
+
|
|
1916
|
+
if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
|
|
1917
|
+
--status followup_required \
|
|
1918
|
+
--record-json '{"id":"bad-correction-prevention","source_refs":["release.json"],"outcome":"mixed","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"none","action":"Should fail.","status":"completed"}],"correction":{"needed":true,"type":"workflow","recurrence_key":"learning-review.missing-prevention","intended_behavior":"A prevention route or no-change rationale is recorded.","observed_behavior":"Neither decision is present.","gap":"The mismatch has no closeout decision."}}' \
|
|
1919
|
+
--summary "Should fail." >"$TMPDIR_EVAL/correction-missing-prevention.out" 2>&1; then
|
|
1920
|
+
_fail "sidecar writer should reject correction-needed records without prevention or no-change rationale"
|
|
1921
|
+
elif rg -q 'correction requires prevention route or no_change_rationale' "$TMPDIR_EVAL/correction-missing-prevention.out"; then
|
|
1922
|
+
_pass "sidecar writer rejects correction-needed records without prevention or no-change rationale"
|
|
1923
|
+
else
|
|
1924
|
+
detail="$(cat "$TMPDIR_EVAL/correction-missing-prevention.out")"
|
|
1925
|
+
_fail "missing correction prevention failure was not actionable: $detail"
|
|
1926
|
+
fi
|
|
1927
|
+
|
|
1928
|
+
if flow_agents_node "$WRITER" record-learning "$ARTIFACT_DIR" \
|
|
1929
|
+
--status followup_required \
|
|
1930
|
+
--record-json '{"id":"bad-correction-prevention-shape","source_refs":["release.json"],"outcome":"mixed","facts":["Should fail."],"interpretation":"Should fail.","routing":[{"target":"none","action":"Should fail.","status":"completed"}],"correction":{"needed":true,"type":"workflow","recurrence_key":"learning-review.incomplete-prevention","intended_behavior":"A complete prevention route is recorded.","observed_behavior":"Prevention only named an action.","gap":"The prevention route was not actionable.","prevention":{"action":"Should fail."}}}' \
|
|
1931
|
+
--summary "Should fail." >"$TMPDIR_EVAL/correction-incomplete-prevention.out" 2>&1; then
|
|
1932
|
+
_fail "sidecar writer should reject incomplete correction prevention routes"
|
|
1933
|
+
elif rg -q 'correction.prevention.target is required' "$TMPDIR_EVAL/correction-incomplete-prevention.out"; then
|
|
1934
|
+
_pass "sidecar writer rejects incomplete correction prevention routes"
|
|
1935
|
+
else
|
|
1936
|
+
_fail "incomplete correction prevention failure was not actionable: $(cat "$TMPDIR_EVAL/correction-incomplete-prevention.out")"
|
|
1937
|
+
fi
|
|
1938
|
+
|
|
1939
|
+
SEMANTIC_LEARNING_DIR="$TMPDIR_EVAL/repo/.flow-agents/semantic-learning"
|
|
1940
|
+
mkdir -p "$SEMANTIC_LEARNING_DIR"
|
|
1941
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$SEMANTIC_LEARNING_DIR/semantic-learning--deliver.md"
|
|
1942
|
+
flow_agents_node "$WRITER" init-plan "$SEMANTIC_LEARNING_DIR/semantic-learning--deliver.md" \
|
|
1943
|
+
--source-request "Semantic learning failure fixture." \
|
|
1944
|
+
--summary "Semantic learning failure fixture." \
|
|
1945
|
+
--next-action "Record evidence." \
|
|
1946
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/semantic-learning-init.out" 2>"$TMPDIR_EVAL/semantic-learning-init.err"
|
|
1947
|
+
flow_agents_node "$WRITER" record-evidence "$SEMANTIC_LEARNING_DIR" \
|
|
1948
|
+
--verdict pass \
|
|
1949
|
+
--check-json '{"id":"semantic-learning-fixture","kind":"test","status":"pass","summary":"Semantic learning setup passed."}' \
|
|
1950
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/semantic-learning-evidence.out" 2>"$TMPDIR_EVAL/semantic-learning-evidence.err"
|
|
1951
|
+
flow_agents_node "$WRITER" record-release "$SEMANTIC_LEARNING_DIR" \
|
|
1952
|
+
--decision merge \
|
|
1953
|
+
--scope "Semantic learning fixture." \
|
|
1954
|
+
--gate-json '{"name":"merge","status":"pass","summary":"Merge gate passed."}' \
|
|
1955
|
+
--rollback-json '{"status":"not_required","summary":"No deployed runtime change.","owner":"codex"}' \
|
|
1956
|
+
--observability-json '{"status":"not_required","summary":"No production telemetry needed."}' \
|
|
1957
|
+
--docs-json '{"status":"updated","summary":"Docs are updated."}' \
|
|
1958
|
+
--summary "Release state exists before learning failure." \
|
|
1959
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/semantic-learning-release.out" 2>"$TMPDIR_EVAL/semantic-learning-release.err"
|
|
1960
|
+
|
|
1961
|
+
if flow_agents_node "$WRITER" record-learning "$SEMANTIC_LEARNING_DIR" \
|
|
1962
|
+
--status learned \
|
|
1963
|
+
--record-json '{"id":"open-routing","source_refs":["release.json"],"outcome":"success","facts":["Should fail."],"interpretation":"Should fail before archiving.","routing":[{"target":"backlog","action":"Route open follow-up.","status":"open"}]}' \
|
|
1964
|
+
--summary "Should fail before state advances." >"$TMPDIR_EVAL/semantic-learning-invalid.out" 2>&1; then
|
|
1965
|
+
_fail "sidecar writer should reject semantically invalid learning records"
|
|
1966
|
+
elif rg -q 'learning status learned cannot have open routing' "$TMPDIR_EVAL/semantic-learning-invalid.out" \
|
|
1967
|
+
&& rg -q '"phase": "release"' "$SEMANTIC_LEARNING_DIR/state.json"; then
|
|
1968
|
+
_pass "sidecar writer does not archive state after invalid learning semantics"
|
|
1969
|
+
else
|
|
1970
|
+
_fail "semantic learning failure advanced state or lacked actionable output"
|
|
1971
|
+
fi
|
|
1972
|
+
|
|
1973
|
+
REVIEW_DIR="$TMPDIR_EVAL/repo/.flow-agents/imported-critique"
|
|
1974
|
+
mkdir -p "$REVIEW_DIR"
|
|
1975
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$REVIEW_DIR/imported-critique--deliver.md"
|
|
1976
|
+
flow_agents_node "$WRITER" init-plan "$REVIEW_DIR/imported-critique--deliver.md" \
|
|
1977
|
+
--source-request "Imported critique fixture." \
|
|
1978
|
+
--summary "Imported critique fixture." \
|
|
1979
|
+
--next-action "Import critique." \
|
|
1980
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/import-init.out" 2>"$TMPDIR_EVAL/import-init.err"
|
|
1981
|
+
flow_agents_node "$WRITER" record-evidence "$REVIEW_DIR" \
|
|
1982
|
+
--verdict pass \
|
|
1983
|
+
--check-json '{"id":"import-fixture","kind":"test","status":"pass","summary":"Import fixture setup passed."}' \
|
|
1984
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/import-evidence.out" 2>"$TMPDIR_EVAL/import-evidence.err"
|
|
1985
|
+
cat > "$REVIEW_DIR/imported-critique--review.md" <<'MARKDOWN'
|
|
1986
|
+
---
|
|
1987
|
+
role: code-review
|
|
1988
|
+
parent: imported-critique--deliver
|
|
1989
|
+
created: 2026-05-09T00:02:00Z
|
|
1990
|
+
verdict: PASS
|
|
1991
|
+
---
|
|
1992
|
+
|
|
1993
|
+
## Code Review
|
|
1994
|
+
|
|
1995
|
+
Findings: 1 LOW
|
|
1996
|
+
|
|
1997
|
+
### Findings
|
|
1998
|
+
|
|
1999
|
+
#### [LOW] src/cli/workflow-sidecar.ts - Minor style note
|
|
2000
|
+
This finding was already addressed before import.
|
|
2001
|
+
|
|
2002
|
+
### Verdict: PASS
|
|
2003
|
+
MARKDOWN
|
|
2004
|
+
|
|
2005
|
+
if flow_agents_node "$WRITER" import-critique "$REVIEW_DIR" "$REVIEW_DIR/imported-critique--review.md" \
|
|
2006
|
+
--finding-status fixed >"$TMPDIR_EVAL/import-critique.out" 2>"$TMPDIR_EVAL/import-critique.err"; then
|
|
2007
|
+
_pass "sidecar writer imports passing critique artifact"
|
|
2008
|
+
else
|
|
2009
|
+
_fail "sidecar writer import critique failed: $(cat "$TMPDIR_EVAL/import-critique.out" "$TMPDIR_EVAL/import-critique.err")"
|
|
2010
|
+
fi
|
|
2011
|
+
|
|
2012
|
+
if rg -q '"id": "minor-style-note"' "$REVIEW_DIR/critique.json" && rg -q '"status": "fixed"' "$REVIEW_DIR/critique.json"; then
|
|
2013
|
+
_pass "sidecar writer extracts review findings"
|
|
2014
|
+
else
|
|
2015
|
+
_fail "sidecar writer did not extract review findings"
|
|
2016
|
+
fi
|
|
2017
|
+
|
|
2018
|
+
cat > "$REVIEW_DIR/unrelated-note.md" <<'MARKDOWN'
|
|
2019
|
+
# Unrelated Note
|
|
2020
|
+
|
|
2021
|
+
This is ordinary Markdown and must not satisfy required critique.
|
|
2022
|
+
MARKDOWN
|
|
2023
|
+
|
|
2024
|
+
if flow_agents_node "$WRITER" import-critique "$REVIEW_DIR" "$REVIEW_DIR/unrelated-note.md" >"$TMPDIR_EVAL/import-unrelated.out" 2>&1; then
|
|
2025
|
+
_fail "sidecar writer should reject non-review Markdown imports"
|
|
2026
|
+
elif rg -q 'review artifact must declare role' "$TMPDIR_EVAL/import-unrelated.out"; then
|
|
2027
|
+
_pass "sidecar writer rejects non-review Markdown imports"
|
|
2028
|
+
else
|
|
2029
|
+
_fail "non-review import failure was not actionable"
|
|
2030
|
+
fi
|
|
2031
|
+
|
|
2032
|
+
if flow_agents_node "$VALIDATOR" --require-sidecars --require-critique "$ARTIFACT_DIR" >"$TMPDIR_EVAL/valid.out" 2>"$TMPDIR_EVAL/valid.err"; then
|
|
2033
|
+
_pass "writer output passes strict sidecar validation"
|
|
2034
|
+
else
|
|
2035
|
+
_fail "writer output failed validation: $(cat "$TMPDIR_EVAL/valid.out" "$TMPDIR_EVAL/valid.err")"
|
|
2036
|
+
fi
|
|
2037
|
+
|
|
2038
|
+
BAD_DIR="$TMPDIR_EVAL/repo/.flow-agents/bad-critique"
|
|
2039
|
+
mkdir -p "$BAD_DIR"
|
|
2040
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$BAD_DIR/bad-critique--deliver.md"
|
|
2041
|
+
|
|
2042
|
+
flow_agents_node "$WRITER" init-plan "$BAD_DIR/bad-critique--deliver.md" \
|
|
2043
|
+
--source-request "Bad critique fixture." \
|
|
2044
|
+
--summary "Bad critique fixture." \
|
|
2045
|
+
--next-action "Record evidence." \
|
|
2046
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/bad-init.out" 2>"$TMPDIR_EVAL/bad-init.err"
|
|
2047
|
+
flow_agents_node "$WRITER" record-evidence "$BAD_DIR" \
|
|
2048
|
+
--verdict pass \
|
|
2049
|
+
--check-json '{"id":"bad-fixture","kind":"test","status":"pass","summary":"Bad fixture setup passed."}' \
|
|
2050
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/bad-evidence.out" 2>"$TMPDIR_EVAL/bad-evidence.err"
|
|
2051
|
+
|
|
2052
|
+
if flow_agents_node "$WRITER" record-critique "$BAD_DIR" \
|
|
2053
|
+
--id bad-review \
|
|
2054
|
+
--reviewer tool-code-reviewer \
|
|
2055
|
+
--verdict pass \
|
|
2056
|
+
--summary "Open finding should fail." \
|
|
2057
|
+
--finding-json '{"id":"open-medium","severity":"medium","status":"open","description":"Open finding."}' \
|
|
2058
|
+
--timestamp "2026-05-09T00:02:00Z" >"$TMPDIR_EVAL/bad-critique.out" 2>&1; then
|
|
2059
|
+
_fail "sidecar writer should reject open critique findings"
|
|
2060
|
+
elif rg -q 'required critique must pass' "$TMPDIR_EVAL/bad-critique.out"; then
|
|
2061
|
+
_pass "sidecar writer rejects open critique findings"
|
|
2062
|
+
else
|
|
2063
|
+
_fail "open critique failure did not mention open findings"
|
|
2064
|
+
fi
|
|
2065
|
+
|
|
2066
|
+
IMPORT_BAD="$TMPDIR_EVAL/repo/.flow-agents/imported-bad-critique"
|
|
2067
|
+
mkdir -p "$IMPORT_BAD"
|
|
2068
|
+
cp "$ARTIFACT_DIR/auto-sidecars--deliver.md" "$IMPORT_BAD/imported-bad-critique--deliver.md"
|
|
2069
|
+
flow_agents_node "$WRITER" init-plan "$IMPORT_BAD/imported-bad-critique--deliver.md" \
|
|
2070
|
+
--source-request "Bad imported critique fixture." \
|
|
2071
|
+
--summary "Bad imported critique fixture." \
|
|
2072
|
+
--next-action "Import failing critique." \
|
|
2073
|
+
--timestamp "2026-05-09T00:00:00Z" >"$TMPDIR_EVAL/import-bad-init.out" 2>"$TMPDIR_EVAL/import-bad-init.err"
|
|
2074
|
+
flow_agents_node "$WRITER" record-evidence "$IMPORT_BAD" \
|
|
2075
|
+
--verdict pass \
|
|
2076
|
+
--check-json '{"id":"import-bad-fixture","kind":"test","status":"pass","summary":"Bad import fixture setup passed."}' \
|
|
2077
|
+
--timestamp "2026-05-09T00:01:00Z" >"$TMPDIR_EVAL/import-bad-evidence.out" 2>"$TMPDIR_EVAL/import-bad-evidence.err"
|
|
2078
|
+
cat > "$IMPORT_BAD/imported-bad-critique--review.md" <<'MARKDOWN'
|
|
2079
|
+
---
|
|
2080
|
+
role: code-review
|
|
2081
|
+
parent: imported-bad-critique--deliver
|
|
2082
|
+
created: 2026-05-09T00:02:00Z
|
|
2083
|
+
verdict: CHANGES_REQUESTED
|
|
2084
|
+
---
|
|
2085
|
+
|
|
2086
|
+
## Code Review
|
|
2087
|
+
|
|
2088
|
+
Findings: 1 HIGH
|
|
2089
|
+
|
|
2090
|
+
### Findings
|
|
2091
|
+
|
|
2092
|
+
#### [HIGH] src/cli/workflow-sidecar.ts - Imported blocker
|
|
2093
|
+
This finding should keep required critique from passing.
|
|
2094
|
+
|
|
2095
|
+
### Verdict: CHANGES_REQUESTED
|
|
2096
|
+
MARKDOWN
|
|
2097
|
+
|
|
2098
|
+
if flow_agents_node "$WRITER" import-critique "$IMPORT_BAD" "$IMPORT_BAD/imported-bad-critique--review.md" >"$TMPDIR_EVAL/import-bad-critique.out" 2>&1; then
|
|
2099
|
+
_fail "sidecar writer should reject imported failing critique"
|
|
2100
|
+
elif rg -q 'required critique must pass' "$TMPDIR_EVAL/import-bad-critique.out" && rg -q '"id": "imported-blocker"' "$IMPORT_BAD/critique.json"; then
|
|
2101
|
+
_pass "sidecar writer persists and rejects imported failing critique"
|
|
2102
|
+
else
|
|
2103
|
+
_fail "imported failing critique did not persist actionable finding"
|
|
2104
|
+
fi
|
|
2105
|
+
|
|
2106
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
2107
|
+
echo "Workflow sidecar writer integration passed."
|
|
2108
|
+
exit 0
|
|
2109
|
+
fi
|
|
2110
|
+
|
|
2111
|
+
echo "Workflow sidecar writer integration failed: $errors issue(s)."
|
|
2112
|
+
exit 1
|