@kontourai/flow-agents 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-push +11 -0
- package/.github/workflows/ci.yml +210 -0
- package/.github/workflows/docs-pages.yml +52 -0
- package/.github/workflows/publish-npm.yml +104 -0
- package/AGENTS.md +26 -0
- package/CHANGELOG.md +66 -0
- package/CODE_OF_CONDUCT.md +25 -0
- package/CONTEXT.md +300 -0
- package/CONTRIBUTING.md +44 -0
- package/LICENSE +201 -0
- package/README.md +129 -0
- package/SECURITY.md +33 -0
- package/agent-cards/dev.json +19 -0
- package/agents/dev.json +127 -0
- package/agents/tool-code-reviewer.json +61 -0
- package/agents/tool-dependencies-updater.json +118 -0
- package/agents/tool-explore-config.json +92 -0
- package/agents/tool-explore-deps.json +92 -0
- package/agents/tool-explore-entry.json +92 -0
- package/agents/tool-explore-patterns.json +92 -0
- package/agents/tool-explore-structure.json +92 -0
- package/agents/tool-explore-tests.json +92 -0
- package/agents/tool-planner.json +57 -0
- package/agents/tool-playwright.json +145 -0
- package/agents/tool-security-reviewer.json +56 -0
- package/agents/tool-verifier.json +61 -0
- package/agents/tool-worker.json +58 -0
- package/build/src/cli/console-learning-projection.js +123 -0
- package/build/src/cli/docs-preview.js +39 -0
- package/build/src/cli/effective-backlog-settings.js +102 -0
- package/build/src/cli/export-bookmarks.js +38 -0
- package/build/src/cli/fixture-retirement-audit.js +140 -0
- package/build/src/cli/flow-kit.js +138 -0
- package/build/src/cli/import-bookmarks.js +50 -0
- package/build/src/cli/init.js +239 -0
- package/build/src/cli/instinct-cli.js +93 -0
- package/build/src/cli/promote-workflow-artifact.js +63 -0
- package/build/src/cli/publish-change-helper.js +154 -0
- package/build/src/cli/pull-work-provider.js +469 -0
- package/build/src/cli/runtime-adapter.js +23 -0
- package/build/src/cli/telemetry-doctor.js +221 -0
- package/build/src/cli/usage-feedback.js +443 -0
- package/build/src/cli/validate-hook-influence.js +152 -0
- package/build/src/cli/validate-source-tree.js +31 -0
- package/build/src/cli/validate-workflow-artifacts.js +486 -0
- package/build/src/cli/veritas-governance.js +262 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
- package/build/src/cli/workflow-sidecar.js +816 -0
- package/build/src/cli.js +89 -0
- package/build/src/flow-kit/validate.js +75 -0
- package/build/src/lib/args.js +45 -0
- package/build/src/lib/fs.js +62 -0
- package/build/src/lib/workflow-learning-projection.js +334 -0
- package/build/src/runtime-adapters.js +146 -0
- package/build/src/tools/build-universal-bundles.js +397 -0
- package/build/src/tools/common.js +56 -0
- package/build/src/tools/filter-installed-packs.js +132 -0
- package/build/src/tools/generate-context-map.js +198 -0
- package/build/src/tools/validate-package.js +64 -0
- package/build/src/tools/validate-source-tree.js +622 -0
- package/console.telemetry.json +176 -0
- package/context/base-rules.md +17 -0
- package/context/code-review-standards.md +62 -0
- package/context/coding-standards.md +42 -0
- package/context/common/orchestrators.md +12 -0
- package/context/common/subagents.md +28 -0
- package/context/contracts/artifact-contract.md +182 -0
- package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
- package/context/contracts/delivery-contract.md +69 -0
- package/context/contracts/execution-contract.md +53 -0
- package/context/contracts/governance-adapter-contract.md +67 -0
- package/context/contracts/planning-contract.md +85 -0
- package/context/contracts/review-contract.md +104 -0
- package/context/contracts/sandbox-policy.md +52 -0
- package/context/contracts/verification-contract.md +134 -0
- package/context/contracts/work-item-contract.md +215 -0
- package/context/deferred/demo-mode.md +33 -0
- package/context/deferred/languages/go.md +31 -0
- package/context/deferred/languages/python.md +31 -0
- package/context/deferred/languages/typescript.md +34 -0
- package/context/deferred/parallelization.md +35 -0
- package/context/deferred/worktree-isolation.md +24 -0
- package/context/development-workflow.md +50 -0
- package/context/scripts/context-budget/budget-scan.sh +166 -0
- package/context/scripts/detect-tools.sh +3 -0
- package/context/scripts/discover-agents.sh +28 -0
- package/context/scripts/git-status.sh +49 -0
- package/context/scripts/hooks/config-protection.js +79 -0
- package/context/scripts/hooks/desktop-notify.sh +39 -0
- package/context/scripts/hooks/governance-audit.sh +135 -0
- package/context/scripts/hooks/lib/audit-transport.sh +40 -0
- package/context/scripts/hooks/lib/hook-flags.js +49 -0
- package/context/scripts/hooks/lib/patterns.sh +57 -0
- package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/context/scripts/hooks/post-edit-accumulator.js +66 -0
- package/context/scripts/hooks/pre-commit-quality.js +194 -0
- package/context/scripts/hooks/quality-gate.js +93 -0
- package/context/scripts/hooks/report-only-guard.js +21 -0
- package/context/scripts/hooks/run-hook.js +136 -0
- package/context/scripts/hooks/stop-format-typecheck.js +141 -0
- package/context/scripts/hooks/stop-goal-fit.js +337 -0
- package/context/scripts/hooks/workflow-steering.js +250 -0
- package/context/scripts/telemetry/console-presets.sh +14 -0
- package/context/scripts/telemetry/install-console-config.sh +214 -0
- package/context/scripts/telemetry/lib/config.sh +85 -0
- package/context/scripts/telemetry/lib/enrich.sh +115 -0
- package/context/scripts/telemetry/lib/redact.sh +22 -0
- package/context/scripts/telemetry/lib/session.sh +63 -0
- package/context/scripts/telemetry/lib/transport.sh +183 -0
- package/context/scripts/telemetry/lib/usage.sh +29 -0
- package/context/scripts/telemetry/sync-agents.sh +173 -0
- package/context/scripts/telemetry/telemetry.conf +23 -0
- package/context/scripts/telemetry/telemetry.sh +387 -0
- package/context/scripts/validate-package.sh +89 -0
- package/context/settings/backlog-provider-settings.json +54 -0
- package/context/templates/core/identity.md +26 -0
- package/context/templates/core/user.md +15 -0
- package/docs/_config.yml +15 -0
- package/docs/_layouts/default.html +87 -0
- package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
- package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
- package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
- package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
- package/docs/adr/0006-typescript-first-source-policy.md +98 -0
- package/docs/agent-system-guidebook.md +391 -0
- package/docs/agent-usage-feedback-loop.md +351 -0
- package/docs/assets/favicon.svg +13 -0
- package/docs/assets/og-image.png +0 -0
- package/docs/assets/site.css +774 -0
- package/docs/assets/site.js +139 -0
- package/docs/configurable-workflow-routing.md +174 -0
- package/docs/context-map.md +145 -0
- package/docs/developer-architecture.md +145 -0
- package/docs/developer-hook-setup.md +61 -0
- package/docs/fixture-ownership.md +44 -0
- package/docs/flow-kit-repository-contract.md +180 -0
- package/docs/index.md +129 -0
- package/docs/kontour-resource-contract.md +358 -0
- package/docs/migrations.md +64 -0
- package/docs/north-star.md +322 -0
- package/docs/operating-layers.md +110 -0
- package/docs/repository-structure.md +132 -0
- package/docs/sandbox-policy.md +56 -0
- package/docs/skills-map.md +203 -0
- package/docs/standards-register.md +96 -0
- package/docs/veritas-integration.md +165 -0
- package/docs/work-item-adapters.md +72 -0
- package/docs/workflow-artifact-lifecycle.md +141 -0
- package/docs/workflow-eval-strategy.md +295 -0
- package/docs/workflow-shared-contracts.md +51 -0
- package/docs/workflow-usage-guide.md +443 -0
- package/evals/ARCHITECTURE.md +143 -0
- package/evals/CONVENTIONS.md +58 -0
- package/evals/README.md +128 -0
- package/evals/acceptance/run.sh +29 -0
- package/evals/acceptance/test_claude_harness.sh +242 -0
- package/evals/acceptance/test_codex_harness.sh +108 -0
- package/evals/acceptance/test_kiro_harness.sh +128 -0
- package/evals/cases/dev/404.html +97 -0
- package/evals/cases/dev/code-review.yaml +44 -0
- package/evals/cases/dev/dashboard.html +300 -0
- package/evals/cases/dev/deliver.yaml +66 -0
- package/evals/cases/dev/dependency-update.yaml +16 -0
- package/evals/cases/dev/explore.yaml +20 -0
- package/evals/cases/dev/index.html +370 -0
- package/evals/cases/dev/package-lock.json +28 -0
- package/evals/cases/dev/package.json +16 -0
- package/evals/cases/dev/plan-work.yaml +20 -0
- package/evals/cases/dev/promptfooconfig.yaml +666 -0
- package/evals/cases/dev/search-first.yaml +20 -0
- package/evals/cases/dev/tdd-workflow.yaml +48 -0
- package/evals/cases/dev/verify-work.yaml +44 -0
- package/evals/cases/dev/workflow.yaml +34 -0
- package/evals/ci/run-baseline.sh +283 -0
- package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
- package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
- package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
- package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
- package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
- package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
- package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
- package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
- package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
- package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
- package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
- package/evals/fixtures/hook-influence/cases.json +336 -0
- package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
- package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
- package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
- package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
- package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
- package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
- package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
- package/evals/fixtures/surface-trust/provider-absent.json +19 -0
- package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
- package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
- package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
- package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
- package/evals/integration/test_bundle_install.sh +541 -0
- package/evals/integration/test_console_learning_projection.sh +192 -0
- package/evals/integration/test_context_map.sh +65 -0
- package/evals/integration/test_effective_backlog_settings.sh +58 -0
- package/evals/integration/test_fixture_retirement_audit.sh +58 -0
- package/evals/integration/test_flow_agents_statusline.sh +93 -0
- package/evals/integration/test_flow_kit_repository.sh +90 -0
- package/evals/integration/test_goal_fit_hook.sh +482 -0
- package/evals/integration/test_hook_category_behaviors.sh +190 -0
- package/evals/integration/test_hook_influence_cases.sh +69 -0
- package/evals/integration/test_local_flow_kit_install.sh +145 -0
- package/evals/integration/test_publish_change_helper.sh +176 -0
- package/evals/integration/test_pull_work_provider.sh +140 -0
- package/evals/integration/test_runtime_adapter_activation.sh +106 -0
- package/evals/integration/test_telemetry.sh +485 -0
- package/evals/integration/test_telemetry_doctor.sh +193 -0
- package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
- package/evals/integration/test_usage_feedback_global.sh +117 -0
- package/evals/integration/test_usage_feedback_import.sh +227 -0
- package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
- package/evals/integration/test_usage_feedback_report.sh +263 -0
- package/evals/integration/test_veritas_governance_adapter.sh +235 -0
- package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
- package/evals/integration/test_workflow_artifacts.sh +1247 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
- package/evals/integration/test_workflow_steering_hook.sh +337 -0
- package/evals/lib/assertions/delegated-to.js +40 -0
- package/evals/lib/assertions/max-tool-calls.js +15 -0
- package/evals/lib/assertions/no-write-tools.js +27 -0
- package/evals/lib/assertions/pass-at-k.js +39 -0
- package/evals/lib/assertions/telemetry-utils.js +105 -0
- package/evals/lib/assertions/tool-called.js +39 -0
- package/evals/lib/assertions/verify-after-fix.js +61 -0
- package/evals/lib/claude-judge.sh +40 -0
- package/evals/lib/claude-provider.sh +74 -0
- package/evals/lib/codex-judge.sh +39 -0
- package/evals/lib/codex-provider.sh +81 -0
- package/evals/lib/eval-dev.sh +5 -0
- package/evals/lib/eval-judge.sh +22 -0
- package/evals/lib/eval-provider.sh +26 -0
- package/evals/lib/eval-report.sh +73 -0
- package/evals/lib/kiro-dev.sh +4 -0
- package/evals/lib/kiro-judge.sh +17 -0
- package/evals/lib/kiro-provider.sh +62 -0
- package/evals/lib/node.sh +111 -0
- package/evals/promptfooconfig.yaml +70 -0
- package/evals/run.sh +309 -0
- package/evals/static/test_evidence_refs.sh +141 -0
- package/evals/static/test_package.sh +407 -0
- package/evals/static/test_repo_hooks.sh +68 -0
- package/evals/static/test_universal_bundles.sh +274 -0
- package/evals/static/test_workflow_skills.sh +1207 -0
- package/install.sh +64 -0
- package/integrations/veritas/flow-agents.adapter.json +138 -0
- package/integrations/veritas/flow-agents.authority-settings.json +26 -0
- package/integrations/veritas/flow-agents.repo-standards.json +82 -0
- package/kits/builder/flows/build.flow.json +218 -0
- package/kits/builder/flows/shape.flow.json +127 -0
- package/kits/builder/kit.json +19 -0
- package/kits/catalog.json +11 -0
- package/package.json +130 -0
- package/packaging/README.md +60 -0
- package/packaging/manifest.json +173 -0
- package/packaging/packs.json +69 -0
- package/powers/dependency-checker/POWER.md +20 -0
- package/powers/dependency-checker/mcp.json +20 -0
- package/powers/playwright/POWER.md +25 -0
- package/powers/playwright/mcp.json +12 -0
- package/prompts/code-audit.md +123 -0
- package/prompts/kcommit.md +88 -0
- package/schemas/backlog-provider-settings.schema.json +138 -0
- package/schemas/workflow-acceptance.schema.json +216 -0
- package/schemas/workflow-critique.schema.json +113 -0
- package/schemas/workflow-evidence.schema.json +357 -0
- package/schemas/workflow-handoff.schema.json +52 -0
- package/schemas/workflow-learning.schema.json +223 -0
- package/schemas/workflow-release.schema.json +172 -0
- package/schemas/workflow-state.schema.json +80 -0
- package/scripts/README.md +111 -0
- package/scripts/build-universal-bundles.js +3 -0
- package/scripts/check-content-boundary.cjs +99 -0
- package/scripts/context-budget/budget-scan.sh +166 -0
- package/scripts/detect-tools.sh +3 -0
- package/scripts/discover-agents.sh +28 -0
- package/scripts/effective-backlog-settings.js +2 -0
- package/scripts/filter-installed-packs.js +2 -0
- package/scripts/flow-kit.js +2 -0
- package/scripts/generate-context-map.js +2 -0
- package/scripts/git-status.sh +49 -0
- package/scripts/hooks/claude-hook-adapter.js +174 -0
- package/scripts/hooks/claude-telemetry-hook.js +115 -0
- package/scripts/hooks/codex-hook-adapter.js +176 -0
- package/scripts/hooks/codex-telemetry-hook.js +95 -0
- package/scripts/hooks/config-protection.js +79 -0
- package/scripts/hooks/desktop-notify.sh +39 -0
- package/scripts/hooks/governance-audit.sh +135 -0
- package/scripts/hooks/lib/audit-transport.sh +40 -0
- package/scripts/hooks/lib/hook-flags.js +49 -0
- package/scripts/hooks/lib/patterns.sh +57 -0
- package/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/scripts/hooks/post-edit-accumulator.js +66 -0
- package/scripts/hooks/pre-commit-quality.js +194 -0
- package/scripts/hooks/quality-gate.js +93 -0
- package/scripts/hooks/report-only-guard.js +21 -0
- package/scripts/hooks/run-hook.js +136 -0
- package/scripts/hooks/stop-format-typecheck.js +141 -0
- package/scripts/hooks/stop-goal-fit.js +337 -0
- package/scripts/hooks/workflow-steering.js +250 -0
- package/scripts/install-codex-home.sh +106 -0
- package/scripts/package.json +3 -0
- package/scripts/promote-workflow-artifact.js +2 -0
- package/scripts/publish-change-helper.js +2 -0
- package/scripts/pull-work-provider.js +2 -0
- package/scripts/setup-repo-hooks.sh +8 -0
- package/scripts/statusline/flow-agents-statusline.js +157 -0
- package/scripts/telemetry/console-presets.sh +14 -0
- package/scripts/telemetry/install-console-config.sh +214 -0
- package/scripts/telemetry/lib/config.sh +85 -0
- package/scripts/telemetry/lib/enrich.sh +115 -0
- package/scripts/telemetry/lib/redact.sh +22 -0
- package/scripts/telemetry/lib/session.sh +63 -0
- package/scripts/telemetry/lib/transport.sh +183 -0
- package/scripts/telemetry/lib/usage.sh +29 -0
- package/scripts/telemetry/sync-agents.sh +173 -0
- package/scripts/telemetry/telemetry.conf +23 -0
- package/scripts/telemetry/telemetry.sh +387 -0
- package/scripts/usage-feedback.js +2 -0
- package/scripts/validate-hook-influence-cases.js +2 -0
- package/scripts/validate-package.sh +89 -0
- package/scripts/validate-source-tree.js +9 -0
- package/skills/agentic-engineering/SKILL.md +62 -0
- package/skills/browser-test/SKILL.md +51 -0
- package/skills/builder-shape/SKILL.md +76 -0
- package/skills/context-budget/SKILL.md +40 -0
- package/skills/deliver/SKILL.md +241 -0
- package/skills/dependency-update/SKILL.md +68 -0
- package/skills/design-probe/SKILL.md +107 -0
- package/skills/eval-rebuild/SKILL.md +39 -0
- package/skills/evidence-gate/SKILL.md +186 -0
- package/skills/execute-plan/SKILL.md +110 -0
- package/skills/explore/SKILL.md +137 -0
- package/skills/feedback-loop/SKILL.md +87 -0
- package/skills/fix-bug/SKILL.md +133 -0
- package/skills/frontend-design/SKILL.md +80 -0
- package/skills/github-cli/SKILL.md +63 -0
- package/skills/idea-to-backlog/SKILL.md +267 -0
- package/skills/knowledge-capture/SKILL.md +55 -0
- package/skills/learning-review/SKILL.md +115 -0
- package/skills/pickup-probe/SKILL.md +114 -0
- package/skills/plan-work/SKILL.md +176 -0
- package/skills/pull-work/SKILL.md +309 -0
- package/skills/release-readiness/SKILL.md +121 -0
- package/skills/review-work/SKILL.md +161 -0
- package/skills/search-first/SKILL.md +66 -0
- package/skills/tdd-workflow/SKILL.md +140 -0
- package/skills/verify-work/SKILL.md +109 -0
- package/src/cli/console-learning-projection.ts +140 -0
- package/src/cli/effective-backlog-settings.ts +99 -0
- package/src/cli/fixture-retirement-audit.ts +154 -0
- package/src/cli/flow-kit.ts +139 -0
- package/src/cli/init.ts +248 -0
- package/src/cli/promote-workflow-artifact.ts +64 -0
- package/src/cli/publish-change-helper.ts +143 -0
- package/src/cli/pull-work-provider.ts +481 -0
- package/src/cli/runtime-adapter.ts +24 -0
- package/src/cli/telemetry-doctor.ts +243 -0
- package/src/cli/usage-feedback.ts +418 -0
- package/src/cli/validate-hook-influence.ts +119 -0
- package/src/cli/validate-source-tree.ts +30 -0
- package/src/cli/validate-workflow-artifacts.ts +411 -0
- package/src/cli/veritas-governance.ts +322 -0
- package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
- package/src/cli/workflow-sidecar.ts +676 -0
- package/src/cli.ts +95 -0
- package/src/flow-kit/validate.ts +74 -0
- package/src/lib/args.ts +43 -0
- package/src/lib/fs.ts +62 -0
- package/src/lib/workflow-learning-projection.ts +491 -0
- package/src/runtime-adapters.ts +154 -0
- package/src/tools/build-universal-bundles.ts +366 -0
- package/src/tools/common.ts +61 -0
- package/src/tools/filter-installed-packs.ts +129 -0
- package/src/tools/generate-context-map.ts +199 -0
- package/src/tools/validate-package.ts +57 -0
- package/src/tools/validate-source-tree.ts +488 -0
- package/tsconfig.json +19 -0
- package/veritas.claims.json +6 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
5
|
+
FLOW_AGENTS_CLI="${FLOW_AGENTS_CLI:-node}"
|
|
6
|
+
FLOW_AGENTS_CLI_ARGS=()
|
|
7
|
+
if [[ "$FLOW_AGENTS_CLI" == "node" ]]; then
|
|
8
|
+
FLOW_AGENTS_CLI_ARGS=("$ROOT_DIR/build/src/cli.js")
|
|
9
|
+
fi
|
|
10
|
+
TMPDIR_EVAL="$(mktemp -d /tmp/flow-agents-telemetry-doctor.XXXXXX)"
|
|
11
|
+
pass=0
|
|
12
|
+
fail=0
|
|
13
|
+
|
|
14
|
+
cleanup() {
|
|
15
|
+
rm -rf "$TMPDIR_EVAL"
|
|
16
|
+
}
|
|
17
|
+
trap cleanup EXIT
|
|
18
|
+
|
|
19
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
20
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
21
|
+
|
|
22
|
+
echo "=== Telemetry Doctor Integration Test ==="
|
|
23
|
+
echo ""
|
|
24
|
+
|
|
25
|
+
DEST="$TMPDIR_EVAL/install"
|
|
26
|
+
mkdir -p "$DEST/scripts/telemetry"
|
|
27
|
+
cp "$ROOT_DIR/scripts/telemetry/telemetry.conf" "$DEST/scripts/telemetry/telemetry.conf"
|
|
28
|
+
|
|
29
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless >"$TMPDIR_EVAL/local.json"; then
|
|
30
|
+
if jq -e '
|
|
31
|
+
.ok == true and
|
|
32
|
+
.telemetry.configExists == true and
|
|
33
|
+
(.telemetry.activeSinks | index("local-files")) and
|
|
34
|
+
.console.sink == "local-only" and
|
|
35
|
+
.console.reachability.checked == false
|
|
36
|
+
' "$TMPDIR_EVAL/local.json" >/dev/null; then
|
|
37
|
+
_pass "doctor reports local-only telemetry as JSON"
|
|
38
|
+
else
|
|
39
|
+
_fail "doctor local-only JSON did not match expected shape"
|
|
40
|
+
fi
|
|
41
|
+
else
|
|
42
|
+
_fail "doctor failed for local-only telemetry config"
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
cat >> "$DEST/scripts/telemetry/telemetry.conf" <<'CONF'
|
|
46
|
+
console_telemetry_url=http://127.0.0.1:9
|
|
47
|
+
console_tenant_id=tenant-a
|
|
48
|
+
CONF
|
|
49
|
+
|
|
50
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless --timeout-ms 250 >"$TMPDIR_EVAL/console.json"; then
|
|
51
|
+
_fail "doctor returned success for unreachable configured Console"
|
|
52
|
+
else
|
|
53
|
+
if jq -e '
|
|
54
|
+
.ok == false and
|
|
55
|
+
(.telemetry.activeSinks | index("console")) and
|
|
56
|
+
.console.sink == "console" and
|
|
57
|
+
.console.endpointAllowed == true and
|
|
58
|
+
.console.tenantConfigured == true and
|
|
59
|
+
.console.reachability.checked == true and
|
|
60
|
+
.console.reachability.ok == false
|
|
61
|
+
' "$TMPDIR_EVAL/console.json" >/dev/null; then
|
|
62
|
+
_pass "doctor reports unreachable configured Console in JSON"
|
|
63
|
+
else
|
|
64
|
+
_fail "doctor unreachable Console JSON did not match expected shape"
|
|
65
|
+
fi
|
|
66
|
+
fi
|
|
67
|
+
|
|
68
|
+
cp "$ROOT_DIR/scripts/telemetry/telemetry.conf" "$DEST/scripts/telemetry/telemetry.conf"
|
|
69
|
+
cat >> "$DEST/scripts/telemetry/telemetry.conf" <<'CONF'
|
|
70
|
+
console_telemetry_url=http://example.test
|
|
71
|
+
CONF
|
|
72
|
+
|
|
73
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless --timeout-ms 250 >"$TMPDIR_EVAL/unsafe.json"; then
|
|
74
|
+
_fail "doctor returned success for unsafe Console URL"
|
|
75
|
+
else
|
|
76
|
+
if jq -e '
|
|
77
|
+
.ok == false and
|
|
78
|
+
.console.sink == "console" and
|
|
79
|
+
.console.endpointAllowed == false and
|
|
80
|
+
.console.reachability.checked == false and
|
|
81
|
+
(.warnings | length > 0)
|
|
82
|
+
' "$TMPDIR_EVAL/unsafe.json" >/dev/null; then
|
|
83
|
+
_pass "doctor rejects unsafe non-local Console URL before reachability"
|
|
84
|
+
else
|
|
85
|
+
_fail "doctor unsafe Console JSON did not match expected shape"
|
|
86
|
+
fi
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
cat > "$DEST/scripts/telemetry/telemetry.conf" <<'CONF'
|
|
90
|
+
enabled=true
|
|
91
|
+
channels=full,analytics
|
|
92
|
+
console_telemetry_url=https://bad host
|
|
93
|
+
CONF
|
|
94
|
+
|
|
95
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless >"$TMPDIR_EVAL/malformed.json"; then
|
|
96
|
+
_fail "doctor returned success for malformed HTTPS Console URL"
|
|
97
|
+
else
|
|
98
|
+
if jq -e '
|
|
99
|
+
.ok == false and
|
|
100
|
+
.console.endpointAllowed == false and
|
|
101
|
+
.console.reachability.checked == false and
|
|
102
|
+
.console.endpointUrl == "[malformed-url]"
|
|
103
|
+
' "$TMPDIR_EVAL/malformed.json" >/dev/null; then
|
|
104
|
+
_pass "doctor emits JSON for malformed HTTPS Console URL"
|
|
105
|
+
else
|
|
106
|
+
_fail "doctor malformed HTTPS JSON did not match expected shape"
|
|
107
|
+
fi
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
cat > "$DEST/scripts/telemetry/telemetry.conf" <<'CONF'
|
|
111
|
+
enabled=true
|
|
112
|
+
channels=full,analytics
|
|
113
|
+
console_telemetry_url=https://user:pass@console.example.test/path?token=secret&safe=yes
|
|
114
|
+
CONF
|
|
115
|
+
|
|
116
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless >"$TMPDIR_EVAL/redacted.json"; then
|
|
117
|
+
_fail "doctor returned success for credential-bearing Console URL"
|
|
118
|
+
else
|
|
119
|
+
if jq -e '
|
|
120
|
+
.ok == false and
|
|
121
|
+
.console.endpointAllowed == false and
|
|
122
|
+
(.console.url | contains("user") | not) and
|
|
123
|
+
(.console.url | contains("pass") | not) and
|
|
124
|
+
(.console.url | contains("secret") | not) and
|
|
125
|
+
(.console.url | contains("token=%5Bredacted%5D"))
|
|
126
|
+
' "$TMPDIR_EVAL/redacted.json" >/dev/null; then
|
|
127
|
+
_pass "doctor redacts credential-bearing Console URLs"
|
|
128
|
+
else
|
|
129
|
+
_fail "doctor did not redact credential-bearing Console URLs"
|
|
130
|
+
fi
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
cat > "$DEST/scripts/telemetry/telemetry.conf" <<'CONF'
|
|
134
|
+
enabled=true
|
|
135
|
+
channels=full
|
|
136
|
+
channel.full.endpoint_url=https://user:pass@example.test/path?token=secret&safe=yes
|
|
137
|
+
CONF
|
|
138
|
+
|
|
139
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless >"$TMPDIR_EVAL/channel-redacted.json"; then
|
|
140
|
+
if jq -e '
|
|
141
|
+
.ok == true and
|
|
142
|
+
(.telemetry.channels[0].endpointUrl | contains("user") | not) and
|
|
143
|
+
(.telemetry.channels[0].endpointUrl | contains("pass") | not) and
|
|
144
|
+
(.telemetry.channels[0].endpointUrl | contains("secret") | not) and
|
|
145
|
+
(.telemetry.channels[0].endpointUrl | contains("token=%5Bredacted%5D"))
|
|
146
|
+
' "$TMPDIR_EVAL/channel-redacted.json" >/dev/null; then
|
|
147
|
+
_pass "doctor redacts channel endpoint URLs"
|
|
148
|
+
else
|
|
149
|
+
_fail "doctor did not redact channel endpoint URLs"
|
|
150
|
+
fi
|
|
151
|
+
else
|
|
152
|
+
_fail "doctor failed for channel endpoint redaction case"
|
|
153
|
+
fi
|
|
154
|
+
|
|
155
|
+
cat > "$DEST/scripts/telemetry/telemetry.conf" <<'CONF'
|
|
156
|
+
enabled=true
|
|
157
|
+
channels=full,analytics
|
|
158
|
+
console_telemetry_url=https://console.example.test
|
|
159
|
+
CONF
|
|
160
|
+
|
|
161
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless >"$TMPDIR_EVAL/nonlocal.json"; then
|
|
162
|
+
_fail "doctor returned success for non-local Console without --allow-network"
|
|
163
|
+
else
|
|
164
|
+
if jq -e '
|
|
165
|
+
.ok == false and
|
|
166
|
+
.console.endpointAllowed == false and
|
|
167
|
+
.console.reachability.checked == false
|
|
168
|
+
' "$TMPDIR_EVAL/nonlocal.json" >/dev/null; then
|
|
169
|
+
_pass "doctor blocks non-local Console reachability without opt-in"
|
|
170
|
+
else
|
|
171
|
+
_fail "doctor non-local block JSON did not match expected shape"
|
|
172
|
+
fi
|
|
173
|
+
fi
|
|
174
|
+
|
|
175
|
+
if $FLOW_AGENTS_CLI "${FLOW_AGENTS_CLI_ARGS[@]}" telemetry-doctor --dest "$DEST" --json --headless --allow-network --timeout-ms 50 >"$TMPDIR_EVAL/allow-network.json"; then
|
|
176
|
+
_fail "doctor returned success for unreachable non-local Console with --allow-network"
|
|
177
|
+
else
|
|
178
|
+
if jq -e '
|
|
179
|
+
.ok == false and
|
|
180
|
+
.console.endpointAllowed == true and
|
|
181
|
+
.console.reachability.checked == true
|
|
182
|
+
' "$TMPDIR_EVAL/allow-network.json" >/dev/null; then
|
|
183
|
+
_pass "doctor checks non-local Console only with explicit network opt-in"
|
|
184
|
+
else
|
|
185
|
+
_fail "doctor allow-network JSON did not match expected shape"
|
|
186
|
+
fi
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
echo ""
|
|
190
|
+
echo "Passed: $pass"
|
|
191
|
+
echo "Failed: $fail"
|
|
192
|
+
|
|
193
|
+
[[ "$fail" -eq 0 ]]
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_usage_feedback_dashboard.sh - Layer 2: automatic artifact sync + HTML dashboard validation
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT_DIR/evals/lib/node.sh"
|
|
7
|
+
USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
|
|
8
|
+
TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-dashboard.XXXXXX)
|
|
9
|
+
pass=0; fail=0
|
|
10
|
+
|
|
11
|
+
cleanup() { rm -rf "$TMPDIR_EVAL"; }
|
|
12
|
+
trap cleanup EXIT
|
|
13
|
+
|
|
14
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
15
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
16
|
+
|
|
17
|
+
echo "=== Layer 2: Usage Feedback Dashboard Validation ==="
|
|
18
|
+
echo ""
|
|
19
|
+
|
|
20
|
+
echo "--- Script Existence ---"
|
|
21
|
+
if [[ -f "$USAGE_FEEDBACK" ]]; then
|
|
22
|
+
_pass "usage-feedback.js exists"
|
|
23
|
+
else
|
|
24
|
+
_fail "usage-feedback.js not found at $USAGE_FEEDBACK"
|
|
25
|
+
echo ""
|
|
26
|
+
echo "Result: $pass passed, $fail failed"
|
|
27
|
+
exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
telemetry="$TMPDIR_EVAL/telemetry"
|
|
31
|
+
artifacts="$TMPDIR_EVAL/.flow-agents"
|
|
32
|
+
mkdir -p "$telemetry" "$artifacts/auto-feedback" "$artifacts/open-feedback"
|
|
33
|
+
cat > "$artifacts/auto-feedback/auto-feedback--deliver.md" <<'MARKDOWN'
|
|
34
|
+
# Auto Feedback Delivery
|
|
35
|
+
|
|
36
|
+
branch: main
|
|
37
|
+
worktree: /tmp/example
|
|
38
|
+
created: 2026-05-04
|
|
39
|
+
status: delivered
|
|
40
|
+
type: deliver
|
|
41
|
+
iteration: 1
|
|
42
|
+
MARKDOWN
|
|
43
|
+
|
|
44
|
+
cat > "$artifacts/open-feedback/open-feedback--deliver.md" <<'MARKDOWN'
|
|
45
|
+
# Open Feedback Delivery
|
|
46
|
+
|
|
47
|
+
branch: main
|
|
48
|
+
worktree: /tmp/example
|
|
49
|
+
created: 2026-05-04
|
|
50
|
+
status: verifying
|
|
51
|
+
type: deliver
|
|
52
|
+
iteration: 0
|
|
53
|
+
MARKDOWN
|
|
54
|
+
mkdir -p "$artifacts/html-feedback"
|
|
55
|
+
cat > "$artifacts/html-feedback/html-feedback--deliver.md" <<'MARKDOWN'
|
|
56
|
+
# <script>alert(1)</script>
|
|
57
|
+
|
|
58
|
+
branch: main
|
|
59
|
+
worktree: /tmp/example
|
|
60
|
+
created: 2026-05-04
|
|
61
|
+
status: delivered
|
|
62
|
+
type: deliver
|
|
63
|
+
iteration: 1
|
|
64
|
+
MARKDOWN
|
|
65
|
+
|
|
66
|
+
echo ""
|
|
67
|
+
echo "--- Artifact Sync ---"
|
|
68
|
+
if flow_agents_node "$USAGE_FEEDBACK" sync-artifacts \
|
|
69
|
+
--telemetry-dir "$telemetry" \
|
|
70
|
+
--artifact-dir "$artifacts" \
|
|
71
|
+
--repo flow-agents \
|
|
72
|
+
--profile-id codex-default \
|
|
73
|
+
--prompt-id deliver-v1 \
|
|
74
|
+
--skill-id deliver >"$TMPDIR_EVAL/sync.out" 2>"$TMPDIR_EVAL/sync.err"; then
|
|
75
|
+
_pass "sync-artifacts derives terminal artifact outcomes"
|
|
76
|
+
else
|
|
77
|
+
_fail "sync-artifacts failed: $(cat "$TMPDIR_EVAL/sync.err" 2>/dev/null)"
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
if [[ -f "$telemetry/outcomes.jsonl" ]] && \
|
|
81
|
+
jq -e 'select(.result == "success" and .task_type == "deliver" and .quality_score == null)' "$telemetry/outcomes.jsonl" >/dev/null 2>&1; then
|
|
82
|
+
_pass "synced outcome records success without invented quality score"
|
|
83
|
+
else
|
|
84
|
+
_fail "synced outcome missing expected success/null-quality fields"
|
|
85
|
+
fi
|
|
86
|
+
|
|
87
|
+
before_count=$(wc -l < "$telemetry/outcomes.jsonl" | tr -d ' ')
|
|
88
|
+
flow_agents_node "$USAGE_FEEDBACK" sync-artifacts \
|
|
89
|
+
--telemetry-dir "$telemetry" \
|
|
90
|
+
--artifact-dir "$artifacts" \
|
|
91
|
+
--repo flow-agents \
|
|
92
|
+
--profile-id codex-default \
|
|
93
|
+
--prompt-id deliver-v1 \
|
|
94
|
+
--skill-id deliver >/dev/null 2>"$TMPDIR_EVAL/sync-second.err"
|
|
95
|
+
after_count=$(wc -l < "$telemetry/outcomes.jsonl" | tr -d ' ')
|
|
96
|
+
if [[ "$before_count" == "$after_count" ]]; then
|
|
97
|
+
_pass "sync-artifacts is idempotent by artifact outcome id"
|
|
98
|
+
else
|
|
99
|
+
_fail "sync-artifacts duplicated outcomes on second run"
|
|
100
|
+
fi
|
|
101
|
+
|
|
102
|
+
if flow_agents_node "$USAGE_FEEDBACK" sync-artifacts \
|
|
103
|
+
--telemetry-dir "$telemetry" \
|
|
104
|
+
--artifact-dir "$artifacts" \
|
|
105
|
+
--include-open \
|
|
106
|
+
--repo flow-agents >/dev/null 2>"$TMPDIR_EVAL/sync-open.err" && \
|
|
107
|
+
jq -e 'select(.result == "not_verified" and .task_slug == "open-feedback-delivery")' "$telemetry/outcomes.jsonl" >/dev/null 2>&1; then
|
|
108
|
+
_pass "sync-artifacts can include open artifacts as not_verified"
|
|
109
|
+
else
|
|
110
|
+
_fail "sync-artifacts --include-open did not record open artifact"
|
|
111
|
+
fi
|
|
112
|
+
|
|
113
|
+
cat >> "$telemetry/outcomes.jsonl" <<'JSONL'
|
|
114
|
+
{"schema_version":"1","outcome_id":"xss-outcome","recorded_at":"2026-05-04T11:30:00Z","session_id":"xss-session","runtime":"codex","repo":"flow-agents","task_type":"deliver","task_slug":"<script>alert(1)</script>","result":"success","quality_score":null,"human_minutes_saved":null,"rework_required":false,"evidence":[]}
|
|
115
|
+
JSONL
|
|
116
|
+
|
|
117
|
+
echo ""
|
|
118
|
+
echo "--- Dashboard ---"
|
|
119
|
+
dashboard="$telemetry/reports/dashboard.html"
|
|
120
|
+
if flow_agents_node "$USAGE_FEEDBACK" dashboard \
|
|
121
|
+
--telemetry-dir "$telemetry" \
|
|
122
|
+
--artifact-dir "$artifacts" \
|
|
123
|
+
--repo flow-agents \
|
|
124
|
+
--profile-id codex-default \
|
|
125
|
+
--prompt-id deliver-v1 \
|
|
126
|
+
--skill-id deliver \
|
|
127
|
+
--force >"$TMPDIR_EVAL/dashboard.out" 2>"$TMPDIR_EVAL/dashboard.err"; then
|
|
128
|
+
_pass "dashboard syncs artifacts and writes HTML"
|
|
129
|
+
else
|
|
130
|
+
_fail "dashboard failed: $(cat "$TMPDIR_EVAL/dashboard.err" 2>/dev/null)"
|
|
131
|
+
fi
|
|
132
|
+
|
|
133
|
+
if [[ -f "$dashboard" ]] && \
|
|
134
|
+
grep -q "<!doctype html>" "$dashboard" && \
|
|
135
|
+
grep -q "Usage Dashboard" "$dashboard" && \
|
|
136
|
+
grep -q "What Needs Attention" "$dashboard" && \
|
|
137
|
+
grep -q "Measurement state" "$dashboard" && \
|
|
138
|
+
grep -q "Data Coverage" "$dashboard" && \
|
|
139
|
+
grep -q "Outcome Mix" "$dashboard" && \
|
|
140
|
+
grep -q "Missing Label Drilldown" "$dashboard" && \
|
|
141
|
+
grep -q "auto-feedback-delivery" "$dashboard" && \
|
|
142
|
+
grep -q '<script>alert(1)</script>' "$dashboard" && \
|
|
143
|
+
! grep -q '<script>alert(1)</script>' "$dashboard"; then
|
|
144
|
+
_pass "dashboard HTML contains expected sections and escapes artifact labels"
|
|
145
|
+
else
|
|
146
|
+
_fail "dashboard HTML missing expected content or escaping"
|
|
147
|
+
fi
|
|
148
|
+
|
|
149
|
+
if flow_agents_node "$USAGE_FEEDBACK" dashboard \
|
|
150
|
+
--telemetry-dir "$telemetry" \
|
|
151
|
+
--artifact-dir "$artifacts" >/dev/null 2>"$TMPDIR_EVAL/dashboard-overwrite.err"; then
|
|
152
|
+
_fail "dashboard overwrote existing output without --force"
|
|
153
|
+
else
|
|
154
|
+
_pass "dashboard rejects existing output without --force"
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
158
|
+
--telemetry-dir "$telemetry" \
|
|
159
|
+
--format html \
|
|
160
|
+
--output reports/report.html >"$TMPDIR_EVAL/report-html.out" 2>"$TMPDIR_EVAL/report-html.err" && \
|
|
161
|
+
grep -q "Usage Dashboard" "$telemetry/reports/report.html"; then
|
|
162
|
+
_pass "report --format html writes dashboard-style HTML"
|
|
163
|
+
else
|
|
164
|
+
_fail "report --format html failed: $(cat "$TMPDIR_EVAL/report-html.err" 2>/dev/null)"
|
|
165
|
+
fi
|
|
166
|
+
|
|
167
|
+
echo ""
|
|
168
|
+
echo "Result: $pass passed, $fail failed"
|
|
169
|
+
[[ $fail -eq 0 ]]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_usage_feedback_global.sh - Layer 2: global usage registry/dashboard validation
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT_DIR/evals/lib/node.sh"
|
|
7
|
+
USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
|
|
8
|
+
TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-global.XXXXXX)
|
|
9
|
+
pass=0; fail=0
|
|
10
|
+
|
|
11
|
+
cleanup() { rm -rf "$TMPDIR_EVAL"; }
|
|
12
|
+
trap cleanup EXIT
|
|
13
|
+
|
|
14
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
15
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
16
|
+
|
|
17
|
+
echo "=== Layer 2: Usage Feedback Global Validation ==="
|
|
18
|
+
echo ""
|
|
19
|
+
|
|
20
|
+
global="$TMPDIR_EVAL/global"
|
|
21
|
+
repo_a="$TMPDIR_EVAL/repo-a"
|
|
22
|
+
repo_b="$TMPDIR_EVAL/repo-b"
|
|
23
|
+
mkdir -p "$repo_a/.flow-agents/alpha" "$repo_b/.flow-agents/beta"
|
|
24
|
+
|
|
25
|
+
cat > "$repo_a/.flow-agents/alpha/alpha--deliver.md" <<'MARKDOWN'
|
|
26
|
+
# Deliver Alpha
|
|
27
|
+
|
|
28
|
+
status: delivered
|
|
29
|
+
type: deliver
|
|
30
|
+
MARKDOWN
|
|
31
|
+
|
|
32
|
+
cat > "$repo_b/.flow-agents/beta/beta--verify.md" <<'MARKDOWN'
|
|
33
|
+
# Verify Beta
|
|
34
|
+
|
|
35
|
+
status: failed
|
|
36
|
+
type: verify-work
|
|
37
|
+
MARKDOWN
|
|
38
|
+
|
|
39
|
+
echo "--- Registration ---"
|
|
40
|
+
if flow_agents_node "$USAGE_FEEDBACK" register-project \
|
|
41
|
+
--global-dir "$global" \
|
|
42
|
+
--repo-root "$repo_a" \
|
|
43
|
+
--name alpha \
|
|
44
|
+
--profile-id codex-default \
|
|
45
|
+
--prompt-id deliver-v1 \
|
|
46
|
+
--skill-id deliver >"$TMPDIR_EVAL/register.out" 2>"$TMPDIR_EVAL/register.err"; then
|
|
47
|
+
_pass "register-project writes global project registry"
|
|
48
|
+
else
|
|
49
|
+
_fail "register-project failed: $(cat "$TMPDIR_EVAL/register.err" 2>/dev/null)"
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
if [[ -f "$global/projects.json" ]] && jq -e '.projects[] | select(.name == "alpha" and .profile_id == "codex-default")' "$global/projects.json" >/dev/null 2>&1; then
|
|
53
|
+
_pass "registered project preserves profile metadata"
|
|
54
|
+
else
|
|
55
|
+
_fail "registered project metadata missing"
|
|
56
|
+
fi
|
|
57
|
+
|
|
58
|
+
echo ""
|
|
59
|
+
echo "--- Sync Projects ---"
|
|
60
|
+
if flow_agents_node "$USAGE_FEEDBACK" sync-projects \
|
|
61
|
+
--global-dir "$global" \
|
|
62
|
+
--repo-root "$repo_b" \
|
|
63
|
+
--name beta \
|
|
64
|
+
--profile-id codex-experimental \
|
|
65
|
+
--prompt-id verify-v1 \
|
|
66
|
+
--skill-id verify-work >"$TMPDIR_EVAL/sync.out" 2>"$TMPDIR_EVAL/sync.err"; then
|
|
67
|
+
_pass "sync-projects syncs explicit project into global root"
|
|
68
|
+
else
|
|
69
|
+
_fail "sync-projects failed: $(cat "$TMPDIR_EVAL/sync.err" 2>/dev/null)"
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
if [[ -f "$global/projects/beta/outcomes.jsonl" ]] && jq -e 'select(.repo == "beta" and .result == "failure" and .profile_id == "codex-experimental")' "$global/projects/beta/outcomes.jsonl" >/dev/null 2>&1; then
|
|
73
|
+
_pass "global project store contains synced outcome with labels"
|
|
74
|
+
else
|
|
75
|
+
_fail "global project store missing synced labeled outcome"
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
echo ""
|
|
79
|
+
echo "--- Global Dashboard ---"
|
|
80
|
+
if flow_agents_node "$USAGE_FEEDBACK" global-dashboard \
|
|
81
|
+
--global-dir "$global" \
|
|
82
|
+
--force >"$TMPDIR_EVAL/global-dashboard.out" 2>"$TMPDIR_EVAL/global-dashboard.err"; then
|
|
83
|
+
_pass "global-dashboard syncs registered projects and writes HTML"
|
|
84
|
+
else
|
|
85
|
+
_fail "global-dashboard failed: $(cat "$TMPDIR_EVAL/global-dashboard.err" 2>/dev/null)"
|
|
86
|
+
fi
|
|
87
|
+
|
|
88
|
+
dashboard="$global/reports/global-dashboard.html"
|
|
89
|
+
if [[ -f "$dashboard" ]] && grep -q "Usage Dashboard" "$dashboard" && grep -q "alpha" "$dashboard" && grep -q "beta" "$dashboard"; then
|
|
90
|
+
_pass "global dashboard includes multiple project groups"
|
|
91
|
+
else
|
|
92
|
+
_fail "global dashboard missing expected project groups"
|
|
93
|
+
fi
|
|
94
|
+
|
|
95
|
+
discover_root="$TMPDIR_EVAL/discover-root"
|
|
96
|
+
repo_c="$discover_root/gamma"
|
|
97
|
+
mkdir -p "$repo_c/.flow-agents/gamma"
|
|
98
|
+
cat > "$repo_c/.flow-agents/gamma/gamma--deliver.md" <<'MARKDOWN'
|
|
99
|
+
# Deliver Gamma
|
|
100
|
+
|
|
101
|
+
status: delivered
|
|
102
|
+
type: deliver
|
|
103
|
+
MARKDOWN
|
|
104
|
+
|
|
105
|
+
if flow_agents_node "$USAGE_FEEDBACK" global-dashboard \
|
|
106
|
+
--global-dir "$global" \
|
|
107
|
+
--discover "$discover_root" \
|
|
108
|
+
--force >/dev/null 2>"$TMPDIR_EVAL/discover.err" && \
|
|
109
|
+
[[ -f "$global/projects/gamma/outcomes.jsonl" ]]; then
|
|
110
|
+
_pass "global-dashboard can discover child project directories"
|
|
111
|
+
else
|
|
112
|
+
_fail "global-dashboard discovery failed: $(cat "$TMPDIR_EVAL/discover.err" 2>/dev/null)"
|
|
113
|
+
fi
|
|
114
|
+
|
|
115
|
+
echo ""
|
|
116
|
+
echo "Result: $pass passed, $fail failed"
|
|
117
|
+
[[ $fail -eq 0 ]]
|