@kontourai/flow-agents 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-push +11 -0
- package/.github/workflows/ci.yml +210 -0
- package/.github/workflows/docs-pages.yml +52 -0
- package/.github/workflows/publish-npm.yml +104 -0
- package/AGENTS.md +26 -0
- package/CHANGELOG.md +66 -0
- package/CODE_OF_CONDUCT.md +25 -0
- package/CONTEXT.md +300 -0
- package/CONTRIBUTING.md +44 -0
- package/LICENSE +201 -0
- package/README.md +129 -0
- package/SECURITY.md +33 -0
- package/agent-cards/dev.json +19 -0
- package/agents/dev.json +127 -0
- package/agents/tool-code-reviewer.json +61 -0
- package/agents/tool-dependencies-updater.json +118 -0
- package/agents/tool-explore-config.json +92 -0
- package/agents/tool-explore-deps.json +92 -0
- package/agents/tool-explore-entry.json +92 -0
- package/agents/tool-explore-patterns.json +92 -0
- package/agents/tool-explore-structure.json +92 -0
- package/agents/tool-explore-tests.json +92 -0
- package/agents/tool-planner.json +57 -0
- package/agents/tool-playwright.json +145 -0
- package/agents/tool-security-reviewer.json +56 -0
- package/agents/tool-verifier.json +61 -0
- package/agents/tool-worker.json +58 -0
- package/build/src/cli/console-learning-projection.js +123 -0
- package/build/src/cli/docs-preview.js +39 -0
- package/build/src/cli/effective-backlog-settings.js +102 -0
- package/build/src/cli/export-bookmarks.js +38 -0
- package/build/src/cli/fixture-retirement-audit.js +140 -0
- package/build/src/cli/flow-kit.js +138 -0
- package/build/src/cli/import-bookmarks.js +50 -0
- package/build/src/cli/init.js +239 -0
- package/build/src/cli/instinct-cli.js +93 -0
- package/build/src/cli/promote-workflow-artifact.js +63 -0
- package/build/src/cli/publish-change-helper.js +154 -0
- package/build/src/cli/pull-work-provider.js +469 -0
- package/build/src/cli/runtime-adapter.js +23 -0
- package/build/src/cli/telemetry-doctor.js +221 -0
- package/build/src/cli/usage-feedback.js +443 -0
- package/build/src/cli/validate-hook-influence.js +152 -0
- package/build/src/cli/validate-source-tree.js +31 -0
- package/build/src/cli/validate-workflow-artifacts.js +486 -0
- package/build/src/cli/veritas-governance.js +262 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
- package/build/src/cli/workflow-sidecar.js +816 -0
- package/build/src/cli.js +89 -0
- package/build/src/flow-kit/validate.js +75 -0
- package/build/src/lib/args.js +45 -0
- package/build/src/lib/fs.js +62 -0
- package/build/src/lib/workflow-learning-projection.js +334 -0
- package/build/src/runtime-adapters.js +146 -0
- package/build/src/tools/build-universal-bundles.js +397 -0
- package/build/src/tools/common.js +56 -0
- package/build/src/tools/filter-installed-packs.js +132 -0
- package/build/src/tools/generate-context-map.js +198 -0
- package/build/src/tools/validate-package.js +64 -0
- package/build/src/tools/validate-source-tree.js +622 -0
- package/console.telemetry.json +176 -0
- package/context/base-rules.md +17 -0
- package/context/code-review-standards.md +62 -0
- package/context/coding-standards.md +42 -0
- package/context/common/orchestrators.md +12 -0
- package/context/common/subagents.md +28 -0
- package/context/contracts/artifact-contract.md +182 -0
- package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
- package/context/contracts/delivery-contract.md +69 -0
- package/context/contracts/execution-contract.md +53 -0
- package/context/contracts/governance-adapter-contract.md +67 -0
- package/context/contracts/planning-contract.md +85 -0
- package/context/contracts/review-contract.md +104 -0
- package/context/contracts/sandbox-policy.md +52 -0
- package/context/contracts/verification-contract.md +134 -0
- package/context/contracts/work-item-contract.md +215 -0
- package/context/deferred/demo-mode.md +33 -0
- package/context/deferred/languages/go.md +31 -0
- package/context/deferred/languages/python.md +31 -0
- package/context/deferred/languages/typescript.md +34 -0
- package/context/deferred/parallelization.md +35 -0
- package/context/deferred/worktree-isolation.md +24 -0
- package/context/development-workflow.md +50 -0
- package/context/scripts/context-budget/budget-scan.sh +166 -0
- package/context/scripts/detect-tools.sh +3 -0
- package/context/scripts/discover-agents.sh +28 -0
- package/context/scripts/git-status.sh +49 -0
- package/context/scripts/hooks/config-protection.js +79 -0
- package/context/scripts/hooks/desktop-notify.sh +39 -0
- package/context/scripts/hooks/governance-audit.sh +135 -0
- package/context/scripts/hooks/lib/audit-transport.sh +40 -0
- package/context/scripts/hooks/lib/hook-flags.js +49 -0
- package/context/scripts/hooks/lib/patterns.sh +57 -0
- package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/context/scripts/hooks/post-edit-accumulator.js +66 -0
- package/context/scripts/hooks/pre-commit-quality.js +194 -0
- package/context/scripts/hooks/quality-gate.js +93 -0
- package/context/scripts/hooks/report-only-guard.js +21 -0
- package/context/scripts/hooks/run-hook.js +136 -0
- package/context/scripts/hooks/stop-format-typecheck.js +141 -0
- package/context/scripts/hooks/stop-goal-fit.js +337 -0
- package/context/scripts/hooks/workflow-steering.js +250 -0
- package/context/scripts/telemetry/console-presets.sh +14 -0
- package/context/scripts/telemetry/install-console-config.sh +214 -0
- package/context/scripts/telemetry/lib/config.sh +85 -0
- package/context/scripts/telemetry/lib/enrich.sh +115 -0
- package/context/scripts/telemetry/lib/redact.sh +22 -0
- package/context/scripts/telemetry/lib/session.sh +63 -0
- package/context/scripts/telemetry/lib/transport.sh +183 -0
- package/context/scripts/telemetry/lib/usage.sh +29 -0
- package/context/scripts/telemetry/sync-agents.sh +173 -0
- package/context/scripts/telemetry/telemetry.conf +23 -0
- package/context/scripts/telemetry/telemetry.sh +387 -0
- package/context/scripts/validate-package.sh +89 -0
- package/context/settings/backlog-provider-settings.json +54 -0
- package/context/templates/core/identity.md +26 -0
- package/context/templates/core/user.md +15 -0
- package/docs/_config.yml +15 -0
- package/docs/_layouts/default.html +87 -0
- package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
- package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
- package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
- package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
- package/docs/adr/0006-typescript-first-source-policy.md +98 -0
- package/docs/agent-system-guidebook.md +391 -0
- package/docs/agent-usage-feedback-loop.md +351 -0
- package/docs/assets/favicon.svg +13 -0
- package/docs/assets/og-image.png +0 -0
- package/docs/assets/site.css +774 -0
- package/docs/assets/site.js +139 -0
- package/docs/configurable-workflow-routing.md +174 -0
- package/docs/context-map.md +145 -0
- package/docs/developer-architecture.md +145 -0
- package/docs/developer-hook-setup.md +61 -0
- package/docs/fixture-ownership.md +44 -0
- package/docs/flow-kit-repository-contract.md +180 -0
- package/docs/index.md +129 -0
- package/docs/kontour-resource-contract.md +358 -0
- package/docs/migrations.md +64 -0
- package/docs/north-star.md +322 -0
- package/docs/operating-layers.md +110 -0
- package/docs/repository-structure.md +132 -0
- package/docs/sandbox-policy.md +56 -0
- package/docs/skills-map.md +203 -0
- package/docs/standards-register.md +96 -0
- package/docs/veritas-integration.md +165 -0
- package/docs/work-item-adapters.md +72 -0
- package/docs/workflow-artifact-lifecycle.md +141 -0
- package/docs/workflow-eval-strategy.md +295 -0
- package/docs/workflow-shared-contracts.md +51 -0
- package/docs/workflow-usage-guide.md +443 -0
- package/evals/ARCHITECTURE.md +143 -0
- package/evals/CONVENTIONS.md +58 -0
- package/evals/README.md +128 -0
- package/evals/acceptance/run.sh +29 -0
- package/evals/acceptance/test_claude_harness.sh +242 -0
- package/evals/acceptance/test_codex_harness.sh +108 -0
- package/evals/acceptance/test_kiro_harness.sh +128 -0
- package/evals/cases/dev/404.html +97 -0
- package/evals/cases/dev/code-review.yaml +44 -0
- package/evals/cases/dev/dashboard.html +300 -0
- package/evals/cases/dev/deliver.yaml +66 -0
- package/evals/cases/dev/dependency-update.yaml +16 -0
- package/evals/cases/dev/explore.yaml +20 -0
- package/evals/cases/dev/index.html +370 -0
- package/evals/cases/dev/package-lock.json +28 -0
- package/evals/cases/dev/package.json +16 -0
- package/evals/cases/dev/plan-work.yaml +20 -0
- package/evals/cases/dev/promptfooconfig.yaml +666 -0
- package/evals/cases/dev/search-first.yaml +20 -0
- package/evals/cases/dev/tdd-workflow.yaml +48 -0
- package/evals/cases/dev/verify-work.yaml +44 -0
- package/evals/cases/dev/workflow.yaml +34 -0
- package/evals/ci/run-baseline.sh +283 -0
- package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
- package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
- package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
- package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
- package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
- package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
- package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
- package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
- package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
- package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
- package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
- package/evals/fixtures/hook-influence/cases.json +336 -0
- package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
- package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
- package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
- package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
- package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
- package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
- package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
- package/evals/fixtures/surface-trust/provider-absent.json +19 -0
- package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
- package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
- package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
- package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
- package/evals/integration/test_bundle_install.sh +541 -0
- package/evals/integration/test_console_learning_projection.sh +192 -0
- package/evals/integration/test_context_map.sh +65 -0
- package/evals/integration/test_effective_backlog_settings.sh +58 -0
- package/evals/integration/test_fixture_retirement_audit.sh +58 -0
- package/evals/integration/test_flow_agents_statusline.sh +93 -0
- package/evals/integration/test_flow_kit_repository.sh +90 -0
- package/evals/integration/test_goal_fit_hook.sh +482 -0
- package/evals/integration/test_hook_category_behaviors.sh +190 -0
- package/evals/integration/test_hook_influence_cases.sh +69 -0
- package/evals/integration/test_local_flow_kit_install.sh +145 -0
- package/evals/integration/test_publish_change_helper.sh +176 -0
- package/evals/integration/test_pull_work_provider.sh +140 -0
- package/evals/integration/test_runtime_adapter_activation.sh +106 -0
- package/evals/integration/test_telemetry.sh +485 -0
- package/evals/integration/test_telemetry_doctor.sh +193 -0
- package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
- package/evals/integration/test_usage_feedback_global.sh +117 -0
- package/evals/integration/test_usage_feedback_import.sh +227 -0
- package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
- package/evals/integration/test_usage_feedback_report.sh +263 -0
- package/evals/integration/test_veritas_governance_adapter.sh +235 -0
- package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
- package/evals/integration/test_workflow_artifacts.sh +1247 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
- package/evals/integration/test_workflow_steering_hook.sh +337 -0
- package/evals/lib/assertions/delegated-to.js +40 -0
- package/evals/lib/assertions/max-tool-calls.js +15 -0
- package/evals/lib/assertions/no-write-tools.js +27 -0
- package/evals/lib/assertions/pass-at-k.js +39 -0
- package/evals/lib/assertions/telemetry-utils.js +105 -0
- package/evals/lib/assertions/tool-called.js +39 -0
- package/evals/lib/assertions/verify-after-fix.js +61 -0
- package/evals/lib/claude-judge.sh +40 -0
- package/evals/lib/claude-provider.sh +74 -0
- package/evals/lib/codex-judge.sh +39 -0
- package/evals/lib/codex-provider.sh +81 -0
- package/evals/lib/eval-dev.sh +5 -0
- package/evals/lib/eval-judge.sh +22 -0
- package/evals/lib/eval-provider.sh +26 -0
- package/evals/lib/eval-report.sh +73 -0
- package/evals/lib/kiro-dev.sh +4 -0
- package/evals/lib/kiro-judge.sh +17 -0
- package/evals/lib/kiro-provider.sh +62 -0
- package/evals/lib/node.sh +111 -0
- package/evals/promptfooconfig.yaml +70 -0
- package/evals/run.sh +309 -0
- package/evals/static/test_evidence_refs.sh +141 -0
- package/evals/static/test_package.sh +407 -0
- package/evals/static/test_repo_hooks.sh +68 -0
- package/evals/static/test_universal_bundles.sh +274 -0
- package/evals/static/test_workflow_skills.sh +1207 -0
- package/install.sh +64 -0
- package/integrations/veritas/flow-agents.adapter.json +138 -0
- package/integrations/veritas/flow-agents.authority-settings.json +26 -0
- package/integrations/veritas/flow-agents.repo-standards.json +82 -0
- package/kits/builder/flows/build.flow.json +218 -0
- package/kits/builder/flows/shape.flow.json +127 -0
- package/kits/builder/kit.json +19 -0
- package/kits/catalog.json +11 -0
- package/package.json +130 -0
- package/packaging/README.md +60 -0
- package/packaging/manifest.json +173 -0
- package/packaging/packs.json +69 -0
- package/powers/dependency-checker/POWER.md +20 -0
- package/powers/dependency-checker/mcp.json +20 -0
- package/powers/playwright/POWER.md +25 -0
- package/powers/playwright/mcp.json +12 -0
- package/prompts/code-audit.md +123 -0
- package/prompts/kcommit.md +88 -0
- package/schemas/backlog-provider-settings.schema.json +138 -0
- package/schemas/workflow-acceptance.schema.json +216 -0
- package/schemas/workflow-critique.schema.json +113 -0
- package/schemas/workflow-evidence.schema.json +357 -0
- package/schemas/workflow-handoff.schema.json +52 -0
- package/schemas/workflow-learning.schema.json +223 -0
- package/schemas/workflow-release.schema.json +172 -0
- package/schemas/workflow-state.schema.json +80 -0
- package/scripts/README.md +111 -0
- package/scripts/build-universal-bundles.js +3 -0
- package/scripts/check-content-boundary.cjs +99 -0
- package/scripts/context-budget/budget-scan.sh +166 -0
- package/scripts/detect-tools.sh +3 -0
- package/scripts/discover-agents.sh +28 -0
- package/scripts/effective-backlog-settings.js +2 -0
- package/scripts/filter-installed-packs.js +2 -0
- package/scripts/flow-kit.js +2 -0
- package/scripts/generate-context-map.js +2 -0
- package/scripts/git-status.sh +49 -0
- package/scripts/hooks/claude-hook-adapter.js +174 -0
- package/scripts/hooks/claude-telemetry-hook.js +115 -0
- package/scripts/hooks/codex-hook-adapter.js +176 -0
- package/scripts/hooks/codex-telemetry-hook.js +95 -0
- package/scripts/hooks/config-protection.js +79 -0
- package/scripts/hooks/desktop-notify.sh +39 -0
- package/scripts/hooks/governance-audit.sh +135 -0
- package/scripts/hooks/lib/audit-transport.sh +40 -0
- package/scripts/hooks/lib/hook-flags.js +49 -0
- package/scripts/hooks/lib/patterns.sh +57 -0
- package/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/scripts/hooks/post-edit-accumulator.js +66 -0
- package/scripts/hooks/pre-commit-quality.js +194 -0
- package/scripts/hooks/quality-gate.js +93 -0
- package/scripts/hooks/report-only-guard.js +21 -0
- package/scripts/hooks/run-hook.js +136 -0
- package/scripts/hooks/stop-format-typecheck.js +141 -0
- package/scripts/hooks/stop-goal-fit.js +337 -0
- package/scripts/hooks/workflow-steering.js +250 -0
- package/scripts/install-codex-home.sh +106 -0
- package/scripts/package.json +3 -0
- package/scripts/promote-workflow-artifact.js +2 -0
- package/scripts/publish-change-helper.js +2 -0
- package/scripts/pull-work-provider.js +2 -0
- package/scripts/setup-repo-hooks.sh +8 -0
- package/scripts/statusline/flow-agents-statusline.js +157 -0
- package/scripts/telemetry/console-presets.sh +14 -0
- package/scripts/telemetry/install-console-config.sh +214 -0
- package/scripts/telemetry/lib/config.sh +85 -0
- package/scripts/telemetry/lib/enrich.sh +115 -0
- package/scripts/telemetry/lib/redact.sh +22 -0
- package/scripts/telemetry/lib/session.sh +63 -0
- package/scripts/telemetry/lib/transport.sh +183 -0
- package/scripts/telemetry/lib/usage.sh +29 -0
- package/scripts/telemetry/sync-agents.sh +173 -0
- package/scripts/telemetry/telemetry.conf +23 -0
- package/scripts/telemetry/telemetry.sh +387 -0
- package/scripts/usage-feedback.js +2 -0
- package/scripts/validate-hook-influence-cases.js +2 -0
- package/scripts/validate-package.sh +89 -0
- package/scripts/validate-source-tree.js +9 -0
- package/skills/agentic-engineering/SKILL.md +62 -0
- package/skills/browser-test/SKILL.md +51 -0
- package/skills/builder-shape/SKILL.md +76 -0
- package/skills/context-budget/SKILL.md +40 -0
- package/skills/deliver/SKILL.md +241 -0
- package/skills/dependency-update/SKILL.md +68 -0
- package/skills/design-probe/SKILL.md +107 -0
- package/skills/eval-rebuild/SKILL.md +39 -0
- package/skills/evidence-gate/SKILL.md +186 -0
- package/skills/execute-plan/SKILL.md +110 -0
- package/skills/explore/SKILL.md +137 -0
- package/skills/feedback-loop/SKILL.md +87 -0
- package/skills/fix-bug/SKILL.md +133 -0
- package/skills/frontend-design/SKILL.md +80 -0
- package/skills/github-cli/SKILL.md +63 -0
- package/skills/idea-to-backlog/SKILL.md +267 -0
- package/skills/knowledge-capture/SKILL.md +55 -0
- package/skills/learning-review/SKILL.md +115 -0
- package/skills/pickup-probe/SKILL.md +114 -0
- package/skills/plan-work/SKILL.md +176 -0
- package/skills/pull-work/SKILL.md +309 -0
- package/skills/release-readiness/SKILL.md +121 -0
- package/skills/review-work/SKILL.md +161 -0
- package/skills/search-first/SKILL.md +66 -0
- package/skills/tdd-workflow/SKILL.md +140 -0
- package/skills/verify-work/SKILL.md +109 -0
- package/src/cli/console-learning-projection.ts +140 -0
- package/src/cli/effective-backlog-settings.ts +99 -0
- package/src/cli/fixture-retirement-audit.ts +154 -0
- package/src/cli/flow-kit.ts +139 -0
- package/src/cli/init.ts +248 -0
- package/src/cli/promote-workflow-artifact.ts +64 -0
- package/src/cli/publish-change-helper.ts +143 -0
- package/src/cli/pull-work-provider.ts +481 -0
- package/src/cli/runtime-adapter.ts +24 -0
- package/src/cli/telemetry-doctor.ts +243 -0
- package/src/cli/usage-feedback.ts +418 -0
- package/src/cli/validate-hook-influence.ts +119 -0
- package/src/cli/validate-source-tree.ts +30 -0
- package/src/cli/validate-workflow-artifacts.ts +411 -0
- package/src/cli/veritas-governance.ts +322 -0
- package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
- package/src/cli/workflow-sidecar.ts +676 -0
- package/src/cli.ts +95 -0
- package/src/flow-kit/validate.ts +74 -0
- package/src/lib/args.ts +43 -0
- package/src/lib/fs.ts +62 -0
- package/src/lib/workflow-learning-projection.ts +491 -0
- package/src/runtime-adapters.ts +154 -0
- package/src/tools/build-universal-bundles.ts +366 -0
- package/src/tools/common.ts +61 -0
- package/src/tools/filter-installed-packs.ts +129 -0
- package/src/tools/generate-context-map.ts +199 -0
- package/src/tools/validate-package.ts +57 -0
- package/src/tools/validate-source-tree.ts +488 -0
- package/tsconfig.json +19 -0
- package/veritas.claims.json +6 -0
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_usage_feedback_import.sh - Layer 2: Usage feedback Codex import validation
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT_DIR/evals/lib/node.sh"
|
|
7
|
+
USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
|
|
8
|
+
FIXTURE_FULL="$ROOT_DIR/evals/fixtures/usage-feedback/sample-full.jsonl"
|
|
9
|
+
TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-import.XXXXXX)
|
|
10
|
+
pass=0; fail=0
|
|
11
|
+
|
|
12
|
+
cleanup() { rm -rf "$TMPDIR_EVAL"; }
|
|
13
|
+
trap cleanup EXIT
|
|
14
|
+
|
|
15
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
16
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
17
|
+
|
|
18
|
+
_sessions_file() {
|
|
19
|
+
local dir="$1"
|
|
20
|
+
if [[ -f "$dir/sessions.jsonl" ]]; then
|
|
21
|
+
echo "$dir/sessions.jsonl"
|
|
22
|
+
elif [[ -f "$dir/normalized-sessions.jsonl" ]]; then
|
|
23
|
+
echo "$dir/normalized-sessions.jsonl"
|
|
24
|
+
else
|
|
25
|
+
find "$dir" -maxdepth 2 -type f \( -name 'sessions.jsonl' -o -name 'normalized-sessions.jsonl' \) 2>/dev/null | head -1
|
|
26
|
+
fi
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
echo "=== Layer 2: Usage Feedback Runtime Import Validation ==="
|
|
30
|
+
echo ""
|
|
31
|
+
|
|
32
|
+
echo "--- Script Existence ---"
|
|
33
|
+
if [[ -f "$USAGE_FEEDBACK" ]]; then
|
|
34
|
+
_pass "usage-feedback.js exists"
|
|
35
|
+
else
|
|
36
|
+
_fail "usage-feedback.js not found at $USAGE_FEEDBACK"
|
|
37
|
+
echo ""
|
|
38
|
+
echo "Result: $pass passed, $fail failed"
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
echo ""
|
|
43
|
+
echo "--- Fixtures ---"
|
|
44
|
+
if [[ -f "$FIXTURE_FULL" ]]; then
|
|
45
|
+
_pass "sample Codex full.jsonl fixture exists"
|
|
46
|
+
else
|
|
47
|
+
_fail "sample Codex full.jsonl fixture missing"
|
|
48
|
+
fi
|
|
49
|
+
|
|
50
|
+
src_claude="$TMPDIR_EVAL/src-claude"
|
|
51
|
+
dst_claude="$TMPDIR_EVAL/dst-claude"
|
|
52
|
+
mkdir -p "$src_claude" "$dst_claude"
|
|
53
|
+
cat > "$src_claude/full.jsonl" <<'JSONL'
|
|
54
|
+
{"schema_version":"0.3.0","timestamp":"2026-05-04T12:00:00Z","session_id":"claude-session-1","event_id":"evt-1","event_type":"turn.user","agent":{"name":"dev","runtime":"claude-code"},"turn":{"prompt_text":"hello","prompt_length":5}}
|
|
55
|
+
{"schema_version":"0.3.0","timestamp":"2026-05-04T12:00:05Z","session_id":"claude-session-1","event_id":"evt-2","event_type":"tool.invoke","agent":{"name":"dev","runtime":"claude-code"},"tool":{"name":"Agent","normalized_name":"delegate to a specialist agent","input":{"subagent_type":"tool-planner"}}}
|
|
56
|
+
{"schema_version":"0.3.0","timestamp":"2026-05-04T12:00:05Z","session_id":"claude-session-1","event_id":"evt-2-delegate","event_type":"agent.delegate","agent":{"name":"dev","runtime":"claude-code"},"delegation":{"targets":["tool-planner"]}}
|
|
57
|
+
{"schema_version":"0.3.0","timestamp":"2026-05-04T12:01:00Z","session_id":"claude-session-1","event_id":"evt-3","event_type":"session.usage","agent":{"name":"dev","runtime":"claude-code"},"usage":{"model":"opus","duration_s":60,"tool_invocations":1,"delegations":1,"input_tokens":null,"output_tokens":null,"estimated_cost_usd":null}}
|
|
58
|
+
JSONL
|
|
59
|
+
|
|
60
|
+
src="$TMPDIR_EVAL/src"
|
|
61
|
+
dst_a="$TMPDIR_EVAL/dst-a"
|
|
62
|
+
dst_b="$TMPDIR_EVAL/dst-b"
|
|
63
|
+
mkdir -p "$src" "$dst_a" "$dst_b"
|
|
64
|
+
cp "$FIXTURE_FULL" "$src/full.jsonl"
|
|
65
|
+
|
|
66
|
+
echo ""
|
|
67
|
+
echo "--- Import Command ---"
|
|
68
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
69
|
+
--input-telemetry-dir "$src" \
|
|
70
|
+
--telemetry-dir "$dst_a" \
|
|
71
|
+
--source-id "repo-a" \
|
|
72
|
+
--repo "repo-a" \
|
|
73
|
+
--repo-root "/tmp/repo-a" \
|
|
74
|
+
--profile-id "codex-default" \
|
|
75
|
+
--prompt-id "deliver-v1" \
|
|
76
|
+
--skill-id "deliver" >/dev/null 2>"$TMPDIR_EVAL/import-a.err"; then
|
|
77
|
+
_pass "import-codex imports from input telemetry dir"
|
|
78
|
+
else
|
|
79
|
+
_fail "import-codex failed for input dir: $(cat "$TMPDIR_EVAL/import-a.err" 2>/dev/null)"
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
sessions_a="$(_sessions_file "$dst_a")"
|
|
83
|
+
if [[ -n "$sessions_a" && -f "$sessions_a" ]]; then
|
|
84
|
+
_pass "import-codex writes normalized session data"
|
|
85
|
+
else
|
|
86
|
+
_fail "import-codex did not write sessions.jsonl or normalized-sessions.jsonl"
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
session_count=$(jq -s 'length' "$sessions_a" 2>/dev/null)
|
|
90
|
+
source_id=$(jq -r 'select(.session_id == "codex-session-1") | .source_id' "$sessions_a" 2>/dev/null | head -1)
|
|
91
|
+
repo=$(jq -r 'select(.session_id == "codex-session-1") | .repo' "$sessions_a" 2>/dev/null | head -1)
|
|
92
|
+
profile=$(jq -r 'select(.session_id == "codex-session-1") | .profile_id' "$sessions_a" 2>/dev/null | head -1)
|
|
93
|
+
prompt=$(jq -r 'select(.session_id == "codex-session-1") | .prompt_id' "$sessions_a" 2>/dev/null | head -1)
|
|
94
|
+
skill=$(jq -r 'select(.session_id == "codex-session-1") | .skill_ids[0]' "$sessions_a" 2>/dev/null | head -1)
|
|
95
|
+
if [[ "$session_count" -ge 2 && "$source_id" == "repo-a" && "$repo" == "repo-a" && "$profile" == "codex-default" && "$prompt" == "deliver-v1" && "$skill" == "deliver" ]]; then
|
|
96
|
+
_pass "normalized sessions include source/repo/profile/prompt/skill identifiers"
|
|
97
|
+
else
|
|
98
|
+
_fail "normalized identifiers mismatch: count='$session_count' source='$source_id' repo='$repo' profile='$profile' prompt='$prompt' skill='$skill'"
|
|
99
|
+
fi
|
|
100
|
+
|
|
101
|
+
turns=$(jq -r 'select(.session_id == "codex-session-1") | .turns' "$sessions_a" 2>/dev/null | head -1)
|
|
102
|
+
tools=$(jq -r 'select(.session_id == "codex-session-1") | .tool_invocations' "$sessions_a" 2>/dev/null | head -1)
|
|
103
|
+
delegations=$(jq -r 'select(.session_id == "codex-session-1") | .delegations' "$sessions_a" 2>/dev/null | head -1)
|
|
104
|
+
permissions=$(jq -r 'select(.session_id == "codex-session-1") | .permission_requests' "$sessions_a" 2>/dev/null | head -1)
|
|
105
|
+
if [[ "$turns" == "1" && "$tools" == "2" && "$delegations" == "1" && "$permissions" == "1" ]]; then
|
|
106
|
+
_pass "normalized sessions preserve Codex usage counts"
|
|
107
|
+
else
|
|
108
|
+
_fail "usage counts mismatch: turns='$turns' tools='$tools' delegations='$delegations' permissions='$permissions'"
|
|
109
|
+
fi
|
|
110
|
+
|
|
111
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-telemetry \
|
|
112
|
+
--runtime claude-code \
|
|
113
|
+
--input-telemetry-dir "$src_claude" \
|
|
114
|
+
--telemetry-dir "$dst_claude" \
|
|
115
|
+
--source-id "repo-claude" \
|
|
116
|
+
--repo "repo-claude" \
|
|
117
|
+
--profile-id "claude-dev" \
|
|
118
|
+
--prompt-id "deliver-v1" \
|
|
119
|
+
--skill-id "deliver" >/dev/null 2>"$TMPDIR_EVAL/import-claude.err"; then
|
|
120
|
+
_pass "import-telemetry imports Claude Code full.jsonl"
|
|
121
|
+
else
|
|
122
|
+
_fail "import-telemetry failed for Claude Code: $(cat "$TMPDIR_EVAL/import-claude.err" 2>/dev/null)"
|
|
123
|
+
fi
|
|
124
|
+
|
|
125
|
+
sessions_claude="$(_sessions_file "$dst_claude")"
|
|
126
|
+
claude_runtime=$(jq -r 'select(.session_id == "claude-session-1") | .runtime' "$sessions_claude" 2>/dev/null | head -1)
|
|
127
|
+
claude_tools=$(jq -r 'select(.session_id == "claude-session-1") | .tool_invocations' "$sessions_claude" 2>/dev/null | head -1)
|
|
128
|
+
claude_delegations=$(jq -r 'select(.session_id == "claude-session-1") | .delegations' "$sessions_claude" 2>/dev/null | head -1)
|
|
129
|
+
if [[ "$claude_runtime" == "claude-code" && "$claude_tools" == "1" && "$claude_delegations" == "1" ]]; then
|
|
130
|
+
_pass "normalized sessions preserve Claude Code runtime and usage counts"
|
|
131
|
+
else
|
|
132
|
+
_fail "Claude import mismatch: runtime='$claude_runtime' tools='$claude_tools' delegations='$claude_delegations'"
|
|
133
|
+
fi
|
|
134
|
+
|
|
135
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
136
|
+
--input-full-jsonl "$src/full.jsonl" \
|
|
137
|
+
--telemetry-dir "$dst_b" \
|
|
138
|
+
--source-id "repo-b" \
|
|
139
|
+
--repo "repo-b" \
|
|
140
|
+
--profile-id "codex-experimental" \
|
|
141
|
+
--prompt-id "deliver-v2" \
|
|
142
|
+
--skill-id "deliver" >/dev/null 2>"$TMPDIR_EVAL/import-b.err"; then
|
|
143
|
+
_pass "import-codex imports from explicit full.jsonl"
|
|
144
|
+
else
|
|
145
|
+
_fail "import-codex failed for explicit full.jsonl: $(cat "$TMPDIR_EVAL/import-b.err" 2>/dev/null)"
|
|
146
|
+
fi
|
|
147
|
+
|
|
148
|
+
sessions_b="$(_sessions_file "$dst_b")"
|
|
149
|
+
source_b=$(jq -r 'select(.session_id == "codex-session-1") | .source_id' "$sessions_b" 2>/dev/null | head -1)
|
|
150
|
+
if [[ "$source_b" == "repo-b" ]]; then
|
|
151
|
+
_pass "same fixture can import with a distinct source_id"
|
|
152
|
+
else
|
|
153
|
+
_fail "second import source_id mismatch: '$source_b'"
|
|
154
|
+
fi
|
|
155
|
+
|
|
156
|
+
dst_fallback="$TMPDIR_EVAL/dst-fallback"
|
|
157
|
+
mkdir -p "$dst_fallback"
|
|
158
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
159
|
+
--input-full-jsonl "$src/full.jsonl" \
|
|
160
|
+
--telemetry-dir "$dst_fallback" >/dev/null 2>"$TMPDIR_EVAL/import-fallback.err"; then
|
|
161
|
+
sessions_fallback="$(_sessions_file "$dst_fallback")"
|
|
162
|
+
source_fallback=$(jq -r 'select(.session_id == "codex-session-1") | .source_id' "$sessions_fallback" 2>/dev/null | head -1)
|
|
163
|
+
if [[ "$source_fallback" == "flow-agents" ]]; then
|
|
164
|
+
_pass "import-codex preserves raw repo source fallback when source-id is omitted"
|
|
165
|
+
else
|
|
166
|
+
_fail "omitted source-id fallback mismatch: '$source_fallback'"
|
|
167
|
+
fi
|
|
168
|
+
else
|
|
169
|
+
_fail "import-codex failed without source-id: $(cat "$TMPDIR_EVAL/import-fallback.err" 2>/dev/null)"
|
|
170
|
+
fi
|
|
171
|
+
|
|
172
|
+
src_no_metadata="$TMPDIR_EVAL/src-no-metadata"
|
|
173
|
+
dst_no_metadata="$TMPDIR_EVAL/dst-no-metadata"
|
|
174
|
+
mkdir -p "$src_no_metadata" "$dst_no_metadata"
|
|
175
|
+
cat > "$src_no_metadata/full.jsonl" <<'JSONL'
|
|
176
|
+
{"session_id":"no-metadata-session","event_type":"turn.user","timestamp":"2026-05-04T12:00:00Z"}
|
|
177
|
+
JSONL
|
|
178
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
179
|
+
--input-telemetry-dir "$src_no_metadata" \
|
|
180
|
+
--telemetry-dir "$dst_no_metadata" >/dev/null 2>"$TMPDIR_EVAL/import-no-metadata.err"; then
|
|
181
|
+
sessions_no_metadata="$(_sessions_file "$dst_no_metadata")"
|
|
182
|
+
source_no_metadata=$(jq -r 'select(.session_id == "no-metadata-session") | .source_id' "$sessions_no_metadata" 2>/dev/null | head -1)
|
|
183
|
+
if [[ "$source_no_metadata" == "src-no-metadata" ]]; then
|
|
184
|
+
_pass "import-codex uses input telemetry dir name when source metadata is absent"
|
|
185
|
+
else
|
|
186
|
+
_fail "input telemetry dir source fallback mismatch: '$source_no_metadata'"
|
|
187
|
+
fi
|
|
188
|
+
else
|
|
189
|
+
_fail "import-codex failed for metadata-free input dir: $(cat "$TMPDIR_EVAL/import-no-metadata.err" 2>/dev/null)"
|
|
190
|
+
fi
|
|
191
|
+
|
|
192
|
+
ln -s "$TMPDIR_EVAL/symlink-target" "$TMPDIR_EVAL/symlink-dst"
|
|
193
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
194
|
+
--input-full-jsonl "$src/full.jsonl" \
|
|
195
|
+
--telemetry-dir "$TMPDIR_EVAL/symlink-dst" >/dev/null 2>"$TMPDIR_EVAL/import-symlink.err"; then
|
|
196
|
+
_fail "import-codex accepted symlinked target telemetry dir"
|
|
197
|
+
else
|
|
198
|
+
_pass "import-codex rejects symlinked target telemetry dir"
|
|
199
|
+
fi
|
|
200
|
+
|
|
201
|
+
mkdir -p "$TMPDIR_EVAL/import-intermediate-target"
|
|
202
|
+
ln -s "$TMPDIR_EVAL/import-intermediate-target" "$TMPDIR_EVAL/import-intermediate-link"
|
|
203
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
204
|
+
--input-full-jsonl "$src/full.jsonl" \
|
|
205
|
+
--telemetry-dir "$TMPDIR_EVAL/import-intermediate-link/nested" >/dev/null 2>"$TMPDIR_EVAL/import-symlink-parent.err"; then
|
|
206
|
+
_fail "import-codex accepted target telemetry dir with symlinked parent"
|
|
207
|
+
else
|
|
208
|
+
if [[ ! -e "$TMPDIR_EVAL/import-intermediate-target/nested/normalized-sessions.jsonl" ]]; then
|
|
209
|
+
_pass "import-codex rejects symlinked target telemetry parent before creating nested dirs"
|
|
210
|
+
else
|
|
211
|
+
_fail "import-codex wrote through symlinked target telemetry parent"
|
|
212
|
+
fi
|
|
213
|
+
fi
|
|
214
|
+
|
|
215
|
+
if flow_agents_node "$USAGE_FEEDBACK" import-codex \
|
|
216
|
+
--input-telemetry-dir "$TMPDIR_EVAL/missing" \
|
|
217
|
+
--telemetry-dir "$TMPDIR_EVAL/missing-dst" \
|
|
218
|
+
--source-id "missing" \
|
|
219
|
+
--repo "missing" >/dev/null 2>"$TMPDIR_EVAL/missing.err"; then
|
|
220
|
+
_fail "import-codex accepted missing input telemetry"
|
|
221
|
+
else
|
|
222
|
+
_pass "import-codex rejects missing input telemetry"
|
|
223
|
+
fi
|
|
224
|
+
|
|
225
|
+
echo ""
|
|
226
|
+
echo "Result: $pass passed, $fail failed"
|
|
227
|
+
[[ $fail -eq 0 ]]
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_usage_feedback_outcomes.sh - Layer 2: Usage feedback outcome validation
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT_DIR/evals/lib/node.sh"
|
|
7
|
+
USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
|
|
8
|
+
TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-outcomes.XXXXXX)
|
|
9
|
+
pass=0; fail=0
|
|
10
|
+
|
|
11
|
+
cleanup() { rm -rf "$TMPDIR_EVAL"; }
|
|
12
|
+
trap cleanup EXIT
|
|
13
|
+
|
|
14
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
15
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
16
|
+
|
|
17
|
+
echo "=== Layer 2: Usage Feedback Outcome Validation ==="
|
|
18
|
+
echo ""
|
|
19
|
+
|
|
20
|
+
echo "--- Script Existence ---"
|
|
21
|
+
if [[ -f "$USAGE_FEEDBACK" ]]; then
|
|
22
|
+
_pass "usage-feedback.js exists"
|
|
23
|
+
else
|
|
24
|
+
_fail "usage-feedback.js not found at $USAGE_FEEDBACK"
|
|
25
|
+
echo ""
|
|
26
|
+
echo "Result: $pass passed, $fail failed"
|
|
27
|
+
exit 1
|
|
28
|
+
fi
|
|
29
|
+
|
|
30
|
+
echo ""
|
|
31
|
+
echo "--- Outcome Recording ---"
|
|
32
|
+
if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
33
|
+
--session-id "session-success" \
|
|
34
|
+
--runtime "codex" \
|
|
35
|
+
--repo "flow-agents" \
|
|
36
|
+
--agent "dev" \
|
|
37
|
+
--profile-id "codex-default" \
|
|
38
|
+
--prompt-id "deliver-v1" \
|
|
39
|
+
--skill-id "deliver" \
|
|
40
|
+
--result "success" \
|
|
41
|
+
--quality-score 5 \
|
|
42
|
+
--task-type "deliver" \
|
|
43
|
+
--task-slug "usage-feedback-success" \
|
|
44
|
+
--human-minutes-saved 12 \
|
|
45
|
+
--evidence ".flow-agents/agent-usage-feedback-loop/agent-usage-feedback-loop--deliver.md" >/dev/null 2>"$TMPDIR_EVAL/success.err"; then
|
|
46
|
+
_pass "record-outcome accepts success with profile/prompt/skill ids"
|
|
47
|
+
else
|
|
48
|
+
_fail "record-outcome rejected success: $(cat "$TMPDIR_EVAL/success.err" 2>/dev/null)"
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
52
|
+
--session-id "session-failure" \
|
|
53
|
+
--runtime "codex" \
|
|
54
|
+
--repo "flow-agents-docs" \
|
|
55
|
+
--agent "dev" \
|
|
56
|
+
--profile-id "codex-experimental" \
|
|
57
|
+
--prompt-id "deliver-v2" \
|
|
58
|
+
--skill-id "deliver" \
|
|
59
|
+
--skill-id "verify-work" \
|
|
60
|
+
--result "failure" \
|
|
61
|
+
--quality-score 2 \
|
|
62
|
+
--task-type "verify" \
|
|
63
|
+
--task-slug "usage-feedback-failure" \
|
|
64
|
+
--rework-required \
|
|
65
|
+
--notes "Fixture failure" >/dev/null 2>"$TMPDIR_EVAL/failure.err"; then
|
|
66
|
+
_pass "record-outcome accepts failure with multiple skill ids"
|
|
67
|
+
else
|
|
68
|
+
_fail "record-outcome rejected failure: $(cat "$TMPDIR_EVAL/failure.err" 2>/dev/null)"
|
|
69
|
+
fi
|
|
70
|
+
|
|
71
|
+
OUTCOMES="$TMPDIR_EVAL/outcomes.jsonl"
|
|
72
|
+
line_count=$(wc -l < "$OUTCOMES" 2>/dev/null | tr -d ' ')
|
|
73
|
+
if [[ "$line_count" == "2" ]]; then
|
|
74
|
+
_pass "record-outcome appends two outcome records"
|
|
75
|
+
else
|
|
76
|
+
_fail "expected 2 outcome records, found ${line_count:-0}"
|
|
77
|
+
fi
|
|
78
|
+
|
|
79
|
+
success_profile=$(jq -r 'select(.session_id == "session-success") | .profile_id' "$OUTCOMES" 2>/dev/null)
|
|
80
|
+
success_prompt=$(jq -r 'select(.session_id == "session-success") | .prompt_id' "$OUTCOMES" 2>/dev/null)
|
|
81
|
+
success_skill=$(jq -r 'select(.session_id == "session-success") | .skill_ids[0]' "$OUTCOMES" 2>/dev/null)
|
|
82
|
+
if [[ "$success_profile" == "codex-default" && "$success_prompt" == "deliver-v1" && "$success_skill" == "deliver" ]]; then
|
|
83
|
+
_pass "success outcome preserves profile, prompt, and skill ids"
|
|
84
|
+
else
|
|
85
|
+
_fail "success identifiers mismatch: profile='$success_profile' prompt='$success_prompt' skill='$success_skill'"
|
|
86
|
+
fi
|
|
87
|
+
|
|
88
|
+
failure_rework=$(jq -r 'select(.session_id == "session-failure") | .rework_required' "$OUTCOMES" 2>/dev/null)
|
|
89
|
+
failure_skill_count=$(jq -r 'select(.session_id == "session-failure") | .skill_ids | length' "$OUTCOMES" 2>/dev/null)
|
|
90
|
+
if [[ "$failure_rework" == "true" && "$failure_skill_count" == "2" ]]; then
|
|
91
|
+
_pass "failure outcome preserves rework flag and multiple skill ids"
|
|
92
|
+
else
|
|
93
|
+
_fail "failure fields mismatch: rework='$failure_rework' skill_count='$failure_skill_count'"
|
|
94
|
+
fi
|
|
95
|
+
|
|
96
|
+
before_invalid=$(wc -l < "$OUTCOMES" 2>/dev/null | tr -d ' ')
|
|
97
|
+
if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
98
|
+
--session-id "session-invalid" \
|
|
99
|
+
--runtime "codex" \
|
|
100
|
+
--repo "flow-agents" \
|
|
101
|
+
--agent "dev" \
|
|
102
|
+
--profile-id "codex-default" \
|
|
103
|
+
--prompt-id "deliver-v1" \
|
|
104
|
+
--skill-id "deliver" \
|
|
105
|
+
--result "excellent" >/dev/null 2>"$TMPDIR_EVAL/invalid-result.err"; then
|
|
106
|
+
_fail "record-outcome accepted invalid result"
|
|
107
|
+
else
|
|
108
|
+
after_invalid=$(wc -l < "$OUTCOMES" 2>/dev/null | tr -d ' ')
|
|
109
|
+
if [[ "$after_invalid" == "$before_invalid" ]]; then
|
|
110
|
+
_pass "record-outcome rejects invalid result without appending"
|
|
111
|
+
else
|
|
112
|
+
_fail "invalid result changed outcomes.jsonl line count from $before_invalid to $after_invalid"
|
|
113
|
+
fi
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
117
|
+
--runtime "codex" \
|
|
118
|
+
--repo "flow-agents" \
|
|
119
|
+
--agent "dev" \
|
|
120
|
+
--result "success" >/dev/null 2>"$TMPDIR_EVAL/missing-session.err"; then
|
|
121
|
+
_fail "record-outcome accepted missing session_id"
|
|
122
|
+
else
|
|
123
|
+
_pass "record-outcome rejects missing session_id"
|
|
124
|
+
fi
|
|
125
|
+
|
|
126
|
+
ln -s "$TMPDIR_EVAL/symlink-target" "$TMPDIR_EVAL/symlink-telemetry"
|
|
127
|
+
if flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
128
|
+
--telemetry-dir "$TMPDIR_EVAL/symlink-telemetry" \
|
|
129
|
+
--session-id "session-symlink-dir" \
|
|
130
|
+
--result "success" >/dev/null 2>"$TMPDIR_EVAL/symlink-dir.err"; then
|
|
131
|
+
_fail "record-outcome accepted symlinked telemetry dir"
|
|
132
|
+
else
|
|
133
|
+
_pass "record-outcome rejects symlinked telemetry dir"
|
|
134
|
+
fi
|
|
135
|
+
|
|
136
|
+
mkdir -p "$TMPDIR_EVAL/intermediate-target"
|
|
137
|
+
ln -s "$TMPDIR_EVAL/intermediate-target" "$TMPDIR_EVAL/intermediate-link"
|
|
138
|
+
if flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
139
|
+
--telemetry-dir "$TMPDIR_EVAL/intermediate-link/nested" \
|
|
140
|
+
--session-id "session-symlink-parent" \
|
|
141
|
+
--result "success" >/dev/null 2>"$TMPDIR_EVAL/symlink-parent.err"; then
|
|
142
|
+
_fail "record-outcome accepted telemetry dir with symlinked parent"
|
|
143
|
+
else
|
|
144
|
+
if [[ ! -e "$TMPDIR_EVAL/intermediate-target/nested/outcomes.jsonl" ]]; then
|
|
145
|
+
_pass "record-outcome rejects symlinked telemetry parent before creating nested dirs"
|
|
146
|
+
else
|
|
147
|
+
_fail "record-outcome wrote through symlinked telemetry parent"
|
|
148
|
+
fi
|
|
149
|
+
fi
|
|
150
|
+
|
|
151
|
+
target_file_dir="$TMPDIR_EVAL/symlink-file-telemetry"
|
|
152
|
+
mkdir -p "$target_file_dir"
|
|
153
|
+
ln -s "$TMPDIR_EVAL/symlink-outcomes-target.jsonl" "$target_file_dir/outcomes.jsonl"
|
|
154
|
+
if flow_agents_node "$USAGE_FEEDBACK" record-outcome \
|
|
155
|
+
--telemetry-dir "$target_file_dir" \
|
|
156
|
+
--session-id "session-symlink-file" \
|
|
157
|
+
--result "success" >/dev/null 2>"$TMPDIR_EVAL/symlink-file.err"; then
|
|
158
|
+
_fail "record-outcome accepted symlinked outcomes target"
|
|
159
|
+
else
|
|
160
|
+
_pass "record-outcome rejects symlinked outcomes target"
|
|
161
|
+
fi
|
|
162
|
+
|
|
163
|
+
echo ""
|
|
164
|
+
echo "Result: $pass passed, $fail failed"
|
|
165
|
+
[[ $fail -eq 0 ]]
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_usage_feedback_report.sh - Layer 2: Usage feedback report validation
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT_DIR/evals/lib/node.sh"
|
|
7
|
+
USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
|
|
8
|
+
FIXTURE_DIR="$ROOT_DIR/evals/fixtures/usage-feedback"
|
|
9
|
+
TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-report.XXXXXX)
|
|
10
|
+
pass=0; fail=0
|
|
11
|
+
|
|
12
|
+
cleanup() { rm -rf "$TMPDIR_EVAL"; }
|
|
13
|
+
trap cleanup EXIT
|
|
14
|
+
|
|
15
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
16
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
17
|
+
|
|
18
|
+
echo "=== Layer 2: Usage Feedback Report Validation ==="
|
|
19
|
+
echo ""
|
|
20
|
+
|
|
21
|
+
echo "--- Script Existence ---"
|
|
22
|
+
if [[ -f "$USAGE_FEEDBACK" ]]; then
|
|
23
|
+
_pass "usage-feedback.js exists"
|
|
24
|
+
else
|
|
25
|
+
_fail "usage-feedback.js not found at $USAGE_FEEDBACK"
|
|
26
|
+
echo ""
|
|
27
|
+
echo "Result: $pass passed, $fail failed"
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
echo ""
|
|
32
|
+
tmp_a="$TMPDIR_EVAL/repo-a"
|
|
33
|
+
tmp_b="$TMPDIR_EVAL/repo-b"
|
|
34
|
+
mkdir -p "$tmp_a" "$tmp_b"
|
|
35
|
+
cp "$FIXTURE_DIR/sample-full.jsonl" "$tmp_a/full.jsonl"
|
|
36
|
+
cp "$FIXTURE_DIR/sample-outcomes.jsonl" "$tmp_a/outcomes.jsonl"
|
|
37
|
+
cp "$FIXTURE_DIR/sample-full.jsonl" "$tmp_b/full.jsonl"
|
|
38
|
+
cat > "$tmp_b/outcomes.jsonl" <<'JSONL'
|
|
39
|
+
{"schema_version":"1","outcome_id":"outcome-2","recorded_at":"2026-05-04T11:30:00Z","session_id":"codex-session-2","runtime":"codex","repo":"repo-b","agent":"dev","profile_id":"codex-experimental","prompt_id":"deliver-v2","prompt_variant":"concise","skill_ids":["deliver","verify-work"],"task_type":"verify","task_slug":"usage-feedback-report","result":"failure","quality_score":2,"human_minutes_saved":0,"rework_required":true,"notes":"Fixture failure outcome","evidence":["evals/integration/test_usage_feedback_report.sh"]}
|
|
40
|
+
JSONL
|
|
41
|
+
|
|
42
|
+
echo "--- JSON Report ---"
|
|
43
|
+
json_report="$TMPDIR_EVAL/report.json"
|
|
44
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
45
|
+
--telemetry-dir "$tmp_a" \
|
|
46
|
+
--telemetry-dir "$tmp_b" \
|
|
47
|
+
--format json \
|
|
48
|
+
--group-by repo >"$json_report" 2>"$TMPDIR_EVAL/report-json.err"; then
|
|
49
|
+
_pass "report emits JSON for multiple telemetry dirs"
|
|
50
|
+
else
|
|
51
|
+
_fail "JSON report failed: $(cat "$TMPDIR_EVAL/report-json.err" 2>/dev/null)"
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
if jq -e '.summary.sessions >= 4 and .summary.sessions_with_outcomes >= 2 and (.summary.success_rate != null) and (.sources | length >= 2)' "$json_report" >/dev/null 2>&1; then
|
|
55
|
+
_pass "JSON report includes summary sessions, outcomes, success rate, and sources"
|
|
56
|
+
else
|
|
57
|
+
_fail "JSON report missing expected summary/source fields"
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
if jq -e '.groups[]? | select((.key == "flow-agents") or (.group == "flow-agents") or (.name == "flow-agents"))' "$json_report" >/dev/null 2>&1; then
|
|
61
|
+
_pass "JSON report groups by repo"
|
|
62
|
+
else
|
|
63
|
+
_fail "JSON report did not include repo group"
|
|
64
|
+
fi
|
|
65
|
+
|
|
66
|
+
if jq -e '.summary.sessions > .summary.sessions_with_outcomes' "$json_report" >/dev/null 2>&1; then
|
|
67
|
+
_pass "report includes sessions without outcomes in usage totals"
|
|
68
|
+
else
|
|
69
|
+
_fail "report did not distinguish sessions without outcomes"
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
echo ""
|
|
73
|
+
echo "--- Markdown Report ---"
|
|
74
|
+
markdown_report="$tmp_a/reports/usage.md"
|
|
75
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
76
|
+
--telemetry-dir "$tmp_a" \
|
|
77
|
+
--group-by profile_id \
|
|
78
|
+
--output "$markdown_report" >/dev/null 2>"$TMPDIR_EVAL/report-md.err"; then
|
|
79
|
+
_pass "report writes Markdown output file"
|
|
80
|
+
else
|
|
81
|
+
_fail "Markdown report failed: $(cat "$TMPDIR_EVAL/report-md.err" 2>/dev/null)"
|
|
82
|
+
fi
|
|
83
|
+
|
|
84
|
+
if [[ -f "$markdown_report" ]]; then
|
|
85
|
+
_pass "Markdown report output file exists"
|
|
86
|
+
else
|
|
87
|
+
_fail "Markdown report output file missing"
|
|
88
|
+
fi
|
|
89
|
+
|
|
90
|
+
if grep -q "# Agent Usage Feedback Report" "$markdown_report" && \
|
|
91
|
+
grep -q "Success rate" "$markdown_report" && \
|
|
92
|
+
grep -q "Avg tool invocations" "$markdown_report" && \
|
|
93
|
+
grep -q "Rework rate" "$markdown_report" && \
|
|
94
|
+
grep -q "codex-default" "$markdown_report"; then
|
|
95
|
+
_pass "Markdown report includes required headings, metrics, and profile group"
|
|
96
|
+
else
|
|
97
|
+
_fail "Markdown report missing required content"
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
relative_report="$tmp_a/reports/relative.md"
|
|
101
|
+
if (cd "$TMPDIR_EVAL" && flow_agents_node "$USAGE_FEEDBACK" report \
|
|
102
|
+
--telemetry-dir "$tmp_a" \
|
|
103
|
+
--group-by profile_id \
|
|
104
|
+
--output reports/relative.md >/dev/null 2>"$TMPDIR_EVAL/report-relative.err") && [[ -f "$relative_report" ]]; then
|
|
105
|
+
_pass "report writes reports/name.md relative to telemetry reports directory"
|
|
106
|
+
else
|
|
107
|
+
_fail "relative reports/name.md output failed: $(cat "$TMPDIR_EVAL/report-relative.err" 2>/dev/null)"
|
|
108
|
+
fi
|
|
109
|
+
|
|
110
|
+
nested_guard_report="$tmp_a/reports/usage-feedback.md"
|
|
111
|
+
if (cd "$TMPDIR_EVAL" && flow_agents_node "$USAGE_FEEDBACK" report \
|
|
112
|
+
--telemetry-dir "$tmp_a" \
|
|
113
|
+
--group-by profile_id \
|
|
114
|
+
--output "$(basename "$tmp_a")/reports/usage-feedback.md" >/dev/null 2>"$TMPDIR_EVAL/report-nested-guard.err") && \
|
|
115
|
+
[[ -f "$nested_guard_report" && ! -e "$tmp_a/reports/$(basename "$tmp_a")/reports/usage-feedback.md" ]]; then
|
|
116
|
+
_pass "report prevents nested telemetry reports duplication for relative output"
|
|
117
|
+
else
|
|
118
|
+
_fail "nested report output guard failed: $(cat "$TMPDIR_EVAL/report-nested-guard.err" 2>/dev/null)"
|
|
119
|
+
fi
|
|
120
|
+
|
|
121
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
122
|
+
--telemetry-dir "$tmp_a" \
|
|
123
|
+
--group-by profile_id \
|
|
124
|
+
--output "$markdown_report" >/dev/null 2>"$TMPDIR_EVAL/report-overwrite.err"; then
|
|
125
|
+
_fail "report overwrote existing output without --force"
|
|
126
|
+
else
|
|
127
|
+
_pass "report rejects existing output without --force"
|
|
128
|
+
fi
|
|
129
|
+
|
|
130
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
131
|
+
--telemetry-dir "$tmp_a" \
|
|
132
|
+
--group-by profile_id \
|
|
133
|
+
--output "$markdown_report" \
|
|
134
|
+
--force >/dev/null 2>"$TMPDIR_EVAL/report-force.err"; then
|
|
135
|
+
_pass "report overwrites existing output with --force"
|
|
136
|
+
else
|
|
137
|
+
_fail "report --force failed: $(cat "$TMPDIR_EVAL/report-force.err" 2>/dev/null)"
|
|
138
|
+
fi
|
|
139
|
+
|
|
140
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
141
|
+
--telemetry-dir "$tmp_a" \
|
|
142
|
+
--output "$TMPDIR_EVAL/outside.md" >/dev/null 2>"$TMPDIR_EVAL/report-outside.err"; then
|
|
143
|
+
_fail "report accepted output outside telemetry reports directory"
|
|
144
|
+
else
|
|
145
|
+
_pass "report rejects output outside telemetry reports directory"
|
|
146
|
+
fi
|
|
147
|
+
|
|
148
|
+
ln -s "$TMPDIR_EVAL/symlink-report-target.md" "$tmp_a/reports/symlink.md"
|
|
149
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
150
|
+
--telemetry-dir "$tmp_a" \
|
|
151
|
+
--output "$tmp_a/reports/symlink.md" \
|
|
152
|
+
--force >/dev/null 2>"$TMPDIR_EVAL/report-symlink.err"; then
|
|
153
|
+
_fail "report accepted symlinked output target"
|
|
154
|
+
else
|
|
155
|
+
_pass "report rejects symlinked output target"
|
|
156
|
+
fi
|
|
157
|
+
|
|
158
|
+
tmp_symlink_reports="$TMPDIR_EVAL/symlink-reports"
|
|
159
|
+
mkdir -p "$tmp_symlink_reports"
|
|
160
|
+
cp "$FIXTURE_DIR/sample-full.jsonl" "$tmp_symlink_reports/full.jsonl"
|
|
161
|
+
ln -s "$TMPDIR_EVAL/report-parent-target" "$tmp_symlink_reports/reports"
|
|
162
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
163
|
+
--telemetry-dir "$tmp_symlink_reports" \
|
|
164
|
+
--output usage.md >/dev/null 2>"$TMPDIR_EVAL/report-symlink-parent.err"; then
|
|
165
|
+
_fail "report accepted symlinked reports directory"
|
|
166
|
+
else
|
|
167
|
+
_pass "report rejects symlinked reports directory"
|
|
168
|
+
fi
|
|
169
|
+
|
|
170
|
+
mkdir -p "$TMPDIR_EVAL/report-intermediate-target"
|
|
171
|
+
ln -s "$TMPDIR_EVAL/report-intermediate-target" "$TMPDIR_EVAL/report-intermediate-link"
|
|
172
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
173
|
+
--telemetry-dir "$TMPDIR_EVAL/report-intermediate-link/nested" \
|
|
174
|
+
--output usage.md >/dev/null 2>"$TMPDIR_EVAL/report-symlink-telemetry-parent.err"; then
|
|
175
|
+
_fail "report accepted telemetry dir with symlinked parent"
|
|
176
|
+
else
|
|
177
|
+
if [[ ! -e "$TMPDIR_EVAL/report-intermediate-target/nested/reports/usage.md" ]]; then
|
|
178
|
+
_pass "report rejects symlinked telemetry parent before creating report dirs"
|
|
179
|
+
else
|
|
180
|
+
_fail "report wrote through symlinked telemetry parent"
|
|
181
|
+
fi
|
|
182
|
+
fi
|
|
183
|
+
|
|
184
|
+
tmp_raw="$TMPDIR_EVAL/raw-source-name"
|
|
185
|
+
mkdir -p "$tmp_raw"
|
|
186
|
+
cat > "$tmp_raw/full.jsonl" <<'JSONL'
|
|
187
|
+
{"session_id":"raw-session","event_type":"turn.user","timestamp":"2026-05-04T12:00:00Z"}
|
|
188
|
+
JSONL
|
|
189
|
+
raw_report="$TMPDIR_EVAL/raw-report.json"
|
|
190
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
191
|
+
--telemetry-dir "$tmp_raw" \
|
|
192
|
+
--format json >"$raw_report" 2>"$TMPDIR_EVAL/report-raw.err" && \
|
|
193
|
+
jq -e '.sources == ["raw-source-name"]' "$raw_report" >/dev/null 2>&1; then
|
|
194
|
+
_pass "raw telemetry without source metadata groups by telemetry directory name"
|
|
195
|
+
else
|
|
196
|
+
_fail "raw telemetry source fallback failed: $(cat "$TMPDIR_EVAL/report-raw.err" 2>/dev/null)"
|
|
197
|
+
fi
|
|
198
|
+
|
|
199
|
+
tmp_escape="$TMPDIR_EVAL/escape-source"
|
|
200
|
+
mkdir -p "$tmp_escape"
|
|
201
|
+
cat > "$tmp_escape/normalized-sessions.jsonl" <<'JSONL'
|
|
202
|
+
{"schema_version":"1","source_id":"escape-source","runtime":"codex","session_id":"escape-session","profile_id":"alpha|beta\nbreak <tag> & value","skill_ids":[],"turns":0,"tool_invocations":0,"delegations":0,"permission_requests":0}
|
|
203
|
+
JSONL
|
|
204
|
+
escape_report="$TMPDIR_EVAL/escape.md"
|
|
205
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
206
|
+
--telemetry-dir "$tmp_escape" \
|
|
207
|
+
--group-by profile_id >"$escape_report" 2>"$TMPDIR_EVAL/report-escape.err" && \
|
|
208
|
+
grep -q 'alpha\\|beta break <tag> & value' "$escape_report"; then
|
|
209
|
+
_pass "Markdown report escapes table labels, HTML chars, and strips newlines"
|
|
210
|
+
else
|
|
211
|
+
_fail "Markdown report label escaping failed: $(cat "$TMPDIR_EVAL/report-escape.err" 2>/dev/null)"
|
|
212
|
+
fi
|
|
213
|
+
|
|
214
|
+
escape_html="$tmp_escape/reports/escape.html"
|
|
215
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
216
|
+
--telemetry-dir "$tmp_escape" \
|
|
217
|
+
--group-by profile_id \
|
|
218
|
+
--format html \
|
|
219
|
+
--output "$escape_html" >"$TMPDIR_EVAL/report-html-escape.out" 2>"$TMPDIR_EVAL/report-html-escape.err" && \
|
|
220
|
+
grep -q '<tag> & value' "$escape_html" && \
|
|
221
|
+
! grep -q '<tag>' "$escape_html"; then
|
|
222
|
+
_pass "HTML report escapes local telemetry labels"
|
|
223
|
+
else
|
|
224
|
+
_fail "HTML report escaping failed: $(cat "$TMPDIR_EVAL/report-html-escape.err" 2>/dev/null)"
|
|
225
|
+
fi
|
|
226
|
+
|
|
227
|
+
echo ""
|
|
228
|
+
echo "--- Fixture Report Smoke ---"
|
|
229
|
+
fixture_report="$TMPDIR_EVAL/fixture-runtime.md"
|
|
230
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
231
|
+
--telemetry-dir "$tmp_a" \
|
|
232
|
+
--group-by runtime >"$fixture_report" 2>"$TMPDIR_EVAL/fixture.err"; then
|
|
233
|
+
_pass "report works against copied fixture telemetry"
|
|
234
|
+
else
|
|
235
|
+
_fail "fixture report failed: $(cat "$TMPDIR_EVAL/fixture.err" 2>/dev/null)"
|
|
236
|
+
fi
|
|
237
|
+
|
|
238
|
+
direct_fixture_report="$TMPDIR_EVAL/direct-fixture-repo.md"
|
|
239
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
240
|
+
--telemetry-dir "$FIXTURE_DIR" \
|
|
241
|
+
--group-by repo >"$direct_fixture_report" 2>"$TMPDIR_EVAL/direct-fixture-repo.err" && \
|
|
242
|
+
grep -q "Sessions: 2" "$direct_fixture_report" && \
|
|
243
|
+
grep -q "flow-agents-docs" "$direct_fixture_report"; then
|
|
244
|
+
_pass "report reads sample fixture names directly for repo groups"
|
|
245
|
+
else
|
|
246
|
+
_fail "direct fixture repo report failed: $(cat "$TMPDIR_EVAL/direct-fixture-repo.err" 2>/dev/null)"
|
|
247
|
+
fi
|
|
248
|
+
|
|
249
|
+
direct_profile_report="$TMPDIR_EVAL/direct-fixture-profile.md"
|
|
250
|
+
if flow_agents_node "$USAGE_FEEDBACK" report \
|
|
251
|
+
--telemetry-dir "$FIXTURE_DIR" \
|
|
252
|
+
--group-by profile_id >"$direct_profile_report" 2>"$TMPDIR_EVAL/direct-fixture-profile.err" && \
|
|
253
|
+
grep -q "Sessions: 2" "$direct_profile_report" && \
|
|
254
|
+
grep -q "codex-default" "$direct_profile_report" && \
|
|
255
|
+
grep -q "codex-experimental" "$direct_profile_report"; then
|
|
256
|
+
_pass "report reads sample fixture names directly for profile groups"
|
|
257
|
+
else
|
|
258
|
+
_fail "direct fixture profile report failed: $(cat "$TMPDIR_EVAL/direct-fixture-profile.err" 2>/dev/null)"
|
|
259
|
+
fi
|
|
260
|
+
|
|
261
|
+
echo ""
|
|
262
|
+
echo "Result: $pass passed, $fail failed"
|
|
263
|
+
[[ $fail -eq 0 ]]
|