@kontourai/flow-agents 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-push +11 -0
- package/.github/workflows/ci.yml +210 -0
- package/.github/workflows/docs-pages.yml +52 -0
- package/.github/workflows/publish-npm.yml +104 -0
- package/AGENTS.md +26 -0
- package/CHANGELOG.md +66 -0
- package/CODE_OF_CONDUCT.md +25 -0
- package/CONTEXT.md +300 -0
- package/CONTRIBUTING.md +44 -0
- package/LICENSE +201 -0
- package/README.md +129 -0
- package/SECURITY.md +33 -0
- package/agent-cards/dev.json +19 -0
- package/agents/dev.json +127 -0
- package/agents/tool-code-reviewer.json +61 -0
- package/agents/tool-dependencies-updater.json +118 -0
- package/agents/tool-explore-config.json +92 -0
- package/agents/tool-explore-deps.json +92 -0
- package/agents/tool-explore-entry.json +92 -0
- package/agents/tool-explore-patterns.json +92 -0
- package/agents/tool-explore-structure.json +92 -0
- package/agents/tool-explore-tests.json +92 -0
- package/agents/tool-planner.json +57 -0
- package/agents/tool-playwright.json +145 -0
- package/agents/tool-security-reviewer.json +56 -0
- package/agents/tool-verifier.json +61 -0
- package/agents/tool-worker.json +58 -0
- package/build/src/cli/console-learning-projection.js +123 -0
- package/build/src/cli/docs-preview.js +39 -0
- package/build/src/cli/effective-backlog-settings.js +102 -0
- package/build/src/cli/export-bookmarks.js +38 -0
- package/build/src/cli/fixture-retirement-audit.js +140 -0
- package/build/src/cli/flow-kit.js +138 -0
- package/build/src/cli/import-bookmarks.js +50 -0
- package/build/src/cli/init.js +239 -0
- package/build/src/cli/instinct-cli.js +93 -0
- package/build/src/cli/promote-workflow-artifact.js +63 -0
- package/build/src/cli/publish-change-helper.js +154 -0
- package/build/src/cli/pull-work-provider.js +469 -0
- package/build/src/cli/runtime-adapter.js +23 -0
- package/build/src/cli/telemetry-doctor.js +221 -0
- package/build/src/cli/usage-feedback.js +443 -0
- package/build/src/cli/validate-hook-influence.js +152 -0
- package/build/src/cli/validate-source-tree.js +31 -0
- package/build/src/cli/validate-workflow-artifacts.js +486 -0
- package/build/src/cli/veritas-governance.js +262 -0
- package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
- package/build/src/cli/workflow-sidecar.js +816 -0
- package/build/src/cli.js +89 -0
- package/build/src/flow-kit/validate.js +75 -0
- package/build/src/lib/args.js +45 -0
- package/build/src/lib/fs.js +62 -0
- package/build/src/lib/workflow-learning-projection.js +334 -0
- package/build/src/runtime-adapters.js +146 -0
- package/build/src/tools/build-universal-bundles.js +397 -0
- package/build/src/tools/common.js +56 -0
- package/build/src/tools/filter-installed-packs.js +132 -0
- package/build/src/tools/generate-context-map.js +198 -0
- package/build/src/tools/validate-package.js +64 -0
- package/build/src/tools/validate-source-tree.js +622 -0
- package/console.telemetry.json +176 -0
- package/context/base-rules.md +17 -0
- package/context/code-review-standards.md +62 -0
- package/context/coding-standards.md +42 -0
- package/context/common/orchestrators.md +12 -0
- package/context/common/subagents.md +28 -0
- package/context/contracts/artifact-contract.md +182 -0
- package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
- package/context/contracts/delivery-contract.md +69 -0
- package/context/contracts/execution-contract.md +53 -0
- package/context/contracts/governance-adapter-contract.md +67 -0
- package/context/contracts/planning-contract.md +85 -0
- package/context/contracts/review-contract.md +104 -0
- package/context/contracts/sandbox-policy.md +52 -0
- package/context/contracts/verification-contract.md +134 -0
- package/context/contracts/work-item-contract.md +215 -0
- package/context/deferred/demo-mode.md +33 -0
- package/context/deferred/languages/go.md +31 -0
- package/context/deferred/languages/python.md +31 -0
- package/context/deferred/languages/typescript.md +34 -0
- package/context/deferred/parallelization.md +35 -0
- package/context/deferred/worktree-isolation.md +24 -0
- package/context/development-workflow.md +50 -0
- package/context/scripts/context-budget/budget-scan.sh +166 -0
- package/context/scripts/detect-tools.sh +3 -0
- package/context/scripts/discover-agents.sh +28 -0
- package/context/scripts/git-status.sh +49 -0
- package/context/scripts/hooks/config-protection.js +79 -0
- package/context/scripts/hooks/desktop-notify.sh +39 -0
- package/context/scripts/hooks/governance-audit.sh +135 -0
- package/context/scripts/hooks/lib/audit-transport.sh +40 -0
- package/context/scripts/hooks/lib/hook-flags.js +49 -0
- package/context/scripts/hooks/lib/patterns.sh +57 -0
- package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/context/scripts/hooks/post-edit-accumulator.js +66 -0
- package/context/scripts/hooks/pre-commit-quality.js +194 -0
- package/context/scripts/hooks/quality-gate.js +93 -0
- package/context/scripts/hooks/report-only-guard.js +21 -0
- package/context/scripts/hooks/run-hook.js +136 -0
- package/context/scripts/hooks/stop-format-typecheck.js +141 -0
- package/context/scripts/hooks/stop-goal-fit.js +337 -0
- package/context/scripts/hooks/workflow-steering.js +250 -0
- package/context/scripts/telemetry/console-presets.sh +14 -0
- package/context/scripts/telemetry/install-console-config.sh +214 -0
- package/context/scripts/telemetry/lib/config.sh +85 -0
- package/context/scripts/telemetry/lib/enrich.sh +115 -0
- package/context/scripts/telemetry/lib/redact.sh +22 -0
- package/context/scripts/telemetry/lib/session.sh +63 -0
- package/context/scripts/telemetry/lib/transport.sh +183 -0
- package/context/scripts/telemetry/lib/usage.sh +29 -0
- package/context/scripts/telemetry/sync-agents.sh +173 -0
- package/context/scripts/telemetry/telemetry.conf +23 -0
- package/context/scripts/telemetry/telemetry.sh +387 -0
- package/context/scripts/validate-package.sh +89 -0
- package/context/settings/backlog-provider-settings.json +54 -0
- package/context/templates/core/identity.md +26 -0
- package/context/templates/core/user.md +15 -0
- package/docs/_config.yml +15 -0
- package/docs/_layouts/default.html +87 -0
- package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
- package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
- package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
- package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
- package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
- package/docs/adr/0006-typescript-first-source-policy.md +98 -0
- package/docs/agent-system-guidebook.md +391 -0
- package/docs/agent-usage-feedback-loop.md +351 -0
- package/docs/assets/favicon.svg +13 -0
- package/docs/assets/og-image.png +0 -0
- package/docs/assets/site.css +774 -0
- package/docs/assets/site.js +139 -0
- package/docs/configurable-workflow-routing.md +174 -0
- package/docs/context-map.md +145 -0
- package/docs/developer-architecture.md +145 -0
- package/docs/developer-hook-setup.md +61 -0
- package/docs/fixture-ownership.md +44 -0
- package/docs/flow-kit-repository-contract.md +180 -0
- package/docs/index.md +129 -0
- package/docs/kontour-resource-contract.md +358 -0
- package/docs/migrations.md +64 -0
- package/docs/north-star.md +322 -0
- package/docs/operating-layers.md +110 -0
- package/docs/repository-structure.md +132 -0
- package/docs/sandbox-policy.md +56 -0
- package/docs/skills-map.md +203 -0
- package/docs/standards-register.md +96 -0
- package/docs/veritas-integration.md +165 -0
- package/docs/work-item-adapters.md +72 -0
- package/docs/workflow-artifact-lifecycle.md +141 -0
- package/docs/workflow-eval-strategy.md +295 -0
- package/docs/workflow-shared-contracts.md +51 -0
- package/docs/workflow-usage-guide.md +443 -0
- package/evals/ARCHITECTURE.md +143 -0
- package/evals/CONVENTIONS.md +58 -0
- package/evals/README.md +128 -0
- package/evals/acceptance/run.sh +29 -0
- package/evals/acceptance/test_claude_harness.sh +242 -0
- package/evals/acceptance/test_codex_harness.sh +108 -0
- package/evals/acceptance/test_kiro_harness.sh +128 -0
- package/evals/cases/dev/404.html +97 -0
- package/evals/cases/dev/code-review.yaml +44 -0
- package/evals/cases/dev/dashboard.html +300 -0
- package/evals/cases/dev/deliver.yaml +66 -0
- package/evals/cases/dev/dependency-update.yaml +16 -0
- package/evals/cases/dev/explore.yaml +20 -0
- package/evals/cases/dev/index.html +370 -0
- package/evals/cases/dev/package-lock.json +28 -0
- package/evals/cases/dev/package.json +16 -0
- package/evals/cases/dev/plan-work.yaml +20 -0
- package/evals/cases/dev/promptfooconfig.yaml +666 -0
- package/evals/cases/dev/search-first.yaml +20 -0
- package/evals/cases/dev/tdd-workflow.yaml +48 -0
- package/evals/cases/dev/verify-work.yaml +44 -0
- package/evals/cases/dev/workflow.yaml +34 -0
- package/evals/ci/run-baseline.sh +283 -0
- package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
- package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
- package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
- package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
- package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
- package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
- package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
- package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
- package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
- package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
- package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
- package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
- package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
- package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
- package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
- package/evals/fixtures/hook-influence/cases.json +336 -0
- package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
- package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
- package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
- package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
- package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
- package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
- package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
- package/evals/fixtures/surface-trust/provider-absent.json +19 -0
- package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
- package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
- package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
- package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
- package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
- package/evals/integration/test_bundle_install.sh +541 -0
- package/evals/integration/test_console_learning_projection.sh +192 -0
- package/evals/integration/test_context_map.sh +65 -0
- package/evals/integration/test_effective_backlog_settings.sh +58 -0
- package/evals/integration/test_fixture_retirement_audit.sh +58 -0
- package/evals/integration/test_flow_agents_statusline.sh +93 -0
- package/evals/integration/test_flow_kit_repository.sh +90 -0
- package/evals/integration/test_goal_fit_hook.sh +482 -0
- package/evals/integration/test_hook_category_behaviors.sh +190 -0
- package/evals/integration/test_hook_influence_cases.sh +69 -0
- package/evals/integration/test_local_flow_kit_install.sh +145 -0
- package/evals/integration/test_publish_change_helper.sh +176 -0
- package/evals/integration/test_pull_work_provider.sh +140 -0
- package/evals/integration/test_runtime_adapter_activation.sh +106 -0
- package/evals/integration/test_telemetry.sh +485 -0
- package/evals/integration/test_telemetry_doctor.sh +193 -0
- package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
- package/evals/integration/test_usage_feedback_global.sh +117 -0
- package/evals/integration/test_usage_feedback_import.sh +227 -0
- package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
- package/evals/integration/test_usage_feedback_report.sh +263 -0
- package/evals/integration/test_veritas_governance_adapter.sh +235 -0
- package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
- package/evals/integration/test_workflow_artifacts.sh +1247 -0
- package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
- package/evals/integration/test_workflow_steering_hook.sh +337 -0
- package/evals/lib/assertions/delegated-to.js +40 -0
- package/evals/lib/assertions/max-tool-calls.js +15 -0
- package/evals/lib/assertions/no-write-tools.js +27 -0
- package/evals/lib/assertions/pass-at-k.js +39 -0
- package/evals/lib/assertions/telemetry-utils.js +105 -0
- package/evals/lib/assertions/tool-called.js +39 -0
- package/evals/lib/assertions/verify-after-fix.js +61 -0
- package/evals/lib/claude-judge.sh +40 -0
- package/evals/lib/claude-provider.sh +74 -0
- package/evals/lib/codex-judge.sh +39 -0
- package/evals/lib/codex-provider.sh +81 -0
- package/evals/lib/eval-dev.sh +5 -0
- package/evals/lib/eval-judge.sh +22 -0
- package/evals/lib/eval-provider.sh +26 -0
- package/evals/lib/eval-report.sh +73 -0
- package/evals/lib/kiro-dev.sh +4 -0
- package/evals/lib/kiro-judge.sh +17 -0
- package/evals/lib/kiro-provider.sh +62 -0
- package/evals/lib/node.sh +111 -0
- package/evals/promptfooconfig.yaml +70 -0
- package/evals/run.sh +309 -0
- package/evals/static/test_evidence_refs.sh +141 -0
- package/evals/static/test_package.sh +407 -0
- package/evals/static/test_repo_hooks.sh +68 -0
- package/evals/static/test_universal_bundles.sh +274 -0
- package/evals/static/test_workflow_skills.sh +1207 -0
- package/install.sh +64 -0
- package/integrations/veritas/flow-agents.adapter.json +138 -0
- package/integrations/veritas/flow-agents.authority-settings.json +26 -0
- package/integrations/veritas/flow-agents.repo-standards.json +82 -0
- package/kits/builder/flows/build.flow.json +218 -0
- package/kits/builder/flows/shape.flow.json +127 -0
- package/kits/builder/kit.json +19 -0
- package/kits/catalog.json +11 -0
- package/package.json +130 -0
- package/packaging/README.md +60 -0
- package/packaging/manifest.json +173 -0
- package/packaging/packs.json +69 -0
- package/powers/dependency-checker/POWER.md +20 -0
- package/powers/dependency-checker/mcp.json +20 -0
- package/powers/playwright/POWER.md +25 -0
- package/powers/playwright/mcp.json +12 -0
- package/prompts/code-audit.md +123 -0
- package/prompts/kcommit.md +88 -0
- package/schemas/backlog-provider-settings.schema.json +138 -0
- package/schemas/workflow-acceptance.schema.json +216 -0
- package/schemas/workflow-critique.schema.json +113 -0
- package/schemas/workflow-evidence.schema.json +357 -0
- package/schemas/workflow-handoff.schema.json +52 -0
- package/schemas/workflow-learning.schema.json +223 -0
- package/schemas/workflow-release.schema.json +172 -0
- package/schemas/workflow-state.schema.json +80 -0
- package/scripts/README.md +111 -0
- package/scripts/build-universal-bundles.js +3 -0
- package/scripts/check-content-boundary.cjs +99 -0
- package/scripts/context-budget/budget-scan.sh +166 -0
- package/scripts/detect-tools.sh +3 -0
- package/scripts/discover-agents.sh +28 -0
- package/scripts/effective-backlog-settings.js +2 -0
- package/scripts/filter-installed-packs.js +2 -0
- package/scripts/flow-kit.js +2 -0
- package/scripts/generate-context-map.js +2 -0
- package/scripts/git-status.sh +49 -0
- package/scripts/hooks/claude-hook-adapter.js +174 -0
- package/scripts/hooks/claude-telemetry-hook.js +115 -0
- package/scripts/hooks/codex-hook-adapter.js +176 -0
- package/scripts/hooks/codex-telemetry-hook.js +95 -0
- package/scripts/hooks/config-protection.js +79 -0
- package/scripts/hooks/desktop-notify.sh +39 -0
- package/scripts/hooks/governance-audit.sh +135 -0
- package/scripts/hooks/lib/audit-transport.sh +40 -0
- package/scripts/hooks/lib/hook-flags.js +49 -0
- package/scripts/hooks/lib/patterns.sh +57 -0
- package/scripts/hooks/lib/resolve-formatter.js +80 -0
- package/scripts/hooks/post-edit-accumulator.js +66 -0
- package/scripts/hooks/pre-commit-quality.js +194 -0
- package/scripts/hooks/quality-gate.js +93 -0
- package/scripts/hooks/report-only-guard.js +21 -0
- package/scripts/hooks/run-hook.js +136 -0
- package/scripts/hooks/stop-format-typecheck.js +141 -0
- package/scripts/hooks/stop-goal-fit.js +337 -0
- package/scripts/hooks/workflow-steering.js +250 -0
- package/scripts/install-codex-home.sh +106 -0
- package/scripts/package.json +3 -0
- package/scripts/promote-workflow-artifact.js +2 -0
- package/scripts/publish-change-helper.js +2 -0
- package/scripts/pull-work-provider.js +2 -0
- package/scripts/setup-repo-hooks.sh +8 -0
- package/scripts/statusline/flow-agents-statusline.js +157 -0
- package/scripts/telemetry/console-presets.sh +14 -0
- package/scripts/telemetry/install-console-config.sh +214 -0
- package/scripts/telemetry/lib/config.sh +85 -0
- package/scripts/telemetry/lib/enrich.sh +115 -0
- package/scripts/telemetry/lib/redact.sh +22 -0
- package/scripts/telemetry/lib/session.sh +63 -0
- package/scripts/telemetry/lib/transport.sh +183 -0
- package/scripts/telemetry/lib/usage.sh +29 -0
- package/scripts/telemetry/sync-agents.sh +173 -0
- package/scripts/telemetry/telemetry.conf +23 -0
- package/scripts/telemetry/telemetry.sh +387 -0
- package/scripts/usage-feedback.js +2 -0
- package/scripts/validate-hook-influence-cases.js +2 -0
- package/scripts/validate-package.sh +89 -0
- package/scripts/validate-source-tree.js +9 -0
- package/skills/agentic-engineering/SKILL.md +62 -0
- package/skills/browser-test/SKILL.md +51 -0
- package/skills/builder-shape/SKILL.md +76 -0
- package/skills/context-budget/SKILL.md +40 -0
- package/skills/deliver/SKILL.md +241 -0
- package/skills/dependency-update/SKILL.md +68 -0
- package/skills/design-probe/SKILL.md +107 -0
- package/skills/eval-rebuild/SKILL.md +39 -0
- package/skills/evidence-gate/SKILL.md +186 -0
- package/skills/execute-plan/SKILL.md +110 -0
- package/skills/explore/SKILL.md +137 -0
- package/skills/feedback-loop/SKILL.md +87 -0
- package/skills/fix-bug/SKILL.md +133 -0
- package/skills/frontend-design/SKILL.md +80 -0
- package/skills/github-cli/SKILL.md +63 -0
- package/skills/idea-to-backlog/SKILL.md +267 -0
- package/skills/knowledge-capture/SKILL.md +55 -0
- package/skills/learning-review/SKILL.md +115 -0
- package/skills/pickup-probe/SKILL.md +114 -0
- package/skills/plan-work/SKILL.md +176 -0
- package/skills/pull-work/SKILL.md +309 -0
- package/skills/release-readiness/SKILL.md +121 -0
- package/skills/review-work/SKILL.md +161 -0
- package/skills/search-first/SKILL.md +66 -0
- package/skills/tdd-workflow/SKILL.md +140 -0
- package/skills/verify-work/SKILL.md +109 -0
- package/src/cli/console-learning-projection.ts +140 -0
- package/src/cli/effective-backlog-settings.ts +99 -0
- package/src/cli/fixture-retirement-audit.ts +154 -0
- package/src/cli/flow-kit.ts +139 -0
- package/src/cli/init.ts +248 -0
- package/src/cli/promote-workflow-artifact.ts +64 -0
- package/src/cli/publish-change-helper.ts +143 -0
- package/src/cli/pull-work-provider.ts +481 -0
- package/src/cli/runtime-adapter.ts +24 -0
- package/src/cli/telemetry-doctor.ts +243 -0
- package/src/cli/usage-feedback.ts +418 -0
- package/src/cli/validate-hook-influence.ts +119 -0
- package/src/cli/validate-source-tree.ts +30 -0
- package/src/cli/validate-workflow-artifacts.ts +411 -0
- package/src/cli/veritas-governance.ts +322 -0
- package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
- package/src/cli/workflow-sidecar.ts +676 -0
- package/src/cli.ts +95 -0
- package/src/flow-kit/validate.ts +74 -0
- package/src/lib/args.ts +43 -0
- package/src/lib/fs.ts +62 -0
- package/src/lib/workflow-learning-projection.ts +491 -0
- package/src/runtime-adapters.ts +154 -0
- package/src/tools/build-universal-bundles.ts +366 -0
- package/src/tools/common.ts +61 -0
- package/src/tools/filter-installed-packs.ts +129 -0
- package/src/tools/generate-context-map.ts +199 -0
- package/src/tools/validate-package.ts +57 -0
- package/src/tools/validate-source-tree.ts +488 -0
- package/tsconfig.json +19 -0
- package/veritas.claims.json +6 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_runtime_adapter_activation.sh - Exercise local runtime adapter activation.
|
|
3
|
+
set -uo pipefail
|
|
4
|
+
|
|
5
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
6
|
+
source "$ROOT/evals/lib/node.sh"
|
|
7
|
+
|
|
8
|
+
errors=0
|
|
9
|
+
TMP_DIR="$(mktemp -d)"
|
|
10
|
+
trap 'rm -rf "$TMP_DIR"' EXIT
|
|
11
|
+
|
|
12
|
+
pass() { echo " ✓ $1"; }
|
|
13
|
+
fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
14
|
+
|
|
15
|
+
CLI="$ROOT/scripts/flow-kit.js"
|
|
16
|
+
DEST="$TMP_DIR/runtime-dest"
|
|
17
|
+
MIXED_SRC="$ROOT/evals/fixtures/flow-kit-repository/mixed-runtime-kit"
|
|
18
|
+
OUT="$TMP_DIR/activation.json"
|
|
19
|
+
UNKNOWN_OUT="$TMP_DIR/unknown.json"
|
|
20
|
+
CATALOG_HASH_BEFORE="$(shasum -a 256 "$ROOT/kits/catalog.json" | awk '{print $1}')"
|
|
21
|
+
mkdir -p "$DEST"
|
|
22
|
+
|
|
23
|
+
echo "=== Runtime Adapter Activation Checks ==="
|
|
24
|
+
|
|
25
|
+
if flow_agents_node "$CLI" install-local "$MIXED_SRC" --dest "$DEST" >"$TMP_DIR/install.out" 2>&1; then
|
|
26
|
+
pass "mixed local kit installs into temp destination"
|
|
27
|
+
else
|
|
28
|
+
fail "mixed local kit install failed"
|
|
29
|
+
sed -n '1,160p' "$TMP_DIR/install.out"
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
if flow_agents_node "$CLI" activate --dest "$DEST" --source-root "$ROOT" --format json >"$OUT" 2>&1; then
|
|
33
|
+
pass "activation succeeds with default adapter"
|
|
34
|
+
else
|
|
35
|
+
fail "activation failed"
|
|
36
|
+
sed -n '1,220p' "$OUT"
|
|
37
|
+
fi
|
|
38
|
+
|
|
39
|
+
if node - "$OUT" "$DEST" "$ROOT/kits/catalog.json" <<'NODE'
|
|
40
|
+
const fs = require("node:fs");
|
|
41
|
+
const path = require("node:path");
|
|
42
|
+
const data = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
|
|
43
|
+
const dest = process.argv[3];
|
|
44
|
+
const catalog = process.argv[4];
|
|
45
|
+
if (data.selected_adapter !== "codex-local") throw new Error(`unexpected selected_adapter: ${data.selected_adapter}`);
|
|
46
|
+
if (JSON.stringify(data.supported_asset_classes) !== JSON.stringify(["flows"])) throw new Error(`unexpected supported_asset_classes: ${data.supported_asset_classes}`);
|
|
47
|
+
const ids = new Set(data.generated_runtime_files.map((item) => item.asset_id));
|
|
48
|
+
for (const expected of ["builder.shape", "builder.build", "mixed.runtime", "codex-local.activation"]) {
|
|
49
|
+
if (!ids.has(expected)) throw new Error(`missing generated asset: ${expected}`);
|
|
50
|
+
}
|
|
51
|
+
for (const item of data.generated_runtime_files) {
|
|
52
|
+
const generatedPath = path.join(dest, item.path);
|
|
53
|
+
if (!fs.existsSync(generatedPath)) throw new Error(`generated file missing: ${generatedPath}`);
|
|
54
|
+
if (path.resolve(catalog) === path.resolve(generatedPath)) throw new Error("activation generated over kits/catalog.json");
|
|
55
|
+
}
|
|
56
|
+
const classes = new Set(data.skipped_assets.map((item) => item.asset_class));
|
|
57
|
+
for (const expected of ["skills", "docs", "adapters", "evals", "assets"]) {
|
|
58
|
+
if (!classes.has(expected)) throw new Error(`missing skipped asset class: ${expected}`);
|
|
59
|
+
}
|
|
60
|
+
for (const item of data.skipped_assets) {
|
|
61
|
+
for (const key of ["asset_class", "path", "kit_id", "asset_id", "reason"]) {
|
|
62
|
+
if (!(key in item)) throw new Error(`skipped asset missing ${key}: ${JSON.stringify(item)}`);
|
|
63
|
+
}
|
|
64
|
+
if (!item.reason.includes("diagnostic-only")) throw new Error(`unexpected skip reason: ${item.reason}`);
|
|
65
|
+
}
|
|
66
|
+
if (!fs.existsSync(path.join(dest, ".flow-agents/runtime/codex/activation.json"))) throw new Error("runtime activation manifest missing");
|
|
67
|
+
console.log("ok");
|
|
68
|
+
NODE
|
|
69
|
+
then
|
|
70
|
+
pass "diagnostics report default adapter, generated files, and skipped unsupported assets"
|
|
71
|
+
else
|
|
72
|
+
fail "activation diagnostics are incomplete"
|
|
73
|
+
sed -n '1,220p' "$OUT"
|
|
74
|
+
fi
|
|
75
|
+
|
|
76
|
+
if [[ "$CATALOG_HASH_BEFORE" == "$(shasum -a 256 "$ROOT/kits/catalog.json" | awk '{print $1}')" ]]; then
|
|
77
|
+
pass "activation does not mutate source kits/catalog.json"
|
|
78
|
+
else
|
|
79
|
+
fail "source kits/catalog.json changed during activation"
|
|
80
|
+
fi
|
|
81
|
+
|
|
82
|
+
if flow_agents_node "$CLI" activate --dest "$DEST" --source-root "$ROOT" --adapter unknown --format json >"$UNKNOWN_OUT" 2>&1; then
|
|
83
|
+
fail "unknown adapter should fail"
|
|
84
|
+
sed -n '1,120p' "$UNKNOWN_OUT"
|
|
85
|
+
elif node - "$UNKNOWN_OUT" <<'NODE'
|
|
86
|
+
const fs = require("node:fs");
|
|
87
|
+
const data = JSON.parse(fs.readFileSync(process.argv[2], "utf8"));
|
|
88
|
+
if (!data.available_adapters?.includes("codex-local")) throw new Error("available adapters missing codex-local");
|
|
89
|
+
if (!data.errors?.length) throw new Error("unknown adapter did not report errors");
|
|
90
|
+
console.log("ok");
|
|
91
|
+
NODE
|
|
92
|
+
then
|
|
93
|
+
pass "unknown adapter reports available adapters"
|
|
94
|
+
else
|
|
95
|
+
fail "unknown adapter diagnostics missing"
|
|
96
|
+
sed -n '1,120p' "$UNKNOWN_OUT"
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
echo ""
|
|
100
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
101
|
+
echo "Runtime adapter activation checks passed."
|
|
102
|
+
exit 0
|
|
103
|
+
fi
|
|
104
|
+
|
|
105
|
+
echo "Runtime adapter activation checks failed: $errors issue(s)."
|
|
106
|
+
exit 1
|
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_telemetry.sh — Layer 2: Telemetry contract validation
|
|
3
|
+
# Tests that the telemetry pipeline produces correct event schemas
|
|
4
|
+
#
|
|
5
|
+
# NOTE: telemetry.sh runs fire-and-forget (backgrounds main + disown) so stdout
|
|
6
|
+
# capture doesn't work. All tests write to a temp log file and read from there,
|
|
7
|
+
# with a short sleep to let the background process finish.
|
|
8
|
+
set -uo pipefail
|
|
9
|
+
|
|
10
|
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
11
|
+
if [[ -d "$ROOT_DIR/context/scripts/telemetry" ]]; then
|
|
12
|
+
TELEMETRY_DIR="$ROOT_DIR/context/scripts/telemetry"
|
|
13
|
+
DISCOVER_SCRIPT="$ROOT_DIR/context/scripts/discover-agents.sh"
|
|
14
|
+
else
|
|
15
|
+
TELEMETRY_DIR="$HOME/.flow-agents/context/scripts/telemetry"
|
|
16
|
+
DISCOVER_SCRIPT="$HOME/.flow-agents/context/scripts/discover-agents.sh"
|
|
17
|
+
fi
|
|
18
|
+
TELEMETRY_SH="${TELEMETRY_DIR}/telemetry.sh"
|
|
19
|
+
TMPDIR_EVAL=$(mktemp -d /tmp/eval-telemetry-test.XXXXXX)
|
|
20
|
+
TMPLOG="${TMPDIR_EVAL}/test-output.jsonl"
|
|
21
|
+
pass=0; fail=0
|
|
22
|
+
|
|
23
|
+
_pass() { echo " ✓ $1"; pass=$((pass + 1)); }
|
|
24
|
+
_fail() { echo " ✗ $1"; fail=$((fail + 1)); }
|
|
25
|
+
|
|
26
|
+
# Run telemetry.sh and wait for async output to land in the temp log file
|
|
27
|
+
_run_telemetry() {
|
|
28
|
+
local hook_type="$1" agent="$2" input="$3" channels="${4:-full}" redact="${5:-none}"
|
|
29
|
+
local channel_upper
|
|
30
|
+
channel_upper=$(echo "$channels" | tr '[:lower:]' '[:upper:]')
|
|
31
|
+
|
|
32
|
+
local before_lines=0
|
|
33
|
+
touch "$TMPLOG"
|
|
34
|
+
before_lines=$(wc -l < "$TMPLOG" | tr -d ' ')
|
|
35
|
+
|
|
36
|
+
local env_vars=(
|
|
37
|
+
TELEMETRY_ENABLED=true
|
|
38
|
+
TELEMETRY_CHANNELS="$channels"
|
|
39
|
+
"TELEMETRY_CHANNEL_${channel_upper}_LOG_FILE=$TMPLOG"
|
|
40
|
+
"TELEMETRY_CHANNEL_${channel_upper}_REDACT=$redact"
|
|
41
|
+
FLOW_AGENTS_TELEMETRY_CAPTURE_RAW_HOOK_INPUT=true
|
|
42
|
+
FLOW_AGENTS_TELEMETRY_FOREGROUND=true
|
|
43
|
+
TELEMETRY_CONFIG_FILE="$TMPDIR_EVAL/telemetry.conf"
|
|
44
|
+
TELEMETRY_DATA_DIR="$TMPDIR_EVAL"
|
|
45
|
+
TELEMETRY_SESSION_DIR="$TMPDIR_EVAL/sessions"
|
|
46
|
+
)
|
|
47
|
+
if [[ -n "${FLOW_AGENTS_TELEMETRY_RUNTIME:-}" ]]; then
|
|
48
|
+
env_vars+=(FLOW_AGENTS_TELEMETRY_RUNTIME="$FLOW_AGENTS_TELEMETRY_RUNTIME")
|
|
49
|
+
fi
|
|
50
|
+
mkdir -p "$TMPDIR_EVAL/sessions"
|
|
51
|
+
echo "$input" | env "${env_vars[@]}" bash "$TELEMETRY_SH" "$hook_type" "$agent" 2>/dev/null
|
|
52
|
+
# Wait for background process to append new line(s)
|
|
53
|
+
local i=0 current_lines
|
|
54
|
+
while [[ $i -lt 50 ]]; do
|
|
55
|
+
current_lines=$(wc -l < "$TMPLOG" 2>/dev/null | tr -d ' ')
|
|
56
|
+
[[ "${current_lines:-0}" -gt "$before_lines" ]] && break
|
|
57
|
+
sleep 0.1; i=$((i + 1))
|
|
58
|
+
done
|
|
59
|
+
# Return the latest new line. telemetry.sh writes asynchronously, so a
|
|
60
|
+
# delayed event from the prior assertion can land after before_lines.
|
|
61
|
+
tail -n +"$((before_lines + 1))" "$TMPLOG" 2>/dev/null | tail -1
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
echo "=== Layer 2: Telemetry Contract Validation ==="
|
|
65
|
+
echo ""
|
|
66
|
+
|
|
67
|
+
# --- 1. Telemetry script exists ---
|
|
68
|
+
echo "--- Script Existence ---"
|
|
69
|
+
if [[ -f "$TELEMETRY_SH" ]]; then
|
|
70
|
+
_pass "telemetry.sh exists"
|
|
71
|
+
else
|
|
72
|
+
_fail "telemetry.sh not found at $TELEMETRY_SH"
|
|
73
|
+
echo "Cannot continue without telemetry script"
|
|
74
|
+
rm -rf "$TMPDIR_EVAL"
|
|
75
|
+
exit 1
|
|
76
|
+
fi
|
|
77
|
+
|
|
78
|
+
for lib in config.sh session.sh enrich.sh transport.sh redact.sh; do
|
|
79
|
+
if [[ -f "${TELEMETRY_DIR}/lib/${lib}" ]]; then
|
|
80
|
+
_pass "lib/${lib} exists"
|
|
81
|
+
else
|
|
82
|
+
_fail "lib/${lib} missing"
|
|
83
|
+
fi
|
|
84
|
+
done
|
|
85
|
+
|
|
86
|
+
# --- 2. Event type mapping ---
|
|
87
|
+
echo ""
|
|
88
|
+
echo "--- Event Type Mapping ---"
|
|
89
|
+
mock_json='{"cwd":"/tmp/eval-test","prompt":"test prompt","tool_name":"test_tool","tool_input":{},"tool_response":{}}'
|
|
90
|
+
|
|
91
|
+
for pair in \
|
|
92
|
+
"agentSpawn:session.start" \
|
|
93
|
+
"SessionStart:session.start" \
|
|
94
|
+
"stop:session.end" \
|
|
95
|
+
"Stop:session.end" \
|
|
96
|
+
"SessionEnd:session.end" \
|
|
97
|
+
"userPromptSubmit:turn.user" \
|
|
98
|
+
"UserPromptSubmit:turn.user" \
|
|
99
|
+
"preToolUse:tool.invoke" \
|
|
100
|
+
"PreToolUse:tool.invoke" \
|
|
101
|
+
"permissionRequest:tool.permission_request" \
|
|
102
|
+
"PermissionRequest:tool.permission_request" \
|
|
103
|
+
"postToolUse:tool.result" \
|
|
104
|
+
"PostToolUse:tool.result" \
|
|
105
|
+
"PostToolUseFailure:tool.result"; do
|
|
106
|
+
hook_type="${pair%%:*}"
|
|
107
|
+
expected="${pair#*:}"
|
|
108
|
+
|
|
109
|
+
output=$(_run_telemetry "$hook_type" "eval-test" "$mock_json")
|
|
110
|
+
|
|
111
|
+
if [[ -z "$output" ]]; then
|
|
112
|
+
_fail "$hook_type → (no output)"
|
|
113
|
+
continue
|
|
114
|
+
fi
|
|
115
|
+
|
|
116
|
+
actual_type=$(echo "$output" | jq -r '.event_type // empty' 2>/dev/null)
|
|
117
|
+
if [[ "$actual_type" == "$expected" ]]; then
|
|
118
|
+
_pass "$hook_type → $actual_type"
|
|
119
|
+
else
|
|
120
|
+
_fail "$hook_type → expected '$expected', got '$actual_type'"
|
|
121
|
+
fi
|
|
122
|
+
done
|
|
123
|
+
|
|
124
|
+
# --- 3. Schema fields present ---
|
|
125
|
+
echo ""
|
|
126
|
+
echo "--- Schema Fields ---"
|
|
127
|
+
output=$(_run_telemetry "agentSpawn" "eval-test" '{"cwd":"/tmp/eval-test"}')
|
|
128
|
+
|
|
129
|
+
for field in schema_version timestamp session_id event_id event_type agent; do
|
|
130
|
+
val=$(echo "$output" | jq -r ".${field} // empty" 2>/dev/null)
|
|
131
|
+
if [[ -n "$val" ]]; then
|
|
132
|
+
_pass "agentSpawn has .$field = $val"
|
|
133
|
+
else
|
|
134
|
+
_fail "agentSpawn missing .$field"
|
|
135
|
+
fi
|
|
136
|
+
done
|
|
137
|
+
|
|
138
|
+
# Check agent sub-fields
|
|
139
|
+
for field in name runtime version; do
|
|
140
|
+
val=$(echo "$output" | jq -r ".agent.${field} // empty" 2>/dev/null)
|
|
141
|
+
if [[ -n "$val" ]]; then
|
|
142
|
+
_pass "agentSpawn has .agent.$field"
|
|
143
|
+
else
|
|
144
|
+
_fail "agentSpawn missing .agent.$field"
|
|
145
|
+
fi
|
|
146
|
+
done
|
|
147
|
+
|
|
148
|
+
# --- 4. userPromptSubmit captures prompt ---
|
|
149
|
+
echo ""
|
|
150
|
+
echo "--- Prompt Capture ---"
|
|
151
|
+
prompt_output=$(_run_telemetry "userPromptSubmit" "eval-test" '{"cwd":"/tmp","prompt":"Hello eval test"}')
|
|
152
|
+
|
|
153
|
+
prompt_text=$(echo "$prompt_output" | jq -r '.turn.prompt_text // empty' 2>/dev/null)
|
|
154
|
+
prompt_length=$(echo "$prompt_output" | jq -r '.turn.prompt_length // empty' 2>/dev/null)
|
|
155
|
+
|
|
156
|
+
if [[ "$prompt_text" == "Hello eval test" ]]; then
|
|
157
|
+
_pass "userPromptSubmit captures prompt_text"
|
|
158
|
+
else
|
|
159
|
+
_fail "userPromptSubmit prompt_text: expected 'Hello eval test', got '$prompt_text'"
|
|
160
|
+
fi
|
|
161
|
+
|
|
162
|
+
if [[ "$prompt_length" -gt 0 ]] 2>/dev/null; then
|
|
163
|
+
_pass "userPromptSubmit captures prompt_length ($prompt_length)"
|
|
164
|
+
else
|
|
165
|
+
_fail "userPromptSubmit prompt_length missing or zero"
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
# --- 5. preToolUse captures tool info ---
|
|
169
|
+
echo ""
|
|
170
|
+
echo "--- Tool Capture ---"
|
|
171
|
+
tool_output=$(_run_telemetry "preToolUse" "eval-test" '{"session_id":"runtime-session-1","turn_id":"turn-1","transcript_path":"/tmp/transcript.jsonl","hook_event_name":"PreToolUse","model":"test-model","cwd":"/tmp","tool_name":"run shell commands","tool_input":{"command":"echo hi"}}')
|
|
172
|
+
|
|
173
|
+
tool_name=$(echo "$tool_output" | jq -r '.tool.name // empty' 2>/dev/null)
|
|
174
|
+
tool_normalized_name=$(echo "$tool_output" | jq -r '.tool.normalized_name // empty' 2>/dev/null)
|
|
175
|
+
if [[ "$tool_name" == "run shell commands" ]]; then
|
|
176
|
+
_pass "preToolUse captures tool.name"
|
|
177
|
+
else
|
|
178
|
+
_fail "preToolUse tool.name: expected 'run shell commands', got '$tool_name'"
|
|
179
|
+
fi
|
|
180
|
+
|
|
181
|
+
if [[ "$tool_normalized_name" == "run shell commands" ]]; then
|
|
182
|
+
_pass "preToolUse captures normalized tool name"
|
|
183
|
+
else
|
|
184
|
+
_fail "preToolUse tool.normalized_name: expected 'run shell commands', got '$tool_normalized_name'"
|
|
185
|
+
fi
|
|
186
|
+
|
|
187
|
+
hook_turn_id=$(echo "$tool_output" | jq -r '.hook.turn_id // empty' 2>/dev/null)
|
|
188
|
+
hook_runtime_session_id=$(echo "$tool_output" | jq -r '.hook.runtime_session_id // empty' 2>/dev/null)
|
|
189
|
+
hook_raw_command=$(echo "$tool_output" | jq -r '.hook.raw_input.tool_input.command // empty' 2>/dev/null)
|
|
190
|
+
if [[ "$hook_turn_id" == "turn-1" && "$hook_runtime_session_id" == "runtime-session-1" && "$hook_raw_command" == "echo hi" ]]; then
|
|
191
|
+
_pass "preToolUse preserves runtime hook envelope and raw input"
|
|
192
|
+
else
|
|
193
|
+
_fail "preToolUse hook envelope incomplete: turn='$hook_turn_id' runtime_session='$hook_runtime_session_id' raw_command='$hook_raw_command'"
|
|
194
|
+
fi
|
|
195
|
+
|
|
196
|
+
runtime_tool_output=$(_run_telemetry "PreToolUse" "eval-test" '{"session_id":"runtime-session-2","turn_id":"turn-runtime","transcript_path":"/tmp/transcript.jsonl","hook_event_name":"PreToolUse","model":"test-model","cwd":"/tmp","tool_name":"Bash","tool_input":{"command":"echo runtime"}}')
|
|
197
|
+
runtime_tool_type=$(echo "$runtime_tool_output" | jq -r '.event_type // empty' 2>/dev/null)
|
|
198
|
+
runtime_tool_name=$(echo "$runtime_tool_output" | jq -r '.tool.normalized_name // empty' 2>/dev/null)
|
|
199
|
+
runtime_turn_id=$(echo "$runtime_tool_output" | jq -r '.hook.turn_id // empty' 2>/dev/null)
|
|
200
|
+
if [[ "$runtime_tool_type" == "tool.invoke" && "$runtime_tool_name" == "execute_bash" && "$runtime_turn_id" == "turn-runtime" ]]; then
|
|
201
|
+
_pass "PreToolUse captures runtime-native tool payload"
|
|
202
|
+
else
|
|
203
|
+
_fail "PreToolUse runtime-native payload incomplete: type='$runtime_tool_type' tool='$runtime_tool_name' turn='$runtime_turn_id'"
|
|
204
|
+
fi
|
|
205
|
+
|
|
206
|
+
permission_output=$(_run_telemetry "permissionRequest" "eval-test" '{"cwd":"/tmp","hook_event_name":"PermissionRequest","tool_name":"Bash","tool_input":{"command":"rm -rf /tmp/example","description":"Run escalated shell command"}}')
|
|
207
|
+
permission_event_type=$(echo "$permission_output" | jq -r '.event_type // empty' 2>/dev/null)
|
|
208
|
+
permission_tool_name=$(echo "$permission_output" | jq -r '.tool.name // empty' 2>/dev/null)
|
|
209
|
+
permission_tool_normalized_name=$(echo "$permission_output" | jq -r '.tool.normalized_name // empty' 2>/dev/null)
|
|
210
|
+
permission_description=$(echo "$permission_output" | jq -r '.permission.description // empty' 2>/dev/null)
|
|
211
|
+
|
|
212
|
+
if [[ "$permission_event_type" == "tool.permission_request" ]]; then
|
|
213
|
+
_pass "permissionRequest maps to tool.permission_request"
|
|
214
|
+
else
|
|
215
|
+
_fail "permissionRequest event_type: expected 'tool.permission_request', got '$permission_event_type'"
|
|
216
|
+
fi
|
|
217
|
+
|
|
218
|
+
if [[ "$permission_tool_name" == "Bash" && "$permission_tool_normalized_name" == "execute_bash" && "$permission_description" == "Run escalated shell command" ]]; then
|
|
219
|
+
_pass "permissionRequest captures tool and approval reason"
|
|
220
|
+
else
|
|
221
|
+
_fail "permissionRequest missing tool/description: tool='$permission_tool_name' normalized='$permission_tool_normalized_name' description='$permission_description'"
|
|
222
|
+
fi
|
|
223
|
+
|
|
224
|
+
runtime_output=$(FLOW_AGENTS_TELEMETRY_RUNTIME=codex _run_telemetry "agentSpawn" "eval-test" '{"cwd":"/tmp"}')
|
|
225
|
+
runtime_name=$(echo "$runtime_output" | jq -r '.agent.runtime // empty' 2>/dev/null)
|
|
226
|
+
if [[ "$runtime_name" == "codex" ]]; then
|
|
227
|
+
_pass "FLOW_AGENTS_TELEMETRY_RUNTIME controls agent.runtime"
|
|
228
|
+
else
|
|
229
|
+
_fail "runtime override: expected 'codex', got '$runtime_name'"
|
|
230
|
+
fi
|
|
231
|
+
|
|
232
|
+
claude_runtime_output=$(FLOW_AGENTS_TELEMETRY_RUNTIME=claude-code _run_telemetry "agentSpawn" "eval-test" '{"cwd":"/tmp"}')
|
|
233
|
+
claude_runtime_name=$(echo "$claude_runtime_output" | jq -r '.agent.runtime // empty' 2>/dev/null)
|
|
234
|
+
if [[ "$claude_runtime_name" == "claude-code" ]]; then
|
|
235
|
+
_pass "FLOW_AGENTS_TELEMETRY_RUNTIME supports claude-code runtime"
|
|
236
|
+
else
|
|
237
|
+
_fail "claude runtime override: expected 'claude-code', got '$claude_runtime_name'"
|
|
238
|
+
fi
|
|
239
|
+
|
|
240
|
+
spawn_before=$(wc -l < "$TMPLOG" 2>/dev/null | tr -d ' ')
|
|
241
|
+
_run_telemetry "preToolUse" "eval-test" '{"cwd":"/tmp","tool_name":"spawn_agent","tool_input":{"agent_type":"tool-worker"}}' >/dev/null
|
|
242
|
+
sleep 1
|
|
243
|
+
spawn_delegate=$(tail -n +"$((spawn_before + 1))" "$TMPLOG" 2>/dev/null | jq -r 'select(.event_type == "agent.delegate") | .delegation.targets[0]' 2>/dev/null | head -1)
|
|
244
|
+
if [[ "$spawn_delegate" == "tool-worker" ]]; then
|
|
245
|
+
_pass "Codex spawn_agent emits agent.delegate"
|
|
246
|
+
else
|
|
247
|
+
_fail "Codex spawn_agent delegation event missing"
|
|
248
|
+
fi
|
|
249
|
+
|
|
250
|
+
agent_before=$(wc -l < "$TMPLOG" 2>/dev/null | tr -d ' ')
|
|
251
|
+
_run_telemetry "preToolUse" "eval-test" '{"cwd":"/tmp","tool_name":"Agent","tool_input":{"subagent_type":"tool-planner"}}' >/dev/null
|
|
252
|
+
sleep 1
|
|
253
|
+
agent_delegate=$(tail -n +"$((agent_before + 1))" "$TMPLOG" 2>/dev/null | jq -r 'select(.event_type == "agent.delegate") | .delegation.targets[0]' 2>/dev/null | head -1)
|
|
254
|
+
if [[ "$agent_delegate" == "tool-planner" ]]; then
|
|
255
|
+
_pass "Claude Agent tool emits agent.delegate"
|
|
256
|
+
else
|
|
257
|
+
_fail "Claude Agent delegation event missing"
|
|
258
|
+
fi
|
|
259
|
+
|
|
260
|
+
kiro_subagent_before=$(wc -l < "$TMPLOG" 2>/dev/null | tr -d ' ')
|
|
261
|
+
_run_telemetry "preToolUse" "eval-test" '{"cwd":"/tmp","tool_name":"delegate to a specialist agent","tool_input":{"subagents":[{"agent_name":"tool-verifier"},{"agent_name":"tool-code-reviewer"}]}}' >/dev/null
|
|
262
|
+
sleep 1
|
|
263
|
+
kiro_subagent_targets=$(tail -n +"$((kiro_subagent_before + 1))" "$TMPLOG" 2>/dev/null | jq -r 'select(.event_type == "agent.delegate") | .delegation.targets | join(",")' 2>/dev/null | head -1)
|
|
264
|
+
if [[ "$kiro_subagent_targets" == "tool-verifier,tool-code-reviewer" ]]; then
|
|
265
|
+
_pass "Kiro delegate to a specialist agent emits agent.delegate"
|
|
266
|
+
else
|
|
267
|
+
_fail "Kiro delegate to a specialist agent delegation event missing: targets='$kiro_subagent_targets'"
|
|
268
|
+
fi
|
|
269
|
+
|
|
270
|
+
# --- 6. Redaction on analytics channel ---
|
|
271
|
+
echo ""
|
|
272
|
+
echo "--- Redaction ---"
|
|
273
|
+
redacted=$(_run_telemetry "preToolUse" "eval-test" '{"cwd":"/tmp","tool_name":"test","tool_input":{"secret":"value"}}' "analytics" "tool.input,tool.output,turn.prompt_text,hook.raw_input")
|
|
274
|
+
|
|
275
|
+
redacted_input=$(echo "$redacted" | jq -r '.tool.input' 2>/dev/null)
|
|
276
|
+
if [[ "$redacted_input" == "null" ]]; then
|
|
277
|
+
_pass "Analytics channel redacts tool.input"
|
|
278
|
+
else
|
|
279
|
+
_fail "Analytics channel did not redact tool.input: $redacted_input"
|
|
280
|
+
fi
|
|
281
|
+
|
|
282
|
+
redacted_raw_input=$(echo "$redacted" | jq -r '.hook.raw_input' 2>/dev/null)
|
|
283
|
+
if [[ "$redacted_raw_input" == "null" ]]; then
|
|
284
|
+
_pass "Analytics channel redacts hook.raw_input"
|
|
285
|
+
else
|
|
286
|
+
_fail "Analytics channel did not redact hook.raw_input: $redacted_raw_input"
|
|
287
|
+
fi
|
|
288
|
+
|
|
289
|
+
codex_log="${TMPDIR_EVAL}/codex-full.jsonl"
|
|
290
|
+
codex_stdout="${TMPDIR_EVAL}/codex-stdout.txt"
|
|
291
|
+
codex_stderr="${TMPDIR_EVAL}/codex-stderr.txt"
|
|
292
|
+
codex_config="${TMPDIR_EVAL}/codex-empty.conf"
|
|
293
|
+
: > "$codex_config"
|
|
294
|
+
printf '%s\n' '{"hook_event_name":"UserPromptSubmit","cwd":"/tmp","prompt":"codex secret","tool_name":"test","tool_input":{"secret":"value"},"tool_response":{"secret":"out"}}' \
|
|
295
|
+
| env \
|
|
296
|
+
TELEMETRY_CONFIG_FILE="$codex_config" \
|
|
297
|
+
TELEMETRY_DATA_DIR="$TMPDIR_EVAL" \
|
|
298
|
+
TELEMETRY_SESSION_DIR="$TMPDIR_EVAL/sessions" \
|
|
299
|
+
TELEMETRY_CHANNEL_FULL_LOG_FILE="$codex_log" \
|
|
300
|
+
FLOW_AGENTS_CODEX_TELEMETRY_CHANNELS=full \
|
|
301
|
+
FLOW_AGENTS_CODEX_TELEMETRY_FOREGROUND=true \
|
|
302
|
+
FLOW_AGENTS_TELEMETRY_CAPTURE_RAW_HOOK_INPUT=true \
|
|
303
|
+
node "$ROOT_DIR/scripts/hooks/codex-telemetry-hook.js" userPromptSubmit eval-test >"$codex_stdout" 2>"$codex_stderr"
|
|
304
|
+
|
|
305
|
+
i=0
|
|
306
|
+
while [[ $i -lt 50 && ! -s "$codex_log" ]]; do
|
|
307
|
+
sleep 0.1; i=$((i + 1))
|
|
308
|
+
done
|
|
309
|
+
codex_event=$(head -1 "$codex_log" 2>/dev/null)
|
|
310
|
+
codex_prompt=$(echo "$codex_event" | jq -r '.turn.prompt_text' 2>/dev/null)
|
|
311
|
+
codex_tool_input=$(echo "$codex_event" | jq -r '.tool.input' 2>/dev/null)
|
|
312
|
+
codex_tool_output=$(echo "$codex_event" | jq -r '.tool.output' 2>/dev/null)
|
|
313
|
+
codex_raw_input=$(echo "$codex_event" | jq -r '.hook.raw_input' 2>/dev/null)
|
|
314
|
+
|
|
315
|
+
if [[ "$codex_prompt" == "null" && "$codex_tool_input" == "null" && "$codex_tool_output" == "null" && "$codex_raw_input" == "null" ]]; then
|
|
316
|
+
_pass "Codex hook defaults redact full-channel sensitive fields"
|
|
317
|
+
else
|
|
318
|
+
_fail "Codex hook default redaction incomplete: prompt='$codex_prompt' tool_input='$codex_tool_input' tool_output='$codex_tool_output' raw='$codex_raw_input'"
|
|
319
|
+
fi
|
|
320
|
+
|
|
321
|
+
if grep -q "TELEMETRY_CHANNEL_FULL_REDACT: process.env.TELEMETRY_CHANNEL_FULL_REDACT || 'none'" "$ROOT_DIR/scripts/hooks/codex-telemetry-hook.js"; then
|
|
322
|
+
_fail "Codex hook still defaults full redaction to none"
|
|
323
|
+
else
|
|
324
|
+
_pass "Codex hook source does not default full redaction to none"
|
|
325
|
+
fi
|
|
326
|
+
|
|
327
|
+
claude_log="${TMPDIR_EVAL}/claude-full.jsonl"
|
|
328
|
+
claude_stdout="${TMPDIR_EVAL}/claude-stdout.txt"
|
|
329
|
+
claude_stderr="${TMPDIR_EVAL}/claude-stderr.txt"
|
|
330
|
+
printf '%s\n' '{"session_id":"claude-session-1","hook_event_name":"PreToolUse","cwd":"/tmp","tool_name":"Agent","tool_input":{"subagent_type":"tool-verifier","prompt":"verify"}}' \
|
|
331
|
+
| env \
|
|
332
|
+
TELEMETRY_CONFIG_FILE="$codex_config" \
|
|
333
|
+
TELEMETRY_DATA_DIR="$TMPDIR_EVAL" \
|
|
334
|
+
TELEMETRY_SESSION_DIR="$TMPDIR_EVAL/sessions" \
|
|
335
|
+
TELEMETRY_CHANNEL_FULL_LOG_FILE="$claude_log" \
|
|
336
|
+
FLOW_AGENTS_CLAUDE_TELEMETRY_CHANNELS=full \
|
|
337
|
+
FLOW_AGENTS_CLAUDE_TELEMETRY_FOREGROUND=true \
|
|
338
|
+
FLOW_AGENTS_TELEMETRY_CAPTURE_RAW_HOOK_INPUT=true \
|
|
339
|
+
node "$ROOT_DIR/scripts/hooks/claude-telemetry-hook.js" PreToolUse eval-test >"$claude_stdout" 2>"$claude_stderr"
|
|
340
|
+
|
|
341
|
+
i=0
|
|
342
|
+
while [[ $i -lt 50 && ! -s "$claude_log" ]]; do
|
|
343
|
+
sleep 0.1; i=$((i + 1))
|
|
344
|
+
done
|
|
345
|
+
claude_event=$(jq -c 'select(.event_type == "tool.invoke")' "$claude_log" 2>/dev/null | head -1)
|
|
346
|
+
claude_runtime=$(echo "$claude_event" | jq -r '.agent.runtime // empty' 2>/dev/null)
|
|
347
|
+
claude_event_type=$(echo "$claude_event" | jq -r '.event_type // empty' 2>/dev/null)
|
|
348
|
+
claude_delegate=$(grep '"event_type":"agent.delegate"' "$claude_log" 2>/dev/null | jq -r '.delegation.targets[0]' 2>/dev/null | head -1)
|
|
349
|
+
claude_continue=$(jq -r '.continue // empty' "$claude_stdout" 2>/dev/null)
|
|
350
|
+
if [[ "$claude_runtime" == "claude-code" && "$claude_event_type" == "tool.invoke" && "$claude_delegate" == "tool-verifier" && "$claude_continue" == "true" ]]; then
|
|
351
|
+
_pass "Claude telemetry hook emits normalized tool and delegation events"
|
|
352
|
+
else
|
|
353
|
+
_fail "Claude telemetry hook output mismatch: runtime='$claude_runtime' event='$claude_event_type' delegate='$claude_delegate' continue='$claude_continue'"
|
|
354
|
+
fi
|
|
355
|
+
|
|
356
|
+
# --- 7. Console telemetry transport ---
|
|
357
|
+
echo ""
|
|
358
|
+
echo "--- Console Transport ---"
|
|
359
|
+
console_capture="${TMPDIR_EVAL}/console-request.json"
|
|
360
|
+
fake_bin="${TMPDIR_EVAL}/fake-bin"
|
|
361
|
+
mkdir -p "$fake_bin"
|
|
362
|
+
cat > "${fake_bin}/curl" <<'SH'
|
|
363
|
+
#!/usr/bin/env bash
|
|
364
|
+
set -euo pipefail
|
|
365
|
+
config_file=""
|
|
366
|
+
while [[ $# -gt 0 ]]; do
|
|
367
|
+
case "$1" in
|
|
368
|
+
--config)
|
|
369
|
+
config_file="$2"
|
|
370
|
+
shift 2
|
|
371
|
+
;;
|
|
372
|
+
*)
|
|
373
|
+
shift
|
|
374
|
+
;;
|
|
375
|
+
esac
|
|
376
|
+
done
|
|
377
|
+
[[ -n "$config_file" && -n "${FLOW_AGENTS_TEST_CONSOLE_CAPTURE:-}" ]]
|
|
378
|
+
node - "$config_file" "$FLOW_AGENTS_TEST_CONSOLE_CAPTURE" <<'NODE'
|
|
379
|
+
const fs = require("fs");
|
|
380
|
+
const [configPath, capturePath] = process.argv.slice(2);
|
|
381
|
+
const config = fs.readFileSync(configPath, "utf8");
|
|
382
|
+
const lines = config.split(/\r?\n/).filter(Boolean);
|
|
383
|
+
const record = { headers: {}, config };
|
|
384
|
+
for (const line of lines) {
|
|
385
|
+
const match = line.match(/^([^=]+) = "(.*)"$/);
|
|
386
|
+
if (!match) continue;
|
|
387
|
+
const key = match[1].trim();
|
|
388
|
+
const value = match[2];
|
|
389
|
+
if (key === "url") record.url = value;
|
|
390
|
+
if (key === "request") record.method = value;
|
|
391
|
+
if (key === "header") {
|
|
392
|
+
const index = value.indexOf(":");
|
|
393
|
+
if (index >= 0) record.headers[value.slice(0, index).toLowerCase()] = value.slice(index + 1).trim();
|
|
394
|
+
}
|
|
395
|
+
if (key === "data-binary" && value.startsWith("@")) {
|
|
396
|
+
record.body = JSON.parse(fs.readFileSync(value.slice(1), "utf8"));
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
fs.writeFileSync(capturePath, JSON.stringify(record));
|
|
400
|
+
NODE
|
|
401
|
+
SH
|
|
402
|
+
chmod +x "${fake_bin}/curl"
|
|
403
|
+
printf '%s\n' '{"cwd":"/tmp","prompt":"console secret","hook_event_name":"UserPromptSubmit","transcript_path":"/tmp/private/transcript.jsonl","last_assistant_message":"sensitive assistant text"}' \
|
|
404
|
+
| env \
|
|
405
|
+
PATH="${fake_bin}:$PATH" \
|
|
406
|
+
FLOW_AGENTS_TEST_CONSOLE_CAPTURE="$console_capture" \
|
|
407
|
+
TELEMETRY_ENABLED=true \
|
|
408
|
+
TELEMETRY_CHANNELS=analytics \
|
|
409
|
+
TELEMETRY_CHANNEL_ANALYTICS_LOG_FILE="${TMPDIR_EVAL}/console-analytics.jsonl" \
|
|
410
|
+
TELEMETRY_CONFIG_FILE="$TMPDIR_EVAL/telemetry.conf" \
|
|
411
|
+
TELEMETRY_DATA_DIR="$TMPDIR_EVAL" \
|
|
412
|
+
TELEMETRY_SESSION_DIR="$TMPDIR_EVAL/sessions" \
|
|
413
|
+
FLOW_AGENTS_TELEMETRY_FOREGROUND=true \
|
|
414
|
+
CONSOLE_TELEMETRY_URL="http://127.0.0.1:3737" \
|
|
415
|
+
CONSOLE_TELEMETRY_TOKEN="console-token" \
|
|
416
|
+
CONSOLE_TENANT_ID="tenant-a" \
|
|
417
|
+
CONSOLE_TELEMETRY_CONNECT_TIMEOUT_SECONDS='1" header = "x-bad: bad' \
|
|
418
|
+
CONSOLE_TELEMETRY_MAX_TIME_SECONDS='5
|
|
419
|
+
url = "https://bad.example"' \
|
|
420
|
+
bash "$TELEMETRY_SH" userPromptSubmit eval-test 2>/dev/null
|
|
421
|
+
i=0
|
|
422
|
+
while [[ $i -lt 50 && ! -s "$console_capture" ]]; do
|
|
423
|
+
sleep 0.1; i=$((i + 1))
|
|
424
|
+
done
|
|
425
|
+
console_url=$(jq -r '.url // empty' "$console_capture" 2>/dev/null)
|
|
426
|
+
console_method=$(jq -r '.method // empty' "$console_capture" 2>/dev/null)
|
|
427
|
+
console_auth=$(jq -r '.headers.authorization // empty' "$console_capture" 2>/dev/null)
|
|
428
|
+
console_tenant=$(jq -r '.headers["x-console-tenant-id"] // empty' "$console_capture" 2>/dev/null)
|
|
429
|
+
console_event_type=$(jq -r '.body.event_type // empty' "$console_capture" 2>/dev/null)
|
|
430
|
+
console_prompt=$(jq -r '.body.turn.prompt_text' "$console_capture" 2>/dev/null)
|
|
431
|
+
console_transcript=$(jq -r '.body.hook.transcript_path' "$console_capture" 2>/dev/null)
|
|
432
|
+
console_assistant=$(jq -r '.body.hook.last_assistant_message' "$console_capture" 2>/dev/null)
|
|
433
|
+
if [[ "$console_url" == "http://127.0.0.1:3737/api/telemetry/records" && "$console_method" == "POST" && "$console_auth" == "Bearer console-token" && "$console_tenant" == "tenant-a" && "$console_event_type" == "turn.user" && "$console_prompt" == "null" && "$console_transcript" == "null" && "$console_assistant" == "null" ]]; then
|
|
434
|
+
_pass "Console telemetry transport posts redacted event with auth and tenant headers"
|
|
435
|
+
else
|
|
436
|
+
_fail "Console telemetry transport mismatch: url='$console_url' method='$console_method' auth='$console_auth' tenant='$console_tenant' event='$console_event_type' prompt='$console_prompt' transcript='$console_transcript' assistant='$console_assistant'"
|
|
437
|
+
fi
|
|
438
|
+
|
|
439
|
+
# --- 8. discover-agents.sh finds agent cards ---
|
|
440
|
+
echo ""
|
|
441
|
+
echo "--- Agent Discovery ---"
|
|
442
|
+
if [[ -f "$DISCOVER_SCRIPT" ]]; then
|
|
443
|
+
repo_cards=$(find "$ROOT_DIR/agent-cards" -maxdepth 1 -name '*.json' 2>/dev/null | wc -l | tr -d ' ')
|
|
444
|
+
if [[ "$repo_cards" -gt 0 ]]; then
|
|
445
|
+
discover_output=$(bash "$DISCOVER_SCRIPT" 2>/dev/null)
|
|
446
|
+
card_count=$(echo "$discover_output" | grep -c '📋' || true)
|
|
447
|
+
if [[ "$card_count" -ge "$repo_cards" ]]; then
|
|
448
|
+
_pass "discover-agents.sh found $card_count repo-local agent cards"
|
|
449
|
+
else
|
|
450
|
+
_fail "discover-agents.sh found $card_count repo-local agent cards, expected at least $repo_cards"
|
|
451
|
+
fi
|
|
452
|
+
else
|
|
453
|
+
# Legacy source-package mode
|
|
454
|
+
workspace_dir="$(find "$HOME/dev" -maxdepth 5 -name "kiro-agents" -path "*/src/*" -type d 2>/dev/null | head -1)"
|
|
455
|
+
if [[ -n "$workspace_dir" ]]; then
|
|
456
|
+
discover_output=$(cd "$(dirname "$workspace_dir")" && bash "$DISCOVER_SCRIPT" 2>/dev/null)
|
|
457
|
+
else
|
|
458
|
+
discover_output=$(bash "$DISCOVER_SCRIPT" 2>/dev/null)
|
|
459
|
+
fi
|
|
460
|
+
card_count=$(echo "$discover_output" | grep -c '📋' || true)
|
|
461
|
+
if [[ "$card_count" -ge 3 ]]; then
|
|
462
|
+
_pass "discover-agents.sh found $card_count legacy agent cards"
|
|
463
|
+
else
|
|
464
|
+
src_cards=$(find "$HOME/dev" -maxdepth 5 -name "agent-card.json" -path "*/src/*" 2>/dev/null | wc -l | tr -d ' ')
|
|
465
|
+
if [[ "$src_cards" -ge 3 ]]; then
|
|
466
|
+
_pass "discover-agents.sh: $src_cards agent cards exist in source (discovery works at runtime from workspace)"
|
|
467
|
+
else
|
|
468
|
+
_fail "discover-agents.sh found 0 cards and only $src_cards in legacy source locations"
|
|
469
|
+
fi
|
|
470
|
+
fi
|
|
471
|
+
fi
|
|
472
|
+
else
|
|
473
|
+
_fail "discover-agents.sh not found"
|
|
474
|
+
fi
|
|
475
|
+
|
|
476
|
+
# --- Cleanup ---
|
|
477
|
+
rm -rf "$TMPDIR_EVAL"
|
|
478
|
+
|
|
479
|
+
# --- Summary ---
|
|
480
|
+
echo ""
|
|
481
|
+
echo "==========================="
|
|
482
|
+
total=$((pass + fail))
|
|
483
|
+
echo "Results: ${pass}/${total} passed, ${fail} failed"
|
|
484
|
+
[[ "$fail" -gt 0 ]] && exit 1
|
|
485
|
+
exit 0
|