@kontourai/flow-agents 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/.githooks/pre-push +11 -0
  2. package/.github/workflows/ci.yml +210 -0
  3. package/.github/workflows/docs-pages.yml +52 -0
  4. package/.github/workflows/publish-npm.yml +104 -0
  5. package/AGENTS.md +26 -0
  6. package/CHANGELOG.md +66 -0
  7. package/CODE_OF_CONDUCT.md +25 -0
  8. package/CONTEXT.md +300 -0
  9. package/CONTRIBUTING.md +44 -0
  10. package/LICENSE +201 -0
  11. package/README.md +129 -0
  12. package/SECURITY.md +33 -0
  13. package/agent-cards/dev.json +19 -0
  14. package/agents/dev.json +127 -0
  15. package/agents/tool-code-reviewer.json +61 -0
  16. package/agents/tool-dependencies-updater.json +118 -0
  17. package/agents/tool-explore-config.json +92 -0
  18. package/agents/tool-explore-deps.json +92 -0
  19. package/agents/tool-explore-entry.json +92 -0
  20. package/agents/tool-explore-patterns.json +92 -0
  21. package/agents/tool-explore-structure.json +92 -0
  22. package/agents/tool-explore-tests.json +92 -0
  23. package/agents/tool-planner.json +57 -0
  24. package/agents/tool-playwright.json +145 -0
  25. package/agents/tool-security-reviewer.json +56 -0
  26. package/agents/tool-verifier.json +61 -0
  27. package/agents/tool-worker.json +58 -0
  28. package/build/src/cli/console-learning-projection.js +123 -0
  29. package/build/src/cli/docs-preview.js +39 -0
  30. package/build/src/cli/effective-backlog-settings.js +102 -0
  31. package/build/src/cli/export-bookmarks.js +38 -0
  32. package/build/src/cli/fixture-retirement-audit.js +140 -0
  33. package/build/src/cli/flow-kit.js +138 -0
  34. package/build/src/cli/import-bookmarks.js +50 -0
  35. package/build/src/cli/init.js +239 -0
  36. package/build/src/cli/instinct-cli.js +93 -0
  37. package/build/src/cli/promote-workflow-artifact.js +63 -0
  38. package/build/src/cli/publish-change-helper.js +154 -0
  39. package/build/src/cli/pull-work-provider.js +469 -0
  40. package/build/src/cli/runtime-adapter.js +23 -0
  41. package/build/src/cli/telemetry-doctor.js +221 -0
  42. package/build/src/cli/usage-feedback.js +443 -0
  43. package/build/src/cli/validate-hook-influence.js +152 -0
  44. package/build/src/cli/validate-source-tree.js +31 -0
  45. package/build/src/cli/validate-workflow-artifacts.js +486 -0
  46. package/build/src/cli/veritas-governance.js +262 -0
  47. package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
  48. package/build/src/cli/workflow-sidecar.js +816 -0
  49. package/build/src/cli.js +89 -0
  50. package/build/src/flow-kit/validate.js +75 -0
  51. package/build/src/lib/args.js +45 -0
  52. package/build/src/lib/fs.js +62 -0
  53. package/build/src/lib/workflow-learning-projection.js +334 -0
  54. package/build/src/runtime-adapters.js +146 -0
  55. package/build/src/tools/build-universal-bundles.js +397 -0
  56. package/build/src/tools/common.js +56 -0
  57. package/build/src/tools/filter-installed-packs.js +132 -0
  58. package/build/src/tools/generate-context-map.js +198 -0
  59. package/build/src/tools/validate-package.js +64 -0
  60. package/build/src/tools/validate-source-tree.js +622 -0
  61. package/console.telemetry.json +176 -0
  62. package/context/base-rules.md +17 -0
  63. package/context/code-review-standards.md +62 -0
  64. package/context/coding-standards.md +42 -0
  65. package/context/common/orchestrators.md +12 -0
  66. package/context/common/subagents.md +28 -0
  67. package/context/contracts/artifact-contract.md +182 -0
  68. package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
  69. package/context/contracts/delivery-contract.md +69 -0
  70. package/context/contracts/execution-contract.md +53 -0
  71. package/context/contracts/governance-adapter-contract.md +67 -0
  72. package/context/contracts/planning-contract.md +85 -0
  73. package/context/contracts/review-contract.md +104 -0
  74. package/context/contracts/sandbox-policy.md +52 -0
  75. package/context/contracts/verification-contract.md +134 -0
  76. package/context/contracts/work-item-contract.md +215 -0
  77. package/context/deferred/demo-mode.md +33 -0
  78. package/context/deferred/languages/go.md +31 -0
  79. package/context/deferred/languages/python.md +31 -0
  80. package/context/deferred/languages/typescript.md +34 -0
  81. package/context/deferred/parallelization.md +35 -0
  82. package/context/deferred/worktree-isolation.md +24 -0
  83. package/context/development-workflow.md +50 -0
  84. package/context/scripts/context-budget/budget-scan.sh +166 -0
  85. package/context/scripts/detect-tools.sh +3 -0
  86. package/context/scripts/discover-agents.sh +28 -0
  87. package/context/scripts/git-status.sh +49 -0
  88. package/context/scripts/hooks/config-protection.js +79 -0
  89. package/context/scripts/hooks/desktop-notify.sh +39 -0
  90. package/context/scripts/hooks/governance-audit.sh +135 -0
  91. package/context/scripts/hooks/lib/audit-transport.sh +40 -0
  92. package/context/scripts/hooks/lib/hook-flags.js +49 -0
  93. package/context/scripts/hooks/lib/patterns.sh +57 -0
  94. package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
  95. package/context/scripts/hooks/post-edit-accumulator.js +66 -0
  96. package/context/scripts/hooks/pre-commit-quality.js +194 -0
  97. package/context/scripts/hooks/quality-gate.js +93 -0
  98. package/context/scripts/hooks/report-only-guard.js +21 -0
  99. package/context/scripts/hooks/run-hook.js +136 -0
  100. package/context/scripts/hooks/stop-format-typecheck.js +141 -0
  101. package/context/scripts/hooks/stop-goal-fit.js +337 -0
  102. package/context/scripts/hooks/workflow-steering.js +250 -0
  103. package/context/scripts/telemetry/console-presets.sh +14 -0
  104. package/context/scripts/telemetry/install-console-config.sh +214 -0
  105. package/context/scripts/telemetry/lib/config.sh +85 -0
  106. package/context/scripts/telemetry/lib/enrich.sh +115 -0
  107. package/context/scripts/telemetry/lib/redact.sh +22 -0
  108. package/context/scripts/telemetry/lib/session.sh +63 -0
  109. package/context/scripts/telemetry/lib/transport.sh +183 -0
  110. package/context/scripts/telemetry/lib/usage.sh +29 -0
  111. package/context/scripts/telemetry/sync-agents.sh +173 -0
  112. package/context/scripts/telemetry/telemetry.conf +23 -0
  113. package/context/scripts/telemetry/telemetry.sh +387 -0
  114. package/context/scripts/validate-package.sh +89 -0
  115. package/context/settings/backlog-provider-settings.json +54 -0
  116. package/context/templates/core/identity.md +26 -0
  117. package/context/templates/core/user.md +15 -0
  118. package/docs/_config.yml +15 -0
  119. package/docs/_layouts/default.html +87 -0
  120. package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
  121. package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
  122. package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
  123. package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
  124. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
  125. package/docs/adr/0006-typescript-first-source-policy.md +98 -0
  126. package/docs/agent-system-guidebook.md +391 -0
  127. package/docs/agent-usage-feedback-loop.md +351 -0
  128. package/docs/assets/favicon.svg +13 -0
  129. package/docs/assets/og-image.png +0 -0
  130. package/docs/assets/site.css +774 -0
  131. package/docs/assets/site.js +139 -0
  132. package/docs/configurable-workflow-routing.md +174 -0
  133. package/docs/context-map.md +145 -0
  134. package/docs/developer-architecture.md +145 -0
  135. package/docs/developer-hook-setup.md +61 -0
  136. package/docs/fixture-ownership.md +44 -0
  137. package/docs/flow-kit-repository-contract.md +180 -0
  138. package/docs/index.md +129 -0
  139. package/docs/kontour-resource-contract.md +358 -0
  140. package/docs/migrations.md +64 -0
  141. package/docs/north-star.md +322 -0
  142. package/docs/operating-layers.md +110 -0
  143. package/docs/repository-structure.md +132 -0
  144. package/docs/sandbox-policy.md +56 -0
  145. package/docs/skills-map.md +203 -0
  146. package/docs/standards-register.md +96 -0
  147. package/docs/veritas-integration.md +165 -0
  148. package/docs/work-item-adapters.md +72 -0
  149. package/docs/workflow-artifact-lifecycle.md +141 -0
  150. package/docs/workflow-eval-strategy.md +295 -0
  151. package/docs/workflow-shared-contracts.md +51 -0
  152. package/docs/workflow-usage-guide.md +443 -0
  153. package/evals/ARCHITECTURE.md +143 -0
  154. package/evals/CONVENTIONS.md +58 -0
  155. package/evals/README.md +128 -0
  156. package/evals/acceptance/run.sh +29 -0
  157. package/evals/acceptance/test_claude_harness.sh +242 -0
  158. package/evals/acceptance/test_codex_harness.sh +108 -0
  159. package/evals/acceptance/test_kiro_harness.sh +128 -0
  160. package/evals/cases/dev/404.html +97 -0
  161. package/evals/cases/dev/code-review.yaml +44 -0
  162. package/evals/cases/dev/dashboard.html +300 -0
  163. package/evals/cases/dev/deliver.yaml +66 -0
  164. package/evals/cases/dev/dependency-update.yaml +16 -0
  165. package/evals/cases/dev/explore.yaml +20 -0
  166. package/evals/cases/dev/index.html +370 -0
  167. package/evals/cases/dev/package-lock.json +28 -0
  168. package/evals/cases/dev/package.json +16 -0
  169. package/evals/cases/dev/plan-work.yaml +20 -0
  170. package/evals/cases/dev/promptfooconfig.yaml +666 -0
  171. package/evals/cases/dev/search-first.yaml +20 -0
  172. package/evals/cases/dev/tdd-workflow.yaml +48 -0
  173. package/evals/cases/dev/verify-work.yaml +44 -0
  174. package/evals/cases/dev/workflow.yaml +34 -0
  175. package/evals/ci/run-baseline.sh +283 -0
  176. package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
  177. package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
  178. package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
  179. package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
  180. package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
  181. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
  182. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
  183. package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
  184. package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
  185. package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
  186. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
  187. package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
  188. package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
  189. package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
  190. package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
  191. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
  192. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
  193. package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
  194. package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
  195. package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
  196. package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
  197. package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
  198. package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
  199. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
  200. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
  201. package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
  202. package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
  203. package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
  204. package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
  205. package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
  206. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
  207. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
  208. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
  209. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
  210. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
  211. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
  212. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
  213. package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
  214. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
  215. package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
  216. package/evals/fixtures/hook-influence/cases.json +336 -0
  217. package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
  218. package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
  219. package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
  220. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
  221. package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
  222. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
  223. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
  224. package/evals/fixtures/surface-trust/provider-absent.json +19 -0
  225. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
  226. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
  227. package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
  228. package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
  229. package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
  230. package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
  231. package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
  232. package/evals/integration/test_bundle_install.sh +541 -0
  233. package/evals/integration/test_console_learning_projection.sh +192 -0
  234. package/evals/integration/test_context_map.sh +65 -0
  235. package/evals/integration/test_effective_backlog_settings.sh +58 -0
  236. package/evals/integration/test_fixture_retirement_audit.sh +58 -0
  237. package/evals/integration/test_flow_agents_statusline.sh +93 -0
  238. package/evals/integration/test_flow_kit_repository.sh +90 -0
  239. package/evals/integration/test_goal_fit_hook.sh +482 -0
  240. package/evals/integration/test_hook_category_behaviors.sh +190 -0
  241. package/evals/integration/test_hook_influence_cases.sh +69 -0
  242. package/evals/integration/test_local_flow_kit_install.sh +145 -0
  243. package/evals/integration/test_publish_change_helper.sh +176 -0
  244. package/evals/integration/test_pull_work_provider.sh +140 -0
  245. package/evals/integration/test_runtime_adapter_activation.sh +106 -0
  246. package/evals/integration/test_telemetry.sh +485 -0
  247. package/evals/integration/test_telemetry_doctor.sh +193 -0
  248. package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
  249. package/evals/integration/test_usage_feedback_global.sh +117 -0
  250. package/evals/integration/test_usage_feedback_import.sh +227 -0
  251. package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
  252. package/evals/integration/test_usage_feedback_report.sh +263 -0
  253. package/evals/integration/test_veritas_governance_adapter.sh +235 -0
  254. package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
  255. package/evals/integration/test_workflow_artifacts.sh +1247 -0
  256. package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
  257. package/evals/integration/test_workflow_steering_hook.sh +337 -0
  258. package/evals/lib/assertions/delegated-to.js +40 -0
  259. package/evals/lib/assertions/max-tool-calls.js +15 -0
  260. package/evals/lib/assertions/no-write-tools.js +27 -0
  261. package/evals/lib/assertions/pass-at-k.js +39 -0
  262. package/evals/lib/assertions/telemetry-utils.js +105 -0
  263. package/evals/lib/assertions/tool-called.js +39 -0
  264. package/evals/lib/assertions/verify-after-fix.js +61 -0
  265. package/evals/lib/claude-judge.sh +40 -0
  266. package/evals/lib/claude-provider.sh +74 -0
  267. package/evals/lib/codex-judge.sh +39 -0
  268. package/evals/lib/codex-provider.sh +81 -0
  269. package/evals/lib/eval-dev.sh +5 -0
  270. package/evals/lib/eval-judge.sh +22 -0
  271. package/evals/lib/eval-provider.sh +26 -0
  272. package/evals/lib/eval-report.sh +73 -0
  273. package/evals/lib/kiro-dev.sh +4 -0
  274. package/evals/lib/kiro-judge.sh +17 -0
  275. package/evals/lib/kiro-provider.sh +62 -0
  276. package/evals/lib/node.sh +111 -0
  277. package/evals/promptfooconfig.yaml +70 -0
  278. package/evals/run.sh +309 -0
  279. package/evals/static/test_evidence_refs.sh +141 -0
  280. package/evals/static/test_package.sh +407 -0
  281. package/evals/static/test_repo_hooks.sh +68 -0
  282. package/evals/static/test_universal_bundles.sh +274 -0
  283. package/evals/static/test_workflow_skills.sh +1207 -0
  284. package/install.sh +64 -0
  285. package/integrations/veritas/flow-agents.adapter.json +138 -0
  286. package/integrations/veritas/flow-agents.authority-settings.json +26 -0
  287. package/integrations/veritas/flow-agents.repo-standards.json +82 -0
  288. package/kits/builder/flows/build.flow.json +218 -0
  289. package/kits/builder/flows/shape.flow.json +127 -0
  290. package/kits/builder/kit.json +19 -0
  291. package/kits/catalog.json +11 -0
  292. package/package.json +130 -0
  293. package/packaging/README.md +60 -0
  294. package/packaging/manifest.json +173 -0
  295. package/packaging/packs.json +69 -0
  296. package/powers/dependency-checker/POWER.md +20 -0
  297. package/powers/dependency-checker/mcp.json +20 -0
  298. package/powers/playwright/POWER.md +25 -0
  299. package/powers/playwright/mcp.json +12 -0
  300. package/prompts/code-audit.md +123 -0
  301. package/prompts/kcommit.md +88 -0
  302. package/schemas/backlog-provider-settings.schema.json +138 -0
  303. package/schemas/workflow-acceptance.schema.json +216 -0
  304. package/schemas/workflow-critique.schema.json +113 -0
  305. package/schemas/workflow-evidence.schema.json +357 -0
  306. package/schemas/workflow-handoff.schema.json +52 -0
  307. package/schemas/workflow-learning.schema.json +223 -0
  308. package/schemas/workflow-release.schema.json +172 -0
  309. package/schemas/workflow-state.schema.json +80 -0
  310. package/scripts/README.md +111 -0
  311. package/scripts/build-universal-bundles.js +3 -0
  312. package/scripts/check-content-boundary.cjs +99 -0
  313. package/scripts/context-budget/budget-scan.sh +166 -0
  314. package/scripts/detect-tools.sh +3 -0
  315. package/scripts/discover-agents.sh +28 -0
  316. package/scripts/effective-backlog-settings.js +2 -0
  317. package/scripts/filter-installed-packs.js +2 -0
  318. package/scripts/flow-kit.js +2 -0
  319. package/scripts/generate-context-map.js +2 -0
  320. package/scripts/git-status.sh +49 -0
  321. package/scripts/hooks/claude-hook-adapter.js +174 -0
  322. package/scripts/hooks/claude-telemetry-hook.js +115 -0
  323. package/scripts/hooks/codex-hook-adapter.js +176 -0
  324. package/scripts/hooks/codex-telemetry-hook.js +95 -0
  325. package/scripts/hooks/config-protection.js +79 -0
  326. package/scripts/hooks/desktop-notify.sh +39 -0
  327. package/scripts/hooks/governance-audit.sh +135 -0
  328. package/scripts/hooks/lib/audit-transport.sh +40 -0
  329. package/scripts/hooks/lib/hook-flags.js +49 -0
  330. package/scripts/hooks/lib/patterns.sh +57 -0
  331. package/scripts/hooks/lib/resolve-formatter.js +80 -0
  332. package/scripts/hooks/post-edit-accumulator.js +66 -0
  333. package/scripts/hooks/pre-commit-quality.js +194 -0
  334. package/scripts/hooks/quality-gate.js +93 -0
  335. package/scripts/hooks/report-only-guard.js +21 -0
  336. package/scripts/hooks/run-hook.js +136 -0
  337. package/scripts/hooks/stop-format-typecheck.js +141 -0
  338. package/scripts/hooks/stop-goal-fit.js +337 -0
  339. package/scripts/hooks/workflow-steering.js +250 -0
  340. package/scripts/install-codex-home.sh +106 -0
  341. package/scripts/package.json +3 -0
  342. package/scripts/promote-workflow-artifact.js +2 -0
  343. package/scripts/publish-change-helper.js +2 -0
  344. package/scripts/pull-work-provider.js +2 -0
  345. package/scripts/setup-repo-hooks.sh +8 -0
  346. package/scripts/statusline/flow-agents-statusline.js +157 -0
  347. package/scripts/telemetry/console-presets.sh +14 -0
  348. package/scripts/telemetry/install-console-config.sh +214 -0
  349. package/scripts/telemetry/lib/config.sh +85 -0
  350. package/scripts/telemetry/lib/enrich.sh +115 -0
  351. package/scripts/telemetry/lib/redact.sh +22 -0
  352. package/scripts/telemetry/lib/session.sh +63 -0
  353. package/scripts/telemetry/lib/transport.sh +183 -0
  354. package/scripts/telemetry/lib/usage.sh +29 -0
  355. package/scripts/telemetry/sync-agents.sh +173 -0
  356. package/scripts/telemetry/telemetry.conf +23 -0
  357. package/scripts/telemetry/telemetry.sh +387 -0
  358. package/scripts/usage-feedback.js +2 -0
  359. package/scripts/validate-hook-influence-cases.js +2 -0
  360. package/scripts/validate-package.sh +89 -0
  361. package/scripts/validate-source-tree.js +9 -0
  362. package/skills/agentic-engineering/SKILL.md +62 -0
  363. package/skills/browser-test/SKILL.md +51 -0
  364. package/skills/builder-shape/SKILL.md +76 -0
  365. package/skills/context-budget/SKILL.md +40 -0
  366. package/skills/deliver/SKILL.md +241 -0
  367. package/skills/dependency-update/SKILL.md +68 -0
  368. package/skills/design-probe/SKILL.md +107 -0
  369. package/skills/eval-rebuild/SKILL.md +39 -0
  370. package/skills/evidence-gate/SKILL.md +186 -0
  371. package/skills/execute-plan/SKILL.md +110 -0
  372. package/skills/explore/SKILL.md +137 -0
  373. package/skills/feedback-loop/SKILL.md +87 -0
  374. package/skills/fix-bug/SKILL.md +133 -0
  375. package/skills/frontend-design/SKILL.md +80 -0
  376. package/skills/github-cli/SKILL.md +63 -0
  377. package/skills/idea-to-backlog/SKILL.md +267 -0
  378. package/skills/knowledge-capture/SKILL.md +55 -0
  379. package/skills/learning-review/SKILL.md +115 -0
  380. package/skills/pickup-probe/SKILL.md +114 -0
  381. package/skills/plan-work/SKILL.md +176 -0
  382. package/skills/pull-work/SKILL.md +309 -0
  383. package/skills/release-readiness/SKILL.md +121 -0
  384. package/skills/review-work/SKILL.md +161 -0
  385. package/skills/search-first/SKILL.md +66 -0
  386. package/skills/tdd-workflow/SKILL.md +140 -0
  387. package/skills/verify-work/SKILL.md +109 -0
  388. package/src/cli/console-learning-projection.ts +140 -0
  389. package/src/cli/effective-backlog-settings.ts +99 -0
  390. package/src/cli/fixture-retirement-audit.ts +154 -0
  391. package/src/cli/flow-kit.ts +139 -0
  392. package/src/cli/init.ts +248 -0
  393. package/src/cli/promote-workflow-artifact.ts +64 -0
  394. package/src/cli/publish-change-helper.ts +143 -0
  395. package/src/cli/pull-work-provider.ts +481 -0
  396. package/src/cli/runtime-adapter.ts +24 -0
  397. package/src/cli/telemetry-doctor.ts +243 -0
  398. package/src/cli/usage-feedback.ts +418 -0
  399. package/src/cli/validate-hook-influence.ts +119 -0
  400. package/src/cli/validate-source-tree.ts +30 -0
  401. package/src/cli/validate-workflow-artifacts.ts +411 -0
  402. package/src/cli/veritas-governance.ts +322 -0
  403. package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
  404. package/src/cli/workflow-sidecar.ts +676 -0
  405. package/src/cli.ts +95 -0
  406. package/src/flow-kit/validate.ts +74 -0
  407. package/src/lib/args.ts +43 -0
  408. package/src/lib/fs.ts +62 -0
  409. package/src/lib/workflow-learning-projection.ts +491 -0
  410. package/src/runtime-adapters.ts +154 -0
  411. package/src/tools/build-universal-bundles.ts +366 -0
  412. package/src/tools/common.ts +61 -0
  413. package/src/tools/filter-installed-packs.ts +129 -0
  414. package/src/tools/generate-context-map.ts +199 -0
  415. package/src/tools/validate-package.ts +57 -0
  416. package/src/tools/validate-source-tree.ts +488 -0
  417. package/tsconfig.json +19 -0
  418. package/veritas.claims.json +6 -0
@@ -0,0 +1,70 @@
1
+ description: Flow Agents Eval Suite — dev skill activation and workflow compliance
2
+ providers:
3
+ - id: 'exec: bash lib/kiro-dev.sh'
4
+ label: dev
5
+ prompts:
6
+ - '{{prompt}}'
7
+ tests:
8
+ - vars:
9
+ prompt: Explore this codebase and tell me what it does
10
+ assert:
11
+ - type: javascript
12
+ value: file://lib/assertions/delegated-to.js
13
+ config:
14
+ expected:
15
+ - tool-explore-structure
16
+ - tool-explore-entry
17
+ - tool-explore-deps
18
+ - type: javascript
19
+ value: file://lib/assertions/tool-called.js
20
+ config:
21
+ tool: delegate to a specialist agent
22
+ - type: javascript
23
+ value: file://lib/assertions/no-write-tools.js
24
+ - type: llm-rubric
25
+ value: 'The agent activated the ''explore'' skill and fanned out parallel subagents
26
+
27
+ (tool-explore-structure, entry, deps, patterns, config, tests) in Wave 1.
28
+
29
+ Response contains a synthesis of directory structure, entry points, dependencies, and patterns.
30
+
31
+ '
32
+ options:
33
+ provider: 'exec: bash lib/kiro-dev.sh'
34
+ - vars:
35
+ prompt: Create a simple hello.py script that prints hello world
36
+ assert:
37
+ - type: javascript
38
+ value: file://lib/assertions/tool-called.js
39
+ config:
40
+ tool: todo tool
41
+ - type: javascript
42
+ value: file://lib/assertions/no-write-tools.js
43
+ - type: llm-rubric
44
+ value: 'The dev agent followed its Phase 0-5 workflow: checked existing TODOs,
45
+
46
+ ran git status, created a plan, then attempted implementation.
47
+
48
+ Write tools are blocked so implementation may fail — check it ATTEMPTED the workflow phases.
49
+
50
+ '
51
+ options:
52
+ provider: 'exec: bash lib/kiro-dev.sh'
53
+ - vars:
54
+ prompt: Check this project for outdated dependencies and security vulnerabilities
55
+ assert:
56
+ - type: javascript
57
+ value: file://lib/assertions/delegated-to.js
58
+ config:
59
+ expected:
60
+ - tool-dependencies-updater
61
+ - type: javascript
62
+ value: file://lib/assertions/no-write-tools.js
63
+ - type: llm-rubric
64
+ value: 'The agent activated the dependency-update skill and delegated to
65
+
66
+ tool-dependencies-updater rather than manually checking package files.
67
+
68
+ '
69
+ options:
70
+ provider: 'exec: bash lib/kiro-dev.sh'
package/evals/run.sh ADDED
@@ -0,0 +1,309 @@
1
+ #!/usr/bin/env bash
2
+ # run.sh — Entry point for the agent eval suite
3
+ # Usage:
4
+ # bash run.sh # Run layers 1+2 (fast, no LLM)
5
+ # bash run.sh static # Layer 1 only
6
+ # bash run.sh integration # Layer 2 only
7
+ # bash run.sh acceptance # Layer 4: harness-native smoke tests
8
+ # bash run.sh acceptance kiro
9
+ # bash run.sh llm # Layer 3: all agents
10
+ # bash run.sh llm dev # Layer 3: dev agent only
11
+ # bash run.sh llm dev --runtime codex # Run dev evals through Codex
12
+ # bash run.sh llm dev --runtime claude --judge-runtime codex
13
+ # bash run.sh llm dev --suite regression
14
+ # bash run.sh report dev # Generate report from last run
15
+ # bash run.sh llm dev --repeat 3 # Run with pass@k measurement
16
+ set -uo pipefail
17
+
18
+ EVAL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
19
+ ROOT_DIR="$(cd "$EVAL_DIR/.." && pwd)"
20
+ LAYER="${1:-all}"
21
+ AGENT="${2:-}"
22
+ RUNTIME="${FLOW_AGENTS_EVAL_RUNTIME:-${EVAL_RUNTIME:-kiro}}"
23
+ JUDGE_RUNTIME="${FLOW_AGENTS_EVAL_JUDGE_RUNTIME:-${EVAL_JUDGE_RUNTIME:-}}"
24
+ SUITE="${FLOW_AGENTS_EVAL_SUITE:-full}"
25
+ PROMPTFOO_BIN="${PROMPTFOO_BIN:-$ROOT_DIR/node_modules/.bin/promptfoo}"
26
+ if [[ ! -x "$PROMPTFOO_BIN" ]]; then
27
+ PROMPTFOO_BIN="$(command -v promptfoo 2>/dev/null || true)"
28
+ fi
29
+
30
+ run_promptfoo() {
31
+ if [[ -z "$PROMPTFOO_BIN" ]]; then
32
+ echo "promptfoo is not installed. Run 'npm install' from the repo root." >&2
33
+ return 127
34
+ fi
35
+ local config_dir="${PROMPTFOO_CONFIG_DIR:-$ROOT_DIR/.promptfoo}"
36
+ mkdir -p "$config_dir"
37
+ PROMPTFOO_CONFIG_DIR="$config_dir" \
38
+ PROMPTFOO_DISABLE_WAL_MODE="${PROMPTFOO_DISABLE_WAL_MODE:-true}" \
39
+ PROMPTFOO_DISABLE_TELEMETRY="${PROMPTFOO_DISABLE_TELEMETRY:-true}" \
40
+ "$PROMPTFOO_BIN" "$@"
41
+ }
42
+
43
+ parse_runtime_args() {
44
+ local rest=()
45
+ while [[ $# -gt 0 ]]; do
46
+ case "$1" in
47
+ --runtime)
48
+ if [[ -z "${2:-}" ]]; then
49
+ echo "--runtime requires kiro or codex" >&2
50
+ exit 1
51
+ fi
52
+ RUNTIME="$2"
53
+ shift 2
54
+ ;;
55
+ --runtime=*)
56
+ RUNTIME="${1#--runtime=}"
57
+ shift
58
+ ;;
59
+ --judge-runtime)
60
+ if [[ -z "${2:-}" ]]; then
61
+ echo "--judge-runtime requires kiro, codex, or claude" >&2
62
+ exit 1
63
+ fi
64
+ JUDGE_RUNTIME="$2"
65
+ shift 2
66
+ ;;
67
+ --judge-runtime=*)
68
+ JUDGE_RUNTIME="${1#--judge-runtime=}"
69
+ shift
70
+ ;;
71
+ --suite)
72
+ if [[ -z "${2:-}" ]]; then
73
+ echo "--suite requires smoke, regression, capability, or full" >&2
74
+ exit 1
75
+ fi
76
+ SUITE="$2"
77
+ shift 2
78
+ ;;
79
+ --suite=*)
80
+ SUITE="${1#--suite=}"
81
+ shift
82
+ ;;
83
+ *)
84
+ rest+=("$1")
85
+ shift
86
+ ;;
87
+ esac
88
+ done
89
+ case "$RUNTIME" in
90
+ kiro|Claude\ Code|codex|claude|claude-code) ;;
91
+ *)
92
+ echo "Unsupported eval runtime '$RUNTIME' (expected kiro, codex, or claude)" >&2
93
+ exit 1
94
+ ;;
95
+ esac
96
+ JUDGE_RUNTIME="${JUDGE_RUNTIME:-$RUNTIME}"
97
+ case "$JUDGE_RUNTIME" in
98
+ kiro|Claude\ Code|codex|claude|claude-code) ;;
99
+ *)
100
+ echo "Unsupported judge runtime '$JUDGE_RUNTIME' (expected kiro, codex, or claude)" >&2
101
+ exit 1
102
+ ;;
103
+ esac
104
+ case "$SUITE" in
105
+ smoke)
106
+ rest=(--filter-first-n 3 "${rest[@]}")
107
+ ;;
108
+ regression)
109
+ rest=(--filter-metadata type=regression "${rest[@]}")
110
+ ;;
111
+ capability)
112
+ rest=(--filter-metadata type=capability "${rest[@]}")
113
+ ;;
114
+ full|"")
115
+ ;;
116
+ *)
117
+ echo "Unsupported suite '$SUITE' (expected smoke, regression, capability, or full)" >&2
118
+ exit 1
119
+ ;;
120
+ esac
121
+ EVAL_ARGS=("${rest[@]}")
122
+ }
123
+
124
+ run_static() {
125
+ echo ""
126
+ echo "╔══════════════════════════════════════╗"
127
+ echo "║ Layer 1: Static Package Validation ║"
128
+ echo "╚══════════════════════════════════════╝"
129
+ local result=0
130
+ bash "$EVAL_DIR/static/test_package.sh" || result=1
131
+ echo ""
132
+ bash "$EVAL_DIR/static/test_universal_bundles.sh" || result=1
133
+ echo ""
134
+ bash "$EVAL_DIR/static/test_workflow_skills.sh" || result=1
135
+ echo ""
136
+ bash "$EVAL_DIR/static/test_evidence_refs.sh" || result=1
137
+ echo ""
138
+ bash "$EVAL_DIR/static/test_repo_hooks.sh" || result=1
139
+ return $result
140
+ }
141
+
142
+ run_integration() {
143
+ echo ""
144
+ echo "╔══════════════════════════════════════════╗"
145
+ echo "║ Layer 2: Telemetry Contract Validation ║"
146
+ echo "╚══════════════════════════════════════════╝"
147
+ local result=0
148
+ bash "$EVAL_DIR/integration/test_telemetry.sh" || result=1
149
+ echo ""
150
+ bash "$EVAL_DIR/integration/test_telemetry_doctor.sh" || result=1
151
+ echo ""
152
+ bash "$EVAL_DIR/integration/test_usage_feedback_outcomes.sh" || result=1
153
+ echo ""
154
+ bash "$EVAL_DIR/integration/test_usage_feedback_import.sh" || result=1
155
+ echo ""
156
+ bash "$EVAL_DIR/integration/test_usage_feedback_report.sh" || result=1
157
+ echo ""
158
+ bash "$EVAL_DIR/integration/test_usage_feedback_dashboard.sh" || result=1
159
+ echo ""
160
+ bash "$EVAL_DIR/integration/test_usage_feedback_global.sh" || result=1
161
+ echo ""
162
+ bash "$EVAL_DIR/integration/test_goal_fit_hook.sh" || result=1
163
+ echo ""
164
+ bash "$EVAL_DIR/integration/test_hook_category_behaviors.sh" || result=1
165
+ echo ""
166
+ bash "$EVAL_DIR/integration/test_workflow_artifacts.sh" || result=1
167
+ echo ""
168
+ bash "$EVAL_DIR/integration/test_workflow_artifact_cleanup_audit.sh" || result=1
169
+ echo ""
170
+ bash "$EVAL_DIR/integration/test_fixture_retirement_audit.sh" || result=1
171
+ echo ""
172
+ bash "$EVAL_DIR/integration/test_publish_change_helper.sh" || result=1
173
+ echo ""
174
+ bash "$EVAL_DIR/integration/test_workflow_sidecar_writer.sh" || result=1
175
+ echo ""
176
+ bash "$EVAL_DIR/integration/test_veritas_governance_adapter.sh" || result=1
177
+ echo ""
178
+ bash "$EVAL_DIR/integration/test_workflow_steering_hook.sh" || result=1
179
+ echo ""
180
+ bash "$EVAL_DIR/integration/test_hook_influence_cases.sh" || result=1
181
+ echo ""
182
+ bash "$EVAL_DIR/integration/test_flow_agents_statusline.sh" || result=1
183
+ echo ""
184
+ bash "$EVAL_DIR/integration/test_context_map.sh" || result=1
185
+ echo ""
186
+ bash "$EVAL_DIR/integration/test_flow_kit_repository.sh" || result=1
187
+ echo ""
188
+ bash "$EVAL_DIR/integration/test_local_flow_kit_install.sh" || result=1
189
+ echo ""
190
+ bash "$EVAL_DIR/integration/test_runtime_adapter_activation.sh" || result=1
191
+ echo ""
192
+ bash "$EVAL_DIR/integration/test_bundle_install.sh" || result=1
193
+ return $result
194
+ }
195
+
196
+ run_llm() {
197
+ parse_runtime_args "$@"
198
+ echo ""
199
+ echo "╔═══════════════════════════════════════╗"
200
+ echo "║ Layer 3: LLM Behavioral Evals ║"
201
+ echo "╚═══════════════════════════════════════╝"
202
+ echo ""
203
+ echo "Runtime: $RUNTIME"
204
+ echo "Judge Runtime: ${JUDGE_RUNTIME:-$RUNTIME}"
205
+ echo "Suite: $SUITE"
206
+ echo ""
207
+
208
+ if [[ -n "$AGENT" ]]; then
209
+ local config="$EVAL_DIR/cases/$AGENT/promptfooconfig.yaml"
210
+ if [[ ! -f "$config" ]]; then
211
+ echo "No config found for agent '$AGENT' at $config"
212
+ exit 1
213
+ fi
214
+ echo "Running evals for: $AGENT"
215
+ cd "$EVAL_DIR/cases/$AGENT"
216
+ mkdir -p "$EVAL_DIR/results"
217
+ local output_file="$EVAL_DIR/results/${AGENT}-${RUNTIME}-$(date +%Y-%m-%d).json"
218
+ FLOW_AGENTS_EVAL_RUNTIME="$RUNTIME" FLOW_AGENTS_EVAL_JUDGE_RUNTIME="${JUDGE_RUNTIME:-$RUNTIME}" FLOW_AGENTS_EVAL_SUITE="$SUITE" FLOW_AGENTS_EVAL_AGENT="$AGENT" KIRO_EVAL_AGENT="$AGENT" run_promptfoo eval --no-cache --output "$output_file" "${EVAL_ARGS[@]}"
219
+ echo ""
220
+ echo "Results saved to: $output_file"
221
+ else
222
+ echo "Running all agent evals..."
223
+ for agent_dir in "$EVAL_DIR"/cases/*/; do
224
+ agent=$(basename "$agent_dir")
225
+ [[ ! -f "$agent_dir/promptfooconfig.yaml" ]] && continue
226
+ echo ""
227
+ echo "--- $agent ---"
228
+ cd "$agent_dir"
229
+ mkdir -p "$EVAL_DIR/results"
230
+ local output_file="$EVAL_DIR/results/${agent}-${RUNTIME}-$(date +%Y-%m-%d).json"
231
+ FLOW_AGENTS_EVAL_RUNTIME="$RUNTIME" FLOW_AGENTS_EVAL_JUDGE_RUNTIME="${JUDGE_RUNTIME:-$RUNTIME}" FLOW_AGENTS_EVAL_SUITE="$SUITE" FLOW_AGENTS_EVAL_AGENT="$agent" KIRO_EVAL_AGENT="$agent" run_promptfoo eval --no-cache --output "$output_file" "${EVAL_ARGS[@]}"
232
+ done
233
+ fi
234
+ echo ""
235
+ echo "View results: npm run promptfoo:view"
236
+ }
237
+
238
+ run_acceptance() {
239
+ echo ""
240
+ echo "╔═══════════════════════════════════════╗"
241
+ echo "║ Layer 4: Harness Acceptance ║"
242
+ echo "╚═══════════════════════════════════════╝"
243
+ echo ""
244
+ local target="${AGENT:-all}"
245
+ bash "$EVAL_DIR/acceptance/run.sh" "$target"
246
+ }
247
+
248
+ run_report() {
249
+ local agent="${1:?Usage: bash run.sh report <agent>}"
250
+ local latest
251
+ latest=$(ls -t "$EVAL_DIR/results/${agent}"-*.json 2>/dev/null | head -1)
252
+ if [[ -z "$latest" ]]; then
253
+ echo "No results found for agent '$agent' in $EVAL_DIR/results/"
254
+ exit 1
255
+ fi
256
+ local previous
257
+ previous=$(ls -t "$EVAL_DIR/results/${agent}"-*.json 2>/dev/null | sed -n '2p')
258
+
259
+ echo ""
260
+ echo "╔══════════════════════════════╗"
261
+ echo "║ Eval Report: $agent"
262
+ echo "╚══════════════════════════════╝"
263
+ echo ""
264
+
265
+ mkdir -p "$EVAL_DIR/results/reports"
266
+ local report_file="$EVAL_DIR/results/reports/$(date +%Y-%m-%d)-${agent}.md"
267
+ bash "$EVAL_DIR/lib/eval-report.sh" "$latest" "$previous" | tee "$report_file"
268
+ echo ""
269
+ echo "Report saved to: $report_file"
270
+ }
271
+
272
+ case "$LAYER" in
273
+ static) run_static ;;
274
+ integration) run_integration ;;
275
+ llm)
276
+ shift
277
+ if [[ "${1:-}" == --* ]]; then
278
+ AGENT=""
279
+ else
280
+ AGENT="${1:-}"
281
+ [[ $# -gt 0 ]] && shift
282
+ fi
283
+ run_llm "$@"
284
+ ;;
285
+ acceptance) shift; AGENT="${1:-all}"; run_acceptance ;;
286
+ report) shift; run_report "$@" ;;
287
+ all)
288
+ run_static
289
+ static_exit=$?
290
+ run_integration
291
+ integration_exit=$?
292
+ echo ""
293
+ echo "╔══════════════════════════╗"
294
+ echo "║ Summary: Layers 1 + 2 ║"
295
+ echo "╚══════════════════════════╝"
296
+ echo " Static: $([ $static_exit -eq 0 ] && echo PASS || echo FAIL)"
297
+ echo " Integration: $([ $integration_exit -eq 0 ] && echo PASS || echo FAIL)"
298
+ echo ""
299
+ if [[ $static_exit -ne 0 || $integration_exit -ne 0 ]]; then
300
+ echo "Fix Layer 1/2 failures before running Layer 3."
301
+ exit 1
302
+ fi
303
+ echo "Layers 1+2 passed. Run 'bash run.sh acceptance [kiro|claude|codex]' for harness smoke tests or 'bash run.sh llm [dev] [--runtime kiro|codex|claude] [--judge-runtime kiro|codex|claude]' for behavioral evals."
304
+ ;;
305
+ *)
306
+ echo "Usage: bash run.sh [static|integration|acceptance|llm|report|all] [target]"
307
+ exit 1
308
+ ;;
309
+ esac
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env bash
2
+ # test_evidence_refs.sh — Structured evidence reference schema checks
3
+ set -uo pipefail
4
+
5
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
+ source "$ROOT/evals/lib/node.sh"
7
+
8
+ errors=0
9
+ pass() { echo " ✓ $1"; }
10
+ fail() { echo " ✗ $1"; errors=$((errors + 1)); }
11
+
12
+ echo "=== Evidence Ref Schema Checks ==="
13
+
14
+ if node --input-type=module <<'NODE'
15
+ import Ajv2020 from "ajv/dist/2020.js";
16
+ import fs from "node:fs";
17
+ import path from "node:path";
18
+
19
+ const root = process.cwd();
20
+ const ajv = new Ajv2020({ allErrors: true });
21
+ const acceptanceSchema = JSON.parse(fs.readFileSync(path.join(root, "schemas/workflow-acceptance.schema.json"), "utf8"));
22
+ const evidenceSchema = JSON.parse(fs.readFileSync(path.join(root, "schemas/workflow-evidence.schema.json"), "utf8"));
23
+ const validateAcceptance = ajv.compile(acceptanceSchema);
24
+ const validateEvidence = ajv.compile(evidenceSchema);
25
+
26
+ const acceptance = {
27
+ schema_version: "1.0",
28
+ task_slug: "structured-evidence-ref-fixture",
29
+ criteria: [
30
+ {
31
+ id: "AC1",
32
+ description: "Behavior claim cites command and source evidence.",
33
+ status: "pass",
34
+ evidence_refs: [
35
+ {
36
+ kind: "command",
37
+ excerpt: "npm run eval:static --silent",
38
+ summary: "Static evals passed."
39
+ },
40
+ {
41
+ kind: "source",
42
+ url: "https://github.com/example/repo/blob/0123456789abcdef0123456789abcdef01234567/src/index.ts#L10-L18",
43
+ file: "src/index.ts",
44
+ line_start: 10,
45
+ line_end: 18,
46
+ excerpt: "export function implementedBehavior() { return true; }"
47
+ }
48
+ ]
49
+ }
50
+ ],
51
+ goal_fit: {
52
+ status: "pass",
53
+ summary: "Structured refs validate."
54
+ }
55
+ };
56
+
57
+ const evidence = {
58
+ schema_version: "1.0",
59
+ task_slug: "structured-evidence-ref-fixture",
60
+ verdict: "pass",
61
+ checks: [
62
+ {
63
+ id: "static-eval",
64
+ kind: "test",
65
+ status: "pass",
66
+ command: "npm run eval:static --silent",
67
+ summary: "Static evals passed.",
68
+ artifact_refs: [
69
+ {
70
+ kind: "source",
71
+ file: "evals/static/test_evidence_refs.sh",
72
+ line_start: 1,
73
+ line_end: 1,
74
+ excerpt: "test_evidence_refs.sh - Structured evidence reference schema checks"
75
+ }
76
+ ]
77
+ }
78
+ ],
79
+ external_evidence: [
80
+ {
81
+ system: "github",
82
+ ref: {
83
+ kind: "provider",
84
+ url: "https://github.com/example/repo/actions/runs/1",
85
+ summary: "Provider check run."
86
+ },
87
+ summary: "Provider evidence."
88
+ }
89
+ ]
90
+ };
91
+
92
+ const legacyAcceptance = structuredClone(acceptance);
93
+ legacyAcceptance.criteria[0].evidence_refs = ["legacy-string-ref"];
94
+
95
+ const invalidSource = structuredClone(evidence);
96
+ delete invalidSource.checks[0].artifact_refs[0].excerpt;
97
+
98
+ const emptyArtifact = structuredClone(evidence);
99
+ emptyArtifact.checks[0].artifact_refs = [{ kind: "artifact" }];
100
+
101
+ const emptyCommand = structuredClone(acceptance);
102
+ emptyCommand.criteria[0].evidence_refs = [{ kind: "command" }];
103
+
104
+ const emptyProvider = structuredClone(evidence);
105
+ emptyProvider.external_evidence[0].ref = { kind: "provider" };
106
+
107
+ if (!validateAcceptance(acceptance)) {
108
+ throw new Error(`structured acceptance refs should validate: ${ajv.errorsText(validateAcceptance.errors)}`);
109
+ }
110
+ if (!validateEvidence(evidence)) {
111
+ throw new Error(`structured evidence refs should validate: ${ajv.errorsText(validateEvidence.errors)}`);
112
+ }
113
+ if (validateAcceptance(legacyAcceptance)) {
114
+ throw new Error("legacy string evidence refs should fail");
115
+ }
116
+ if (validateEvidence(invalidSource)) {
117
+ throw new Error("source refs missing excerpt should fail");
118
+ }
119
+ if (validateEvidence(emptyArtifact)) {
120
+ throw new Error("artifact refs without file/url and summary/excerpt should fail");
121
+ }
122
+ if (validateAcceptance(emptyCommand)) {
123
+ throw new Error("command refs without excerpt/summary/url should fail");
124
+ }
125
+ if (validateEvidence(emptyProvider)) {
126
+ throw new Error("provider refs without url should fail");
127
+ }
128
+ NODE
129
+ then
130
+ pass "structured refs validate and incomplete refs fail"
131
+ else
132
+ fail "structured evidence ref schema check failed"
133
+ fi
134
+
135
+ if [[ $errors -eq 0 ]]; then
136
+ echo "=== PASS ==="
137
+ exit 0
138
+ else
139
+ echo "=== FAIL ($errors) ==="
140
+ exit 1
141
+ fi