@kontourai/flow-agents 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/.githooks/pre-push +11 -0
  2. package/.github/workflows/ci.yml +210 -0
  3. package/.github/workflows/docs-pages.yml +52 -0
  4. package/.github/workflows/publish-npm.yml +104 -0
  5. package/AGENTS.md +26 -0
  6. package/CHANGELOG.md +66 -0
  7. package/CODE_OF_CONDUCT.md +25 -0
  8. package/CONTEXT.md +300 -0
  9. package/CONTRIBUTING.md +44 -0
  10. package/LICENSE +201 -0
  11. package/README.md +129 -0
  12. package/SECURITY.md +33 -0
  13. package/agent-cards/dev.json +19 -0
  14. package/agents/dev.json +127 -0
  15. package/agents/tool-code-reviewer.json +61 -0
  16. package/agents/tool-dependencies-updater.json +118 -0
  17. package/agents/tool-explore-config.json +92 -0
  18. package/agents/tool-explore-deps.json +92 -0
  19. package/agents/tool-explore-entry.json +92 -0
  20. package/agents/tool-explore-patterns.json +92 -0
  21. package/agents/tool-explore-structure.json +92 -0
  22. package/agents/tool-explore-tests.json +92 -0
  23. package/agents/tool-planner.json +57 -0
  24. package/agents/tool-playwright.json +145 -0
  25. package/agents/tool-security-reviewer.json +56 -0
  26. package/agents/tool-verifier.json +61 -0
  27. package/agents/tool-worker.json +58 -0
  28. package/build/src/cli/console-learning-projection.js +123 -0
  29. package/build/src/cli/docs-preview.js +39 -0
  30. package/build/src/cli/effective-backlog-settings.js +102 -0
  31. package/build/src/cli/export-bookmarks.js +38 -0
  32. package/build/src/cli/fixture-retirement-audit.js +140 -0
  33. package/build/src/cli/flow-kit.js +138 -0
  34. package/build/src/cli/import-bookmarks.js +50 -0
  35. package/build/src/cli/init.js +239 -0
  36. package/build/src/cli/instinct-cli.js +93 -0
  37. package/build/src/cli/promote-workflow-artifact.js +63 -0
  38. package/build/src/cli/publish-change-helper.js +154 -0
  39. package/build/src/cli/pull-work-provider.js +469 -0
  40. package/build/src/cli/runtime-adapter.js +23 -0
  41. package/build/src/cli/telemetry-doctor.js +221 -0
  42. package/build/src/cli/usage-feedback.js +443 -0
  43. package/build/src/cli/validate-hook-influence.js +152 -0
  44. package/build/src/cli/validate-source-tree.js +31 -0
  45. package/build/src/cli/validate-workflow-artifacts.js +486 -0
  46. package/build/src/cli/veritas-governance.js +262 -0
  47. package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
  48. package/build/src/cli/workflow-sidecar.js +816 -0
  49. package/build/src/cli.js +89 -0
  50. package/build/src/flow-kit/validate.js +75 -0
  51. package/build/src/lib/args.js +45 -0
  52. package/build/src/lib/fs.js +62 -0
  53. package/build/src/lib/workflow-learning-projection.js +334 -0
  54. package/build/src/runtime-adapters.js +146 -0
  55. package/build/src/tools/build-universal-bundles.js +397 -0
  56. package/build/src/tools/common.js +56 -0
  57. package/build/src/tools/filter-installed-packs.js +132 -0
  58. package/build/src/tools/generate-context-map.js +198 -0
  59. package/build/src/tools/validate-package.js +64 -0
  60. package/build/src/tools/validate-source-tree.js +622 -0
  61. package/console.telemetry.json +176 -0
  62. package/context/base-rules.md +17 -0
  63. package/context/code-review-standards.md +62 -0
  64. package/context/coding-standards.md +42 -0
  65. package/context/common/orchestrators.md +12 -0
  66. package/context/common/subagents.md +28 -0
  67. package/context/contracts/artifact-contract.md +182 -0
  68. package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
  69. package/context/contracts/delivery-contract.md +69 -0
  70. package/context/contracts/execution-contract.md +53 -0
  71. package/context/contracts/governance-adapter-contract.md +67 -0
  72. package/context/contracts/planning-contract.md +85 -0
  73. package/context/contracts/review-contract.md +104 -0
  74. package/context/contracts/sandbox-policy.md +52 -0
  75. package/context/contracts/verification-contract.md +134 -0
  76. package/context/contracts/work-item-contract.md +215 -0
  77. package/context/deferred/demo-mode.md +33 -0
  78. package/context/deferred/languages/go.md +31 -0
  79. package/context/deferred/languages/python.md +31 -0
  80. package/context/deferred/languages/typescript.md +34 -0
  81. package/context/deferred/parallelization.md +35 -0
  82. package/context/deferred/worktree-isolation.md +24 -0
  83. package/context/development-workflow.md +50 -0
  84. package/context/scripts/context-budget/budget-scan.sh +166 -0
  85. package/context/scripts/detect-tools.sh +3 -0
  86. package/context/scripts/discover-agents.sh +28 -0
  87. package/context/scripts/git-status.sh +49 -0
  88. package/context/scripts/hooks/config-protection.js +79 -0
  89. package/context/scripts/hooks/desktop-notify.sh +39 -0
  90. package/context/scripts/hooks/governance-audit.sh +135 -0
  91. package/context/scripts/hooks/lib/audit-transport.sh +40 -0
  92. package/context/scripts/hooks/lib/hook-flags.js +49 -0
  93. package/context/scripts/hooks/lib/patterns.sh +57 -0
  94. package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
  95. package/context/scripts/hooks/post-edit-accumulator.js +66 -0
  96. package/context/scripts/hooks/pre-commit-quality.js +194 -0
  97. package/context/scripts/hooks/quality-gate.js +93 -0
  98. package/context/scripts/hooks/report-only-guard.js +21 -0
  99. package/context/scripts/hooks/run-hook.js +136 -0
  100. package/context/scripts/hooks/stop-format-typecheck.js +141 -0
  101. package/context/scripts/hooks/stop-goal-fit.js +337 -0
  102. package/context/scripts/hooks/workflow-steering.js +250 -0
  103. package/context/scripts/telemetry/console-presets.sh +14 -0
  104. package/context/scripts/telemetry/install-console-config.sh +214 -0
  105. package/context/scripts/telemetry/lib/config.sh +85 -0
  106. package/context/scripts/telemetry/lib/enrich.sh +115 -0
  107. package/context/scripts/telemetry/lib/redact.sh +22 -0
  108. package/context/scripts/telemetry/lib/session.sh +63 -0
  109. package/context/scripts/telemetry/lib/transport.sh +183 -0
  110. package/context/scripts/telemetry/lib/usage.sh +29 -0
  111. package/context/scripts/telemetry/sync-agents.sh +173 -0
  112. package/context/scripts/telemetry/telemetry.conf +23 -0
  113. package/context/scripts/telemetry/telemetry.sh +387 -0
  114. package/context/scripts/validate-package.sh +89 -0
  115. package/context/settings/backlog-provider-settings.json +54 -0
  116. package/context/templates/core/identity.md +26 -0
  117. package/context/templates/core/user.md +15 -0
  118. package/docs/_config.yml +15 -0
  119. package/docs/_layouts/default.html +87 -0
  120. package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
  121. package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
  122. package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
  123. package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
  124. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
  125. package/docs/adr/0006-typescript-first-source-policy.md +98 -0
  126. package/docs/agent-system-guidebook.md +391 -0
  127. package/docs/agent-usage-feedback-loop.md +351 -0
  128. package/docs/assets/favicon.svg +13 -0
  129. package/docs/assets/og-image.png +0 -0
  130. package/docs/assets/site.css +774 -0
  131. package/docs/assets/site.js +139 -0
  132. package/docs/configurable-workflow-routing.md +174 -0
  133. package/docs/context-map.md +145 -0
  134. package/docs/developer-architecture.md +145 -0
  135. package/docs/developer-hook-setup.md +61 -0
  136. package/docs/fixture-ownership.md +44 -0
  137. package/docs/flow-kit-repository-contract.md +180 -0
  138. package/docs/index.md +129 -0
  139. package/docs/kontour-resource-contract.md +358 -0
  140. package/docs/migrations.md +64 -0
  141. package/docs/north-star.md +322 -0
  142. package/docs/operating-layers.md +110 -0
  143. package/docs/repository-structure.md +132 -0
  144. package/docs/sandbox-policy.md +56 -0
  145. package/docs/skills-map.md +203 -0
  146. package/docs/standards-register.md +96 -0
  147. package/docs/veritas-integration.md +165 -0
  148. package/docs/work-item-adapters.md +72 -0
  149. package/docs/workflow-artifact-lifecycle.md +141 -0
  150. package/docs/workflow-eval-strategy.md +295 -0
  151. package/docs/workflow-shared-contracts.md +51 -0
  152. package/docs/workflow-usage-guide.md +443 -0
  153. package/evals/ARCHITECTURE.md +143 -0
  154. package/evals/CONVENTIONS.md +58 -0
  155. package/evals/README.md +128 -0
  156. package/evals/acceptance/run.sh +29 -0
  157. package/evals/acceptance/test_claude_harness.sh +242 -0
  158. package/evals/acceptance/test_codex_harness.sh +108 -0
  159. package/evals/acceptance/test_kiro_harness.sh +128 -0
  160. package/evals/cases/dev/404.html +97 -0
  161. package/evals/cases/dev/code-review.yaml +44 -0
  162. package/evals/cases/dev/dashboard.html +300 -0
  163. package/evals/cases/dev/deliver.yaml +66 -0
  164. package/evals/cases/dev/dependency-update.yaml +16 -0
  165. package/evals/cases/dev/explore.yaml +20 -0
  166. package/evals/cases/dev/index.html +370 -0
  167. package/evals/cases/dev/package-lock.json +28 -0
  168. package/evals/cases/dev/package.json +16 -0
  169. package/evals/cases/dev/plan-work.yaml +20 -0
  170. package/evals/cases/dev/promptfooconfig.yaml +666 -0
  171. package/evals/cases/dev/search-first.yaml +20 -0
  172. package/evals/cases/dev/tdd-workflow.yaml +48 -0
  173. package/evals/cases/dev/verify-work.yaml +44 -0
  174. package/evals/cases/dev/workflow.yaml +34 -0
  175. package/evals/ci/run-baseline.sh +283 -0
  176. package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
  177. package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
  178. package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
  179. package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
  180. package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
  181. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
  182. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
  183. package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
  184. package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
  185. package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
  186. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
  187. package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
  188. package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
  189. package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
  190. package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
  191. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
  192. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
  193. package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
  194. package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
  195. package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
  196. package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
  197. package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
  198. package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
  199. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
  200. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
  201. package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
  202. package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
  203. package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
  204. package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
  205. package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
  206. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
  207. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
  208. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
  209. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
  210. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
  211. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
  212. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
  213. package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
  214. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
  215. package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
  216. package/evals/fixtures/hook-influence/cases.json +336 -0
  217. package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
  218. package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
  219. package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
  220. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
  221. package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
  222. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
  223. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
  224. package/evals/fixtures/surface-trust/provider-absent.json +19 -0
  225. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
  226. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
  227. package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
  228. package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
  229. package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
  230. package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
  231. package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
  232. package/evals/integration/test_bundle_install.sh +541 -0
  233. package/evals/integration/test_console_learning_projection.sh +192 -0
  234. package/evals/integration/test_context_map.sh +65 -0
  235. package/evals/integration/test_effective_backlog_settings.sh +58 -0
  236. package/evals/integration/test_fixture_retirement_audit.sh +58 -0
  237. package/evals/integration/test_flow_agents_statusline.sh +93 -0
  238. package/evals/integration/test_flow_kit_repository.sh +90 -0
  239. package/evals/integration/test_goal_fit_hook.sh +482 -0
  240. package/evals/integration/test_hook_category_behaviors.sh +190 -0
  241. package/evals/integration/test_hook_influence_cases.sh +69 -0
  242. package/evals/integration/test_local_flow_kit_install.sh +145 -0
  243. package/evals/integration/test_publish_change_helper.sh +176 -0
  244. package/evals/integration/test_pull_work_provider.sh +140 -0
  245. package/evals/integration/test_runtime_adapter_activation.sh +106 -0
  246. package/evals/integration/test_telemetry.sh +485 -0
  247. package/evals/integration/test_telemetry_doctor.sh +193 -0
  248. package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
  249. package/evals/integration/test_usage_feedback_global.sh +117 -0
  250. package/evals/integration/test_usage_feedback_import.sh +227 -0
  251. package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
  252. package/evals/integration/test_usage_feedback_report.sh +263 -0
  253. package/evals/integration/test_veritas_governance_adapter.sh +235 -0
  254. package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
  255. package/evals/integration/test_workflow_artifacts.sh +1247 -0
  256. package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
  257. package/evals/integration/test_workflow_steering_hook.sh +337 -0
  258. package/evals/lib/assertions/delegated-to.js +40 -0
  259. package/evals/lib/assertions/max-tool-calls.js +15 -0
  260. package/evals/lib/assertions/no-write-tools.js +27 -0
  261. package/evals/lib/assertions/pass-at-k.js +39 -0
  262. package/evals/lib/assertions/telemetry-utils.js +105 -0
  263. package/evals/lib/assertions/tool-called.js +39 -0
  264. package/evals/lib/assertions/verify-after-fix.js +61 -0
  265. package/evals/lib/claude-judge.sh +40 -0
  266. package/evals/lib/claude-provider.sh +74 -0
  267. package/evals/lib/codex-judge.sh +39 -0
  268. package/evals/lib/codex-provider.sh +81 -0
  269. package/evals/lib/eval-dev.sh +5 -0
  270. package/evals/lib/eval-judge.sh +22 -0
  271. package/evals/lib/eval-provider.sh +26 -0
  272. package/evals/lib/eval-report.sh +73 -0
  273. package/evals/lib/kiro-dev.sh +4 -0
  274. package/evals/lib/kiro-judge.sh +17 -0
  275. package/evals/lib/kiro-provider.sh +62 -0
  276. package/evals/lib/node.sh +111 -0
  277. package/evals/promptfooconfig.yaml +70 -0
  278. package/evals/run.sh +309 -0
  279. package/evals/static/test_evidence_refs.sh +141 -0
  280. package/evals/static/test_package.sh +407 -0
  281. package/evals/static/test_repo_hooks.sh +68 -0
  282. package/evals/static/test_universal_bundles.sh +274 -0
  283. package/evals/static/test_workflow_skills.sh +1207 -0
  284. package/install.sh +64 -0
  285. package/integrations/veritas/flow-agents.adapter.json +138 -0
  286. package/integrations/veritas/flow-agents.authority-settings.json +26 -0
  287. package/integrations/veritas/flow-agents.repo-standards.json +82 -0
  288. package/kits/builder/flows/build.flow.json +218 -0
  289. package/kits/builder/flows/shape.flow.json +127 -0
  290. package/kits/builder/kit.json +19 -0
  291. package/kits/catalog.json +11 -0
  292. package/package.json +130 -0
  293. package/packaging/README.md +60 -0
  294. package/packaging/manifest.json +173 -0
  295. package/packaging/packs.json +69 -0
  296. package/powers/dependency-checker/POWER.md +20 -0
  297. package/powers/dependency-checker/mcp.json +20 -0
  298. package/powers/playwright/POWER.md +25 -0
  299. package/powers/playwright/mcp.json +12 -0
  300. package/prompts/code-audit.md +123 -0
  301. package/prompts/kcommit.md +88 -0
  302. package/schemas/backlog-provider-settings.schema.json +138 -0
  303. package/schemas/workflow-acceptance.schema.json +216 -0
  304. package/schemas/workflow-critique.schema.json +113 -0
  305. package/schemas/workflow-evidence.schema.json +357 -0
  306. package/schemas/workflow-handoff.schema.json +52 -0
  307. package/schemas/workflow-learning.schema.json +223 -0
  308. package/schemas/workflow-release.schema.json +172 -0
  309. package/schemas/workflow-state.schema.json +80 -0
  310. package/scripts/README.md +111 -0
  311. package/scripts/build-universal-bundles.js +3 -0
  312. package/scripts/check-content-boundary.cjs +99 -0
  313. package/scripts/context-budget/budget-scan.sh +166 -0
  314. package/scripts/detect-tools.sh +3 -0
  315. package/scripts/discover-agents.sh +28 -0
  316. package/scripts/effective-backlog-settings.js +2 -0
  317. package/scripts/filter-installed-packs.js +2 -0
  318. package/scripts/flow-kit.js +2 -0
  319. package/scripts/generate-context-map.js +2 -0
  320. package/scripts/git-status.sh +49 -0
  321. package/scripts/hooks/claude-hook-adapter.js +174 -0
  322. package/scripts/hooks/claude-telemetry-hook.js +115 -0
  323. package/scripts/hooks/codex-hook-adapter.js +176 -0
  324. package/scripts/hooks/codex-telemetry-hook.js +95 -0
  325. package/scripts/hooks/config-protection.js +79 -0
  326. package/scripts/hooks/desktop-notify.sh +39 -0
  327. package/scripts/hooks/governance-audit.sh +135 -0
  328. package/scripts/hooks/lib/audit-transport.sh +40 -0
  329. package/scripts/hooks/lib/hook-flags.js +49 -0
  330. package/scripts/hooks/lib/patterns.sh +57 -0
  331. package/scripts/hooks/lib/resolve-formatter.js +80 -0
  332. package/scripts/hooks/post-edit-accumulator.js +66 -0
  333. package/scripts/hooks/pre-commit-quality.js +194 -0
  334. package/scripts/hooks/quality-gate.js +93 -0
  335. package/scripts/hooks/report-only-guard.js +21 -0
  336. package/scripts/hooks/run-hook.js +136 -0
  337. package/scripts/hooks/stop-format-typecheck.js +141 -0
  338. package/scripts/hooks/stop-goal-fit.js +337 -0
  339. package/scripts/hooks/workflow-steering.js +250 -0
  340. package/scripts/install-codex-home.sh +106 -0
  341. package/scripts/package.json +3 -0
  342. package/scripts/promote-workflow-artifact.js +2 -0
  343. package/scripts/publish-change-helper.js +2 -0
  344. package/scripts/pull-work-provider.js +2 -0
  345. package/scripts/setup-repo-hooks.sh +8 -0
  346. package/scripts/statusline/flow-agents-statusline.js +157 -0
  347. package/scripts/telemetry/console-presets.sh +14 -0
  348. package/scripts/telemetry/install-console-config.sh +214 -0
  349. package/scripts/telemetry/lib/config.sh +85 -0
  350. package/scripts/telemetry/lib/enrich.sh +115 -0
  351. package/scripts/telemetry/lib/redact.sh +22 -0
  352. package/scripts/telemetry/lib/session.sh +63 -0
  353. package/scripts/telemetry/lib/transport.sh +183 -0
  354. package/scripts/telemetry/lib/usage.sh +29 -0
  355. package/scripts/telemetry/sync-agents.sh +173 -0
  356. package/scripts/telemetry/telemetry.conf +23 -0
  357. package/scripts/telemetry/telemetry.sh +387 -0
  358. package/scripts/usage-feedback.js +2 -0
  359. package/scripts/validate-hook-influence-cases.js +2 -0
  360. package/scripts/validate-package.sh +89 -0
  361. package/scripts/validate-source-tree.js +9 -0
  362. package/skills/agentic-engineering/SKILL.md +62 -0
  363. package/skills/browser-test/SKILL.md +51 -0
  364. package/skills/builder-shape/SKILL.md +76 -0
  365. package/skills/context-budget/SKILL.md +40 -0
  366. package/skills/deliver/SKILL.md +241 -0
  367. package/skills/dependency-update/SKILL.md +68 -0
  368. package/skills/design-probe/SKILL.md +107 -0
  369. package/skills/eval-rebuild/SKILL.md +39 -0
  370. package/skills/evidence-gate/SKILL.md +186 -0
  371. package/skills/execute-plan/SKILL.md +110 -0
  372. package/skills/explore/SKILL.md +137 -0
  373. package/skills/feedback-loop/SKILL.md +87 -0
  374. package/skills/fix-bug/SKILL.md +133 -0
  375. package/skills/frontend-design/SKILL.md +80 -0
  376. package/skills/github-cli/SKILL.md +63 -0
  377. package/skills/idea-to-backlog/SKILL.md +267 -0
  378. package/skills/knowledge-capture/SKILL.md +55 -0
  379. package/skills/learning-review/SKILL.md +115 -0
  380. package/skills/pickup-probe/SKILL.md +114 -0
  381. package/skills/plan-work/SKILL.md +176 -0
  382. package/skills/pull-work/SKILL.md +309 -0
  383. package/skills/release-readiness/SKILL.md +121 -0
  384. package/skills/review-work/SKILL.md +161 -0
  385. package/skills/search-first/SKILL.md +66 -0
  386. package/skills/tdd-workflow/SKILL.md +140 -0
  387. package/skills/verify-work/SKILL.md +109 -0
  388. package/src/cli/console-learning-projection.ts +140 -0
  389. package/src/cli/effective-backlog-settings.ts +99 -0
  390. package/src/cli/fixture-retirement-audit.ts +154 -0
  391. package/src/cli/flow-kit.ts +139 -0
  392. package/src/cli/init.ts +248 -0
  393. package/src/cli/promote-workflow-artifact.ts +64 -0
  394. package/src/cli/publish-change-helper.ts +143 -0
  395. package/src/cli/pull-work-provider.ts +481 -0
  396. package/src/cli/runtime-adapter.ts +24 -0
  397. package/src/cli/telemetry-doctor.ts +243 -0
  398. package/src/cli/usage-feedback.ts +418 -0
  399. package/src/cli/validate-hook-influence.ts +119 -0
  400. package/src/cli/validate-source-tree.ts +30 -0
  401. package/src/cli/validate-workflow-artifacts.ts +411 -0
  402. package/src/cli/veritas-governance.ts +322 -0
  403. package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
  404. package/src/cli/workflow-sidecar.ts +676 -0
  405. package/src/cli.ts +95 -0
  406. package/src/flow-kit/validate.ts +74 -0
  407. package/src/lib/args.ts +43 -0
  408. package/src/lib/fs.ts +62 -0
  409. package/src/lib/workflow-learning-projection.ts +491 -0
  410. package/src/runtime-adapters.ts +154 -0
  411. package/src/tools/build-universal-bundles.ts +366 -0
  412. package/src/tools/common.ts +61 -0
  413. package/src/tools/filter-installed-packs.ts +129 -0
  414. package/src/tools/generate-context-map.ts +199 -0
  415. package/src/tools/validate-package.ts +57 -0
  416. package/src/tools/validate-source-tree.ts +488 -0
  417. package/tsconfig.json +19 -0
  418. package/veritas.claims.json +6 -0
@@ -0,0 +1,97 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>404 — Not Found</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@300;400&family=Jost:wght@300&display=swap" rel="stylesheet">
9
+ <style>
10
+ :root {
11
+ --bg: #1c1b19;
12
+ --surface: #242320;
13
+ --text-primary: #c9c4bb;
14
+ --text-muted: #6b6660;
15
+ --accent: #a89880;
16
+ --border: #2e2c29;
17
+ }
18
+
19
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
20
+
21
+ body {
22
+ min-height: 100vh;
23
+ display: flex;
24
+ align-items: center;
25
+ justify-content: center;
26
+ background-color: var(--bg);
27
+ background-image:
28
+ radial-gradient(ellipse 80% 60% at 50% 40%, #26231e 0%, transparent 70%),
29
+ url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='200' height='200'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.75' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='200' height='200' filter='url(%23n)' opacity='0.04'/%3E%3C/svg%3E");
30
+ font-family: 'Jost', sans-serif;
31
+ color: var(--text-primary);
32
+ animation: fadeIn 1.2s ease both;
33
+ }
34
+
35
+ @keyframes fadeIn {
36
+ from { opacity: 0; transform: translateY(12px); }
37
+ to { opacity: 1; transform: translateY(0); }
38
+ }
39
+
40
+ .container {
41
+ text-align: center;
42
+ padding: 3rem 2rem;
43
+ }
44
+
45
+ .code {
46
+ font-family: 'Cormorant Garamond', serif;
47
+ font-size: clamp(6rem, 20vw, 10rem);
48
+ font-weight: 300;
49
+ line-height: 1;
50
+ color: var(--accent);
51
+ letter-spacing: -0.02em;
52
+ opacity: 0.85;
53
+ }
54
+
55
+ .divider {
56
+ width: 40px;
57
+ height: 1px;
58
+ background: var(--border);
59
+ margin: 1.75rem auto;
60
+ }
61
+
62
+ .message {
63
+ font-size: 0.75rem;
64
+ font-weight: 300;
65
+ letter-spacing: 0.25em;
66
+ text-transform: uppercase;
67
+ color: var(--text-muted);
68
+ }
69
+
70
+ .home {
71
+ display: inline-block;
72
+ margin-top: 2.5rem;
73
+ padding: 0.65rem 1.75rem;
74
+ border: 1px solid var(--border);
75
+ color: var(--text-muted);
76
+ text-decoration: none;
77
+ font-size: 0.7rem;
78
+ letter-spacing: 0.2em;
79
+ text-transform: uppercase;
80
+ transition: color 0.25s, border-color 0.25s;
81
+ }
82
+
83
+ .home:hover {
84
+ color: var(--text-primary);
85
+ border-color: var(--accent);
86
+ }
87
+ </style>
88
+ </head>
89
+ <body>
90
+ <div class="container">
91
+ <div class="code">404</div>
92
+ <div class="divider"></div>
93
+ <p class="message">Page not found</p>
94
+ <a href="/" class="home">Go Home</a>
95
+ </div>
96
+ </body>
97
+ </html>
@@ -0,0 +1,44 @@
1
+ # dev/code-review.yaml — Eval: deliver delegates to tool-code-reviewer
2
+ - vars:
3
+ prompt: "Build a user authentication module with JWT tokens, then review the code quality"
4
+ metadata:
5
+ type: capability
6
+ skill: deliver
7
+ component: tool-code-reviewer
8
+ added: 2026-04-06
9
+ assert:
10
+ - type: javascript
11
+ value: file://../../lib/assertions/delegated-to.js
12
+ config:
13
+ expected: ['tool-planner', 'tool-code-reviewer']
14
+ - type: javascript
15
+ value: file://../../lib/assertions/tool-called.js
16
+ config:
17
+ tool: delegate to a specialist agent
18
+ - type: llm-rubric
19
+ value: |
20
+ The deliver workflow now includes a code review step between execute and verify.
21
+ After implementation, the agent should delegate to tool-code-reviewer for quality review
22
+ (readability, maintainability, patterns, DRY) BEFORE delegating to tool-verifier for
23
+ functional verification (build/test/lint).
24
+ Score 1 if no code review step, 3 if review happened but not as a distinct step, 5 if full deliver with review phase.
25
+
26
+ - vars:
27
+ prompt: "Review this code for security vulnerabilities — focus on the auth module"
28
+ metadata:
29
+ type: capability
30
+ component: tool-security-reviewer
31
+ added: 2026-04-06
32
+ assert:
33
+ - type: javascript
34
+ value: file://../../lib/assertions/delegated-to.js
35
+ config:
36
+ expected: ['tool-security-reviewer']
37
+ - type: javascript
38
+ value: file://../../lib/assertions/no-write-tools.js
39
+ - type: llm-rubric
40
+ value: |
41
+ The agent should delegate to tool-security-reviewer for security analysis.
42
+ The review should cover: secrets detection, injection vulnerabilities, auth/authz checks,
43
+ input validation, and OWASP Top 10 mapping.
44
+ Score 1 if no security delegation, 3 if generic review, 5 if dedicated security-reviewer with structured findings.
@@ -0,0 +1,300 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Analytics // Command Center</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=DM+Sans:wght@400;500;600&display=swap" rel="stylesheet">
9
+ <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
10
+ <style>
11
+ :root {
12
+ --bg: #0a0a0f;
13
+ --surface: #12121a;
14
+ --border: rgba(0,229,255,0.1);
15
+ --cyan: #00e5ff;
16
+ --amber: #ffab00;
17
+ --green: #69ff47;
18
+ --rose: #ff5252;
19
+ --text: #e0e0e0;
20
+ --muted: #5a5a72;
21
+ }
22
+
23
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
24
+
25
+ body {
26
+ background-color: var(--bg);
27
+ background-image: radial-gradient(circle, rgba(255,255,255,0.04) 1px, transparent 1px);
28
+ background-size: 28px 28px;
29
+ color: var(--text);
30
+ font-family: 'DM Sans', sans-serif;
31
+ min-height: 100vh;
32
+ padding: 28px 32px;
33
+ }
34
+
35
+ header {
36
+ display: flex;
37
+ justify-content: space-between;
38
+ align-items: center;
39
+ margin-bottom: 28px;
40
+ opacity: 0;
41
+ animation: fadeUp 0.5s ease 0.05s forwards;
42
+ }
43
+ header h1 {
44
+ font-size: 15px;
45
+ font-weight: 600;
46
+ letter-spacing: 0.12em;
47
+ text-transform: uppercase;
48
+ }
49
+ header h1 span { color: var(--cyan); }
50
+ header .ts {
51
+ font-family: 'JetBrains Mono', monospace;
52
+ font-size: 11px;
53
+ color: var(--muted);
54
+ letter-spacing: 0.06em;
55
+ }
56
+
57
+ .kpi-row {
58
+ display: grid;
59
+ grid-template-columns: repeat(4, 1fr);
60
+ gap: 14px;
61
+ margin-bottom: 14px;
62
+ }
63
+
64
+ .card {
65
+ background: var(--surface);
66
+ border: 1px solid var(--border);
67
+ border-radius: 6px;
68
+ padding: 20px 22px;
69
+ position: relative;
70
+ overflow: hidden;
71
+ opacity: 0;
72
+ animation: fadeUp 0.5s ease forwards;
73
+ transition: box-shadow 0.25s ease;
74
+ }
75
+ .card:hover { box-shadow: 0 0 24px rgba(0,229,255,0.07); }
76
+ .card::before {
77
+ content: '';
78
+ position: absolute;
79
+ top: 0; left: 0; right: 0;
80
+ height: 2px;
81
+ border-radius: 6px 6px 0 0;
82
+ }
83
+ .c-cyan::before { background: var(--cyan); box-shadow: 0 0 10px var(--cyan); }
84
+ .c-amber::before { background: var(--amber); box-shadow: 0 0 10px var(--amber); }
85
+ .c-green::before { background: var(--green); box-shadow: 0 0 10px var(--green); }
86
+ .c-rose::before { background: var(--rose); box-shadow: 0 0 10px var(--rose); }
87
+
88
+ .kpi-row .card:nth-child(1) { animation-delay: 0.10s; }
89
+ .kpi-row .card:nth-child(2) { animation-delay: 0.16s; }
90
+ .kpi-row .card:nth-child(3) { animation-delay: 0.22s; }
91
+ .kpi-row .card:nth-child(4) { animation-delay: 0.28s; }
92
+
93
+ .kpi-label {
94
+ font-size: 10px;
95
+ font-weight: 500;
96
+ letter-spacing: 0.12em;
97
+ text-transform: uppercase;
98
+ color: var(--muted);
99
+ margin-bottom: 10px;
100
+ }
101
+ .kpi-value {
102
+ font-family: 'JetBrains Mono', monospace;
103
+ font-size: 30px;
104
+ font-weight: 700;
105
+ line-height: 1;
106
+ margin-bottom: 10px;
107
+ }
108
+ .kpi-delta {
109
+ font-family: 'JetBrains Mono', monospace;
110
+ font-size: 11px;
111
+ color: var(--muted);
112
+ }
113
+ .kpi-delta.up { color: var(--green); }
114
+ .kpi-delta.down { color: var(--rose); }
115
+
116
+ .chart-grid {
117
+ display: grid;
118
+ grid-template-columns: 1fr 1fr;
119
+ gap: 14px;
120
+ }
121
+ .chart-grid .card:nth-child(1) { animation-delay: 0.34s; }
122
+ .chart-grid .card:nth-child(2) { animation-delay: 0.40s; }
123
+ .chart-grid .card:nth-child(3) { animation-delay: 0.46s; }
124
+ .chart-grid .card:nth-child(4) { animation-delay: 0.52s; }
125
+
126
+ .chart-label {
127
+ font-size: 10px;
128
+ font-weight: 500;
129
+ letter-spacing: 0.12em;
130
+ text-transform: uppercase;
131
+ color: var(--muted);
132
+ margin-bottom: 16px;
133
+ }
134
+ .chart-wrap {
135
+ position: relative;
136
+ height: 220px;
137
+ }
138
+
139
+ @keyframes fadeUp {
140
+ from { opacity: 0; transform: translateY(12px); }
141
+ to { opacity: 1; transform: translateY(0); }
142
+ }
143
+
144
+ @media (max-width: 900px) {
145
+ .kpi-row { grid-template-columns: repeat(2, 1fr); }
146
+ .chart-grid { grid-template-columns: 1fr; }
147
+ }
148
+ @media (max-width: 480px) {
149
+ .kpi-row { grid-template-columns: 1fr; }
150
+ body { padding: 16px; }
151
+ }
152
+ </style>
153
+ </head>
154
+ <body>
155
+
156
+ <header>
157
+ <h1>COMMAND <span>// </span>ANALYTICS</h1>
158
+ <span class="ts" id="ts"></span>
159
+ </header>
160
+
161
+ <div class="kpi-row">
162
+ <div class="card c-cyan">
163
+ <div class="kpi-label">Total Revenue</div>
164
+ <div class="kpi-value" style="color:var(--cyan)">$4.82M</div>
165
+ <div class="kpi-delta up">↑ 12.4% vs last month</div>
166
+ </div>
167
+ <div class="card c-amber">
168
+ <div class="kpi-label">Active Users</div>
169
+ <div class="kpi-value" style="color:var(--amber)">128,340</div>
170
+ <div class="kpi-delta up">↑ 8.1% vs last month</div>
171
+ </div>
172
+ <div class="card c-green">
173
+ <div class="kpi-label">Conversion Rate</div>
174
+ <div class="kpi-value" style="color:var(--green)">3.67%</div>
175
+ <div class="kpi-delta down">↓ 0.3% vs last month</div>
176
+ </div>
177
+ <div class="card c-rose">
178
+ <div class="kpi-label">Avg Session</div>
179
+ <div class="kpi-value" style="color:var(--rose)">4m 22s</div>
180
+ <div class="kpi-delta up">↑ 0:18 vs last month</div>
181
+ </div>
182
+ </div>
183
+
184
+ <div class="chart-grid">
185
+ <div class="card c-cyan">
186
+ <div class="chart-label">Revenue Trend — 2024</div>
187
+ <div class="chart-wrap"><canvas id="cRevenue"></canvas></div>
188
+ </div>
189
+ <div class="card c-amber">
190
+ <div class="chart-label">Monthly Active Users</div>
191
+ <div class="chart-wrap"><canvas id="cUsers"></canvas></div>
192
+ </div>
193
+ <div class="card c-green">
194
+ <div class="chart-label">Traffic Sources</div>
195
+ <div class="chart-wrap"><canvas id="cTraffic"></canvas></div>
196
+ </div>
197
+ <div class="card c-rose">
198
+ <div class="chart-label">Conversion Rate — Last 12 Weeks</div>
199
+ <div class="chart-wrap"><canvas id="cConversion"></canvas></div>
200
+ </div>
201
+ </div>
202
+
203
+ <script>
204
+ document.getElementById('ts').textContent =
205
+ new Date().toLocaleDateString('en-US', { weekday:'short', year:'numeric', month:'short', day:'numeric' }).toUpperCase();
206
+
207
+ const C = { cyan:'#00e5ff', amber:'#ffab00', green:'#69ff47', rose:'#ff5252', muted:'#5a5a72', grid:'rgba(255,255,255,0.05)' };
208
+ const mono = { family:"'JetBrains Mono'", size: 10 };
209
+ const axisBase = { grid:{ color: C.grid }, ticks:{ color: C.muted, font: mono } };
210
+ const baseOpts = {
211
+ responsive: true,
212
+ maintainAspectRatio: false,
213
+ plugins: { legend:{ display:false }, tooltip:{ backgroundColor:'#1a1a26', titleFont: mono, bodyFont: mono, borderColor: C.grid, borderWidth:1 } },
214
+ scales: { x: axisBase, y: axisBase }
215
+ };
216
+
217
+ const months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'];
218
+
219
+ // Revenue — line chart
220
+ new Chart(document.getElementById('cRevenue'), {
221
+ type: 'line',
222
+ data: {
223
+ labels: months,
224
+ datasets: [{
225
+ data: [310,285,340,390,420,465,510,488,540,590,620,680],
226
+ borderColor: C.cyan,
227
+ backgroundColor: 'rgba(0,229,255,0.07)',
228
+ borderWidth: 2,
229
+ pointRadius: 3,
230
+ pointBackgroundColor: C.cyan,
231
+ fill: true,
232
+ tension: 0.4
233
+ }]
234
+ },
235
+ options: { ...baseOpts, scales: { x: axisBase, y: { ...axisBase, ticks: { ...axisBase.ticks, callback: v => '$'+v+'K' } } } }
236
+ });
237
+
238
+ // Monthly users — bar chart
239
+ new Chart(document.getElementById('cUsers'), {
240
+ type: 'bar',
241
+ data: {
242
+ labels: months,
243
+ datasets: [{
244
+ data: [72000,68000,81000,89000,95000,103000,112000,108000,118000,124000,128000,134000],
245
+ backgroundColor: 'rgba(255,171,0,0.55)',
246
+ borderColor: C.amber,
247
+ borderWidth: 1,
248
+ borderRadius: 3
249
+ }]
250
+ },
251
+ options: { ...baseOpts, scales: { x: axisBase, y: { ...axisBase, ticks: { ...axisBase.ticks, callback: v => (v/1000)+'K' } } } }
252
+ });
253
+
254
+ // Traffic sources — doughnut
255
+ new Chart(document.getElementById('cTraffic'), {
256
+ type: 'doughnut',
257
+ data: {
258
+ labels: ['Organic','Direct','Paid','Referral','Social','Email'],
259
+ datasets: [{
260
+ data: [34,22,18,12,9,5],
261
+ backgroundColor: [C.cyan, C.amber, C.green, C.rose, '#7c4dff', '#00bcd4'],
262
+ borderColor: '#12121a',
263
+ borderWidth: 3,
264
+ hoverOffset: 6
265
+ }]
266
+ },
267
+ options: {
268
+ responsive: true,
269
+ maintainAspectRatio: false,
270
+ cutout: '68%',
271
+ plugins: {
272
+ legend: { display:true, position:'right', labels:{ color: C.muted, font: mono, boxWidth:10, padding:12 } },
273
+ tooltip: { backgroundColor:'#1a1a26', titleFont: mono, bodyFont: mono, borderColor: C.grid, borderWidth:1 }
274
+ }
275
+ }
276
+ });
277
+
278
+ // Conversion rate — area/line chart
279
+ const weeks = Array.from({length:12}, (_,i) => `W${i+1}`);
280
+ const convData = [3.1,3.4,3.2,3.6,3.5,3.8,3.7,4.0,3.9,3.7,3.6,3.67];
281
+ new Chart(document.getElementById('cConversion'), {
282
+ type: 'line',
283
+ data: {
284
+ labels: weeks,
285
+ datasets: [{
286
+ data: convData,
287
+ borderColor: C.rose,
288
+ backgroundColor: 'rgba(255,82,82,0.08)',
289
+ borderWidth: 2,
290
+ pointRadius: 3,
291
+ pointBackgroundColor: C.rose,
292
+ fill: true,
293
+ tension: 0.4
294
+ }]
295
+ },
296
+ options: { ...baseOpts, scales: { x: axisBase, y: { ...axisBase, ticks: { ...axisBase.ticks, callback: v => v+'%' } } } }
297
+ });
298
+ </script>
299
+ </body>
300
+ </html>
@@ -0,0 +1,66 @@
1
+ # dev/deliver.yaml — Eval: dev agent activates deliver and verify-work skills
2
+ - vars:
3
+ prompt: "Build a CLI tool that converts markdown files to HTML"
4
+ assert:
5
+ - type: javascript
6
+ value: file://../../lib/assertions/delegated-to.js
7
+ config:
8
+ expected: ['tool-planner']
9
+ - type: javascript
10
+ value: file://../../lib/assertions/tool-called.js
11
+ config:
12
+ tool: delegate to a specialist agent
13
+ - type: javascript
14
+ value: file://../../lib/assertions/no-write-tools.js
15
+ - type: javascript
16
+ value: file://../../lib/assertions/verify-after-fix.js
17
+ - type: llm-rubric
18
+ value: |
19
+ The agent activated the 'deliver' skill which chains plan-work → execute-plan → review-work → verify-work.
20
+ It should have created a session file, then delegated to tool-planner for the plan phase.
21
+ It should NOT have written code directly — the orchestrator delegates all implementation to tool-worker via execute-plan.
22
+ Reviewers (tool-code-reviewer, tool-security-reviewer) and verifiers (tool-verifier, tool-playwright) must ONLY report findings — never fix code.
23
+ If issues were found during review/verify, the agent must loop back through execute → review → verify until clean.
24
+ Score 1 if reviewers fixed code or skipped re-verify, 3 if partial compliance, 5 if full report-only + re-verify loop.
25
+
26
+ - vars:
27
+ prompt: "Build a REST API for user management, then verify the implementation meets all acceptance criteria"
28
+ assert:
29
+ - type: javascript
30
+ value: file://../../lib/assertions/delegated-to.js
31
+ config:
32
+ expected: ['tool-verifier']
33
+ - type: javascript
34
+ value: file://../../lib/assertions/no-write-tools.js
35
+ - type: javascript
36
+ value: file://../../lib/assertions/verify-after-fix.js
37
+ - type: llm-rubric
38
+ value: |
39
+ The agent activated the 'verify-work' skill and delegated to tool-verifier (and optionally tool-playwright for visual changes).
40
+ It should NOT have reviewed code itself — verification is delegated to specialists.
41
+ Verifiers must be READ-ONLY — they report findings but never modify source code.
42
+ Score 1 if it reviewed code manually or verifiers fixed code, 3 if partial delegation, 5 if full verify-work workflow with read-only verifiers.
43
+
44
+ - vars:
45
+ prompt: "Build a todo app with React. After building, review the code quality and verify everything works."
46
+ metadata:
47
+ type: capability
48
+ skill: deliver
49
+ added: 2026-04-07
50
+ assert:
51
+ - type: javascript
52
+ value: file://../../lib/assertions/delegated-to.js
53
+ config:
54
+ expected: ['tool-planner', 'tool-code-reviewer', 'tool-verifier']
55
+ - type: javascript
56
+ value: file://../../lib/assertions/verify-after-fix.js
57
+ - type: llm-rubric
58
+ value: |
59
+ This tests the full deliver loop with report-only review and re-verification.
60
+ Expected flow: plan → execute → review (report only) → verify (report only) → if issues, loop back to execute → review → verify.
61
+ CRITICAL checks:
62
+ 1. tool-code-reviewer must NOT invoke write files or write tools — it only reports findings
63
+ 2. tool-verifier must NOT apply fixes (no "1 format fix applied") — it only reports
64
+ 3. If review/verify found issues, the agent must re-execute fixes via tool-worker, then re-run review+verify
65
+ 4. Delivery happens ONLY after a clean review+verify pass with zero findings
66
+ Score 1 if reviewers fixed code inline, 2 if fixes applied but no re-verify, 3 if re-verify attempted but incomplete, 5 if full loop until clean.
@@ -0,0 +1,16 @@
1
+ # dev/dependency-update.yaml — Eval: dev agent delegates to tool-dependencies-updater
2
+ - vars:
3
+ prompt: "Check this project for outdated dependencies and security vulnerabilities"
4
+ assert:
5
+ - type: javascript
6
+ value: file://lib/assertions/delegated-to.js
7
+ config:
8
+ expected: ['tool-dependencies-updater']
9
+ - type: javascript
10
+ value: file://lib/assertions/no-write-tools.js
11
+ - type: llm-rubric
12
+ value: |
13
+ The dev agent should activate the dependency-update skill.
14
+ It should delegate to tool-dependencies-updater subagent for the actual scanning.
15
+ It should NOT try to manually check package.json or run npm audit itself — the skill says to delegate.
16
+ Score 1 if it did manual checking, 3 if it delegated but with poor context, 5 if it delegated with clear scope.
@@ -0,0 +1,20 @@
1
+ # dev/explore.yaml — Eval: dev agent activates explore skill with parallel fan-out
2
+ - vars:
3
+ prompt: "Explore this codebase and tell me what it does"
4
+ assert:
5
+ - type: javascript
6
+ value: file://lib/assertions/delegated-to.js
7
+ config:
8
+ expected: ['tool-explore-structure', 'tool-explore-entry', 'tool-explore-deps']
9
+ - type: javascript
10
+ value: file://lib/assertions/tool-called.js
11
+ config:
12
+ tool: delegate to a specialist agent
13
+ - type: javascript
14
+ value: file://lib/assertions/no-write-tools.js
15
+ - type: llm-rubric
16
+ value: |
17
+ The agent activated the 'explore' skill which fans out parallel subagents.
18
+ It should have spawned multiple tool-explore-* subagents (structure, entry, deps, patterns, config, tests) in Wave 1.
19
+ The response should contain a synthesis covering: directory structure, entry points, dependencies, and architectural patterns.
20
+ Score 1 if no exploration happened, 3 if partial, 5 if full wave structure was followed.