@kontourai/flow-agents 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (418) hide show
  1. package/.githooks/pre-push +11 -0
  2. package/.github/workflows/ci.yml +210 -0
  3. package/.github/workflows/docs-pages.yml +52 -0
  4. package/.github/workflows/publish-npm.yml +104 -0
  5. package/AGENTS.md +26 -0
  6. package/CHANGELOG.md +66 -0
  7. package/CODE_OF_CONDUCT.md +25 -0
  8. package/CONTEXT.md +300 -0
  9. package/CONTRIBUTING.md +44 -0
  10. package/LICENSE +201 -0
  11. package/README.md +129 -0
  12. package/SECURITY.md +33 -0
  13. package/agent-cards/dev.json +19 -0
  14. package/agents/dev.json +127 -0
  15. package/agents/tool-code-reviewer.json +61 -0
  16. package/agents/tool-dependencies-updater.json +118 -0
  17. package/agents/tool-explore-config.json +92 -0
  18. package/agents/tool-explore-deps.json +92 -0
  19. package/agents/tool-explore-entry.json +92 -0
  20. package/agents/tool-explore-patterns.json +92 -0
  21. package/agents/tool-explore-structure.json +92 -0
  22. package/agents/tool-explore-tests.json +92 -0
  23. package/agents/tool-planner.json +57 -0
  24. package/agents/tool-playwright.json +145 -0
  25. package/agents/tool-security-reviewer.json +56 -0
  26. package/agents/tool-verifier.json +61 -0
  27. package/agents/tool-worker.json +58 -0
  28. package/build/src/cli/console-learning-projection.js +123 -0
  29. package/build/src/cli/docs-preview.js +39 -0
  30. package/build/src/cli/effective-backlog-settings.js +102 -0
  31. package/build/src/cli/export-bookmarks.js +38 -0
  32. package/build/src/cli/fixture-retirement-audit.js +140 -0
  33. package/build/src/cli/flow-kit.js +138 -0
  34. package/build/src/cli/import-bookmarks.js +50 -0
  35. package/build/src/cli/init.js +239 -0
  36. package/build/src/cli/instinct-cli.js +93 -0
  37. package/build/src/cli/promote-workflow-artifact.js +63 -0
  38. package/build/src/cli/publish-change-helper.js +154 -0
  39. package/build/src/cli/pull-work-provider.js +469 -0
  40. package/build/src/cli/runtime-adapter.js +23 -0
  41. package/build/src/cli/telemetry-doctor.js +221 -0
  42. package/build/src/cli/usage-feedback.js +443 -0
  43. package/build/src/cli/validate-hook-influence.js +152 -0
  44. package/build/src/cli/validate-source-tree.js +31 -0
  45. package/build/src/cli/validate-workflow-artifacts.js +486 -0
  46. package/build/src/cli/veritas-governance.js +262 -0
  47. package/build/src/cli/workflow-artifact-cleanup-audit.js +272 -0
  48. package/build/src/cli/workflow-sidecar.js +816 -0
  49. package/build/src/cli.js +89 -0
  50. package/build/src/flow-kit/validate.js +75 -0
  51. package/build/src/lib/args.js +45 -0
  52. package/build/src/lib/fs.js +62 -0
  53. package/build/src/lib/workflow-learning-projection.js +334 -0
  54. package/build/src/runtime-adapters.js +146 -0
  55. package/build/src/tools/build-universal-bundles.js +397 -0
  56. package/build/src/tools/common.js +56 -0
  57. package/build/src/tools/filter-installed-packs.js +132 -0
  58. package/build/src/tools/generate-context-map.js +198 -0
  59. package/build/src/tools/validate-package.js +64 -0
  60. package/build/src/tools/validate-source-tree.js +622 -0
  61. package/console.telemetry.json +176 -0
  62. package/context/base-rules.md +17 -0
  63. package/context/code-review-standards.md +62 -0
  64. package/context/coding-standards.md +42 -0
  65. package/context/common/orchestrators.md +12 -0
  66. package/context/common/subagents.md +28 -0
  67. package/context/contracts/artifact-contract.md +182 -0
  68. package/context/contracts/builder-kit-workflow-state-contract.md +319 -0
  69. package/context/contracts/delivery-contract.md +69 -0
  70. package/context/contracts/execution-contract.md +53 -0
  71. package/context/contracts/governance-adapter-contract.md +67 -0
  72. package/context/contracts/planning-contract.md +85 -0
  73. package/context/contracts/review-contract.md +104 -0
  74. package/context/contracts/sandbox-policy.md +52 -0
  75. package/context/contracts/verification-contract.md +134 -0
  76. package/context/contracts/work-item-contract.md +215 -0
  77. package/context/deferred/demo-mode.md +33 -0
  78. package/context/deferred/languages/go.md +31 -0
  79. package/context/deferred/languages/python.md +31 -0
  80. package/context/deferred/languages/typescript.md +34 -0
  81. package/context/deferred/parallelization.md +35 -0
  82. package/context/deferred/worktree-isolation.md +24 -0
  83. package/context/development-workflow.md +50 -0
  84. package/context/scripts/context-budget/budget-scan.sh +166 -0
  85. package/context/scripts/detect-tools.sh +3 -0
  86. package/context/scripts/discover-agents.sh +28 -0
  87. package/context/scripts/git-status.sh +49 -0
  88. package/context/scripts/hooks/config-protection.js +79 -0
  89. package/context/scripts/hooks/desktop-notify.sh +39 -0
  90. package/context/scripts/hooks/governance-audit.sh +135 -0
  91. package/context/scripts/hooks/lib/audit-transport.sh +40 -0
  92. package/context/scripts/hooks/lib/hook-flags.js +49 -0
  93. package/context/scripts/hooks/lib/patterns.sh +57 -0
  94. package/context/scripts/hooks/lib/resolve-formatter.js +80 -0
  95. package/context/scripts/hooks/post-edit-accumulator.js +66 -0
  96. package/context/scripts/hooks/pre-commit-quality.js +194 -0
  97. package/context/scripts/hooks/quality-gate.js +93 -0
  98. package/context/scripts/hooks/report-only-guard.js +21 -0
  99. package/context/scripts/hooks/run-hook.js +136 -0
  100. package/context/scripts/hooks/stop-format-typecheck.js +141 -0
  101. package/context/scripts/hooks/stop-goal-fit.js +337 -0
  102. package/context/scripts/hooks/workflow-steering.js +250 -0
  103. package/context/scripts/telemetry/console-presets.sh +14 -0
  104. package/context/scripts/telemetry/install-console-config.sh +214 -0
  105. package/context/scripts/telemetry/lib/config.sh +85 -0
  106. package/context/scripts/telemetry/lib/enrich.sh +115 -0
  107. package/context/scripts/telemetry/lib/redact.sh +22 -0
  108. package/context/scripts/telemetry/lib/session.sh +63 -0
  109. package/context/scripts/telemetry/lib/transport.sh +183 -0
  110. package/context/scripts/telemetry/lib/usage.sh +29 -0
  111. package/context/scripts/telemetry/sync-agents.sh +173 -0
  112. package/context/scripts/telemetry/telemetry.conf +23 -0
  113. package/context/scripts/telemetry/telemetry.sh +387 -0
  114. package/context/scripts/validate-package.sh +89 -0
  115. package/context/settings/backlog-provider-settings.json +54 -0
  116. package/context/templates/core/identity.md +26 -0
  117. package/context/templates/core/user.md +15 -0
  118. package/docs/_config.yml +15 -0
  119. package/docs/_layouts/default.html +87 -0
  120. package/docs/adr/0001-flow-agents-consumes-flow.md +77 -0
  121. package/docs/adr/0002-flow-kits-as-extension-unit.md +13 -0
  122. package/docs/adr/0003-flow-agents-coordinates-kits-and-adapters.md +13 -0
  123. package/docs/adr/0004-gates-expect-surface-claims.md +15 -0
  124. package/docs/adr/0005-kubernetes-inspired-resource-contracts.md +48 -0
  125. package/docs/adr/0006-typescript-first-source-policy.md +98 -0
  126. package/docs/agent-system-guidebook.md +391 -0
  127. package/docs/agent-usage-feedback-loop.md +351 -0
  128. package/docs/assets/favicon.svg +13 -0
  129. package/docs/assets/og-image.png +0 -0
  130. package/docs/assets/site.css +774 -0
  131. package/docs/assets/site.js +139 -0
  132. package/docs/configurable-workflow-routing.md +174 -0
  133. package/docs/context-map.md +145 -0
  134. package/docs/developer-architecture.md +145 -0
  135. package/docs/developer-hook-setup.md +61 -0
  136. package/docs/fixture-ownership.md +44 -0
  137. package/docs/flow-kit-repository-contract.md +180 -0
  138. package/docs/index.md +129 -0
  139. package/docs/kontour-resource-contract.md +358 -0
  140. package/docs/migrations.md +64 -0
  141. package/docs/north-star.md +322 -0
  142. package/docs/operating-layers.md +110 -0
  143. package/docs/repository-structure.md +132 -0
  144. package/docs/sandbox-policy.md +56 -0
  145. package/docs/skills-map.md +203 -0
  146. package/docs/standards-register.md +96 -0
  147. package/docs/veritas-integration.md +165 -0
  148. package/docs/work-item-adapters.md +72 -0
  149. package/docs/workflow-artifact-lifecycle.md +141 -0
  150. package/docs/workflow-eval-strategy.md +295 -0
  151. package/docs/workflow-shared-contracts.md +51 -0
  152. package/docs/workflow-usage-guide.md +443 -0
  153. package/evals/ARCHITECTURE.md +143 -0
  154. package/evals/CONVENTIONS.md +58 -0
  155. package/evals/README.md +128 -0
  156. package/evals/acceptance/run.sh +29 -0
  157. package/evals/acceptance/test_claude_harness.sh +242 -0
  158. package/evals/acceptance/test_codex_harness.sh +108 -0
  159. package/evals/acceptance/test_kiro_harness.sh +128 -0
  160. package/evals/cases/dev/404.html +97 -0
  161. package/evals/cases/dev/code-review.yaml +44 -0
  162. package/evals/cases/dev/dashboard.html +300 -0
  163. package/evals/cases/dev/deliver.yaml +66 -0
  164. package/evals/cases/dev/dependency-update.yaml +16 -0
  165. package/evals/cases/dev/explore.yaml +20 -0
  166. package/evals/cases/dev/index.html +370 -0
  167. package/evals/cases/dev/package-lock.json +28 -0
  168. package/evals/cases/dev/package.json +16 -0
  169. package/evals/cases/dev/plan-work.yaml +20 -0
  170. package/evals/cases/dev/promptfooconfig.yaml +666 -0
  171. package/evals/cases/dev/search-first.yaml +20 -0
  172. package/evals/cases/dev/tdd-workflow.yaml +48 -0
  173. package/evals/cases/dev/verify-work.yaml +44 -0
  174. package/evals/cases/dev/workflow.yaml +34 -0
  175. package/evals/ci/run-baseline.sh +283 -0
  176. package/evals/fixtures/backlog-provider-settings/global-default.json +44 -0
  177. package/evals/fixtures/backlog-provider-settings/project-override.json +53 -0
  178. package/evals/fixtures/builder-kit-workflow-state/baseline-freshness-resolution-hint.json +139 -0
  179. package/evals/fixtures/builder-kit-workflow-state/direct-primitive-stop.json +59 -0
  180. package/evals/fixtures/builder-kit-workflow-state/empty-board-route-shape.json +55 -0
  181. package/evals/fixtures/builder-kit-workflow-state/happy-path.json +71 -0
  182. package/evals/fixtures/builder-kit-workflow-state/mid-work-resume.json +80 -0
  183. package/evals/fixtures/builder-kit-workflow-state/missing-prestep-recovery.json +65 -0
  184. package/evals/fixtures/builder-kit-workflow-state/product-build-chaining.json +60 -0
  185. package/evals/fixtures/builder-kit-workflow-state/stale-continuation-requires-new-probe.json +57 -0
  186. package/evals/fixtures/console-learning-projection/artifacts/console-learning-correction/learning.json +50 -0
  187. package/evals/fixtures/console-learning-projection/artifacts/console-learning-open-route/learning.json +41 -0
  188. package/evals/fixtures/flow-kit-repository/invalid-absolute-path/kit.json +8 -0
  189. package/evals/fixtures/flow-kit-repository/invalid-asset-section/flows/review.flow.json +6 -0
  190. package/evals/fixtures/flow-kit-repository/invalid-asset-section/kit.json +11 -0
  191. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/flows/review.flow.json +6 -0
  192. package/evals/fixtures/flow-kit-repository/invalid-duplicate-flow/kit.json +9 -0
  193. package/evals/fixtures/flow-kit-repository/invalid-id/flows/review.flow.json +6 -0
  194. package/evals/fixtures/flow-kit-repository/invalid-id/kit.json +8 -0
  195. package/evals/fixtures/flow-kit-repository/invalid-malformed-json/kit.json +8 -0
  196. package/evals/fixtures/flow-kit-repository/invalid-missing-flow/kit.json +8 -0
  197. package/evals/fixtures/flow-kit-repository/invalid-missing-id/flows/review.flow.json +6 -0
  198. package/evals/fixtures/flow-kit-repository/invalid-missing-id/kit.json +7 -0
  199. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/flows/review.flow.json +6 -0
  200. package/evals/fixtures/flow-kit-repository/invalid-missing-schema-version/kit.json +7 -0
  201. package/evals/fixtures/flow-kit-repository/invalid-name/flows/review.flow.json +6 -0
  202. package/evals/fixtures/flow-kit-repository/invalid-name/kit.json +8 -0
  203. package/evals/fixtures/flow-kit-repository/invalid-schema-version/flows/review.flow.json +6 -0
  204. package/evals/fixtures/flow-kit-repository/invalid-schema-version/kit.json +8 -0
  205. package/evals/fixtures/flow-kit-repository/invalid-traversal/kit.json +8 -0
  206. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/adapters/example.json +3 -0
  207. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/assets/example.txt +1 -0
  208. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/docs/README.md +3 -0
  209. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/flows/runtime.flow.json +26 -0
  210. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-evals/example.json +3 -0
  211. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit-skills/mixed/SKILL.md +3 -0
  212. package/evals/fixtures/flow-kit-repository/mixed-runtime-kit/kit.json +44 -0
  213. package/evals/fixtures/flow-kit-repository/valid-local-kit/docs/README.md +3 -0
  214. package/evals/fixtures/flow-kit-repository/valid-local-kit/flows/review.flow.json +26 -0
  215. package/evals/fixtures/flow-kit-repository/valid-local-kit/kit.json +20 -0
  216. package/evals/fixtures/hook-influence/cases.json +336 -0
  217. package/evals/fixtures/pull-work-provider/github-issues.json +170 -0
  218. package/evals/fixtures/pull-work-wip-shepherding/global-wip-informs.json +43 -0
  219. package/evals/fixtures/pull-work-wip-shepherding/personal-wip-blocks.json +42 -0
  220. package/evals/fixtures/surface-trust/accepted-claim-trust-report.json +31 -0
  221. package/evals/fixtures/surface-trust/artifact-absent.json +19 -0
  222. package/evals/fixtures/surface-trust/integrity-mismatch-trust-report.json +32 -0
  223. package/evals/fixtures/surface-trust/missing-authority-trust-report.json +27 -0
  224. package/evals/fixtures/surface-trust/provider-absent.json +19 -0
  225. package/evals/fixtures/surface-trust/rejected-claim-trust-report.json +30 -0
  226. package/evals/fixtures/surface-trust/stale-claim-trust-snapshot.json +31 -0
  227. package/evals/fixtures/usage-feedback/sample-full.jsonl +11 -0
  228. package/evals/fixtures/usage-feedback/sample-outcomes.jsonl +1 -0
  229. package/evals/fixtures/veritas-governance-adapter/fake-veritas-pass.sh +18 -0
  230. package/evals/fixtures/veritas-governance-adapter/fake-veritas-secret-fail.sh +10 -0
  231. package/evals/fixtures/veritas-governance-adapter/fake-veritas-unconfigured.sh +4 -0
  232. package/evals/integration/test_bundle_install.sh +541 -0
  233. package/evals/integration/test_console_learning_projection.sh +192 -0
  234. package/evals/integration/test_context_map.sh +65 -0
  235. package/evals/integration/test_effective_backlog_settings.sh +58 -0
  236. package/evals/integration/test_fixture_retirement_audit.sh +58 -0
  237. package/evals/integration/test_flow_agents_statusline.sh +93 -0
  238. package/evals/integration/test_flow_kit_repository.sh +90 -0
  239. package/evals/integration/test_goal_fit_hook.sh +482 -0
  240. package/evals/integration/test_hook_category_behaviors.sh +190 -0
  241. package/evals/integration/test_hook_influence_cases.sh +69 -0
  242. package/evals/integration/test_local_flow_kit_install.sh +145 -0
  243. package/evals/integration/test_publish_change_helper.sh +176 -0
  244. package/evals/integration/test_pull_work_provider.sh +140 -0
  245. package/evals/integration/test_runtime_adapter_activation.sh +106 -0
  246. package/evals/integration/test_telemetry.sh +485 -0
  247. package/evals/integration/test_telemetry_doctor.sh +193 -0
  248. package/evals/integration/test_usage_feedback_dashboard.sh +169 -0
  249. package/evals/integration/test_usage_feedback_global.sh +117 -0
  250. package/evals/integration/test_usage_feedback_import.sh +227 -0
  251. package/evals/integration/test_usage_feedback_outcomes.sh +165 -0
  252. package/evals/integration/test_usage_feedback_report.sh +263 -0
  253. package/evals/integration/test_veritas_governance_adapter.sh +235 -0
  254. package/evals/integration/test_workflow_artifact_cleanup_audit.sh +287 -0
  255. package/evals/integration/test_workflow_artifacts.sh +1247 -0
  256. package/evals/integration/test_workflow_sidecar_writer.sh +2112 -0
  257. package/evals/integration/test_workflow_steering_hook.sh +337 -0
  258. package/evals/lib/assertions/delegated-to.js +40 -0
  259. package/evals/lib/assertions/max-tool-calls.js +15 -0
  260. package/evals/lib/assertions/no-write-tools.js +27 -0
  261. package/evals/lib/assertions/pass-at-k.js +39 -0
  262. package/evals/lib/assertions/telemetry-utils.js +105 -0
  263. package/evals/lib/assertions/tool-called.js +39 -0
  264. package/evals/lib/assertions/verify-after-fix.js +61 -0
  265. package/evals/lib/claude-judge.sh +40 -0
  266. package/evals/lib/claude-provider.sh +74 -0
  267. package/evals/lib/codex-judge.sh +39 -0
  268. package/evals/lib/codex-provider.sh +81 -0
  269. package/evals/lib/eval-dev.sh +5 -0
  270. package/evals/lib/eval-judge.sh +22 -0
  271. package/evals/lib/eval-provider.sh +26 -0
  272. package/evals/lib/eval-report.sh +73 -0
  273. package/evals/lib/kiro-dev.sh +4 -0
  274. package/evals/lib/kiro-judge.sh +17 -0
  275. package/evals/lib/kiro-provider.sh +62 -0
  276. package/evals/lib/node.sh +111 -0
  277. package/evals/promptfooconfig.yaml +70 -0
  278. package/evals/run.sh +309 -0
  279. package/evals/static/test_evidence_refs.sh +141 -0
  280. package/evals/static/test_package.sh +407 -0
  281. package/evals/static/test_repo_hooks.sh +68 -0
  282. package/evals/static/test_universal_bundles.sh +274 -0
  283. package/evals/static/test_workflow_skills.sh +1207 -0
  284. package/install.sh +64 -0
  285. package/integrations/veritas/flow-agents.adapter.json +138 -0
  286. package/integrations/veritas/flow-agents.authority-settings.json +26 -0
  287. package/integrations/veritas/flow-agents.repo-standards.json +82 -0
  288. package/kits/builder/flows/build.flow.json +218 -0
  289. package/kits/builder/flows/shape.flow.json +127 -0
  290. package/kits/builder/kit.json +19 -0
  291. package/kits/catalog.json +11 -0
  292. package/package.json +130 -0
  293. package/packaging/README.md +60 -0
  294. package/packaging/manifest.json +173 -0
  295. package/packaging/packs.json +69 -0
  296. package/powers/dependency-checker/POWER.md +20 -0
  297. package/powers/dependency-checker/mcp.json +20 -0
  298. package/powers/playwright/POWER.md +25 -0
  299. package/powers/playwright/mcp.json +12 -0
  300. package/prompts/code-audit.md +123 -0
  301. package/prompts/kcommit.md +88 -0
  302. package/schemas/backlog-provider-settings.schema.json +138 -0
  303. package/schemas/workflow-acceptance.schema.json +216 -0
  304. package/schemas/workflow-critique.schema.json +113 -0
  305. package/schemas/workflow-evidence.schema.json +357 -0
  306. package/schemas/workflow-handoff.schema.json +52 -0
  307. package/schemas/workflow-learning.schema.json +223 -0
  308. package/schemas/workflow-release.schema.json +172 -0
  309. package/schemas/workflow-state.schema.json +80 -0
  310. package/scripts/README.md +111 -0
  311. package/scripts/build-universal-bundles.js +3 -0
  312. package/scripts/check-content-boundary.cjs +99 -0
  313. package/scripts/context-budget/budget-scan.sh +166 -0
  314. package/scripts/detect-tools.sh +3 -0
  315. package/scripts/discover-agents.sh +28 -0
  316. package/scripts/effective-backlog-settings.js +2 -0
  317. package/scripts/filter-installed-packs.js +2 -0
  318. package/scripts/flow-kit.js +2 -0
  319. package/scripts/generate-context-map.js +2 -0
  320. package/scripts/git-status.sh +49 -0
  321. package/scripts/hooks/claude-hook-adapter.js +174 -0
  322. package/scripts/hooks/claude-telemetry-hook.js +115 -0
  323. package/scripts/hooks/codex-hook-adapter.js +176 -0
  324. package/scripts/hooks/codex-telemetry-hook.js +95 -0
  325. package/scripts/hooks/config-protection.js +79 -0
  326. package/scripts/hooks/desktop-notify.sh +39 -0
  327. package/scripts/hooks/governance-audit.sh +135 -0
  328. package/scripts/hooks/lib/audit-transport.sh +40 -0
  329. package/scripts/hooks/lib/hook-flags.js +49 -0
  330. package/scripts/hooks/lib/patterns.sh +57 -0
  331. package/scripts/hooks/lib/resolve-formatter.js +80 -0
  332. package/scripts/hooks/post-edit-accumulator.js +66 -0
  333. package/scripts/hooks/pre-commit-quality.js +194 -0
  334. package/scripts/hooks/quality-gate.js +93 -0
  335. package/scripts/hooks/report-only-guard.js +21 -0
  336. package/scripts/hooks/run-hook.js +136 -0
  337. package/scripts/hooks/stop-format-typecheck.js +141 -0
  338. package/scripts/hooks/stop-goal-fit.js +337 -0
  339. package/scripts/hooks/workflow-steering.js +250 -0
  340. package/scripts/install-codex-home.sh +106 -0
  341. package/scripts/package.json +3 -0
  342. package/scripts/promote-workflow-artifact.js +2 -0
  343. package/scripts/publish-change-helper.js +2 -0
  344. package/scripts/pull-work-provider.js +2 -0
  345. package/scripts/setup-repo-hooks.sh +8 -0
  346. package/scripts/statusline/flow-agents-statusline.js +157 -0
  347. package/scripts/telemetry/console-presets.sh +14 -0
  348. package/scripts/telemetry/install-console-config.sh +214 -0
  349. package/scripts/telemetry/lib/config.sh +85 -0
  350. package/scripts/telemetry/lib/enrich.sh +115 -0
  351. package/scripts/telemetry/lib/redact.sh +22 -0
  352. package/scripts/telemetry/lib/session.sh +63 -0
  353. package/scripts/telemetry/lib/transport.sh +183 -0
  354. package/scripts/telemetry/lib/usage.sh +29 -0
  355. package/scripts/telemetry/sync-agents.sh +173 -0
  356. package/scripts/telemetry/telemetry.conf +23 -0
  357. package/scripts/telemetry/telemetry.sh +387 -0
  358. package/scripts/usage-feedback.js +2 -0
  359. package/scripts/validate-hook-influence-cases.js +2 -0
  360. package/scripts/validate-package.sh +89 -0
  361. package/scripts/validate-source-tree.js +9 -0
  362. package/skills/agentic-engineering/SKILL.md +62 -0
  363. package/skills/browser-test/SKILL.md +51 -0
  364. package/skills/builder-shape/SKILL.md +76 -0
  365. package/skills/context-budget/SKILL.md +40 -0
  366. package/skills/deliver/SKILL.md +241 -0
  367. package/skills/dependency-update/SKILL.md +68 -0
  368. package/skills/design-probe/SKILL.md +107 -0
  369. package/skills/eval-rebuild/SKILL.md +39 -0
  370. package/skills/evidence-gate/SKILL.md +186 -0
  371. package/skills/execute-plan/SKILL.md +110 -0
  372. package/skills/explore/SKILL.md +137 -0
  373. package/skills/feedback-loop/SKILL.md +87 -0
  374. package/skills/fix-bug/SKILL.md +133 -0
  375. package/skills/frontend-design/SKILL.md +80 -0
  376. package/skills/github-cli/SKILL.md +63 -0
  377. package/skills/idea-to-backlog/SKILL.md +267 -0
  378. package/skills/knowledge-capture/SKILL.md +55 -0
  379. package/skills/learning-review/SKILL.md +115 -0
  380. package/skills/pickup-probe/SKILL.md +114 -0
  381. package/skills/plan-work/SKILL.md +176 -0
  382. package/skills/pull-work/SKILL.md +309 -0
  383. package/skills/release-readiness/SKILL.md +121 -0
  384. package/skills/review-work/SKILL.md +161 -0
  385. package/skills/search-first/SKILL.md +66 -0
  386. package/skills/tdd-workflow/SKILL.md +140 -0
  387. package/skills/verify-work/SKILL.md +109 -0
  388. package/src/cli/console-learning-projection.ts +140 -0
  389. package/src/cli/effective-backlog-settings.ts +99 -0
  390. package/src/cli/fixture-retirement-audit.ts +154 -0
  391. package/src/cli/flow-kit.ts +139 -0
  392. package/src/cli/init.ts +248 -0
  393. package/src/cli/promote-workflow-artifact.ts +64 -0
  394. package/src/cli/publish-change-helper.ts +143 -0
  395. package/src/cli/pull-work-provider.ts +481 -0
  396. package/src/cli/runtime-adapter.ts +24 -0
  397. package/src/cli/telemetry-doctor.ts +243 -0
  398. package/src/cli/usage-feedback.ts +418 -0
  399. package/src/cli/validate-hook-influence.ts +119 -0
  400. package/src/cli/validate-source-tree.ts +30 -0
  401. package/src/cli/validate-workflow-artifacts.ts +411 -0
  402. package/src/cli/veritas-governance.ts +322 -0
  403. package/src/cli/workflow-artifact-cleanup-audit.ts +281 -0
  404. package/src/cli/workflow-sidecar.ts +676 -0
  405. package/src/cli.ts +95 -0
  406. package/src/flow-kit/validate.ts +74 -0
  407. package/src/lib/args.ts +43 -0
  408. package/src/lib/fs.ts +62 -0
  409. package/src/lib/workflow-learning-projection.ts +491 -0
  410. package/src/runtime-adapters.ts +154 -0
  411. package/src/tools/build-universal-bundles.ts +366 -0
  412. package/src/tools/common.ts +61 -0
  413. package/src/tools/filter-installed-packs.ts +129 -0
  414. package/src/tools/generate-context-map.ts +199 -0
  415. package/src/tools/validate-package.ts +57 -0
  416. package/src/tools/validate-source-tree.ts +488 -0
  417. package/tsconfig.json +19 -0
  418. package/veritas.claims.json +6 -0
@@ -0,0 +1,227 @@
1
+ #!/usr/bin/env bash
2
+ # test_usage_feedback_import.sh - Layer 2: Usage feedback Codex import validation
3
+ set -uo pipefail
4
+
5
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
+ source "$ROOT_DIR/evals/lib/node.sh"
7
+ USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
8
+ FIXTURE_FULL="$ROOT_DIR/evals/fixtures/usage-feedback/sample-full.jsonl"
9
+ TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-import.XXXXXX)
10
+ pass=0; fail=0
11
+
12
+ cleanup() { rm -rf "$TMPDIR_EVAL"; }
13
+ trap cleanup EXIT
14
+
15
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
16
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
17
+
18
+ _sessions_file() {
19
+ local dir="$1"
20
+ if [[ -f "$dir/sessions.jsonl" ]]; then
21
+ echo "$dir/sessions.jsonl"
22
+ elif [[ -f "$dir/normalized-sessions.jsonl" ]]; then
23
+ echo "$dir/normalized-sessions.jsonl"
24
+ else
25
+ find "$dir" -maxdepth 2 -type f \( -name 'sessions.jsonl' -o -name 'normalized-sessions.jsonl' \) 2>/dev/null | head -1
26
+ fi
27
+ }
28
+
29
+ echo "=== Layer 2: Usage Feedback Runtime Import Validation ==="
30
+ echo ""
31
+
32
+ echo "--- Script Existence ---"
33
+ if [[ -f "$USAGE_FEEDBACK" ]]; then
34
+ _pass "usage-feedback.js exists"
35
+ else
36
+ _fail "usage-feedback.js not found at $USAGE_FEEDBACK"
37
+ echo ""
38
+ echo "Result: $pass passed, $fail failed"
39
+ exit 1
40
+ fi
41
+
42
+ echo ""
43
+ echo "--- Fixtures ---"
44
+ if [[ -f "$FIXTURE_FULL" ]]; then
45
+ _pass "sample Codex full.jsonl fixture exists"
46
+ else
47
+ _fail "sample Codex full.jsonl fixture missing"
48
+ fi
49
+
50
+ src_claude="$TMPDIR_EVAL/src-claude"
51
+ dst_claude="$TMPDIR_EVAL/dst-claude"
52
+ mkdir -p "$src_claude" "$dst_claude"
53
+ cat > "$src_claude/full.jsonl" <<'JSONL'
54
+ {"schema_version":"0.3.0","timestamp":"2026-05-04T12:00:00Z","session_id":"claude-session-1","event_id":"evt-1","event_type":"turn.user","agent":{"name":"dev","runtime":"claude-code"},"turn":{"prompt_text":"hello","prompt_length":5}}
55
+ {"schema_version":"0.3.0","timestamp":"2026-05-04T12:00:05Z","session_id":"claude-session-1","event_id":"evt-2","event_type":"tool.invoke","agent":{"name":"dev","runtime":"claude-code"},"tool":{"name":"Agent","normalized_name":"delegate to a specialist agent","input":{"subagent_type":"tool-planner"}}}
56
+ {"schema_version":"0.3.0","timestamp":"2026-05-04T12:00:05Z","session_id":"claude-session-1","event_id":"evt-2-delegate","event_type":"agent.delegate","agent":{"name":"dev","runtime":"claude-code"},"delegation":{"targets":["tool-planner"]}}
57
+ {"schema_version":"0.3.0","timestamp":"2026-05-04T12:01:00Z","session_id":"claude-session-1","event_id":"evt-3","event_type":"session.usage","agent":{"name":"dev","runtime":"claude-code"},"usage":{"model":"opus","duration_s":60,"tool_invocations":1,"delegations":1,"input_tokens":null,"output_tokens":null,"estimated_cost_usd":null}}
58
+ JSONL
59
+
60
+ src="$TMPDIR_EVAL/src"
61
+ dst_a="$TMPDIR_EVAL/dst-a"
62
+ dst_b="$TMPDIR_EVAL/dst-b"
63
+ mkdir -p "$src" "$dst_a" "$dst_b"
64
+ cp "$FIXTURE_FULL" "$src/full.jsonl"
65
+
66
+ echo ""
67
+ echo "--- Import Command ---"
68
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
69
+ --input-telemetry-dir "$src" \
70
+ --telemetry-dir "$dst_a" \
71
+ --source-id "repo-a" \
72
+ --repo "repo-a" \
73
+ --repo-root "/tmp/repo-a" \
74
+ --profile-id "codex-default" \
75
+ --prompt-id "deliver-v1" \
76
+ --skill-id "deliver" >/dev/null 2>"$TMPDIR_EVAL/import-a.err"; then
77
+ _pass "import-codex imports from input telemetry dir"
78
+ else
79
+ _fail "import-codex failed for input dir: $(cat "$TMPDIR_EVAL/import-a.err" 2>/dev/null)"
80
+ fi
81
+
82
+ sessions_a="$(_sessions_file "$dst_a")"
83
+ if [[ -n "$sessions_a" && -f "$sessions_a" ]]; then
84
+ _pass "import-codex writes normalized session data"
85
+ else
86
+ _fail "import-codex did not write sessions.jsonl or normalized-sessions.jsonl"
87
+ fi
88
+
89
+ session_count=$(jq -s 'length' "$sessions_a" 2>/dev/null)
90
+ source_id=$(jq -r 'select(.session_id == "codex-session-1") | .source_id' "$sessions_a" 2>/dev/null | head -1)
91
+ repo=$(jq -r 'select(.session_id == "codex-session-1") | .repo' "$sessions_a" 2>/dev/null | head -1)
92
+ profile=$(jq -r 'select(.session_id == "codex-session-1") | .profile_id' "$sessions_a" 2>/dev/null | head -1)
93
+ prompt=$(jq -r 'select(.session_id == "codex-session-1") | .prompt_id' "$sessions_a" 2>/dev/null | head -1)
94
+ skill=$(jq -r 'select(.session_id == "codex-session-1") | .skill_ids[0]' "$sessions_a" 2>/dev/null | head -1)
95
+ if [[ "$session_count" -ge 2 && "$source_id" == "repo-a" && "$repo" == "repo-a" && "$profile" == "codex-default" && "$prompt" == "deliver-v1" && "$skill" == "deliver" ]]; then
96
+ _pass "normalized sessions include source/repo/profile/prompt/skill identifiers"
97
+ else
98
+ _fail "normalized identifiers mismatch: count='$session_count' source='$source_id' repo='$repo' profile='$profile' prompt='$prompt' skill='$skill'"
99
+ fi
100
+
101
+ turns=$(jq -r 'select(.session_id == "codex-session-1") | .turns' "$sessions_a" 2>/dev/null | head -1)
102
+ tools=$(jq -r 'select(.session_id == "codex-session-1") | .tool_invocations' "$sessions_a" 2>/dev/null | head -1)
103
+ delegations=$(jq -r 'select(.session_id == "codex-session-1") | .delegations' "$sessions_a" 2>/dev/null | head -1)
104
+ permissions=$(jq -r 'select(.session_id == "codex-session-1") | .permission_requests' "$sessions_a" 2>/dev/null | head -1)
105
+ if [[ "$turns" == "1" && "$tools" == "2" && "$delegations" == "1" && "$permissions" == "1" ]]; then
106
+ _pass "normalized sessions preserve Codex usage counts"
107
+ else
108
+ _fail "usage counts mismatch: turns='$turns' tools='$tools' delegations='$delegations' permissions='$permissions'"
109
+ fi
110
+
111
+ if flow_agents_node "$USAGE_FEEDBACK" import-telemetry \
112
+ --runtime claude-code \
113
+ --input-telemetry-dir "$src_claude" \
114
+ --telemetry-dir "$dst_claude" \
115
+ --source-id "repo-claude" \
116
+ --repo "repo-claude" \
117
+ --profile-id "claude-dev" \
118
+ --prompt-id "deliver-v1" \
119
+ --skill-id "deliver" >/dev/null 2>"$TMPDIR_EVAL/import-claude.err"; then
120
+ _pass "import-telemetry imports Claude Code full.jsonl"
121
+ else
122
+ _fail "import-telemetry failed for Claude Code: $(cat "$TMPDIR_EVAL/import-claude.err" 2>/dev/null)"
123
+ fi
124
+
125
+ sessions_claude="$(_sessions_file "$dst_claude")"
126
+ claude_runtime=$(jq -r 'select(.session_id == "claude-session-1") | .runtime' "$sessions_claude" 2>/dev/null | head -1)
127
+ claude_tools=$(jq -r 'select(.session_id == "claude-session-1") | .tool_invocations' "$sessions_claude" 2>/dev/null | head -1)
128
+ claude_delegations=$(jq -r 'select(.session_id == "claude-session-1") | .delegations' "$sessions_claude" 2>/dev/null | head -1)
129
+ if [[ "$claude_runtime" == "claude-code" && "$claude_tools" == "1" && "$claude_delegations" == "1" ]]; then
130
+ _pass "normalized sessions preserve Claude Code runtime and usage counts"
131
+ else
132
+ _fail "Claude import mismatch: runtime='$claude_runtime' tools='$claude_tools' delegations='$claude_delegations'"
133
+ fi
134
+
135
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
136
+ --input-full-jsonl "$src/full.jsonl" \
137
+ --telemetry-dir "$dst_b" \
138
+ --source-id "repo-b" \
139
+ --repo "repo-b" \
140
+ --profile-id "codex-experimental" \
141
+ --prompt-id "deliver-v2" \
142
+ --skill-id "deliver" >/dev/null 2>"$TMPDIR_EVAL/import-b.err"; then
143
+ _pass "import-codex imports from explicit full.jsonl"
144
+ else
145
+ _fail "import-codex failed for explicit full.jsonl: $(cat "$TMPDIR_EVAL/import-b.err" 2>/dev/null)"
146
+ fi
147
+
148
+ sessions_b="$(_sessions_file "$dst_b")"
149
+ source_b=$(jq -r 'select(.session_id == "codex-session-1") | .source_id' "$sessions_b" 2>/dev/null | head -1)
150
+ if [[ "$source_b" == "repo-b" ]]; then
151
+ _pass "same fixture can import with a distinct source_id"
152
+ else
153
+ _fail "second import source_id mismatch: '$source_b'"
154
+ fi
155
+
156
+ dst_fallback="$TMPDIR_EVAL/dst-fallback"
157
+ mkdir -p "$dst_fallback"
158
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
159
+ --input-full-jsonl "$src/full.jsonl" \
160
+ --telemetry-dir "$dst_fallback" >/dev/null 2>"$TMPDIR_EVAL/import-fallback.err"; then
161
+ sessions_fallback="$(_sessions_file "$dst_fallback")"
162
+ source_fallback=$(jq -r 'select(.session_id == "codex-session-1") | .source_id' "$sessions_fallback" 2>/dev/null | head -1)
163
+ if [[ "$source_fallback" == "flow-agents" ]]; then
164
+ _pass "import-codex preserves raw repo source fallback when source-id is omitted"
165
+ else
166
+ _fail "omitted source-id fallback mismatch: '$source_fallback'"
167
+ fi
168
+ else
169
+ _fail "import-codex failed without source-id: $(cat "$TMPDIR_EVAL/import-fallback.err" 2>/dev/null)"
170
+ fi
171
+
172
+ src_no_metadata="$TMPDIR_EVAL/src-no-metadata"
173
+ dst_no_metadata="$TMPDIR_EVAL/dst-no-metadata"
174
+ mkdir -p "$src_no_metadata" "$dst_no_metadata"
175
+ cat > "$src_no_metadata/full.jsonl" <<'JSONL'
176
+ {"session_id":"no-metadata-session","event_type":"turn.user","timestamp":"2026-05-04T12:00:00Z"}
177
+ JSONL
178
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
179
+ --input-telemetry-dir "$src_no_metadata" \
180
+ --telemetry-dir "$dst_no_metadata" >/dev/null 2>"$TMPDIR_EVAL/import-no-metadata.err"; then
181
+ sessions_no_metadata="$(_sessions_file "$dst_no_metadata")"
182
+ source_no_metadata=$(jq -r 'select(.session_id == "no-metadata-session") | .source_id' "$sessions_no_metadata" 2>/dev/null | head -1)
183
+ if [[ "$source_no_metadata" == "src-no-metadata" ]]; then
184
+ _pass "import-codex uses input telemetry dir name when source metadata is absent"
185
+ else
186
+ _fail "input telemetry dir source fallback mismatch: '$source_no_metadata'"
187
+ fi
188
+ else
189
+ _fail "import-codex failed for metadata-free input dir: $(cat "$TMPDIR_EVAL/import-no-metadata.err" 2>/dev/null)"
190
+ fi
191
+
192
+ ln -s "$TMPDIR_EVAL/symlink-target" "$TMPDIR_EVAL/symlink-dst"
193
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
194
+ --input-full-jsonl "$src/full.jsonl" \
195
+ --telemetry-dir "$TMPDIR_EVAL/symlink-dst" >/dev/null 2>"$TMPDIR_EVAL/import-symlink.err"; then
196
+ _fail "import-codex accepted symlinked target telemetry dir"
197
+ else
198
+ _pass "import-codex rejects symlinked target telemetry dir"
199
+ fi
200
+
201
+ mkdir -p "$TMPDIR_EVAL/import-intermediate-target"
202
+ ln -s "$TMPDIR_EVAL/import-intermediate-target" "$TMPDIR_EVAL/import-intermediate-link"
203
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
204
+ --input-full-jsonl "$src/full.jsonl" \
205
+ --telemetry-dir "$TMPDIR_EVAL/import-intermediate-link/nested" >/dev/null 2>"$TMPDIR_EVAL/import-symlink-parent.err"; then
206
+ _fail "import-codex accepted target telemetry dir with symlinked parent"
207
+ else
208
+ if [[ ! -e "$TMPDIR_EVAL/import-intermediate-target/nested/normalized-sessions.jsonl" ]]; then
209
+ _pass "import-codex rejects symlinked target telemetry parent before creating nested dirs"
210
+ else
211
+ _fail "import-codex wrote through symlinked target telemetry parent"
212
+ fi
213
+ fi
214
+
215
+ if flow_agents_node "$USAGE_FEEDBACK" import-codex \
216
+ --input-telemetry-dir "$TMPDIR_EVAL/missing" \
217
+ --telemetry-dir "$TMPDIR_EVAL/missing-dst" \
218
+ --source-id "missing" \
219
+ --repo "missing" >/dev/null 2>"$TMPDIR_EVAL/missing.err"; then
220
+ _fail "import-codex accepted missing input telemetry"
221
+ else
222
+ _pass "import-codex rejects missing input telemetry"
223
+ fi
224
+
225
+ echo ""
226
+ echo "Result: $pass passed, $fail failed"
227
+ [[ $fail -eq 0 ]]
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env bash
2
+ # test_usage_feedback_outcomes.sh - Layer 2: Usage feedback outcome validation
3
+ set -uo pipefail
4
+
5
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
+ source "$ROOT_DIR/evals/lib/node.sh"
7
+ USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
8
+ TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-outcomes.XXXXXX)
9
+ pass=0; fail=0
10
+
11
+ cleanup() { rm -rf "$TMPDIR_EVAL"; }
12
+ trap cleanup EXIT
13
+
14
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
15
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
16
+
17
+ echo "=== Layer 2: Usage Feedback Outcome Validation ==="
18
+ echo ""
19
+
20
+ echo "--- Script Existence ---"
21
+ if [[ -f "$USAGE_FEEDBACK" ]]; then
22
+ _pass "usage-feedback.js exists"
23
+ else
24
+ _fail "usage-feedback.js not found at $USAGE_FEEDBACK"
25
+ echo ""
26
+ echo "Result: $pass passed, $fail failed"
27
+ exit 1
28
+ fi
29
+
30
+ echo ""
31
+ echo "--- Outcome Recording ---"
32
+ if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
33
+ --session-id "session-success" \
34
+ --runtime "codex" \
35
+ --repo "flow-agents" \
36
+ --agent "dev" \
37
+ --profile-id "codex-default" \
38
+ --prompt-id "deliver-v1" \
39
+ --skill-id "deliver" \
40
+ --result "success" \
41
+ --quality-score 5 \
42
+ --task-type "deliver" \
43
+ --task-slug "usage-feedback-success" \
44
+ --human-minutes-saved 12 \
45
+ --evidence ".flow-agents/agent-usage-feedback-loop/agent-usage-feedback-loop--deliver.md" >/dev/null 2>"$TMPDIR_EVAL/success.err"; then
46
+ _pass "record-outcome accepts success with profile/prompt/skill ids"
47
+ else
48
+ _fail "record-outcome rejected success: $(cat "$TMPDIR_EVAL/success.err" 2>/dev/null)"
49
+ fi
50
+
51
+ if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
52
+ --session-id "session-failure" \
53
+ --runtime "codex" \
54
+ --repo "flow-agents-docs" \
55
+ --agent "dev" \
56
+ --profile-id "codex-experimental" \
57
+ --prompt-id "deliver-v2" \
58
+ --skill-id "deliver" \
59
+ --skill-id "verify-work" \
60
+ --result "failure" \
61
+ --quality-score 2 \
62
+ --task-type "verify" \
63
+ --task-slug "usage-feedback-failure" \
64
+ --rework-required \
65
+ --notes "Fixture failure" >/dev/null 2>"$TMPDIR_EVAL/failure.err"; then
66
+ _pass "record-outcome accepts failure with multiple skill ids"
67
+ else
68
+ _fail "record-outcome rejected failure: $(cat "$TMPDIR_EVAL/failure.err" 2>/dev/null)"
69
+ fi
70
+
71
+ OUTCOMES="$TMPDIR_EVAL/outcomes.jsonl"
72
+ line_count=$(wc -l < "$OUTCOMES" 2>/dev/null | tr -d ' ')
73
+ if [[ "$line_count" == "2" ]]; then
74
+ _pass "record-outcome appends two outcome records"
75
+ else
76
+ _fail "expected 2 outcome records, found ${line_count:-0}"
77
+ fi
78
+
79
+ success_profile=$(jq -r 'select(.session_id == "session-success") | .profile_id' "$OUTCOMES" 2>/dev/null)
80
+ success_prompt=$(jq -r 'select(.session_id == "session-success") | .prompt_id' "$OUTCOMES" 2>/dev/null)
81
+ success_skill=$(jq -r 'select(.session_id == "session-success") | .skill_ids[0]' "$OUTCOMES" 2>/dev/null)
82
+ if [[ "$success_profile" == "codex-default" && "$success_prompt" == "deliver-v1" && "$success_skill" == "deliver" ]]; then
83
+ _pass "success outcome preserves profile, prompt, and skill ids"
84
+ else
85
+ _fail "success identifiers mismatch: profile='$success_profile' prompt='$success_prompt' skill='$success_skill'"
86
+ fi
87
+
88
+ failure_rework=$(jq -r 'select(.session_id == "session-failure") | .rework_required' "$OUTCOMES" 2>/dev/null)
89
+ failure_skill_count=$(jq -r 'select(.session_id == "session-failure") | .skill_ids | length' "$OUTCOMES" 2>/dev/null)
90
+ if [[ "$failure_rework" == "true" && "$failure_skill_count" == "2" ]]; then
91
+ _pass "failure outcome preserves rework flag and multiple skill ids"
92
+ else
93
+ _fail "failure fields mismatch: rework='$failure_rework' skill_count='$failure_skill_count'"
94
+ fi
95
+
96
+ before_invalid=$(wc -l < "$OUTCOMES" 2>/dev/null | tr -d ' ')
97
+ if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
98
+ --session-id "session-invalid" \
99
+ --runtime "codex" \
100
+ --repo "flow-agents" \
101
+ --agent "dev" \
102
+ --profile-id "codex-default" \
103
+ --prompt-id "deliver-v1" \
104
+ --skill-id "deliver" \
105
+ --result "excellent" >/dev/null 2>"$TMPDIR_EVAL/invalid-result.err"; then
106
+ _fail "record-outcome accepted invalid result"
107
+ else
108
+ after_invalid=$(wc -l < "$OUTCOMES" 2>/dev/null | tr -d ' ')
109
+ if [[ "$after_invalid" == "$before_invalid" ]]; then
110
+ _pass "record-outcome rejects invalid result without appending"
111
+ else
112
+ _fail "invalid result changed outcomes.jsonl line count from $before_invalid to $after_invalid"
113
+ fi
114
+ fi
115
+
116
+ if TELEMETRY_DATA_DIR="$TMPDIR_EVAL" flow_agents_node "$USAGE_FEEDBACK" record-outcome \
117
+ --runtime "codex" \
118
+ --repo "flow-agents" \
119
+ --agent "dev" \
120
+ --result "success" >/dev/null 2>"$TMPDIR_EVAL/missing-session.err"; then
121
+ _fail "record-outcome accepted missing session_id"
122
+ else
123
+ _pass "record-outcome rejects missing session_id"
124
+ fi
125
+
126
+ ln -s "$TMPDIR_EVAL/symlink-target" "$TMPDIR_EVAL/symlink-telemetry"
127
+ if flow_agents_node "$USAGE_FEEDBACK" record-outcome \
128
+ --telemetry-dir "$TMPDIR_EVAL/symlink-telemetry" \
129
+ --session-id "session-symlink-dir" \
130
+ --result "success" >/dev/null 2>"$TMPDIR_EVAL/symlink-dir.err"; then
131
+ _fail "record-outcome accepted symlinked telemetry dir"
132
+ else
133
+ _pass "record-outcome rejects symlinked telemetry dir"
134
+ fi
135
+
136
+ mkdir -p "$TMPDIR_EVAL/intermediate-target"
137
+ ln -s "$TMPDIR_EVAL/intermediate-target" "$TMPDIR_EVAL/intermediate-link"
138
+ if flow_agents_node "$USAGE_FEEDBACK" record-outcome \
139
+ --telemetry-dir "$TMPDIR_EVAL/intermediate-link/nested" \
140
+ --session-id "session-symlink-parent" \
141
+ --result "success" >/dev/null 2>"$TMPDIR_EVAL/symlink-parent.err"; then
142
+ _fail "record-outcome accepted telemetry dir with symlinked parent"
143
+ else
144
+ if [[ ! -e "$TMPDIR_EVAL/intermediate-target/nested/outcomes.jsonl" ]]; then
145
+ _pass "record-outcome rejects symlinked telemetry parent before creating nested dirs"
146
+ else
147
+ _fail "record-outcome wrote through symlinked telemetry parent"
148
+ fi
149
+ fi
150
+
151
+ target_file_dir="$TMPDIR_EVAL/symlink-file-telemetry"
152
+ mkdir -p "$target_file_dir"
153
+ ln -s "$TMPDIR_EVAL/symlink-outcomes-target.jsonl" "$target_file_dir/outcomes.jsonl"
154
+ if flow_agents_node "$USAGE_FEEDBACK" record-outcome \
155
+ --telemetry-dir "$target_file_dir" \
156
+ --session-id "session-symlink-file" \
157
+ --result "success" >/dev/null 2>"$TMPDIR_EVAL/symlink-file.err"; then
158
+ _fail "record-outcome accepted symlinked outcomes target"
159
+ else
160
+ _pass "record-outcome rejects symlinked outcomes target"
161
+ fi
162
+
163
+ echo ""
164
+ echo "Result: $pass passed, $fail failed"
165
+ [[ $fail -eq 0 ]]
@@ -0,0 +1,263 @@
1
+ #!/usr/bin/env bash
2
+ # test_usage_feedback_report.sh - Layer 2: Usage feedback report validation
3
+ set -uo pipefail
4
+
5
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
6
+ source "$ROOT_DIR/evals/lib/node.sh"
7
+ USAGE_FEEDBACK="$ROOT_DIR/scripts/usage-feedback.js"
8
+ FIXTURE_DIR="$ROOT_DIR/evals/fixtures/usage-feedback"
9
+ TMPDIR_EVAL=$(mktemp -d /tmp/eval-usage-feedback-report.XXXXXX)
10
+ pass=0; fail=0
11
+
12
+ cleanup() { rm -rf "$TMPDIR_EVAL"; }
13
+ trap cleanup EXIT
14
+
15
+ _pass() { echo " ✓ $1"; pass=$((pass + 1)); }
16
+ _fail() { echo " ✗ $1"; fail=$((fail + 1)); }
17
+
18
+ echo "=== Layer 2: Usage Feedback Report Validation ==="
19
+ echo ""
20
+
21
+ echo "--- Script Existence ---"
22
+ if [[ -f "$USAGE_FEEDBACK" ]]; then
23
+ _pass "usage-feedback.js exists"
24
+ else
25
+ _fail "usage-feedback.js not found at $USAGE_FEEDBACK"
26
+ echo ""
27
+ echo "Result: $pass passed, $fail failed"
28
+ exit 1
29
+ fi
30
+
31
+ echo ""
32
+ tmp_a="$TMPDIR_EVAL/repo-a"
33
+ tmp_b="$TMPDIR_EVAL/repo-b"
34
+ mkdir -p "$tmp_a" "$tmp_b"
35
+ cp "$FIXTURE_DIR/sample-full.jsonl" "$tmp_a/full.jsonl"
36
+ cp "$FIXTURE_DIR/sample-outcomes.jsonl" "$tmp_a/outcomes.jsonl"
37
+ cp "$FIXTURE_DIR/sample-full.jsonl" "$tmp_b/full.jsonl"
38
+ cat > "$tmp_b/outcomes.jsonl" <<'JSONL'
39
+ {"schema_version":"1","outcome_id":"outcome-2","recorded_at":"2026-05-04T11:30:00Z","session_id":"codex-session-2","runtime":"codex","repo":"repo-b","agent":"dev","profile_id":"codex-experimental","prompt_id":"deliver-v2","prompt_variant":"concise","skill_ids":["deliver","verify-work"],"task_type":"verify","task_slug":"usage-feedback-report","result":"failure","quality_score":2,"human_minutes_saved":0,"rework_required":true,"notes":"Fixture failure outcome","evidence":["evals/integration/test_usage_feedback_report.sh"]}
40
+ JSONL
41
+
42
+ echo "--- JSON Report ---"
43
+ json_report="$TMPDIR_EVAL/report.json"
44
+ if flow_agents_node "$USAGE_FEEDBACK" report \
45
+ --telemetry-dir "$tmp_a" \
46
+ --telemetry-dir "$tmp_b" \
47
+ --format json \
48
+ --group-by repo >"$json_report" 2>"$TMPDIR_EVAL/report-json.err"; then
49
+ _pass "report emits JSON for multiple telemetry dirs"
50
+ else
51
+ _fail "JSON report failed: $(cat "$TMPDIR_EVAL/report-json.err" 2>/dev/null)"
52
+ fi
53
+
54
+ if jq -e '.summary.sessions >= 4 and .summary.sessions_with_outcomes >= 2 and (.summary.success_rate != null) and (.sources | length >= 2)' "$json_report" >/dev/null 2>&1; then
55
+ _pass "JSON report includes summary sessions, outcomes, success rate, and sources"
56
+ else
57
+ _fail "JSON report missing expected summary/source fields"
58
+ fi
59
+
60
+ if jq -e '.groups[]? | select((.key == "flow-agents") or (.group == "flow-agents") or (.name == "flow-agents"))' "$json_report" >/dev/null 2>&1; then
61
+ _pass "JSON report groups by repo"
62
+ else
63
+ _fail "JSON report did not include repo group"
64
+ fi
65
+
66
+ if jq -e '.summary.sessions > .summary.sessions_with_outcomes' "$json_report" >/dev/null 2>&1; then
67
+ _pass "report includes sessions without outcomes in usage totals"
68
+ else
69
+ _fail "report did not distinguish sessions without outcomes"
70
+ fi
71
+
72
+ echo ""
73
+ echo "--- Markdown Report ---"
74
+ markdown_report="$tmp_a/reports/usage.md"
75
+ if flow_agents_node "$USAGE_FEEDBACK" report \
76
+ --telemetry-dir "$tmp_a" \
77
+ --group-by profile_id \
78
+ --output "$markdown_report" >/dev/null 2>"$TMPDIR_EVAL/report-md.err"; then
79
+ _pass "report writes Markdown output file"
80
+ else
81
+ _fail "Markdown report failed: $(cat "$TMPDIR_EVAL/report-md.err" 2>/dev/null)"
82
+ fi
83
+
84
+ if [[ -f "$markdown_report" ]]; then
85
+ _pass "Markdown report output file exists"
86
+ else
87
+ _fail "Markdown report output file missing"
88
+ fi
89
+
90
+ if grep -q "# Agent Usage Feedback Report" "$markdown_report" && \
91
+ grep -q "Success rate" "$markdown_report" && \
92
+ grep -q "Avg tool invocations" "$markdown_report" && \
93
+ grep -q "Rework rate" "$markdown_report" && \
94
+ grep -q "codex-default" "$markdown_report"; then
95
+ _pass "Markdown report includes required headings, metrics, and profile group"
96
+ else
97
+ _fail "Markdown report missing required content"
98
+ fi
99
+
100
+ relative_report="$tmp_a/reports/relative.md"
101
+ if (cd "$TMPDIR_EVAL" && flow_agents_node "$USAGE_FEEDBACK" report \
102
+ --telemetry-dir "$tmp_a" \
103
+ --group-by profile_id \
104
+ --output reports/relative.md >/dev/null 2>"$TMPDIR_EVAL/report-relative.err") && [[ -f "$relative_report" ]]; then
105
+ _pass "report writes reports/name.md relative to telemetry reports directory"
106
+ else
107
+ _fail "relative reports/name.md output failed: $(cat "$TMPDIR_EVAL/report-relative.err" 2>/dev/null)"
108
+ fi
109
+
110
+ nested_guard_report="$tmp_a/reports/usage-feedback.md"
111
+ if (cd "$TMPDIR_EVAL" && flow_agents_node "$USAGE_FEEDBACK" report \
112
+ --telemetry-dir "$tmp_a" \
113
+ --group-by profile_id \
114
+ --output "$(basename "$tmp_a")/reports/usage-feedback.md" >/dev/null 2>"$TMPDIR_EVAL/report-nested-guard.err") && \
115
+ [[ -f "$nested_guard_report" && ! -e "$tmp_a/reports/$(basename "$tmp_a")/reports/usage-feedback.md" ]]; then
116
+ _pass "report prevents nested telemetry reports duplication for relative output"
117
+ else
118
+ _fail "nested report output guard failed: $(cat "$TMPDIR_EVAL/report-nested-guard.err" 2>/dev/null)"
119
+ fi
120
+
121
+ if flow_agents_node "$USAGE_FEEDBACK" report \
122
+ --telemetry-dir "$tmp_a" \
123
+ --group-by profile_id \
124
+ --output "$markdown_report" >/dev/null 2>"$TMPDIR_EVAL/report-overwrite.err"; then
125
+ _fail "report overwrote existing output without --force"
126
+ else
127
+ _pass "report rejects existing output without --force"
128
+ fi
129
+
130
+ if flow_agents_node "$USAGE_FEEDBACK" report \
131
+ --telemetry-dir "$tmp_a" \
132
+ --group-by profile_id \
133
+ --output "$markdown_report" \
134
+ --force >/dev/null 2>"$TMPDIR_EVAL/report-force.err"; then
135
+ _pass "report overwrites existing output with --force"
136
+ else
137
+ _fail "report --force failed: $(cat "$TMPDIR_EVAL/report-force.err" 2>/dev/null)"
138
+ fi
139
+
140
+ if flow_agents_node "$USAGE_FEEDBACK" report \
141
+ --telemetry-dir "$tmp_a" \
142
+ --output "$TMPDIR_EVAL/outside.md" >/dev/null 2>"$TMPDIR_EVAL/report-outside.err"; then
143
+ _fail "report accepted output outside telemetry reports directory"
144
+ else
145
+ _pass "report rejects output outside telemetry reports directory"
146
+ fi
147
+
148
+ ln -s "$TMPDIR_EVAL/symlink-report-target.md" "$tmp_a/reports/symlink.md"
149
+ if flow_agents_node "$USAGE_FEEDBACK" report \
150
+ --telemetry-dir "$tmp_a" \
151
+ --output "$tmp_a/reports/symlink.md" \
152
+ --force >/dev/null 2>"$TMPDIR_EVAL/report-symlink.err"; then
153
+ _fail "report accepted symlinked output target"
154
+ else
155
+ _pass "report rejects symlinked output target"
156
+ fi
157
+
158
+ tmp_symlink_reports="$TMPDIR_EVAL/symlink-reports"
159
+ mkdir -p "$tmp_symlink_reports"
160
+ cp "$FIXTURE_DIR/sample-full.jsonl" "$tmp_symlink_reports/full.jsonl"
161
+ ln -s "$TMPDIR_EVAL/report-parent-target" "$tmp_symlink_reports/reports"
162
+ if flow_agents_node "$USAGE_FEEDBACK" report \
163
+ --telemetry-dir "$tmp_symlink_reports" \
164
+ --output usage.md >/dev/null 2>"$TMPDIR_EVAL/report-symlink-parent.err"; then
165
+ _fail "report accepted symlinked reports directory"
166
+ else
167
+ _pass "report rejects symlinked reports directory"
168
+ fi
169
+
170
+ mkdir -p "$TMPDIR_EVAL/report-intermediate-target"
171
+ ln -s "$TMPDIR_EVAL/report-intermediate-target" "$TMPDIR_EVAL/report-intermediate-link"
172
+ if flow_agents_node "$USAGE_FEEDBACK" report \
173
+ --telemetry-dir "$TMPDIR_EVAL/report-intermediate-link/nested" \
174
+ --output usage.md >/dev/null 2>"$TMPDIR_EVAL/report-symlink-telemetry-parent.err"; then
175
+ _fail "report accepted telemetry dir with symlinked parent"
176
+ else
177
+ if [[ ! -e "$TMPDIR_EVAL/report-intermediate-target/nested/reports/usage.md" ]]; then
178
+ _pass "report rejects symlinked telemetry parent before creating report dirs"
179
+ else
180
+ _fail "report wrote through symlinked telemetry parent"
181
+ fi
182
+ fi
183
+
184
+ tmp_raw="$TMPDIR_EVAL/raw-source-name"
185
+ mkdir -p "$tmp_raw"
186
+ cat > "$tmp_raw/full.jsonl" <<'JSONL'
187
+ {"session_id":"raw-session","event_type":"turn.user","timestamp":"2026-05-04T12:00:00Z"}
188
+ JSONL
189
+ raw_report="$TMPDIR_EVAL/raw-report.json"
190
+ if flow_agents_node "$USAGE_FEEDBACK" report \
191
+ --telemetry-dir "$tmp_raw" \
192
+ --format json >"$raw_report" 2>"$TMPDIR_EVAL/report-raw.err" && \
193
+ jq -e '.sources == ["raw-source-name"]' "$raw_report" >/dev/null 2>&1; then
194
+ _pass "raw telemetry without source metadata groups by telemetry directory name"
195
+ else
196
+ _fail "raw telemetry source fallback failed: $(cat "$TMPDIR_EVAL/report-raw.err" 2>/dev/null)"
197
+ fi
198
+
199
+ tmp_escape="$TMPDIR_EVAL/escape-source"
200
+ mkdir -p "$tmp_escape"
201
+ cat > "$tmp_escape/normalized-sessions.jsonl" <<'JSONL'
202
+ {"schema_version":"1","source_id":"escape-source","runtime":"codex","session_id":"escape-session","profile_id":"alpha|beta\nbreak <tag> & value","skill_ids":[],"turns":0,"tool_invocations":0,"delegations":0,"permission_requests":0}
203
+ JSONL
204
+ escape_report="$TMPDIR_EVAL/escape.md"
205
+ if flow_agents_node "$USAGE_FEEDBACK" report \
206
+ --telemetry-dir "$tmp_escape" \
207
+ --group-by profile_id >"$escape_report" 2>"$TMPDIR_EVAL/report-escape.err" && \
208
+ grep -q 'alpha\\|beta break &lt;tag&gt; &amp; value' "$escape_report"; then
209
+ _pass "Markdown report escapes table labels, HTML chars, and strips newlines"
210
+ else
211
+ _fail "Markdown report label escaping failed: $(cat "$TMPDIR_EVAL/report-escape.err" 2>/dev/null)"
212
+ fi
213
+
214
+ escape_html="$tmp_escape/reports/escape.html"
215
+ if flow_agents_node "$USAGE_FEEDBACK" report \
216
+ --telemetry-dir "$tmp_escape" \
217
+ --group-by profile_id \
218
+ --format html \
219
+ --output "$escape_html" >"$TMPDIR_EVAL/report-html-escape.out" 2>"$TMPDIR_EVAL/report-html-escape.err" && \
220
+ grep -q '&lt;tag&gt; &amp; value' "$escape_html" && \
221
+ ! grep -q '<tag>' "$escape_html"; then
222
+ _pass "HTML report escapes local telemetry labels"
223
+ else
224
+ _fail "HTML report escaping failed: $(cat "$TMPDIR_EVAL/report-html-escape.err" 2>/dev/null)"
225
+ fi
226
+
227
+ echo ""
228
+ echo "--- Fixture Report Smoke ---"
229
+ fixture_report="$TMPDIR_EVAL/fixture-runtime.md"
230
+ if flow_agents_node "$USAGE_FEEDBACK" report \
231
+ --telemetry-dir "$tmp_a" \
232
+ --group-by runtime >"$fixture_report" 2>"$TMPDIR_EVAL/fixture.err"; then
233
+ _pass "report works against copied fixture telemetry"
234
+ else
235
+ _fail "fixture report failed: $(cat "$TMPDIR_EVAL/fixture.err" 2>/dev/null)"
236
+ fi
237
+
238
+ direct_fixture_report="$TMPDIR_EVAL/direct-fixture-repo.md"
239
+ if flow_agents_node "$USAGE_FEEDBACK" report \
240
+ --telemetry-dir "$FIXTURE_DIR" \
241
+ --group-by repo >"$direct_fixture_report" 2>"$TMPDIR_EVAL/direct-fixture-repo.err" && \
242
+ grep -q "Sessions: 2" "$direct_fixture_report" && \
243
+ grep -q "flow-agents-docs" "$direct_fixture_report"; then
244
+ _pass "report reads sample fixture names directly for repo groups"
245
+ else
246
+ _fail "direct fixture repo report failed: $(cat "$TMPDIR_EVAL/direct-fixture-repo.err" 2>/dev/null)"
247
+ fi
248
+
249
+ direct_profile_report="$TMPDIR_EVAL/direct-fixture-profile.md"
250
+ if flow_agents_node "$USAGE_FEEDBACK" report \
251
+ --telemetry-dir "$FIXTURE_DIR" \
252
+ --group-by profile_id >"$direct_profile_report" 2>"$TMPDIR_EVAL/direct-fixture-profile.err" && \
253
+ grep -q "Sessions: 2" "$direct_profile_report" && \
254
+ grep -q "codex-default" "$direct_profile_report" && \
255
+ grep -q "codex-experimental" "$direct_profile_report"; then
256
+ _pass "report reads sample fixture names directly for profile groups"
257
+ else
258
+ _fail "direct fixture profile report failed: $(cat "$TMPDIR_EVAL/direct-fixture-profile.err" 2>/dev/null)"
259
+ fi
260
+
261
+ echo ""
262
+ echo "Result: $pass passed, $fail failed"
263
+ [[ $fail -eq 0 ]]