@sanity/ailf 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/canonical/grader-references/README.md +2 -2
  2. package/canonical/reference-solutions/content-lake/mutations.ts +160 -0
  3. package/canonical/reference-solutions/content-lake/realtime.ts +187 -0
  4. package/canonical/reference-solutions/image-handling/asset-pipeline.tsx +166 -0
  5. package/canonical/reference-solutions/portable-text/custom-blocks.ts +204 -0
  6. package/canonical/reference-solutions/portable-text/rendering.tsx +163 -0
  7. package/config/features.ts +1 -1
  8. package/config/models.ts +28 -23
  9. package/config/sources.ts +1 -1
  10. package/config/thresholds.ts +1 -1
  11. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +10 -0
  12. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +185 -0
  13. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +6 -0
  14. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +42 -0
  15. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +14 -0
  16. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +25 -0
  17. package/dist/_vendor/ailf-core/config-helpers.d.ts +6 -0
  18. package/dist/_vendor/ailf-core/config-helpers.js +29 -0
  19. package/dist/_vendor/ailf-core/examples/index.d.ts +164 -94
  20. package/dist/_vendor/ailf-core/examples/index.js +208 -114
  21. package/dist/_vendor/ailf-core/index.d.ts +1 -0
  22. package/dist/_vendor/ailf-core/index.js +1 -0
  23. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +94 -0
  24. package/dist/_vendor/ailf-core/ports/artifact-collector.js +13 -0
  25. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +138 -0
  26. package/dist/_vendor/ailf-core/ports/capture-comparator.js +10 -0
  27. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -1
  28. package/dist/_vendor/ailf-core/ports/eval-runner.d.ts +6 -0
  29. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  30. package/dist/_vendor/ailf-core/ports/pipeline-step.d.ts +11 -0
  31. package/dist/_vendor/ailf-core/ports/task-source.d.ts +3 -3
  32. package/dist/_vendor/ailf-core/ports/task-source.js +3 -3
  33. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -1
  34. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +14 -2
  35. package/dist/_vendor/ailf-core/services/config-helpers.d.ts +16 -1
  36. package/dist/_vendor/ailf-core/services/config-helpers.js +21 -0
  37. package/dist/_vendor/ailf-core/services/index.d.ts +1 -1
  38. package/dist/_vendor/ailf-core/services/index.js +1 -1
  39. package/dist/_vendor/ailf-core/services/scoring.js +9 -0
  40. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +12 -1
  41. package/dist/_vendor/ailf-core/types/generalized-task.js +1 -1
  42. package/dist/_vendor/ailf-core/types/index.d.ts +47 -4
  43. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +27 -0
  44. package/dist/_vendor/ailf-shared/eval-modes.d.ts +15 -0
  45. package/dist/_vendor/ailf-shared/eval-modes.js +18 -0
  46. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  47. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  48. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  49. package/dist/_vendor/ailf-tasks/index.js +16 -0
  50. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  51. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  52. package/dist/_vendor/ailf-tasks/schemas.d.ts +198 -0
  53. package/dist/_vendor/ailf-tasks/schemas.js +180 -0
  54. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  55. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  56. package/dist/adapters/api-client/remediation.js +2 -2
  57. package/dist/adapters/config-sources/file-config-adapter.js +6 -1
  58. package/dist/adapters/eval-runners/promptfoo-eval-adapter.js +8 -2
  59. package/dist/adapters/index.d.ts +0 -1
  60. package/dist/adapters/index.js +0 -1
  61. package/dist/adapters/task-sources/composite-task-source.d.ts +1 -1
  62. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  63. package/dist/adapters/task-sources/content-lake-task-source.d.ts +4 -6
  64. package/dist/adapters/task-sources/content-lake-task-source.js +4 -6
  65. package/dist/adapters/task-sources/index.d.ts +1 -2
  66. package/dist/adapters/task-sources/index.js +1 -2
  67. package/dist/adapters/task-sources/repo-schemas.d.ts +1 -1
  68. package/dist/adapters/task-sources/repo-schemas.js +2 -2
  69. package/dist/adapters/task-sources/repo-task-source.js +1 -1
  70. package/dist/adapters/task-sources/repo-trigger.d.ts +1 -1
  71. package/dist/adapters/task-sources/repo-trigger.js +1 -1
  72. package/dist/adapters/task-sources/task-file-loader.d.ts +9 -6
  73. package/dist/adapters/task-sources/task-file-loader.js +20 -6
  74. package/dist/agent-observer/test-imports.d.ts +7 -0
  75. package/dist/agent-observer/test-imports.js +185 -0
  76. package/dist/artifact-capture/comparator.d.ts +22 -0
  77. package/dist/artifact-capture/comparator.js +493 -0
  78. package/dist/artifact-capture/filesystem-collector.d.ts +42 -0
  79. package/dist/artifact-capture/filesystem-collector.js +237 -0
  80. package/dist/artifact-capture/redact-artifact.d.ts +20 -0
  81. package/dist/artifact-capture/redact-artifact.js +115 -0
  82. package/dist/assertions/source-isolation.d.ts +1 -1
  83. package/dist/assertions/source-isolation.js +1 -1
  84. package/dist/cli.js +4 -0
  85. package/dist/commands/calculate-scores.js +1 -0
  86. package/dist/commands/capture-compare.d.ts +15 -0
  87. package/dist/commands/capture-compare.js +253 -0
  88. package/dist/commands/capture-list.d.ts +12 -0
  89. package/dist/commands/capture-list.js +147 -0
  90. package/dist/commands/capture.d.ts +9 -0
  91. package/dist/commands/capture.js +16 -0
  92. package/dist/commands/chronic-failures.d.ts +8 -0
  93. package/dist/commands/chronic-failures.js +33 -0
  94. package/dist/commands/explain-handler.d.ts +1 -1
  95. package/dist/commands/explain-handler.js +37 -8
  96. package/dist/commands/fetch-docs.js +1 -0
  97. package/dist/commands/generate-configs.d.ts +3 -3
  98. package/dist/commands/generate-configs.js +20 -8
  99. package/dist/commands/init.d.ts +2 -3
  100. package/dist/commands/init.js +56 -170
  101. package/dist/commands/pipeline-action.d.ts +7 -1
  102. package/dist/commands/pipeline-action.js +43 -19
  103. package/dist/commands/pipeline.d.ts +6 -1
  104. package/dist/commands/pipeline.js +7 -2
  105. package/dist/commands/pr-comment.js +1 -0
  106. package/dist/commands/publish.js +1 -0
  107. package/dist/commands/shared/help.js +2 -2
  108. package/dist/commands/update-quality-scores.d.ts +5 -0
  109. package/dist/commands/update-quality-scores.js +20 -0
  110. package/dist/composition-root.d.ts +2 -3
  111. package/dist/composition-root.js +27 -14
  112. package/dist/config/features.ts +23 -0
  113. package/dist/config/models.ts +100 -0
  114. package/dist/config/prompts.ts +16 -0
  115. package/dist/config/rubrics.ts +225 -0
  116. package/dist/config/schedules.ts +47 -0
  117. package/dist/config/sinks.ts +37 -0
  118. package/dist/config/sources.ts +21 -0
  119. package/dist/config/thresholds.ts +61 -0
  120. package/dist/lib/agent-behavior-report.d.ts +8 -0
  121. package/dist/lib/agent-behavior-report.js +185 -0
  122. package/dist/lib/baseline.d.ts +19 -0
  123. package/dist/lib/baseline.js +153 -0
  124. package/dist/lib/calculate-scores.d.ts +23 -0
  125. package/dist/lib/calculate-scores.js +42 -0
  126. package/dist/lib/compare.d.ts +18 -0
  127. package/dist/lib/compare.js +170 -0
  128. package/dist/lib/coverage-audit.d.ts +4 -0
  129. package/dist/lib/coverage-audit.js +42 -0
  130. package/dist/lib/discovery-report.d.ts +13 -0
  131. package/dist/lib/discovery-report.js +57 -0
  132. package/dist/lib/fetch-docs.d.ts +30 -0
  133. package/dist/lib/fetch-docs.js +171 -0
  134. package/dist/lib/generate-configs.d.ts +25 -0
  135. package/dist/lib/generate-configs.js +42 -0
  136. package/dist/lib/grader-api.d.ts +21 -0
  137. package/dist/lib/grader-api.js +34 -0
  138. package/dist/lib/grader-compare.d.ts +19 -0
  139. package/dist/lib/grader-compare.js +91 -0
  140. package/dist/lib/grader-consistency.d.ts +27 -0
  141. package/dist/lib/grader-consistency.js +79 -0
  142. package/dist/lib/grader-sensitivity.d.ts +19 -0
  143. package/dist/lib/grader-sensitivity.js +75 -0
  144. package/dist/lib/grader-validate.d.ts +19 -0
  145. package/dist/lib/grader-validate.js +78 -0
  146. package/dist/lib/measure-retrieval.d.ts +14 -0
  147. package/dist/lib/measure-retrieval.js +71 -0
  148. package/dist/lib/pr-comment.d.ts +16 -0
  149. package/dist/lib/pr-comment.js +28 -0
  150. package/dist/lib/readiness-report.d.ts +13 -0
  151. package/dist/lib/readiness-report.js +108 -0
  152. package/dist/lib/webhook-server.d.ts +11 -0
  153. package/dist/lib/webhook-server.js +24 -0
  154. package/dist/lib/weekly-digest.d.ts +24 -0
  155. package/dist/lib/weekly-digest.js +148 -0
  156. package/dist/orchestration/build-app-context.js +13 -0
  157. package/dist/orchestration/cache-context.d.ts +23 -0
  158. package/dist/orchestration/cache-context.js +43 -0
  159. package/dist/orchestration/env-bridge.d.ts +21 -0
  160. package/dist/orchestration/env-bridge.js +66 -0
  161. package/dist/orchestration/load-pipeline-tasks.d.ts +34 -0
  162. package/dist/orchestration/load-pipeline-tasks.js +52 -0
  163. package/dist/orchestration/pipeline-orchestrator.js +75 -5
  164. package/dist/orchestration/step-runner.js +5 -1
  165. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -0
  166. package/dist/orchestration/steps/calculate-scores-step.js +13 -0
  167. package/dist/orchestration/steps/callback-step.js +10 -1
  168. package/dist/orchestration/steps/compare-step.js +6 -3
  169. package/dist/orchestration/steps/discovery-report-step.js +6 -2
  170. package/dist/orchestration/steps/fetch-docs-shell.d.ts +17 -0
  171. package/dist/orchestration/steps/fetch-docs-shell.js +30 -0
  172. package/dist/orchestration/steps/fetch-docs-step.d.ts +1 -0
  173. package/dist/orchestration/steps/fetch-docs-step.js +30 -16
  174. package/dist/orchestration/steps/gap-analysis-step.js +13 -2
  175. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -0
  176. package/dist/orchestration/steps/generate-configs-step.js +50 -15
  177. package/dist/orchestration/steps/mirror-repo-tasks-step.d.ts +1 -1
  178. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  179. package/dist/orchestration/steps/publish-report-step.js +19 -0
  180. package/dist/orchestration/steps/readiness-step.js +8 -3
  181. package/dist/orchestration/steps/report-step.js +17 -4
  182. package/dist/orchestration/steps/run-eval-step.d.ts +1 -0
  183. package/dist/orchestration/steps/run-eval-step.js +52 -32
  184. package/dist/pipeline/agent-behavior-report.js +6 -0
  185. package/dist/pipeline/attribution.d.ts +1 -1
  186. package/dist/pipeline/attribution.js +1 -1
  187. package/dist/pipeline/cache.js +29 -15
  188. package/dist/pipeline/calculate-scores.d.ts +2 -0
  189. package/dist/pipeline/calculate-scores.js +70 -33
  190. package/dist/pipeline/checks.d.ts +8 -3
  191. package/dist/pipeline/checks.js +23 -3
  192. package/dist/pipeline/chronic-failures.d.ts +55 -0
  193. package/dist/pipeline/chronic-failures.js +110 -0
  194. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +33 -0
  195. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +2 -3
  196. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +9 -0
  197. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +339 -0
  198. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +10 -0
  199. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +509 -0
  200. package/dist/pipeline/compiler/assertion-mapper.d.ts +1 -1
  201. package/dist/pipeline/compiler/assertion-mapper.js +1 -1
  202. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +2 -7
  203. package/dist/pipeline/compiler/compiler-to-yaml.js +2 -7
  204. package/dist/pipeline/compiler/config-loader.d.ts +14 -0
  205. package/dist/pipeline/compiler/config-loader.js +42 -2
  206. package/dist/pipeline/compiler/fixture-resolver.d.ts +1 -1
  207. package/dist/pipeline/compiler/fixture-resolver.js +1 -1
  208. package/dist/pipeline/compiler/ignore-fields.d.ts +1 -1
  209. package/dist/pipeline/compiler/ignore-fields.js +1 -1
  210. package/dist/pipeline/compiler/index.d.ts +2 -5
  211. package/dist/pipeline/compiler/index.js +2 -5
  212. package/dist/pipeline/compiler/literacy-bridge.d.ts +1 -1
  213. package/dist/pipeline/compiler/literacy-bridge.js +1 -1
  214. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +1 -1
  215. package/dist/pipeline/compiler/mode-bases/agent-harness.js +1 -1
  216. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +1 -1
  217. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +1 -1
  218. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +13 -2
  219. package/dist/pipeline/compiler/mode-bases/literacy.js +55 -1
  220. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +1 -1
  221. package/dist/pipeline/compiler/mode-bases/mcp-server.js +1 -1
  222. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +1 -1
  223. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +1 -1
  224. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  225. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  226. package/dist/pipeline/compiler/mode-handlers/index.d.ts +2 -2
  227. package/dist/pipeline/compiler/mode-handlers/index.js +2 -2
  228. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +1 -1
  229. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +1 -1
  230. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  231. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  232. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  233. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +1 -1
  234. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  235. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +334 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +1 -1
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +1 -1
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +1 -1
  241. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +1 -1
  242. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +4 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +69 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +307 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +22 -5
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +6 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +10 -5
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +314 -7
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +10 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +65 -0
  251. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +368 -0
  252. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +1 -1
  253. package/dist/pipeline/compiler/presets/sanity-literacy.js +1 -1
  254. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +1 -4
  255. package/dist/pipeline/compiler/promptfoo-compiler.js +3 -12
  256. package/dist/pipeline/compiler/provider-assembler.js +13 -7
  257. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +1 -1
  258. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +1 -1
  259. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +1 -1
  260. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +1 -1
  261. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +1 -1
  262. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +1 -1
  263. package/dist/pipeline/compiler/sandbox/index.d.ts +1 -1
  264. package/dist/pipeline/compiler/sandbox/index.js +1 -1
  265. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +1 -1
  266. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +1 -1
  267. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +1 -1
  268. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +1 -1
  269. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +1 -1
  270. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +1 -1
  271. package/dist/pipeline/compiler/scoring-bridge.d.ts +1 -1
  272. package/dist/pipeline/compiler/scoring-bridge.js +1 -1
  273. package/dist/pipeline/compiler/task-bridge.d.ts +41 -0
  274. package/dist/pipeline/compiler/task-bridge.js +92 -0
  275. package/dist/pipeline/compiler/task-graph-builder.d.ts +1 -4
  276. package/dist/pipeline/compiler/task-graph-builder.js +1 -4
  277. package/dist/pipeline/compiler/telemetry/index.d.ts +1 -1
  278. package/dist/pipeline/compiler/telemetry/index.js +1 -1
  279. package/dist/pipeline/compiler/variable-resolver.d.ts +1 -1
  280. package/dist/pipeline/compiler/variable-resolver.js +1 -1
  281. package/dist/pipeline/coverage-audit.d.ts +1 -1
  282. package/dist/pipeline/coverage-audit.js +1 -1
  283. package/dist/pipeline/degradations.d.ts +1 -1
  284. package/dist/pipeline/degradations.js +1 -1
  285. package/dist/pipeline/failure-modes.d.ts +1 -1
  286. package/dist/pipeline/failure-modes.js +13 -1
  287. package/dist/pipeline/gap-analysis.d.ts +1 -1
  288. package/dist/pipeline/gap-analysis.js +3 -1
  289. package/dist/pipeline/generate-configs.d.ts +2 -2
  290. package/dist/pipeline/generate-configs.js +15 -8
  291. package/dist/pipeline/grader-compare-runner.d.ts +1 -1
  292. package/dist/pipeline/grader-compare-runner.js +7 -1
  293. package/dist/pipeline/grader-comparison.d.ts +1 -1
  294. package/dist/pipeline/grader-comparison.js +1 -1
  295. package/dist/pipeline/grader-consistency-runner.d.ts +1 -1
  296. package/dist/pipeline/grader-consistency-runner.js +7 -1
  297. package/dist/pipeline/grader-consistency.d.ts +1 -1
  298. package/dist/pipeline/grader-consistency.js +1 -1
  299. package/dist/pipeline/grader-sensitivity-runner.d.ts +1 -1
  300. package/dist/pipeline/grader-sensitivity-runner.js +1 -1
  301. package/dist/pipeline/grader-sensitivity.d.ts +1 -1
  302. package/dist/pipeline/grader-sensitivity.js +1 -1
  303. package/dist/pipeline/grader-validate-runner.d.ts +1 -1
  304. package/dist/pipeline/grader-validate-runner.js +2 -2
  305. package/dist/pipeline/grader-validation.d.ts +1 -1
  306. package/dist/pipeline/grader-validation.js +1 -1
  307. package/dist/pipeline/map-request-to-config.js +15 -2
  308. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  309. package/dist/pipeline/mirror-repo-tasks.js +1 -1
  310. package/dist/pipeline/plan-format.d.ts +1 -1
  311. package/dist/pipeline/plan-format.js +1 -1
  312. package/dist/pipeline/plan.d.ts +1 -1
  313. package/dist/pipeline/plan.js +67 -29
  314. package/dist/pipeline/probe.d.ts +1 -1
  315. package/dist/pipeline/probe.js +1 -1
  316. package/dist/pipeline/readiness-report.d.ts +2 -2
  317. package/dist/pipeline/readiness-report.js +2 -2
  318. package/dist/pipeline/release-classification.d.ts +1 -1
  319. package/dist/pipeline/release-classification.js +1 -1
  320. package/dist/pipeline/release-report.d.ts +1 -1
  321. package/dist/pipeline/release-report.js +1 -1
  322. package/dist/pipeline/repo-eval-comment.d.ts +1 -1
  323. package/dist/pipeline/repo-eval-comment.js +1 -1
  324. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  325. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  326. package/dist/pipeline/resolve-mappings.d.ts +6 -6
  327. package/dist/pipeline/resolve-mappings.js +44 -44
  328. package/dist/pipeline/retrieval-metrics.d.ts +3 -3
  329. package/dist/pipeline/retrieval-metrics.js +28 -20
  330. package/dist/pipeline/steps/calculate-scores-step.d.ts +11 -0
  331. package/dist/pipeline/steps/calculate-scores-step.js +89 -0
  332. package/dist/pipeline/steps/compare-step.d.ts +18 -0
  333. package/dist/pipeline/steps/compare-step.js +90 -0
  334. package/dist/pipeline/steps/eval-step.d.ts +53 -0
  335. package/dist/pipeline/steps/eval-step.js +347 -0
  336. package/dist/pipeline/steps/fetch-docs-step.d.ts +11 -0
  337. package/dist/pipeline/steps/fetch-docs-step.js +84 -0
  338. package/dist/pipeline/steps/generate-configs-step.d.ts +11 -0
  339. package/dist/pipeline/steps/generate-configs-step.js +98 -0
  340. package/dist/pipeline/steps/grader-consistency-step.d.ts +21 -0
  341. package/dist/pipeline/steps/grader-consistency-step.js +74 -0
  342. package/dist/pipeline/steps/publish-report-step.d.ts +57 -0
  343. package/dist/pipeline/steps/publish-report-step.js +243 -0
  344. package/dist/pipeline/steps/report-step.d.ts +13 -0
  345. package/dist/pipeline/steps/report-step.js +56 -0
  346. package/dist/pipeline/steps/update-scores-step.d.ts +11 -0
  347. package/dist/pipeline/steps/update-scores-step.js +42 -0
  348. package/dist/pipeline/targeted-loo.d.ts +1 -1
  349. package/dist/pipeline/targeted-loo.js +1 -1
  350. package/dist/pipeline/thresholds.d.ts +1 -1
  351. package/dist/pipeline/thresholds.js +1 -1
  352. package/dist/pipeline/validate.js +13 -0
  353. package/dist/report-store.d.ts +17 -0
  354. package/dist/report-store.js +24 -0
  355. package/dist/scripts/agent-behavior-report.d.ts +19 -0
  356. package/dist/scripts/agent-behavior-report.js +315 -0
  357. package/dist/scripts/baseline.d.ts +43 -0
  358. package/dist/scripts/baseline.js +267 -0
  359. package/dist/scripts/calculate-scores.d.ts +166 -0
  360. package/dist/scripts/calculate-scores.js +1296 -0
  361. package/dist/scripts/compare.d.ts +22 -0
  362. package/dist/scripts/compare.js +334 -0
  363. package/dist/scripts/coverage-audit.d.ts +44 -0
  364. package/dist/scripts/coverage-audit.js +209 -0
  365. package/dist/scripts/debug-eval.d.ts +19 -0
  366. package/dist/scripts/debug-eval.js +73 -0
  367. package/dist/scripts/discovery-report.d.ts +58 -0
  368. package/dist/scripts/discovery-report.js +250 -0
  369. package/dist/scripts/fetch-docs.d.ts +35 -0
  370. package/dist/scripts/fetch-docs.js +472 -0
  371. package/dist/scripts/generate-configs.d.ts +66 -0
  372. package/dist/scripts/generate-configs.js +459 -0
  373. package/dist/scripts/grader-api.d.ts +27 -0
  374. package/dist/scripts/grader-api.js +206 -0
  375. package/dist/scripts/grader-compare.d.ts +22 -0
  376. package/dist/scripts/grader-compare.js +368 -0
  377. package/dist/scripts/grader-consistency.d.ts +20 -0
  378. package/dist/scripts/grader-consistency.js +313 -0
  379. package/dist/scripts/grader-sensitivity.d.ts +22 -0
  380. package/dist/scripts/grader-sensitivity.js +354 -0
  381. package/dist/scripts/grader-validate.d.ts +19 -0
  382. package/dist/scripts/grader-validate.js +267 -0
  383. package/dist/scripts/measure-retrieval.d.ts +10 -0
  384. package/dist/scripts/measure-retrieval.js +145 -0
  385. package/dist/scripts/migrate-task-mode.d.ts +1 -1
  386. package/dist/scripts/migrate-task-mode.js +1 -1
  387. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +1 -1
  388. package/dist/scripts/migrate-tasks-to-content-lake.js +1 -1
  389. package/dist/scripts/pipeline.d.ts +76 -0
  390. package/dist/scripts/pipeline.js +1031 -0
  391. package/dist/scripts/pr-comment.d.ts +10 -0
  392. package/dist/scripts/pr-comment.js +510 -0
  393. package/dist/scripts/readiness-report.d.ts +88 -0
  394. package/dist/scripts/readiness-report.js +342 -0
  395. package/dist/scripts/update-quality-scores.d.ts +15 -0
  396. package/dist/scripts/update-quality-scores.js +184 -0
  397. package/dist/scripts/validate-task-sources.d.ts +1 -1
  398. package/dist/scripts/validate-task-sources.js +1 -1
  399. package/dist/scripts/validate.d.ts +13 -0
  400. package/dist/scripts/validate.js +79 -0
  401. package/dist/scripts/webhook-server.d.ts +26 -0
  402. package/dist/scripts/webhook-server.js +147 -0
  403. package/dist/scripts/weekly-digest.d.ts +24 -0
  404. package/dist/scripts/weekly-digest.js +144 -0
  405. package/dist/sinks/format-slack.d.ts +64 -0
  406. package/dist/sinks/format-slack.js +306 -0
  407. package/dist/sinks/slack-sink.d.ts +27 -0
  408. package/dist/sinks/slack-sink.js +78 -0
  409. package/dist/sinks/types.d.ts +1 -1
  410. package/dist/sinks/types.js +1 -1
  411. package/dist/sinks/webhook-sink.d.ts +19 -0
  412. package/dist/sinks/webhook-sink.js +50 -0
  413. package/dist/tasks/knowledge-probe/define-type-api.task.ts +66 -0
  414. package/dist/tasks/knowledge-probe/groq-projections.task.ts +62 -0
  415. package/dist/tasks/literacy/content-lake.task.ts +181 -0
  416. package/dist/tasks/literacy/frameworks.task.ts +129 -0
  417. package/dist/tasks/literacy/functions.task.ts +70 -0
  418. package/dist/tasks/literacy/groq.task.ts +259 -0
  419. package/dist/tasks/literacy/image-handling.task.ts +95 -0
  420. package/dist/tasks/literacy/nextjs-live.task.ts +76 -0
  421. package/dist/tasks/literacy/portable-text.task.ts +169 -0
  422. package/dist/tasks/literacy/studio-setup.task.ts +134 -0
  423. package/dist/tasks/literacy/visual-editing.task.ts +147 -0
  424. package/package.json +25 -25
  425. package/tasks/.expanded.agentic.yaml +280 -0
  426. package/tasks/.expanded.yaml +565 -0
  427. package/tasks/knowledge-probe/define-type-api.task.ts +11 -0
  428. package/tasks/knowledge-probe/groq-projections.task.ts +3 -0
  429. package/tasks/literacy/content-lake.task.ts +181 -0
  430. package/tasks/literacy/frameworks.task.ts +1 -0
  431. package/tasks/literacy/functions.task.ts +1 -0
  432. package/tasks/literacy/groq.task.ts +1 -0
  433. package/tasks/literacy/image-handling.task.ts +95 -0
  434. package/tasks/literacy/nextjs-live.task.ts +2 -1
  435. package/tasks/literacy/portable-text.task.ts +169 -0
  436. package/tasks/literacy/studio-setup.task.ts +5 -2
  437. package/tasks/literacy/visual-editing.task.ts +1 -0
  438. package/LICENSE +0 -21
  439. package/tasks/frameworks.yaml +0 -98
  440. package/tasks/functions.yaml +0 -51
  441. package/tasks/groq.yaml +0 -216
  442. package/tasks/nextjs-live.yaml +0 -62
  443. package/tasks/studio-setup.yaml +0 -111
  444. package/tasks/visual-editing.yaml +0 -120
@@ -2,13 +2,14 @@
2
2
  * pipeline/retrieval-metrics.ts
3
3
  *
4
4
  * Computes retrieval precision and recall by comparing agent-retrieved
5
- * doc slugs against canonical_docs defined in task YAML files.
5
+ * doc slugs against canonical_docs defined in task definitions.
6
6
  *
7
- * This is a pure computation module — no file I/O beyond reading task YAMLs.
7
+ * This is a pure computation module — no file I/O beyond reading task files.
8
8
  */
9
- import { existsSync, readFileSync, readdirSync } from "fs";
10
- import { resolve } from "path";
11
- import { load } from "js-yaml";
9
+ import { existsSync } from "fs";
10
+ import { discoverTsTaskFiles, loadTsTaskFileSync, } from "../adapters/task-sources/task-file-loader.js";
11
+ import { resolveVendoredSubdir } from "./compiler/config-loader.js";
12
+ // --- Types for task definitions (just the canonical_docs part) ---
12
13
  /**
13
14
  * Compute retrieval metrics from agentic behavior data.
14
15
  *
@@ -107,28 +108,35 @@ export function computeTaskMetrics(taskId, area, retrieved, canonical) {
107
108
  };
108
109
  }
109
110
  /**
110
- * Load canonical_docs from all task YAML files.
111
+ * Load canonical docs from *.task.ts files in tasks/literacy/.
111
112
  * Returns a map of taskId → { slugs: Set<string>, area: string }.
112
113
  */
113
114
  export function loadCanonicalDocs(rootDir) {
114
- const tasksDir = resolve(rootDir, "tasks");
115
+ const tasksDir = resolveVendoredSubdir(rootDir, "tasks/literacy");
115
116
  if (!existsSync(tasksDir))
116
117
  return new Map();
117
118
  const result = new Map();
118
- const files = readdirSync(tasksDir).filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."));
119
+ const files = discoverTsTaskFiles(tasksDir);
119
120
  for (const file of files) {
120
- const area = file.replace(/\.ya?ml$/, "");
121
- const raw = readFileSync(resolve(tasksDir, file), "utf-8");
122
- const parsed = load(raw);
123
- if (!Array.isArray(parsed))
124
- continue;
125
- for (const entry of parsed) {
126
- const task = entry;
127
- if (task.id && task.canonical_docs && task.canonical_docs.length > 0) {
128
- result.set(task.id, {
129
- area,
130
- slugs: new Set(task.canonical_docs.map((d) => d.slug)),
131
- });
121
+ const loaded = loadTsTaskFileSync(file);
122
+ for (const task of loaded.tasks) {
123
+ const t = task;
124
+ const id = typeof t.id === "string" ? t.id : undefined;
125
+ const area = typeof t.area === "string" ? t.area : undefined;
126
+ if (!id || !area)
127
+ continue;
128
+ // Extract slugs from context.docs
129
+ const context = t.context;
130
+ if (!context?.docs || !Array.isArray(context.docs))
131
+ continue;
132
+ const slugs = new Set();
133
+ for (const doc of context.docs) {
134
+ const d = doc;
135
+ if (typeof d.slug === "string")
136
+ slugs.add(d.slug);
137
+ }
138
+ if (slugs.size > 0) {
139
+ result.set(id, { area, slugs });
132
140
  }
133
141
  }
134
142
  }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Pipeline step: Calculate AI Literacy Scores from eval results.
3
+ *
4
+ * Preconditions: eval-results.json exists and is valid
5
+ * Postconditions: score-summary.json exists and is valid
6
+ *
7
+ * Cache key: eval results JSON file(s)
8
+ * Cache outputs: results/latest/score-summary.json
9
+ */
10
+ import type { EvalMode, StepResult } from "../types.js";
11
+ export declare function runCalculateScores(source?: string, mode?: EvalMode, noCache?: boolean): StepResult;
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Pipeline step: Calculate AI Literacy Scores from eval results.
3
+ *
4
+ * Preconditions: eval-results.json exists and is valid
5
+ * Postconditions: score-summary.json exists and is valid
6
+ *
7
+ * Cache key: eval results JSON file(s)
8
+ * Cache outputs: results/latest/score-summary.json
9
+ */
10
+ import { execSync } from "child_process";
11
+ import { dirname, resolve } from "path";
12
+ import { fileURLToPath } from "url";
13
+ import { getStepInputPaths, hashFiles, lookupCache, recordCache, } from "../cache.js";
14
+ import { checkResultsExist, checkScoreSummaryValid } from "../checks.js";
15
+ import { RESULTS_FILES } from "./eval-step.js";
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const ROOT = resolve(__dirname, "..", "..", "..");
18
+ export function runCalculateScores(source, mode = "baseline", noCache = false) {
19
+ const start = Date.now();
20
+ // For full mode, use the baseline results file as the primary input
21
+ // (calculate-scores reads all available results files internally)
22
+ const primaryMode = mode === "full" ? "baseline" : mode;
23
+ const resultsFile = RESULTS_FILES[primaryMode];
24
+ const resultsIssues = checkResultsExist(ROOT, resultsFile);
25
+ const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
26
+ if (resultsErrors.length > 0) {
27
+ return {
28
+ durationMs: Date.now() - start,
29
+ error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}`,
30
+ status: "failed",
31
+ };
32
+ }
33
+ // Cache check
34
+ if (!noCache) {
35
+ const cacheResult = lookupCache(ROOT, "calculate-scores");
36
+ if (cacheResult.hit) {
37
+ return {
38
+ durationMs: Date.now() - start,
39
+ status: "success",
40
+ summary: `Skipped (cached) — ${cacheResult.entry.summary}`,
41
+ };
42
+ }
43
+ }
44
+ // Execute — note: calculate-scores exits 1 when areas are below critical,
45
+ // which is expected behavior, not an error
46
+ try {
47
+ const sourceArg = source ? ` --source ${source}` : "";
48
+ const resultsArg = primaryMode !== "baseline" ? ` ${resultsFile}` : "";
49
+ execSync(`tsx src/lib/calculate-scores.ts${resultsArg}${sourceArg}`, {
50
+ cwd: ROOT,
51
+ env: process.env,
52
+ stdio: "inherit",
53
+ });
54
+ }
55
+ catch (err) {
56
+ const code = err !== null && typeof err === "object" && "status" in err
57
+ ? err.status
58
+ : 1;
59
+ // Exit code 1 means "areas below critical" — that's expected
60
+ if (code !== 1) {
61
+ return {
62
+ durationMs: Date.now() - start,
63
+ error: `calculate-scores failed with exit code ${code}`,
64
+ status: "failed",
65
+ };
66
+ }
67
+ }
68
+ // Postcondition: score summary exists and is valid
69
+ const summaryIssues = checkScoreSummaryValid(ROOT);
70
+ const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
71
+ if (summaryErrors.length > 0) {
72
+ return {
73
+ durationMs: Date.now() - start,
74
+ error: `Postcondition failed: ${summaryErrors.map((e) => e.message).join("; ")}`,
75
+ status: "failed",
76
+ };
77
+ }
78
+ const durationMs = Date.now() - start;
79
+ const summary = "Scores calculated and summary written";
80
+ // Record cache
81
+ if (!noCache) {
82
+ const inputPaths = getStepInputPaths(ROOT, "calculate-scores");
83
+ const inputHash = hashFiles(inputPaths);
84
+ recordCache(ROOT, "calculate-scores", inputHash, summary, durationMs, [
85
+ "results/latest/score-summary.json",
86
+ ]);
87
+ }
88
+ return { durationMs, status: "success", summary };
89
+ }
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Pipeline step: Compare current scores against a baseline.
3
+ *
4
+ * Preconditions: score-summary.json exists
5
+ * Postconditions: comparison-report.json written to results/latest/
6
+ *
7
+ * This step is optional — it only runs when --compare is passed
8
+ * (or a baseline exists and auto-compare is enabled).
9
+ */
10
+ import type { CompareOptions, StepResult } from "../types.js";
11
+ /**
12
+ * Run comparison against a baseline.
13
+ *
14
+ * @param rootDir Package root directory
15
+ * @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
16
+ * @param options Compare options (noise threshold, etc.)
17
+ */
18
+ export declare function runCompare(rootDir: string, baselinePath?: string, options?: CompareOptions): StepResult;
@@ -0,0 +1,90 @@
1
+ /**
2
+ * Pipeline step: Compare current scores against a baseline.
3
+ *
4
+ * Preconditions: score-summary.json exists
5
+ * Postconditions: comparison-report.json written to results/latest/
6
+ *
7
+ * This step is optional — it only runs when --compare is passed
8
+ * (or a baseline exists and auto-compare is enabled).
9
+ */
10
+ import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
11
+ import { join, resolve } from "path";
12
+ import { compare } from "../compare.js";
13
+ /**
14
+ * Run comparison against a baseline.
15
+ *
16
+ * @param rootDir Package root directory
17
+ * @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
18
+ * @param options Compare options (noise threshold, etc.)
19
+ */
20
+ export function runCompare(rootDir, baselinePath, options) {
21
+ const start = Date.now();
22
+ const scoreSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
23
+ if (!existsSync(scoreSummaryPath)) {
24
+ return {
25
+ durationMs: Date.now() - start,
26
+ error: "score-summary.json not found. Run calculate-scores first.",
27
+ status: "failed",
28
+ };
29
+ }
30
+ // Load experiment (current run)
31
+ const experiment = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
32
+ // Resolve baseline
33
+ let resolvedBaselinePath;
34
+ if (baselinePath) {
35
+ resolvedBaselinePath = resolve(baselinePath);
36
+ }
37
+ else {
38
+ const baselinesDir = resolve(rootDir, "results", "baselines");
39
+ if (!existsSync(baselinesDir)) {
40
+ return {
41
+ reason: "No baselines directory found. Run 'pnpm baseline:save' first.",
42
+ status: "skipped",
43
+ };
44
+ }
45
+ const files = readdirSync(baselinesDir)
46
+ .filter((f) => f.endsWith(".json"))
47
+ .sort()
48
+ .reverse();
49
+ if (files.length === 0) {
50
+ return {
51
+ reason: "No baseline files found. Run 'pnpm baseline:save' first.",
52
+ status: "skipped",
53
+ };
54
+ }
55
+ resolvedBaselinePath = join(baselinesDir, files[0]);
56
+ }
57
+ if (!existsSync(resolvedBaselinePath)) {
58
+ return {
59
+ durationMs: Date.now() - start,
60
+ error: `Baseline file not found: ${resolvedBaselinePath}`,
61
+ status: "failed",
62
+ };
63
+ }
64
+ const baseline = JSON.parse(readFileSync(resolvedBaselinePath, "utf-8"));
65
+ // Run comparison
66
+ const report = compare(baseline, experiment, options);
67
+ // Write report
68
+ const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
69
+ writeFileSync(reportPath, JSON.stringify(report, null, 2));
70
+ // Build summary
71
+ const improved = report.improved.length;
72
+ const regressed = report.regressed.length;
73
+ const unchanged = report.unchanged.length;
74
+ const overallDelta = report.deltas.overall;
75
+ const deltaStr = overallDelta > 0
76
+ ? `+${Math.round(overallDelta)}`
77
+ : String(Math.round(overallDelta));
78
+ const parts = [`Overall: ${deltaStr}`];
79
+ if (improved > 0)
80
+ parts.push(`${improved} improved`);
81
+ if (regressed > 0)
82
+ parts.push(`${regressed} regressed`);
83
+ if (unchanged > 0)
84
+ parts.push(`${unchanged} unchanged`);
85
+ return {
86
+ durationMs: Date.now() - start,
87
+ status: "success",
88
+ summary: parts.join(", "),
89
+ };
90
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Pipeline step: Run promptfoo evaluation.
3
+ *
4
+ * Preconditions: config files and context files exist
5
+ * Postconditions: eval-results.json exists and is valid
6
+ *
7
+ * Cache key: promptfooconfig*.yaml + contexts/*.md + tasks/*.yaml +
8
+ * canonical contexts + reference solutions + config/models.yaml
9
+ * Cache outputs: results/latest/eval-results*.json
10
+ *
11
+ * Remote cache: When local cache misses and a Sanity token is available,
12
+ * the step queries the Content Lake for a report with a matching eval
13
+ * fingerprint. On a hit, the cached score-summary.json is written to disk
14
+ * and the eval + calculate-scores steps are skipped entirely.
15
+ *
16
+ * @see docs/design-docs/content-lake-eval-caching.md
17
+ */
18
+ import type { ConcreteEvalMode, DebugOptions, FilterOptions, StepResult } from "../types.js";
19
+ /** Each mode writes eval results to a different file (set in the config's outputPath) */
20
+ export declare const RESULTS_FILES: Record<ConcreteEvalMode, string>;
21
+ /** Extended step result that carries cache metadata for downstream steps */
22
+ export interface EvalStepResult {
23
+ /** The computed eval fingerprint (for publishing in provenance) */
24
+ evalFingerprint?: string;
25
+ /** Whether this result came from a remote cache hit */
26
+ remoteCacheHit?: boolean;
27
+ /** The step result */
28
+ stepResult: StepResult;
29
+ }
30
+ /** Options for the remote cache (Content Lake fingerprint lookup) */
31
+ export interface RemoteCacheOptions {
32
+ /** Whether this is a debug run (debug runs don't use remote cache) */
33
+ debug?: boolean;
34
+ /** Filter options used for fingerprint computation */
35
+ filter?: FilterOptions;
36
+ /** Grader model identifier from models.yaml */
37
+ graderModel: string;
38
+ /** Disable remote cache lookup (--no-remote-cache) */
39
+ noRemoteCache?: boolean;
40
+ /** Sanity API token for reading cached reports */
41
+ sanityToken?: string;
42
+ }
43
+ export declare function buildFilterFlags(debug?: DebugOptions): string;
44
+ /**
45
+ * Extract the Promptfoo share URL from the eval results JSON.
46
+ *
47
+ * Promptfoo writes a `shareableUrl` field into the results file when
48
+ * `PROMPTFOO_API_KEY` is set. This replaces the previous approach of
49
+ * scraping the URL from a captured log file (which required piping
50
+ * through `tee` and broke TTY progress reporting).
51
+ */
52
+ export declare function extractShareUrl(mode: ConcreteEvalMode): string | undefined;
53
+ export declare function runEval(mode: ConcreteEvalMode, debug?: DebugOptions, concurrency?: number, noCache?: boolean, remoteCacheOpts?: RemoteCacheOptions): Promise<EvalStepResult>;