@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -1,27 +0,0 @@
1
- /**
2
- * parser.ts — Standalone task file and directory parsing.
3
- *
4
- * High-level functions for loading and validating .ailf/tasks/ YAML
5
- * files without any dependency on the eval pipeline.
6
- *
7
- * Usage:
8
- * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
- */
10
- import { type RepoTask } from "./schemas.js";
11
- /**
12
- * Parse a single task YAML string and return validated tasks.
13
- *
14
- * @param content - Raw YAML string content
15
- * @param filename - Source filename (for error messages)
16
- * @returns Validated array of RepoTask objects
17
- * @throws Error if YAML parsing or Zod validation fails
18
- */
19
- export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
20
- /**
21
- * Load and parse all task YAML files from a directory.
22
- *
23
- * @param dirPath - Path to directory containing .yaml/.yml files
24
- * @returns All validated tasks, sorted by filename
25
- * @throws Error if directory not found, no YAML files, or validation fails
26
- */
27
- export declare function loadTaskDir(dirPath: string): RepoTask[];
@@ -1,73 +0,0 @@
1
- /**
2
- * parser.ts — Standalone task file and directory parsing.
3
- *
4
- * High-level functions for loading and validating .ailf/tasks/ YAML
5
- * files without any dependency on the eval pipeline.
6
- *
7
- * Usage:
8
- * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
- */
10
- import { existsSync, readdirSync, readFileSync } from "fs";
11
- import { resolve } from "path";
12
- import { load } from "js-yaml";
13
- import { RepoTaskFileSchema } from "./schemas.js";
14
- // ---------------------------------------------------------------------------
15
- // Public API
16
- // ---------------------------------------------------------------------------
17
- /**
18
- * Parse a single task YAML string and return validated tasks.
19
- *
20
- * @param content - Raw YAML string content
21
- * @param filename - Source filename (for error messages)
22
- * @returns Validated array of RepoTask objects
23
- * @throws Error if YAML parsing or Zod validation fails
24
- */
25
- export function parseTaskFile(content, filename = "<string>") {
26
- const parsed = load(content);
27
- if (!Array.isArray(parsed)) {
28
- throw new Error(`${filename} did not parse to an array of tasks. ` +
29
- "Task files must contain a YAML array of task definitions.");
30
- }
31
- const result = RepoTaskFileSchema.safeParse(parsed);
32
- if (!result.success) {
33
- const messages = result.error.issues
34
- .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
35
- .join("\n");
36
- throw new Error(`Invalid task file "${filename}":\n${messages}`);
37
- }
38
- return result.data;
39
- }
40
- /**
41
- * Load and parse all task YAML files from a directory.
42
- *
43
- * @param dirPath - Path to directory containing .yaml/.yml files
44
- * @returns All validated tasks, sorted by filename
45
- * @throws Error if directory not found, no YAML files, or validation fails
46
- */
47
- export function loadTaskDir(dirPath) {
48
- if (!existsSync(dirPath)) {
49
- throw new Error(`Tasks directory not found: ${dirPath}\n` +
50
- " Expected a directory containing .ailf/tasks/*.yaml files.");
51
- }
52
- const yamlFiles = readdirSync(dirPath)
53
- .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
54
- .sort();
55
- if (yamlFiles.length === 0) {
56
- throw new Error(`No YAML files found in ${dirPath}\n` +
57
- " Expected .ailf/tasks/*.yaml files with task definitions.");
58
- }
59
- const allTasks = [];
60
- for (const file of yamlFiles) {
61
- const filePath = resolve(dirPath, file);
62
- const content = readFileSync(filePath, "utf-8");
63
- try {
64
- const tasks = parseTaskFile(content, file);
65
- allTasks.push(...tasks);
66
- }
67
- catch (err) {
68
- const msg = err instanceof Error ? err.message : String(err);
69
- throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
70
- }
71
- }
72
- return allTasks;
73
- }
@@ -1,198 +0,0 @@
1
- /**
2
- * schemas.ts — Zod schemas for repo-based task definitions.
3
- *
4
- * Validates .ailf/tasks/*.yaml task files from external repositories.
5
- * These schemas are the contract between external repos and the AILF eval
6
- * pipeline — they define exactly what fields are accepted, with friendly
7
- * error messages for authors writing task YAML by hand.
8
- *
9
- * This module is the single source of truth for task schemas. The eval
10
- * package re-exports from here to avoid duplication.
11
- *
12
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
13
- */
14
- import { z } from "zod";
15
- /**
16
- * The set of assertion types allowed in repo-based task files.
17
- *
18
- * This is a curated subset of Promptfoo assertion types — we expose only the
19
- * types that are stable, well-documented, and useful for external authors.
20
- */
21
- export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
22
- export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
23
- /**
24
- * Valid rubric template names — must match keys in config/rubrics.yaml.
25
- */
26
- export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
27
- export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
28
- /**
29
- * Zod schema for a single repo-based task definition.
30
- *
31
- * This is the external-author-facing contract. Field names are camelCase
32
- * to match the Content Lake document schema (ailf.task).
33
- */
34
- export declare const RepoTaskSchema: z.ZodObject<{
35
- id: z.ZodString;
36
- description: z.ZodString;
37
- status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
38
- active: "active";
39
- draft: "draft";
40
- paused: "paused";
41
- archived: "archived";
42
- }>>>;
43
- featureArea: z.ZodString;
44
- tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
45
- canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
46
- id: z.ZodString;
47
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
48
- slug: z.ZodOptional<z.ZodString>;
49
- path: z.ZodOptional<z.ZodString>;
50
- }, z.core.$strip>, z.ZodObject<{
51
- slug: z.ZodString;
52
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
53
- }, z.core.$strip>, z.ZodObject<{
54
- path: z.ZodString;
55
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
56
- }, z.core.$strip>, z.ZodObject<{
57
- perspective: z.ZodString;
58
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
59
- }, z.core.$strip>]>>>>;
60
- vars: z.ZodOptional<z.ZodObject<{
61
- task: z.ZodString;
62
- }, z.core.$loose>>;
63
- assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
64
- type: z.ZodLiteral<"llm-rubric">;
65
- template: z.ZodEnum<{
66
- "task-completion": "task-completion";
67
- "code-correctness": "code-correctness";
68
- "doc-coverage": "doc-coverage";
69
- }>;
70
- criteria: z.ZodArray<z.ZodString>;
71
- weight: z.ZodOptional<z.ZodNumber>;
72
- }, z.core.$strip>, z.ZodObject<{
73
- type: z.ZodEnum<{
74
- "llm-rubric": "llm-rubric";
75
- contains: "contains";
76
- "contains-any": "contains-any";
77
- "contains-all": "contains-all";
78
- "not-contains": "not-contains";
79
- icontains: "icontains";
80
- "icontains-any": "icontains-any";
81
- regex: "regex";
82
- javascript: "javascript";
83
- similar: "similar";
84
- cost: "cost";
85
- latency: "latency";
86
- }>;
87
- value: z.ZodOptional<z.ZodUnknown>;
88
- threshold: z.ZodOptional<z.ZodNumber>;
89
- weight: z.ZodOptional<z.ZodNumber>;
90
- }, z.core.$loose>]>>;
91
- baseline: z.ZodOptional<z.ZodObject<{
92
- enabled: z.ZodOptional<z.ZodBoolean>;
93
- rubric: z.ZodOptional<z.ZodEnum<{
94
- abbreviated: "abbreviated";
95
- full: "full";
96
- none: "none";
97
- }>>;
98
- }, z.core.$strip>>;
99
- docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
100
- referenceSolution: z.ZodOptional<z.ZodString>;
101
- execution: z.ZodOptional<z.ZodObject<{
102
- enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
103
- blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
104
- threshold: z.ZodOptional<z.ZodObject<{
105
- score: z.ZodOptional<z.ZodNumber>;
106
- }, z.core.$strip>>;
107
- trigger: z.ZodOptional<z.ZodObject<{
108
- branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
109
- paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
110
- }, z.core.$strip>>;
111
- source: z.ZodOptional<z.ZodString>;
112
- }, z.core.$strip>>;
113
- }, z.core.$strip>;
114
- export type RepoTask = z.infer<typeof RepoTaskSchema>;
115
- /**
116
- * Schema for an array of repo tasks — what a single .ailf/tasks/*.yaml file
117
- * contains. Each file must define at least one task.
118
- */
119
- export declare const RepoTaskFileSchema: z.ZodArray<z.ZodObject<{
120
- id: z.ZodString;
121
- description: z.ZodString;
122
- status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
123
- active: "active";
124
- draft: "draft";
125
- paused: "paused";
126
- archived: "archived";
127
- }>>>;
128
- featureArea: z.ZodString;
129
- tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
130
- canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
131
- id: z.ZodString;
132
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
133
- slug: z.ZodOptional<z.ZodString>;
134
- path: z.ZodOptional<z.ZodString>;
135
- }, z.core.$strip>, z.ZodObject<{
136
- slug: z.ZodString;
137
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
138
- }, z.core.$strip>, z.ZodObject<{
139
- path: z.ZodString;
140
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
141
- }, z.core.$strip>, z.ZodObject<{
142
- perspective: z.ZodString;
143
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
144
- }, z.core.$strip>]>>>>;
145
- vars: z.ZodOptional<z.ZodObject<{
146
- task: z.ZodString;
147
- }, z.core.$loose>>;
148
- assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
149
- type: z.ZodLiteral<"llm-rubric">;
150
- template: z.ZodEnum<{
151
- "task-completion": "task-completion";
152
- "code-correctness": "code-correctness";
153
- "doc-coverage": "doc-coverage";
154
- }>;
155
- criteria: z.ZodArray<z.ZodString>;
156
- weight: z.ZodOptional<z.ZodNumber>;
157
- }, z.core.$strip>, z.ZodObject<{
158
- type: z.ZodEnum<{
159
- "llm-rubric": "llm-rubric";
160
- contains: "contains";
161
- "contains-any": "contains-any";
162
- "contains-all": "contains-all";
163
- "not-contains": "not-contains";
164
- icontains: "icontains";
165
- "icontains-any": "icontains-any";
166
- regex: "regex";
167
- javascript: "javascript";
168
- similar: "similar";
169
- cost: "cost";
170
- latency: "latency";
171
- }>;
172
- value: z.ZodOptional<z.ZodUnknown>;
173
- threshold: z.ZodOptional<z.ZodNumber>;
174
- weight: z.ZodOptional<z.ZodNumber>;
175
- }, z.core.$loose>]>>;
176
- baseline: z.ZodOptional<z.ZodObject<{
177
- enabled: z.ZodOptional<z.ZodBoolean>;
178
- rubric: z.ZodOptional<z.ZodEnum<{
179
- abbreviated: "abbreviated";
180
- full: "full";
181
- none: "none";
182
- }>>;
183
- }, z.core.$strip>>;
184
- docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
185
- referenceSolution: z.ZodOptional<z.ZodString>;
186
- execution: z.ZodOptional<z.ZodObject<{
187
- enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
188
- blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
189
- threshold: z.ZodOptional<z.ZodObject<{
190
- score: z.ZodOptional<z.ZodNumber>;
191
- }, z.core.$strip>>;
192
- trigger: z.ZodOptional<z.ZodObject<{
193
- branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
194
- paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
195
- }, z.core.$strip>>;
196
- source: z.ZodOptional<z.ZodString>;
197
- }, z.core.$strip>>;
198
- }, z.core.$strip>>;
@@ -1,180 +0,0 @@
1
- /**
2
- * schemas.ts — Zod schemas for repo-based task definitions.
3
- *
4
- * Validates .ailf/tasks/*.yaml task files from external repositories.
5
- * These schemas are the contract between external repos and the AILF eval
6
- * pipeline — they define exactly what fields are accepted, with friendly
7
- * error messages for authors writing task YAML by hand.
8
- *
9
- * This module is the single source of truth for task schemas. The eval
10
- * package re-exports from here to avoid duplication.
11
- *
12
- * @see docs/exec-plans/tasks-as-content/phase-4-repo-based-tasks.md
13
- */
14
- import { z } from "zod";
15
- // ---------------------------------------------------------------------------
16
- // Constants — curated assertion types and rubric template names
17
- // ---------------------------------------------------------------------------
18
- /**
19
- * The set of assertion types allowed in repo-based task files.
20
- *
21
- * This is a curated subset of Promptfoo assertion types — we expose only the
22
- * types that are stable, well-documented, and useful for external authors.
23
- */
24
- export const CURATED_ASSERTION_TYPES = [
25
- "llm-rubric",
26
- "contains",
27
- "contains-any",
28
- "contains-all",
29
- "not-contains",
30
- "icontains",
31
- "icontains-any",
32
- "regex",
33
- "javascript",
34
- "similar",
35
- "cost",
36
- "latency",
37
- ];
38
- /**
39
- * Valid rubric template names — must match keys in config/rubrics.yaml.
40
- */
41
- export const RUBRIC_TEMPLATE_NAMES = [
42
- "task-completion",
43
- "code-correctness",
44
- "doc-coverage",
45
- ];
46
- // ---------------------------------------------------------------------------
47
- // Assertion schemas
48
- // ---------------------------------------------------------------------------
49
- /**
50
- * Polymorphic canonical doc reference — discriminated by key presence.
51
- * Exactly one resolution key (slug, path, id, or perspective) must be present.
52
- *
53
- * @see docs/design-docs/canonical-doc-resolution.md
54
- */
55
- const SlugDocRefSchema = z.object({
56
- slug: z.string().min(1),
57
- reason: z.string().optional().default(""),
58
- });
59
- const PathDocRefSchema = z.object({
60
- path: z.string().min(1),
61
- reason: z.string().optional().default(""),
62
- });
63
- const IdDocRefSchema = z.object({
64
- id: z.string().min(1),
65
- reason: z.string().optional().default(""),
66
- /** Human-readable slug annotation (not used for resolution) */
67
- slug: z.string().optional(),
68
- /** Human-readable path annotation (not used for resolution) */
69
- path: z.string().optional(),
70
- });
71
- const PerspectiveDocRefSchema = z.object({
72
- perspective: z.string().min(1),
73
- reason: z.string().optional().default(""),
74
- });
75
- // Order matters: IdDocRefSchema first because it may also carry `slug`
76
- // and `path` as optional annotations. Zod tries schemas in order, so
77
- // entries like `{ id: "...", slug: "..." }` must match IdDocRefSchema
78
- // (not SlugDocRefSchema).
79
- const CanonicalDocRefSchema = z.union([
80
- IdDocRefSchema,
81
- SlugDocRefSchema,
82
- PathDocRefSchema,
83
- PerspectiveDocRefSchema,
84
- ]);
85
- /**
86
- * A templated LLM-rubric assertion — uses one of the predefined rubric
87
- * templates with author-supplied criteria.
88
- */
89
- const TemplatedAssertionSchema = z.object({
90
- type: z.literal("llm-rubric"),
91
- template: z.enum(RUBRIC_TEMPLATE_NAMES),
92
- criteria: z.array(z.string().min(1)).min(1),
93
- weight: z.number().optional(),
94
- });
95
- /**
96
- * A value-based assertion (contains, regex, cost, etc.). Uses .passthrough()
97
- * to allow extra fields for future extension without schema breakage.
98
- */
99
- const ValueAssertionSchema = z
100
- .object({
101
- type: z.enum(CURATED_ASSERTION_TYPES),
102
- value: z.unknown().optional(),
103
- threshold: z.number().optional(),
104
- weight: z.number().optional(),
105
- })
106
- .passthrough();
107
- /** Union of all supported assertion shapes. */
108
- const AssertionSchema = z.union([
109
- TemplatedAssertionSchema,
110
- ValueAssertionSchema,
111
- ]);
112
- // ---------------------------------------------------------------------------
113
- // Nested config schemas
114
- // ---------------------------------------------------------------------------
115
- const BaselineConfigSchema = z
116
- .object({
117
- enabled: z.boolean().optional(),
118
- rubric: z.enum(["abbreviated", "full", "none"]).optional(),
119
- })
120
- .optional();
121
- const ExecutionConfigSchema = z
122
- .object({
123
- enabled: z.boolean().optional().default(true),
124
- blocking: z.boolean().optional().default(false),
125
- threshold: z
126
- .object({
127
- score: z.number().min(0).max(100).optional(),
128
- })
129
- .optional(),
130
- trigger: z
131
- .object({
132
- branches: z.array(z.string()).optional(),
133
- paths: z.array(z.string()).optional(),
134
- })
135
- .optional(),
136
- source: z.string().optional(),
137
- })
138
- .optional();
139
- // ---------------------------------------------------------------------------
140
- // RepoTaskSchema — a single task definition from .ailf/tasks/*.yaml
141
- // ---------------------------------------------------------------------------
142
- /**
143
- * Zod schema for a single repo-based task definition.
144
- *
145
- * This is the external-author-facing contract. Field names are camelCase
146
- * to match the Content Lake document schema (ailf.task).
147
- */
148
- export const RepoTaskSchema = z.object({
149
- id: z
150
- .string()
151
- .min(1)
152
- .regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
153
- description: z.string().min(1),
154
- status: z
155
- .enum(["active", "draft", "paused", "archived"])
156
- .optional()
157
- .default("active"),
158
- featureArea: z
159
- .string()
160
- .min(1)
161
- .regex(/^[a-z0-9][a-z0-9-]*$/, "Feature area must be lowercase alphanumeric with hyphens"),
162
- tags: z.array(z.string()).optional(),
163
- canonicalDocs: z.array(CanonicalDocRefSchema).optional().default([]),
164
- vars: z
165
- .object({
166
- task: z.string().min(1),
167
- })
168
- .passthrough()
169
- .optional(),
170
- assert: z.array(AssertionSchema).min(1),
171
- baseline: BaselineConfigSchema,
172
- docCoverage: z.boolean().optional().default(false),
173
- referenceSolution: z.string().optional(),
174
- execution: ExecutionConfigSchema,
175
- });
176
- /**
177
- * Schema for an array of repo tasks — what a single .ailf/tasks/*.yaml file
178
- * contains. Each file must define at least one task.
179
- */
180
- export const RepoTaskFileSchema = z.array(RepoTaskSchema).min(1);
@@ -1,47 +0,0 @@
1
- /**
2
- * validation.ts — Semantic validation for repo-based tasks.
3
- *
4
- * Checks that go beyond Zod schema parsing:
5
- * - Assertion types are in the curated set
6
- * - Rubric template names resolve to known templates
7
- * - Feature area strings are well-formed
8
- * - Canonical doc slugs look reasonable (slugs, not URLs)
9
- *
10
- * These produce warnings, not errors — the pipeline can still run
11
- * with imperfect tasks. Only structural failures (caught by Zod) block.
12
- */
13
- import { type RepoTask } from "./schemas.js";
14
- export interface ValidationResult {
15
- valid: boolean;
16
- errors: ValidationMessage[];
17
- warnings: ValidationMessage[];
18
- }
19
- export interface ValidationMessage {
20
- taskId: string;
21
- field: string;
22
- message: string;
23
- }
24
- /**
25
- * Run semantic validation on an array of parsed repo tasks.
26
- *
27
- * Returns warnings for issues that don't block execution (unknown feature
28
- * areas, unresolved slugs) and errors for issues that would cause pipeline
29
- * failures (completely missing required fields — though Zod catches most).
30
- */
31
- export declare function validateRepoTasks(tasks: RepoTask[]): ValidationResult;
32
- /**
33
- * Format validation results for console output.
34
- */
35
- export declare function formatValidationResult(result: ValidationResult): string;
36
- /**
37
- * Detect snake_case field names in raw task YAML data.
38
- *
39
- * This runs BEFORE Zod parsing to provide a user-friendly error message
40
- * when authors use framework-internal snake_case names instead of the
41
- * camelCase names expected in repo task files.
42
- *
43
- * @param raw - Raw parsed YAML (before Zod validation)
44
- * @param filename - Source filename for error messages
45
- * @returns Array of warning messages (empty if no issues)
46
- */
47
- export declare function detectSnakeCaseFields(raw: unknown, filename: string): string[];
@@ -1,162 +0,0 @@
1
- /**
2
- * validation.ts — Semantic validation for repo-based tasks.
3
- *
4
- * Checks that go beyond Zod schema parsing:
5
- * - Assertion types are in the curated set
6
- * - Rubric template names resolve to known templates
7
- * - Feature area strings are well-formed
8
- * - Canonical doc slugs look reasonable (slugs, not URLs)
9
- *
10
- * These produce warnings, not errors — the pipeline can still run
11
- * with imperfect tasks. Only structural failures (caught by Zod) block.
12
- */
13
- import { CURATED_ASSERTION_TYPES, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
14
- // ---------------------------------------------------------------------------
15
- // Public API
16
- // ---------------------------------------------------------------------------
17
- /**
18
- * Run semantic validation on an array of parsed repo tasks.
19
- *
20
- * Returns warnings for issues that don't block execution (unknown feature
21
- * areas, unresolved slugs) and errors for issues that would cause pipeline
22
- * failures (completely missing required fields — though Zod catches most).
23
- */
24
- export function validateRepoTasks(tasks) {
25
- const errors = [];
26
- const warnings = [];
27
- // Check for duplicate IDs
28
- const seenIds = new Set();
29
- for (const task of tasks) {
30
- if (seenIds.has(task.id)) {
31
- errors.push({
32
- taskId: task.id,
33
- field: "id",
34
- message: `Duplicate task ID "${task.id}"`,
35
- });
36
- }
37
- seenIds.add(task.id);
38
- }
39
- for (const task of tasks) {
40
- // Check assertion types
41
- for (let i = 0; i < task.assert.length; i++) {
42
- const assertion = task.assert[i];
43
- if (!CURATED_ASSERTION_TYPES.includes(assertion.type)) {
44
- warnings.push({
45
- taskId: task.id,
46
- field: `assert[${i}].type`,
47
- message: `Unknown assertion type "${assertion.type}". ` +
48
- `Valid types: ${CURATED_ASSERTION_TYPES.join(", ")}`,
49
- });
50
- }
51
- // Check rubric template for llm-rubric assertions
52
- if (assertion.type === "llm-rubric" && "template" in assertion) {
53
- const template = assertion.template;
54
- if (!RUBRIC_TEMPLATE_NAMES.includes(template)) {
55
- warnings.push({
56
- taskId: task.id,
57
- field: `assert[${i}].template`,
58
- message: `Unknown rubric template "${template}". ` +
59
- `Valid templates: ${RUBRIC_TEMPLATE_NAMES.join(", ")}`,
60
- });
61
- }
62
- }
63
- }
64
- // Check canonical doc refs look reasonable
65
- for (let i = 0; i < (task.canonicalDocs?.length ?? 0); i++) {
66
- const doc = task.canonicalDocs[i];
67
- // Slug refs: warn if they look like URLs or paths
68
- if ("slug" in doc && !("id" in doc) && typeof doc.slug === "string") {
69
- if (doc.slug.includes("/") || doc.slug.includes("http")) {
70
- warnings.push({
71
- taskId: task.id,
72
- field: `canonicalDocs[${i}].slug`,
73
- message: `Slug "${doc.slug}" looks like a URL or path — use 'path' type for paths or 'slug' for document slugs (e.g., "groq-introduction")`,
74
- });
75
- }
76
- }
77
- }
78
- // Check task has at least one llm-rubric assertion (recommended but not required)
79
- const hasLlmRubric = task.assert.some((a) => a.type === "llm-rubric");
80
- if (!hasLlmRubric) {
81
- warnings.push({
82
- taskId: task.id,
83
- field: "assert",
84
- message: "No llm-rubric assertion found. Tasks should have at least one scored rubric for meaningful evaluation.",
85
- });
86
- }
87
- // Check taskPrompt exists in vars (vars.task)
88
- if (!task.vars?.task) {
89
- warnings.push({
90
- taskId: task.id,
91
- field: "vars.task",
92
- message: "No task prompt found in vars.task. The LLM will receive an empty implementation request.",
93
- });
94
- }
95
- }
96
- return {
97
- valid: errors.length === 0,
98
- errors,
99
- warnings,
100
- };
101
- }
102
- /**
103
- * Format validation results for console output.
104
- */
105
- export function formatValidationResult(result) {
106
- const lines = [];
107
- if (result.errors.length > 0) {
108
- lines.push("❌ Errors:");
109
- for (const e of result.errors) {
110
- lines.push(` [${e.taskId}] ${e.field}: ${e.message}`);
111
- }
112
- }
113
- if (result.warnings.length > 0) {
114
- lines.push("⚠️ Warnings:");
115
- for (const w of result.warnings) {
116
- lines.push(` [${w.taskId}] ${w.field}: ${w.message}`);
117
- }
118
- }
119
- if (result.valid && result.warnings.length === 0) {
120
- lines.push("✅ All repo tasks pass validation");
121
- }
122
- return lines.join("\n");
123
- }
124
- // ---------------------------------------------------------------------------
125
- // Snake_case detection (pre-parse helper)
126
- // ---------------------------------------------------------------------------
127
- /** Known snake_case → camelCase field mappings for common errors */
128
- const SNAKE_TO_CAMEL = {
129
- feature_area: "featureArea",
130
- canonical_docs: "canonicalDocs",
131
- doc_coverage: "docCoverage",
132
- reference_solution: "referenceSolution",
133
- };
134
- /**
135
- * Detect snake_case field names in raw task YAML data.
136
- *
137
- * This runs BEFORE Zod parsing to provide a user-friendly error message
138
- * when authors use framework-internal snake_case names instead of the
139
- * camelCase names expected in repo task files.
140
- *
141
- * @param raw - Raw parsed YAML (before Zod validation)
142
- * @param filename - Source filename for error messages
143
- * @returns Array of warning messages (empty if no issues)
144
- */
145
- export function detectSnakeCaseFields(raw, filename) {
146
- const warnings = [];
147
- if (!Array.isArray(raw))
148
- return warnings;
149
- for (let i = 0; i < raw.length; i++) {
150
- const entry = raw[i];
151
- if (typeof entry !== "object" || entry === null)
152
- continue;
153
- const obj = entry;
154
- const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
155
- for (const [snake, camel] of Object.entries(SNAKE_TO_CAMEL)) {
156
- if (snake in obj) {
157
- warnings.push(`[${filename}] ${taskId}: Found "${snake}" — repo tasks use camelCase. Did you mean "${camel}"?`);
158
- }
159
- }
160
- }
161
- return warnings;
162
- }