@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,404 @@
1
+ /**
2
+ * scoring-and-presets.test.ts — Tests for 4-tier scoring engine,
3
+ * storage schema, and plugin registry / presets.
4
+ *
5
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/scoring-and-presets.test.ts
6
+ */
7
+ import assert from "node:assert/strict";
8
+ import { dirname, resolve } from "node:path";
9
+ import { describe, it } from "node:test";
10
+ import { fileURLToPath } from "node:url";
11
+ const __dirname = dirname(fileURLToPath(import.meta.url));
12
+ import { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, } from "../../../_vendor/ailf-core/index.js";
13
+ import { CURRENT_SCHEMA_VERSION, InMemoryPluginRegistry, isSchemaVersioned, migrateDocument, } from "../../../_vendor/ailf-core/index.js";
14
+ import { createSanityLiteracyPreset, sanityLiteracyPreset, } from "../presets/sanity-literacy.js";
15
+ // ---------------------------------------------------------------------------
16
+ // Helpers
17
+ // ---------------------------------------------------------------------------
18
+ function makeAssertion(overrides) {
19
+ return {
20
+ pass: true,
21
+ score: 0.8,
22
+ reason: "Good",
23
+ assertionType: "llm-rubric",
24
+ dimension: "task-completion",
25
+ latencyMs: 100,
26
+ weight: 1.0,
27
+ ...overrides,
28
+ };
29
+ }
30
+ function makeDimension(overrides) {
31
+ return {
32
+ dimensionId: "task-completion",
33
+ label: "Task Completion",
34
+ score: 0.8,
35
+ assertionCount: 2,
36
+ passCount: 2,
37
+ aggregation: "weighted-mean",
38
+ assertions: [],
39
+ ...overrides,
40
+ };
41
+ }
42
+ // ---------------------------------------------------------------------------
43
+ // Tier 1 → Tier 2: Assertion → Dimension aggregation
44
+ // ---------------------------------------------------------------------------
45
+ describe("aggregateDimensions", () => {
46
+ it("groups assertions by dimension", () => {
47
+ const assertions = [
48
+ makeAssertion({ dimension: "code-correctness", score: 0.9 }),
49
+ makeAssertion({ dimension: "code-correctness", score: 0.7 }),
50
+ makeAssertion({ dimension: "task-completion", score: 0.8 }),
51
+ ];
52
+ const dims = aggregateDimensions(assertions);
53
+ assert.equal(dims.length, 2);
54
+ const cc = dims.find((d) => d.dimensionId === "code-correctness");
55
+ assert.ok(cc);
56
+ assert.equal(cc.assertionCount, 2);
57
+ });
58
+ it("uses weighted-mean by default", () => {
59
+ const assertions = [
60
+ makeAssertion({ score: 0.6, weight: 1.0 }),
61
+ makeAssertion({ score: 0.8, weight: 3.0 }),
62
+ ];
63
+ const dims = aggregateDimensions(assertions);
64
+ // Weighted mean: (0.6*1 + 0.8*3) / (1+3) = 3.0/4 = 0.75
65
+ assert.ok(Math.abs(dims[0].score - 0.75) < 0.01);
66
+ });
67
+ it("falls back to pass rate when no numeric scores", () => {
68
+ const assertions = [
69
+ makeAssertion({ score: null, pass: true }),
70
+ makeAssertion({ score: null, pass: false }),
71
+ ];
72
+ const dims = aggregateDimensions(assertions);
73
+ assert.equal(dims[0].score, 0.5);
74
+ });
75
+ it("applies custom dimension labels", () => {
76
+ const assertions = [makeAssertion({ dimension: "tc" })];
77
+ const dims = aggregateDimensions(assertions, {
78
+ dimensionLabels: { tc: "Task Completion" },
79
+ });
80
+ assert.equal(dims[0].label, "Task Completion");
81
+ });
82
+ });
83
+ // ---------------------------------------------------------------------------
84
+ // Tier 2 → Tier 3: Dimension → Task scoring
85
+ // ---------------------------------------------------------------------------
86
+ describe("computeTaskScore", () => {
87
+ it("computes weighted score from dimensions", () => {
88
+ const dims = [
89
+ makeDimension({ dimensionId: "tc", score: 0.8 }),
90
+ makeDimension({ dimensionId: "cc", score: 0.6 }),
91
+ ];
92
+ const task = computeTaskScore(dims, {
93
+ taskId: "test-task",
94
+ weights: { tc: 0.6, cc: 0.4 },
95
+ });
96
+ // 0.8*0.6 + 0.6*0.4 = 0.48 + 0.24 = 0.72
97
+ assert.ok(Math.abs(task.score - 0.72) < 0.01);
98
+ });
99
+ it("normalizes weights that don't sum to 1", () => {
100
+ const dims = [
101
+ makeDimension({ dimensionId: "tc", score: 1.0 }),
102
+ makeDimension({ dimensionId: "cc", score: 0.0 }),
103
+ ];
104
+ const task = computeTaskScore(dims, {
105
+ taskId: "test-task",
106
+ weights: { tc: 2, cc: 2 },
107
+ });
108
+ // (1.0*2 + 0.0*2) / (2+2) = 2/4 = 0.5
109
+ assert.ok(Math.abs(task.score - 0.5) < 0.01);
110
+ });
111
+ it("checks against threshold", () => {
112
+ const dims = [makeDimension({ dimensionId: "tc", score: 0.6 })];
113
+ const passing = computeTaskScore(dims, {
114
+ taskId: "t1",
115
+ weights: { tc: 1.0 },
116
+ threshold: 0.5,
117
+ });
118
+ assert.equal(passing.passesThreshold, true);
119
+ const failing = computeTaskScore(dims, {
120
+ taskId: "t2",
121
+ weights: { tc: 1.0 },
122
+ threshold: 0.7,
123
+ });
124
+ assert.equal(failing.passesThreshold, false);
125
+ });
126
+ it("records weight source", () => {
127
+ const task = computeTaskScore([makeDimension()], {
128
+ taskId: "t1",
129
+ weights: { "task-completion": 1.0 },
130
+ weightSource: "rubrics.yaml:default",
131
+ });
132
+ assert.equal(task.weightSource, "rubrics.yaml:default");
133
+ });
134
+ });
135
+ // ---------------------------------------------------------------------------
136
+ // Tier 3 → Tier 4: Task → Area aggregation
137
+ // ---------------------------------------------------------------------------
138
+ describe("aggregateAreas", () => {
139
+ it("groups tasks by area prefix", () => {
140
+ const tasks = [
141
+ computeTaskScore([makeDimension({ score: 0.8 })], {
142
+ taskId: "groq-basic",
143
+ weights: { "task-completion": 1.0 },
144
+ }),
145
+ computeTaskScore([makeDimension({ score: 0.6 })], {
146
+ taskId: "groq-advanced",
147
+ weights: { "task-completion": 1.0 },
148
+ }),
149
+ computeTaskScore([makeDimension({ score: 0.9 })], {
150
+ taskId: "studio-schema",
151
+ weights: { "task-completion": 1.0 },
152
+ }),
153
+ ];
154
+ const areas = aggregateAreas(tasks);
155
+ assert.equal(areas.length, 2);
156
+ const groq = areas.find((a) => a.areaId === "groq");
157
+ assert.ok(groq);
158
+ assert.equal(groq.taskCount, 2);
159
+ assert.ok(Math.abs(groq.score - 0.7) < 0.01); // (0.8+0.6)/2
160
+ const studio = areas.find((a) => a.areaId === "studio");
161
+ assert.ok(studio);
162
+ assert.equal(studio.taskCount, 1);
163
+ });
164
+ it("computes delta from previous scores", () => {
165
+ const tasks = [
166
+ computeTaskScore([makeDimension({ score: 0.8 })], {
167
+ taskId: "groq-basic",
168
+ weights: { "task-completion": 1.0 },
169
+ }),
170
+ ];
171
+ const areas = aggregateAreas(tasks, { groq: 0.6 });
172
+ assert.ok(areas[0].delta !== null);
173
+ assert.ok(Math.abs(areas[0].delta - 0.2) < 0.01);
174
+ });
175
+ });
176
+ // ---------------------------------------------------------------------------
177
+ // Score normalization
178
+ // ---------------------------------------------------------------------------
179
+ describe("normalizeScore", () => {
180
+ it("normalizes LLM rubric scores (0-100 → 0-1)", () => {
181
+ assert.ok(Math.abs(normalizeScore(75, "llm-rubric") - 0.75) < 0.01);
182
+ });
183
+ it("passes through already-normalized scores", () => {
184
+ assert.ok(Math.abs(normalizeScore(0.75, "llm-rubric") - 0.75) < 0.01);
185
+ });
186
+ it("normalizes boolean assertions to 0 or 1", () => {
187
+ assert.equal(normalizeScore(1, "contains"), 1);
188
+ assert.equal(normalizeScore(0, "contains"), 0);
189
+ });
190
+ it("clamps similarity scores to [0, 1]", () => {
191
+ assert.equal(normalizeScore(1.5, "similar"), 1);
192
+ assert.equal(normalizeScore(-0.1, "similar"), 0);
193
+ });
194
+ });
195
+ // ---------------------------------------------------------------------------
196
+ // Ensemble grading
197
+ // ---------------------------------------------------------------------------
198
+ describe("computeEnsembleScore", () => {
199
+ it("computes mean ensemble score", () => {
200
+ const { score, agreement } = computeEnsembleScore([0.8, 0.6, 0.7], "mean");
201
+ assert.ok(Math.abs(score - 0.7) < 0.01);
202
+ assert.ok(agreement > 0);
203
+ });
204
+ it("computes median ensemble score", () => {
205
+ const { score } = computeEnsembleScore([0.9, 0.5, 0.7], "median");
206
+ assert.ok(Math.abs(score - 0.7) < 0.01);
207
+ });
208
+ it("computes max ensemble score", () => {
209
+ const { score } = computeEnsembleScore([0.9, 0.5, 0.7], "max");
210
+ assert.ok(Math.abs(score - 0.9) < 0.01);
211
+ });
212
+ it("agreement is 1 for identical scores", () => {
213
+ const { agreement } = computeEnsembleScore([0.8, 0.8, 0.8]);
214
+ assert.ok(Math.abs(agreement - 1.0) < 0.01);
215
+ });
216
+ it("agreement decreases with divergent scores", () => {
217
+ const { agreement } = computeEnsembleScore([0.0, 1.0]);
218
+ assert.ok(agreement < 0.6);
219
+ });
220
+ });
221
+ // ---------------------------------------------------------------------------
222
+ // Storage schema
223
+ // ---------------------------------------------------------------------------
224
+ describe("storage schema", () => {
225
+ it("CURRENT_SCHEMA_VERSION is 1", () => {
226
+ assert.equal(CURRENT_SCHEMA_VERSION, 1);
227
+ });
228
+ it("isSchemaVersioned detects versioned docs", () => {
229
+ assert.equal(isSchemaVersioned({ schemaVersion: 1 }), true);
230
+ assert.equal(isSchemaVersioned({}), false);
231
+ assert.equal(isSchemaVersioned(null), false);
232
+ });
233
+ it("migrateDocument is no-op for current version", () => {
234
+ const doc = { schemaVersion: 1, _type: "ailf.run" };
235
+ const migrated = migrateDocument(doc);
236
+ assert.equal(migrated.schemaVersion, 1);
237
+ });
238
+ });
239
+ // ---------------------------------------------------------------------------
240
+ // Plugin registry
241
+ // ---------------------------------------------------------------------------
242
+ describe("InMemoryPluginRegistry", () => {
243
+ it("registers and retrieves modes", () => {
244
+ const registry = new InMemoryPluginRegistry();
245
+ registry.registerMode({
246
+ id: "custom",
247
+ label: "Custom Mode",
248
+ validProviderPatterns: [".*"],
249
+ rubricTemplateIds: [],
250
+ handlerModule: "./custom.js",
251
+ });
252
+ assert.equal(registry.getModes().length, 1);
253
+ assert.equal(registry.getMode("custom")?.label, "Custom Mode");
254
+ });
255
+ it("registers and retrieves assertions", () => {
256
+ const registry = new InMemoryPluginRegistry();
257
+ registry.registerAssertion({
258
+ type: "api-match",
259
+ label: "API Match",
260
+ compatibleModes: ["custom"],
261
+ handlerModule: "./api-match.js",
262
+ });
263
+ assert.equal(registry.getAssertions().length, 1);
264
+ });
265
+ it("registers a complete preset with mode base", () => {
266
+ const registry = new InMemoryPluginRegistry();
267
+ // Must register mode base first
268
+ const { createLiteracyModeBase } = require("../mode-bases/literacy.js");
269
+ registry.registerModeBase(createLiteracyModeBase());
270
+ registry.registerPreset(sanityLiteracyPreset);
271
+ // Mode + rubrics from mode base, domain config from preset
272
+ assert.ok(registry.getMode("literacy"));
273
+ assert.ok(registry.getRubricTemplates().length > 0);
274
+ assert.ok(registry.getPresets().length === 1);
275
+ });
276
+ });
277
+ // ---------------------------------------------------------------------------
278
+ // sanity-literacy preset
279
+ // ---------------------------------------------------------------------------
280
+ describe("sanityLiteracyPreset", () => {
281
+ it("has correct manifest", () => {
282
+ assert.equal(sanityLiteracyPreset.name, "sanity-literacy");
283
+ assert.equal(sanityLiteracyPreset.manifest.pluginApiVersion, 1);
284
+ });
285
+ it("targets literacy mode base", () => {
286
+ assert.equal(sanityLiteracyPreset.mode, "literacy");
287
+ });
288
+ it("does not bundle assertions (now framework built-ins)", () => {
289
+ assert.equal(sanityLiteracyPreset.assertions, undefined);
290
+ });
291
+ it("does not bundle rubrics/scoring/prompts (now in literacy mode base)", () => {
292
+ // Evaluation methodology moved to mode-bases/literacy.ts
293
+ assert.equal(sanityLiteracyPreset.rubricTemplates, undefined);
294
+ assert.equal(sanityLiteracyPreset.scoringProfiles, undefined);
295
+ assert.equal(sanityLiteracyPreset.promptTemplates, undefined);
296
+ });
297
+ it("includes sanity:// fixture resolver", () => {
298
+ assert.ok(sanityLiteracyPreset.fixtureResolvers?.some((r) => r.scheme === "sanity://"));
299
+ });
300
+ it("includes 3 source definitions", () => {
301
+ const sources = sanityLiteracyPreset.sourceDefs;
302
+ assert.ok(sources);
303
+ assert.equal(sources.length, 3);
304
+ const names = sources.map((s) => s.name);
305
+ assert.ok(names.includes("production"));
306
+ assert.ok(names.includes("branch"));
307
+ assert.ok(names.includes("local"));
308
+ });
309
+ it("production source has correct baseUrl", () => {
310
+ const prod = sanityLiteracyPreset.sourceDefs.find((s) => s.name === "production");
311
+ assert.ok(prod);
312
+ assert.equal(prod.baseUrl, "https://www.sanity.io/docs");
313
+ });
314
+ it("includes feature registry with all features", () => {
315
+ const features = sanityLiteracyPreset.featureDefs;
316
+ assert.ok(features);
317
+ assert.equal(features.features.length, 14);
318
+ const ids = features.features.map((f) => f.id);
319
+ assert.ok(ids.includes("groq"));
320
+ assert.ok(ids.includes("visual-editing"));
321
+ assert.ok(ids.includes("portable-text"));
322
+ assert.ok(ids.includes("ai-assist"));
323
+ });
324
+ it("includes a docFetcher factory", () => {
325
+ assert.equal(typeof sanityLiteracyPreset.docFetcher, "function");
326
+ const fetcher = sanityLiteracyPreset.docFetcher();
327
+ assert.ok(fetcher);
328
+ assert.equal(typeof fetcher.fetch, "function");
329
+ });
330
+ });
331
+ // ---------------------------------------------------------------------------
332
+ // createSanityLiteracyPreset factory
333
+ // ---------------------------------------------------------------------------
334
+ describe("createSanityLiteracyPreset", () => {
335
+ it("returns a domain-only preset targeting literacy mode", () => {
336
+ const preset = createSanityLiteracyPreset({ rootDir: "/tmp/test" });
337
+ assert.equal(preset.name, "sanity-literacy");
338
+ assert.equal(preset.mode, "literacy");
339
+ // Domain config present
340
+ assert.ok(preset.fixtureResolvers);
341
+ assert.ok(preset.docFetcher);
342
+ assert.ok(preset.sourceDefs);
343
+ assert.ok(preset.featureDefs);
344
+ // Methodology inherited from mode base, not on preset
345
+ assert.equal(preset.rubricTemplates, undefined);
346
+ assert.equal(preset.scoringProfiles, undefined);
347
+ assert.equal(preset.promptTemplates, undefined);
348
+ });
349
+ it("registers all extension points via mode base + domain config", () => {
350
+ const registry = new InMemoryPluginRegistry();
351
+ // Must register mode base first (composition root does this)
352
+ const { createLiteracyModeBase } = require("../mode-bases/literacy.js");
353
+ registry.registerModeBase(createLiteracyModeBase());
354
+ const preset = createSanityLiteracyPreset({ rootDir: "/tmp/test" });
355
+ registry.registerPreset(preset);
356
+ // Mode from mode base
357
+ assert.ok(registry.getMode("literacy"));
358
+ // Rubrics, scoring, prompts inherited from mode base
359
+ assert.equal(registry.getRubricTemplates().length, 3);
360
+ assert.equal(Object.keys(registry.getPromptTemplates()).length, 3);
361
+ assert.equal(Object.keys(registry.getScoringProfiles()).length, 2);
362
+ // Domain config from preset
363
+ assert.ok(registry.getDocFetcherFactory());
364
+ assert.equal(registry.getSourceDefs().length, 3);
365
+ assert.ok(registry.getFeatureDefs());
366
+ assert.equal(registry.getFeatureDefs().features.length, 14);
367
+ });
368
+ });
369
+ // ---------------------------------------------------------------------------
370
+ // Preset is single source of truth for sources and features
371
+ // ---------------------------------------------------------------------------
372
+ describe("preset is single source of truth for Sanity config", () => {
373
+ it("config/sources.ts exports an empty array", async () => {
374
+ const { tryLoadConfigFile } = await import("../../compiler/config-loader.js");
375
+ const ROOT = resolve(__dirname, "..", "..", "..", "..");
376
+ const loaded = tryLoadConfigFile("sources", ROOT);
377
+ assert.ok(loaded, "config/sources.ts should exist");
378
+ const sources = loaded.data;
379
+ assert.ok(Array.isArray(sources), "should export an array");
380
+ assert.equal(sources.length, 0, "config/sources should be empty (preset provides sources)");
381
+ });
382
+ it("config/features.ts exports an empty features array", async () => {
383
+ const { tryLoadConfigFile } = await import("../../compiler/config-loader.js");
384
+ const ROOT = resolve(__dirname, "..", "..", "..", "..");
385
+ const loaded = tryLoadConfigFile("features", ROOT);
386
+ assert.ok(loaded, "config/features.ts should exist");
387
+ assert.ok(Array.isArray(loaded.data.features), "should have a features array");
388
+ assert.equal(loaded.data.features.length, 0, "config/features should be empty (preset provides features)");
389
+ });
390
+ it("preset contains all 3 source entries", () => {
391
+ const sources = sanityLiteracyPreset.sourceDefs;
392
+ assert.equal(sources.length, 3);
393
+ const names = sources.map((s) => s.name).sort();
394
+ assert.deepEqual(names, ["branch", "local", "production"]);
395
+ });
396
+ it("preset contains all 14 feature entries", () => {
397
+ const features = sanityLiteracyPreset.featureDefs.features;
398
+ assert.equal(features.length, 14);
399
+ const covered = features.filter((f) => f.status === "covered");
400
+ const uncovered = features.filter((f) => f.status === "uncovered");
401
+ assert.equal(covered.length, 6, "should have 6 covered features");
402
+ assert.equal(uncovered.length, 8, "should have 8 uncovered features");
403
+ });
404
+ });
@@ -0,0 +1,10 @@
1
+ /**
2
+ * scoring-bridge.test.ts — Tests for the 4-tier scoring engine bridge.
3
+ *
4
+ * Verifies that `scoreTestGroup` produces the same 0–100 output as the
5
+ * legacy `accumulateDimensions → averageDimensions → weightedComposite`
6
+ * chain when given identical inputs.
7
+ *
8
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/scoring-bridge.test.ts
9
+ */
10
+ export {};
@@ -0,0 +1,184 @@
1
+ /**
2
+ * scoring-bridge.test.ts — Tests for the 4-tier scoring engine bridge.
3
+ *
4
+ * Verifies that `scoreTestGroup` produces the same 0–100 output as the
5
+ * legacy `accumulateDimensions → averageDimensions → weightedComposite`
6
+ * chain when given identical inputs.
7
+ *
8
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/scoring-bridge.test.ts
9
+ */
10
+ import assert from "node:assert/strict";
11
+ import { describe, it } from "node:test";
12
+ import { scoreTestGroup } from "../scoring-bridge.js";
13
+ // ---------------------------------------------------------------------------
14
+ // Helpers
15
+ // ---------------------------------------------------------------------------
16
+ function makeTestResult(overrides) {
17
+ const dims = overrides?.dimensions ?? {};
18
+ const componentResults = [];
19
+ if (dims.taskCompletion !== undefined) {
20
+ componentResults.push({
21
+ assertion: {
22
+ type: "llm-rubric",
23
+ metadata: { dimension: "task-completion" },
24
+ },
25
+ pass: true,
26
+ reason: JSON.stringify({ score: dims.taskCompletion }),
27
+ score: dims.taskCompletion / 100,
28
+ });
29
+ }
30
+ if (dims.codeCorrectness !== undefined) {
31
+ componentResults.push({
32
+ assertion: {
33
+ type: "llm-rubric",
34
+ metadata: { dimension: "code-correctness" },
35
+ },
36
+ pass: true,
37
+ reason: JSON.stringify({ score: dims.codeCorrectness }),
38
+ score: dims.codeCorrectness / 100,
39
+ });
40
+ }
41
+ if (dims.docCoverage !== undefined) {
42
+ componentResults.push({
43
+ assertion: {
44
+ type: "llm-rubric",
45
+ metadata: { dimension: "doc-coverage" },
46
+ },
47
+ pass: true,
48
+ reason: JSON.stringify({ score: dims.docCoverage }),
49
+ score: dims.docCoverage / 100,
50
+ });
51
+ }
52
+ return {
53
+ cost: overrides?.cost ?? 0.01,
54
+ description: overrides?.description ?? "test",
55
+ gradingResult: {
56
+ componentResults,
57
+ pass: true,
58
+ },
59
+ response: { output: "mock output" },
60
+ vars: overrides?.vars ?? { task: "test", docs: "" },
61
+ };
62
+ }
63
+ const DEFAULT_PROFILE = {
64
+ "code-correctness": 0.35,
65
+ "doc-coverage": 0.25,
66
+ "task-completion": 0.4,
67
+ };
68
+ const OUTPUT_ONLY_PROFILE = {
69
+ "code-correctness": 0.55,
70
+ "task-completion": 0.45,
71
+ };
72
+ // ---------------------------------------------------------------------------
73
+ // Tests
74
+ // ---------------------------------------------------------------------------
75
+ describe("scoreTestGroup — basic scoring", () => {
76
+ it("returns zeroes for empty test array", () => {
77
+ const result = scoreTestGroup([], DEFAULT_PROFILE);
78
+ assert.equal(result.composite, 0);
79
+ assert.equal(result.totalCost, 0);
80
+ assert.deepEqual(result.dimensions, {});
81
+ });
82
+ it("scores a single test with all dimensions", () => {
83
+ const tests = [
84
+ makeTestResult({
85
+ dimensions: {
86
+ taskCompletion: 80,
87
+ codeCorrectness: 70,
88
+ docCoverage: 60,
89
+ },
90
+ }),
91
+ ];
92
+ const result = scoreTestGroup(tests, DEFAULT_PROFILE);
93
+ // Expected: 80*0.4 + 70*0.35 + 60*0.25 = 32 + 24.5 + 15 = 71.5 → 72
94
+ assert.equal(result.dimensions.taskCompletion, 80);
95
+ assert.equal(result.dimensions.codeCorrectness, 70);
96
+ assert.equal(result.dimensions.docCoverage, 60);
97
+ assert.equal(result.composite, 72);
98
+ });
99
+ it("averages across multiple tests", () => {
100
+ const tests = [
101
+ makeTestResult({
102
+ dimensions: { taskCompletion: 80, codeCorrectness: 60 },
103
+ }),
104
+ makeTestResult({
105
+ dimensions: { taskCompletion: 60, codeCorrectness: 80 },
106
+ }),
107
+ ];
108
+ const result = scoreTestGroup(tests, OUTPUT_ONLY_PROFILE);
109
+ // taskCompletion avg = 70, codeCorrectness avg = 70
110
+ // Expected: 70*0.45 + 70*0.55 = 31.5 + 38.5 = 70
111
+ assert.equal(result.dimensions.taskCompletion, 70);
112
+ assert.equal(result.dimensions.codeCorrectness, 70);
113
+ assert.equal(result.composite, 70);
114
+ });
115
+ it("accumulates cost across tests", () => {
116
+ const tests = [
117
+ makeTestResult({ cost: 0.05, dimensions: { taskCompletion: 80 } }),
118
+ makeTestResult({ cost: 0.03, dimensions: { taskCompletion: 70 } }),
119
+ ];
120
+ const result = scoreTestGroup(tests, DEFAULT_PROFILE);
121
+ assert.ok(Math.abs(result.totalCost - 0.08) < 0.001);
122
+ });
123
+ });
124
+ describe("scoreTestGroup — profile handling", () => {
125
+ it("uses output-only profile (excludes doc-coverage)", () => {
126
+ const tests = [
127
+ makeTestResult({
128
+ dimensions: {
129
+ taskCompletion: 80,
130
+ codeCorrectness: 60,
131
+ docCoverage: 100,
132
+ },
133
+ }),
134
+ ];
135
+ const result = scoreTestGroup(tests, OUTPUT_ONLY_PROFILE);
136
+ // doc-coverage should be present in dimensions but NOT affect composite
137
+ // Expected: 80*0.45 + 60*0.55 = 36 + 33 = 69
138
+ assert.equal(result.dimensions.docCoverage, 100);
139
+ assert.equal(result.composite, 69);
140
+ });
141
+ it("handles profile with only one dimension", () => {
142
+ const tests = [
143
+ makeTestResult({
144
+ dimensions: { taskCompletion: 90, codeCorrectness: 50 },
145
+ }),
146
+ ];
147
+ const result = scoreTestGroup(tests, { "task-completion": 1.0 });
148
+ // Only taskCompletion should count
149
+ assert.equal(result.composite, 90);
150
+ });
151
+ });
152
+ describe("scoreTestGroup — edge cases", () => {
153
+ it("handles tests with no rubric components", () => {
154
+ const test = {
155
+ cost: 0.01,
156
+ description: "no rubrics",
157
+ gradingResult: {
158
+ componentResults: [
159
+ { assertion: { type: "javascript" }, pass: true, score: 1 },
160
+ ],
161
+ pass: true,
162
+ },
163
+ response: { output: "mock" },
164
+ vars: { task: "test", docs: "" },
165
+ };
166
+ const result = scoreTestGroup([test], DEFAULT_PROFILE);
167
+ // No llm-rubric components → 0 composite
168
+ assert.equal(result.composite, 0);
169
+ assert.equal(result.totalCost, 0.01);
170
+ });
171
+ it("provides raw DimensionScore objects for advanced consumers", () => {
172
+ const tests = [
173
+ makeTestResult({
174
+ dimensions: { taskCompletion: 80, codeCorrectness: 60 },
175
+ }),
176
+ ];
177
+ const result = scoreTestGroup(tests, DEFAULT_PROFILE);
178
+ assert.ok(result.rawDimensions.length >= 2);
179
+ const tcDim = result.rawDimensions.find((d) => d.dimensionId === "task-completion");
180
+ assert.ok(tcDim);
181
+ assert.ok(tcDim.score >= 0 && tcDim.score <= 1); // 0–1 scale
182
+ assert.equal(tcDim.assertionCount, 1);
183
+ });
184
+ });
@@ -0,0 +1,8 @@
1
+ /**
2
+ * task-graph-builder.test.ts — Unit tests for TaskGraphBuilder.
3
+ *
4
+ * Tests DAG construction, cycle detection, filtering, and priority assignment.
5
+ *
6
+ * Run: npx tsx --test src/pipeline/compiler/__tests__/task-graph-builder.test.ts
7
+ */
8
+ export {};