@sanity/ailf 0.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/README.md +0 -1
  2. package/config/features.ts +23 -0
  3. package/config/models.ts +95 -0
  4. package/config/prompts.ts +16 -0
  5. package/config/rubrics.ts +225 -0
  6. package/config/schedules.ts +47 -0
  7. package/config/sinks.ts +37 -0
  8. package/config/sources.ts +21 -0
  9. package/config/thresholds.ts +61 -0
  10. package/dist/_vendor/ailf-core/config-helpers.d.ts +171 -0
  11. package/dist/_vendor/ailf-core/config-helpers.js +170 -0
  12. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  13. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  14. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -0
  15. package/dist/_vendor/ailf-core/examples/index.js +25 -0
  16. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  17. package/dist/_vendor/ailf-core/index.js +5 -0
  18. package/dist/_vendor/ailf-core/ports/context.d.ts +17 -2
  19. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  20. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  21. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  22. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  23. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  24. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  25. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +8 -2
  26. package/dist/_vendor/ailf-core/schemas/eval-config.js +17 -2
  27. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +9 -3
  28. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +8 -1
  29. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -31
  30. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -9
  31. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  32. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  33. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  34. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  35. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  36. package/dist/_vendor/ailf-core/services/index.js +2 -1
  37. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  38. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  39. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  40. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  41. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  42. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  43. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  44. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  45. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +332 -0
  46. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  47. package/dist/_vendor/ailf-core/types/index.d.ts +45 -83
  48. package/dist/_vendor/ailf-core/types/index.js +8 -1
  49. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +257 -0
  50. package/dist/_vendor/ailf-core/types/plugin-registry.js +185 -0
  51. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  52. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  53. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  54. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  55. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  56. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  57. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  58. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  59. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  60. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  61. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  62. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  63. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  64. package/dist/_vendor/ailf-shared/index.js +0 -1
  65. package/dist/adapters/api-client/build-request.js +14 -13
  66. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  67. package/dist/adapters/config-sources/file-config-adapter.js +39 -12
  68. package/dist/adapters/config-sources/index.d.ts +2 -0
  69. package/dist/adapters/config-sources/index.js +1 -0
  70. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  71. package/dist/adapters/config-sources/ts-config-loader.js +141 -0
  72. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  73. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  74. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  75. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  76. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  77. package/dist/adapters/task-sources/content-lake-task-source.js +35 -39
  78. package/dist/adapters/task-sources/index.d.ts +3 -2
  79. package/dist/adapters/task-sources/index.js +3 -2
  80. package/dist/adapters/task-sources/repo-schemas.d.ts +218 -16
  81. package/dist/adapters/task-sources/repo-schemas.js +227 -19
  82. package/dist/adapters/task-sources/repo-task-source.d.ts +16 -12
  83. package/dist/adapters/task-sources/repo-task-source.js +92 -80
  84. package/dist/adapters/task-sources/repo-validation.d.ts +36 -5
  85. package/dist/adapters/task-sources/repo-validation.js +126 -5
  86. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  87. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  88. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  89. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  90. package/dist/cli.js +0 -2
  91. package/dist/commands/baseline.js +4 -1
  92. package/dist/commands/calculate-scores.js +1 -1
  93. package/dist/commands/coverage-audit.js +9 -1
  94. package/dist/commands/explain-handler.js +25 -23
  95. package/dist/commands/fetch-docs.js +3 -2
  96. package/dist/commands/generate-configs.js +1 -1
  97. package/dist/commands/init.d.ts +6 -4
  98. package/dist/commands/init.js +302 -23
  99. package/dist/commands/interactive.js +11 -7
  100. package/dist/commands/pipeline-action.d.ts +2 -0
  101. package/dist/commands/pipeline-action.js +16 -6
  102. package/dist/commands/pipeline.d.ts +1 -0
  103. package/dist/commands/pipeline.js +4 -2
  104. package/dist/commands/pr-comment.js +1 -1
  105. package/dist/commands/publish.js +2 -2
  106. package/dist/commands/readiness-report.js +13 -6
  107. package/dist/commands/validate-tasks.d.ts +2 -2
  108. package/dist/commands/validate-tasks.js +26 -15
  109. package/dist/composition-root.d.ts +13 -1
  110. package/dist/composition-root.js +99 -4
  111. package/dist/index.d.ts +41 -0
  112. package/dist/index.js +48 -0
  113. package/dist/orchestration/build-app-context.js +1 -0
  114. package/dist/orchestration/build-step-sequence.js +28 -8
  115. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  116. package/dist/orchestration/steps/fetch-docs-step.js +8 -7
  117. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  118. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  119. package/dist/orchestration/steps/generate-configs-step.js +261 -51
  120. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  121. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  122. package/dist/orchestration/steps/readiness-step.js +5 -6
  123. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  124. package/dist/orchestration/steps/run-eval-step.js +8 -7
  125. package/dist/pipeline/cache.d.ts +1 -1
  126. package/dist/pipeline/cache.js +36 -8
  127. package/dist/pipeline/calculate-scores.d.ts +2 -4
  128. package/dist/pipeline/calculate-scores.js +43 -113
  129. package/dist/pipeline/checks.js +2 -2
  130. package/dist/pipeline/compare.js +8 -8
  131. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  132. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  133. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  134. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  135. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  136. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  137. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  138. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  139. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  140. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +392 -0
  141. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  142. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  143. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  144. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  145. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  146. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +404 -0
  147. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  148. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  149. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  150. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  151. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  152. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  153. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  154. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  155. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  156. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  157. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  158. package/dist/pipeline/compiler/config-loader.js +111 -0
  159. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  160. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  161. package/dist/pipeline/compiler/hash.d.ts +11 -0
  162. package/dist/pipeline/compiler/hash.js +18 -0
  163. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  164. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  165. package/dist/pipeline/compiler/index.d.ts +29 -0
  166. package/dist/pipeline/compiler/index.js +45 -0
  167. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  168. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  169. package/dist/pipeline/compiler/mode-bases/agent-harness.d.ts +10 -0
  170. package/dist/pipeline/compiler/mode-bases/agent-harness.js +21 -0
  171. package/dist/pipeline/compiler/mode-bases/index.d.ts +4 -0
  172. package/dist/pipeline/compiler/mode-bases/index.js +4 -0
  173. package/dist/pipeline/compiler/mode-bases/knowledge-probe.d.ts +10 -0
  174. package/dist/pipeline/compiler/mode-bases/knowledge-probe.js +22 -0
  175. package/dist/pipeline/compiler/mode-bases/literacy.d.ts +12 -0
  176. package/dist/pipeline/compiler/mode-bases/literacy.js +78 -0
  177. package/dist/pipeline/compiler/mode-bases/mcp-server.d.ts +10 -0
  178. package/dist/pipeline/compiler/mode-bases/mcp-server.js +70 -0
  179. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  180. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  181. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  182. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  183. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  184. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  185. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +43 -0
  186. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +187 -0
  187. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.d.ts +19 -0
  188. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +138 -0
  189. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.d.ts +16 -0
  190. package/dist/pipeline/compiler/mode-handlers/agent-harness/index.js +43 -0
  191. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.d.ts +9 -0
  192. package/dist/pipeline/compiler/mode-handlers/agent-harness/prompts.js +29 -0
  193. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +12 -0
  194. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +82 -0
  195. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.d.ts +4 -0
  196. package/dist/pipeline/compiler/mode-handlers/agent-harness/tool-presets.js +19 -0
  197. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +49 -0
  198. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.js +4 -0
  199. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.d.ts +9 -0
  200. package/dist/pipeline/compiler/mode-handlers/agent-harness/validation.js +16 -0
  201. package/dist/pipeline/compiler/mode-handlers/index.d.ts +15 -0
  202. package/dist/pipeline/compiler/mode-handlers/index.js +19 -0
  203. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.d.ts +16 -0
  204. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/assertions.js +61 -0
  205. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.d.ts +18 -0
  206. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/compiler.js +112 -0
  207. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.d.ts +26 -0
  208. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/index.js +49 -0
  209. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.d.ts +9 -0
  210. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/prompts.js +28 -0
  211. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +44 -0
  212. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.js +4 -0
  213. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.d.ts +9 -0
  214. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/validation.js +24 -0
  215. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +18 -0
  216. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +118 -0
  217. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.d.ts +14 -0
  218. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +105 -0
  219. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +11 -0
  220. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +38 -0
  221. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.d.ts +9 -0
  222. package/dist/pipeline/compiler/mode-handlers/literacy/prompts.js +74 -0
  223. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +41 -0
  224. package/dist/pipeline/compiler/mode-handlers/literacy/types.js +4 -0
  225. package/dist/pipeline/compiler/mode-handlers/literacy/validation.d.ts +12 -0
  226. package/dist/pipeline/compiler/mode-handlers/literacy/validation.js +28 -0
  227. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.d.ts +42 -0
  228. package/dist/pipeline/compiler/mode-handlers/mcp-server/assertions.js +334 -0
  229. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.d.ts +19 -0
  230. package/dist/pipeline/compiler/mode-handlers/mcp-server/compiler.js +100 -0
  231. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.d.ts +27 -0
  232. package/dist/pipeline/compiler/mode-handlers/mcp-server/index.js +54 -0
  233. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.d.ts +8 -0
  234. package/dist/pipeline/compiler/mode-handlers/mcp-server/prompts.js +28 -0
  235. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.d.ts +28 -0
  236. package/dist/pipeline/compiler/mode-handlers/mcp-server/provider-config.js +104 -0
  237. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.d.ts +37 -0
  238. package/dist/pipeline/compiler/mode-handlers/mcp-server/types.js +4 -0
  239. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.d.ts +9 -0
  240. package/dist/pipeline/compiler/mode-handlers/mcp-server/validation.js +43 -0
  241. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.d.ts +33 -0
  242. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/index.js +174 -0
  243. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.d.ts +19 -0
  244. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/mcp-connection.js +95 -0
  245. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.d.ts +19 -0
  246. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-anthropic.js +172 -0
  247. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.d.ts +14 -0
  248. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/tool-loop-openai.js +16 -0
  249. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.d.ts +93 -0
  250. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider/types.js +4 -0
  251. package/dist/pipeline/compiler/preset-loader.d.ts +22 -0
  252. package/dist/pipeline/compiler/preset-loader.js +99 -0
  253. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  254. package/dist/pipeline/compiler/presets/index.js +8 -0
  255. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +42 -0
  256. package/dist/pipeline/compiler/presets/sanity-literacy.js +208 -0
  257. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  258. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  259. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  260. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  261. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  262. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  263. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  264. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  265. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  266. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  267. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  268. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  269. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  270. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  271. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  272. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  273. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  274. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  275. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  276. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  277. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  278. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  279. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  280. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  281. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  282. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  283. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  284. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  285. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  286. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  287. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  288. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  289. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  290. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  291. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  292. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  293. package/dist/pipeline/coverage-audit.d.ts +15 -5
  294. package/dist/pipeline/coverage-audit.js +41 -22
  295. package/dist/pipeline/eval-constants.d.ts +16 -6
  296. package/dist/pipeline/eval-constants.js +25 -4
  297. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  298. package/dist/pipeline/eval-fingerprint.js +8 -9
  299. package/dist/pipeline/expand-tasks.d.ts +19 -10
  300. package/dist/pipeline/expand-tasks.js +34 -28
  301. package/dist/pipeline/gap-analysis.d.ts +1 -1
  302. package/dist/pipeline/gap-analysis.js +2 -2
  303. package/dist/pipeline/generate-configs.d.ts +22 -4
  304. package/dist/pipeline/generate-configs.js +53 -24
  305. package/dist/pipeline/grader-api.d.ts +3 -3
  306. package/dist/pipeline/grader-api.js +5 -12
  307. package/dist/pipeline/grader-compare-runner.js +20 -27
  308. package/dist/pipeline/grader-comparison.d.ts +4 -8
  309. package/dist/pipeline/grader-comparison.js +11 -17
  310. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  311. package/dist/pipeline/grader-consistency-runner.js +16 -20
  312. package/dist/pipeline/grader-consistency.d.ts +6 -10
  313. package/dist/pipeline/grader-consistency.js +13 -32
  314. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  315. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  316. package/dist/pipeline/grader-sensitivity.js +10 -10
  317. package/dist/pipeline/grader-validate-runner.js +7 -5
  318. package/dist/pipeline/grader-validation.d.ts +2 -6
  319. package/dist/pipeline/grader-validation.js +14 -22
  320. package/dist/pipeline/map-request-to-config.js +7 -1
  321. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -13
  322. package/dist/pipeline/mirror-repo-tasks.js +22 -21
  323. package/dist/pipeline/normalize-mode.d.ts +49 -0
  324. package/dist/pipeline/normalize-mode.js +64 -0
  325. package/dist/pipeline/plan.d.ts +5 -2
  326. package/dist/pipeline/plan.js +134 -78
  327. package/dist/pipeline/pr-comment.js +2 -0
  328. package/dist/pipeline/profile-resolution.d.ts +22 -14
  329. package/dist/pipeline/profile-resolution.js +41 -19
  330. package/dist/pipeline/provenance.d.ts +2 -2
  331. package/dist/pipeline/provenance.js +12 -17
  332. package/dist/pipeline/release-report.js +4 -4
  333. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  334. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  335. package/dist/pipeline/rubric-loader.d.ts +20 -0
  336. package/dist/pipeline/rubric-loader.js +37 -0
  337. package/dist/pipeline/validate.d.ts +4 -4
  338. package/dist/pipeline/validate.js +64 -53
  339. package/dist/schedules/loader.js +18 -8
  340. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  341. package/dist/scripts/migrate-task-mode.js +85 -0
  342. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  343. package/dist/scripts/validate-task-sources.d.ts +1 -1
  344. package/dist/scripts/validate-task-sources.js +15 -15
  345. package/dist/sinks/loader.js +5 -7
  346. package/dist/sources.d.ts +7 -7
  347. package/dist/sources.js +22 -24
  348. package/dist/webhook/dispatch.js +2 -1
  349. package/package.json +15 -4
  350. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  351. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  352. package/tasks/literacy/frameworks.task.ts +128 -0
  353. package/tasks/literacy/functions.task.ts +69 -0
  354. package/tasks/literacy/groq.task.ts +258 -0
  355. package/tasks/literacy/nextjs-live.task.ts +75 -0
  356. package/tasks/literacy/studio-setup.task.ts +131 -0
  357. package/tasks/literacy/visual-editing.task.ts +146 -0
  358. package/config/features.yaml +0 -116
  359. package/config/models.yaml +0 -116
  360. package/config/prompts.yaml +0 -75
  361. package/config/rubrics.yaml +0 -81
  362. package/config/schedules.yaml +0 -43
  363. package/config/sinks.yaml +0 -54
  364. package/config/sources.yaml +0 -51
  365. package/config/thresholds.yaml +0 -49
  366. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  367. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  368. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  369. package/dist/_vendor/ailf-tasks/index.js +0 -16
  370. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  371. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  372. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  373. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  374. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  375. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  376. package/dist/agent-observer/test-imports.d.ts +0 -7
  377. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,114 @@
1
+ /**
2
+ * GitWorktreeSandboxStrategy — sandbox using `git worktree` for repo-based tasks.
3
+ *
4
+ * Creates a git worktree at a specific ref, providing a deterministic
5
+ * starting state for tasks that modify a git repository.
6
+ *
7
+ * All git CLI calls use `execFileSync` (array form, no shell) to prevent
8
+ * injection from task-supplied values like git refs or repo paths.
9
+ *
10
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ */
12
+ import { randomUUID } from "crypto";
13
+ import { execFileSync } from "child_process";
14
+ import { existsSync, rmSync } from "fs";
15
+ import { tmpdir } from "os";
16
+ import { resolve } from "path";
17
+ /** Validate a git ref contains no shell metacharacters or path traversal */
18
+ function validateGitRef(ref) {
19
+ // Git refs: alphanumeric, -, _, /, ., ~, ^ — no spaces, no shell metacharacters
20
+ if (!/^[a-zA-Z0-9._\-/~^]+$/.test(ref)) {
21
+ throw new Error(`Invalid git ref: "${ref}" — must contain only alphanumeric, -, _, /, ., ~, ^ characters`);
22
+ }
23
+ // Disallow path traversal via ".." segments
24
+ if (ref.includes("..")) {
25
+ throw new Error(`Invalid git ref: "${ref}" — must not contain ".." (path traversal)`);
26
+ }
27
+ }
28
+ export class GitWorktreeSandboxStrategy {
29
+ name = "Git Worktree";
30
+ type = "git-worktree";
31
+ async isAvailable() {
32
+ try {
33
+ execFileSync("git", ["--version"], { stdio: "ignore", timeout: 5000 });
34
+ return true;
35
+ }
36
+ catch {
37
+ return false;
38
+ }
39
+ }
40
+ async provision(options) {
41
+ const repoPath = options.repoPath;
42
+ if (!repoPath) {
43
+ throw new Error("GitWorktreeSandboxStrategy requires repoPath — " +
44
+ "the path to the git repository to create a worktree from");
45
+ }
46
+ const ref = options.gitRef ?? "HEAD";
47
+ validateGitRef(ref);
48
+ const id = `ailf-worktree-${randomUUID().slice(0, 8)}`;
49
+ const workingDir = resolve(tmpdir(), id);
50
+ try {
51
+ // Array form — no shell, prevents injection via repoPath/workingDir/ref
52
+ execFileSync("git", ["-C", repoPath, "worktree", "add", workingDir, ref], {
53
+ encoding: "utf-8",
54
+ timeout: 30_000,
55
+ });
56
+ return {
57
+ id,
58
+ workingDir,
59
+ strategy: "git-worktree",
60
+ gitRef: ref,
61
+ createdAt: new Date().toISOString(),
62
+ };
63
+ }
64
+ catch (err) {
65
+ const msg = err instanceof Error ? err.message : String(err);
66
+ throw new Error(`Failed to create git worktree at "${ref}": ${msg}`, {
67
+ cause: err,
68
+ });
69
+ }
70
+ }
71
+ async collectArtifacts(sandbox) {
72
+ const modifiedFiles = [];
73
+ let diff;
74
+ if (existsSync(sandbox.workingDir)) {
75
+ try {
76
+ diff = execFileSync("git", ["-C", sandbox.workingDir, "diff"], {
77
+ encoding: "utf-8",
78
+ timeout: 10_000,
79
+ }).trim();
80
+ const statusOutput = execFileSync("git", ["-C", sandbox.workingDir, "status", "--porcelain"], { encoding: "utf-8", timeout: 10_000 }).trim();
81
+ if (statusOutput) {
82
+ for (const line of statusOutput.split("\n")) {
83
+ const file = line.slice(3).trim();
84
+ if (file)
85
+ modifiedFiles.push(file);
86
+ }
87
+ }
88
+ }
89
+ catch {
90
+ // Best-effort artifact collection
91
+ }
92
+ }
93
+ return {
94
+ modifiedFiles,
95
+ diff: diff || undefined,
96
+ durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
97
+ };
98
+ }
99
+ async teardown(sandbox) {
100
+ if (existsSync(sandbox.workingDir)) {
101
+ try {
102
+ execFileSync("git", ["worktree", "remove", sandbox.workingDir, "--force"], { stdio: "ignore", timeout: 10_000 });
103
+ }
104
+ catch {
105
+ // If worktree remove fails, fall back to manual cleanup.
106
+ // Guard: only delete under tmpdir to prevent accidental deletion.
107
+ const tmp = resolve(tmpdir());
108
+ if (resolve(sandbox.workingDir).startsWith(tmp)) {
109
+ rmSync(sandbox.workingDir, { recursive: true, force: true });
110
+ }
111
+ }
112
+ }
113
+ }
114
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Sandbox infrastructure — isolated execution environments for agent harness mode.
3
+ *
4
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
5
+ */
6
+ export type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy, SandboxType, } from "./sandbox-strategy.js";
7
+ export { DockerSandboxStrategy } from "./docker-sandbox.js";
8
+ export { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
9
+ export { TempDirSandboxStrategy } from "./tempdir-sandbox.js";
10
+ export { createSandboxStrategy, selectSandboxStrategy, type SandboxSelectionResult, } from "./sandbox-selector.js";
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Sandbox infrastructure — isolated execution environments for agent harness mode.
3
+ *
4
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
5
+ */
6
+ // Implementations
7
+ export { DockerSandboxStrategy } from "./docker-sandbox.js";
8
+ export { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
9
+ export { TempDirSandboxStrategy } from "./tempdir-sandbox.js";
10
+ // Selector
11
+ export { createSandboxStrategy, selectSandboxStrategy, } from "./sandbox-selector.js";
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Sandbox selector — chooses the best available sandbox strategy.
3
+ *
4
+ * Selection logic:
5
+ * 1. If task config specifies a strategy, use it
6
+ * 2. If Docker is available, prefer Docker (better isolation)
7
+ * 3. Fall back to TempDir (always available)
8
+ *
9
+ * CI environments (detected via CI env var) always prefer Docker.
10
+ *
11
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
+ */
13
+ import type { SandboxStrategy, SandboxType } from "./sandbox-strategy.js";
14
+ /** Result of sandbox selection */
15
+ export interface SandboxSelectionResult {
16
+ /** The selected strategy */
17
+ strategy: SandboxStrategy;
18
+ /** Whether this was a fallback from the preferred strategy */
19
+ isFallback: boolean;
20
+ /** Warning message if fallback was used */
21
+ warning?: string;
22
+ }
23
+ /**
24
+ * Select the best available sandbox strategy.
25
+ *
26
+ * @param preferred - Preferred sandbox type from task config
27
+ * @param log - Optional log function for diagnostics
28
+ * @returns The selected strategy with fallback metadata
29
+ */
30
+ export declare function selectSandboxStrategy(preferred?: SandboxType, log?: (msg: string) => void): Promise<SandboxSelectionResult>;
31
+ /**
32
+ * Create a specific sandbox strategy by type.
33
+ * Does NOT check availability — caller should verify first.
34
+ */
35
+ export declare function createSandboxStrategy(type: SandboxType): SandboxStrategy;
@@ -0,0 +1,86 @@
1
+ /**
2
+ * Sandbox selector — chooses the best available sandbox strategy.
3
+ *
4
+ * Selection logic:
5
+ * 1. If task config specifies a strategy, use it
6
+ * 2. If Docker is available, prefer Docker (better isolation)
7
+ * 3. Fall back to TempDir (always available)
8
+ *
9
+ * CI environments (detected via CI env var) always prefer Docker.
10
+ *
11
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
+ */
13
+ import { DockerSandboxStrategy } from "./docker-sandbox.js";
14
+ import { GitWorktreeSandboxStrategy } from "./git-worktree-sandbox.js";
15
+ import { TempDirSandboxStrategy } from "./tempdir-sandbox.js";
16
+ // ---------------------------------------------------------------------------
17
+ // Strategy registry
18
+ // ---------------------------------------------------------------------------
19
+ const strategies = {
20
+ docker: () => new DockerSandboxStrategy(),
21
+ "git-worktree": () => new GitWorktreeSandboxStrategy(),
22
+ none: () => new TempDirSandboxStrategy(), // "none" = tempdir
23
+ tempdir: () => new TempDirSandboxStrategy(),
24
+ };
25
+ /**
26
+ * Select the best available sandbox strategy.
27
+ *
28
+ * @param preferred - Preferred sandbox type from task config
29
+ * @param log - Optional log function for diagnostics
30
+ * @returns The selected strategy with fallback metadata
31
+ */
32
+ export async function selectSandboxStrategy(preferred, log) {
33
+ const emit = log ?? (() => { });
34
+ // If a specific strategy is requested, try it first
35
+ if (preferred && preferred !== "none") {
36
+ const strategy = strategies[preferred]();
37
+ const available = await strategy.isAvailable();
38
+ if (available) {
39
+ emit(`Using ${strategy.name} sandbox strategy (requested)`);
40
+ return { strategy, isFallback: false };
41
+ }
42
+ emit(`${strategy.name} is not available, falling back...`);
43
+ }
44
+ // CI environments prefer Docker
45
+ const isCI = Boolean(process.env.CI || process.env.GITHUB_ACTIONS);
46
+ if (isCI) {
47
+ const docker = new DockerSandboxStrategy();
48
+ if (await docker.isAvailable()) {
49
+ emit("Using Docker sandbox strategy (CI environment)");
50
+ return { strategy: docker, isFallback: preferred !== "docker" };
51
+ }
52
+ }
53
+ // Default fallback: Docker → TempDir
54
+ const docker = new DockerSandboxStrategy();
55
+ if (await docker.isAvailable()) {
56
+ emit("Using Docker sandbox strategy (auto-detected)");
57
+ return {
58
+ strategy: docker,
59
+ isFallback: preferred !== undefined && preferred !== "docker",
60
+ ...(preferred && preferred !== "docker"
61
+ ? {
62
+ warning: `Preferred sandbox "${preferred}" unavailable, using Docker instead`,
63
+ }
64
+ : {}),
65
+ };
66
+ }
67
+ // Universal fallback
68
+ const tempdir = new TempDirSandboxStrategy();
69
+ emit("Using TempDir sandbox strategy (fallback)");
70
+ return {
71
+ strategy: tempdir,
72
+ isFallback: preferred !== undefined && preferred !== "tempdir",
73
+ ...(preferred && preferred !== "tempdir" && preferred !== "none"
74
+ ? {
75
+ warning: `Preferred sandbox "${preferred}" unavailable, using temp directory instead`,
76
+ }
77
+ : {}),
78
+ };
79
+ }
80
+ /**
81
+ * Create a specific sandbox strategy by type.
82
+ * Does NOT check availability — caller should verify first.
83
+ */
84
+ export function createSandboxStrategy(type) {
85
+ return strategies[type]();
86
+ }
@@ -0,0 +1,81 @@
1
+ /**
2
+ * SandboxStrategy — port interface for isolated agent execution environments.
3
+ *
4
+ * Three implementations, selected by task config with automatic fallback:
5
+ *
6
+ * - DockerSandboxStrategy — full isolation via Docker containers
7
+ * - TempDirSandboxStrategy — lightweight fallback using OS temp directories
8
+ * - GitWorktreeSandboxStrategy — uses `git worktree` for repo-based tasks
9
+ *
10
+ * Selection: task config specifies preferred strategy; runtime falls back
11
+ * Docker → TempDir if Docker is unavailable. CI environments prefer Docker.
12
+ *
13
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
+ */
15
+ /** Metadata describing a provisioned sandbox */
16
+ export interface SandboxInfo {
17
+ /** Unique sandbox identifier */
18
+ id: string;
19
+ /** Absolute path to the sandbox working directory */
20
+ workingDir: string;
21
+ /** Which strategy created this sandbox */
22
+ strategy: SandboxType;
23
+ /** Docker container ID (when strategy is "docker") */
24
+ containerId?: string;
25
+ /** Git worktree ref (when strategy is "git-worktree") */
26
+ gitRef?: string;
27
+ /** Timestamp when the sandbox was provisioned */
28
+ createdAt: string;
29
+ }
30
+ /** Supported sandbox strategies */
31
+ export type SandboxType = "docker" | "git-worktree" | "none" | "tempdir";
32
+ /** Configuration for sandbox provisioning */
33
+ export interface SandboxProvisionOptions {
34
+ /** Sandbox type to use */
35
+ type: SandboxType;
36
+ /** Task ID (used for naming) */
37
+ taskId: string;
38
+ /** Docker image (for docker strategy) */
39
+ image?: string;
40
+ /** Resource limits */
41
+ limits?: {
42
+ cpus?: number;
43
+ memoryBytes?: number;
44
+ diskBytes?: number;
45
+ networkAccess?: boolean;
46
+ };
47
+ /** Git ref for git-worktree strategy */
48
+ gitRef?: string;
49
+ /** Git repo path for git-worktree strategy */
50
+ repoPath?: string;
51
+ }
52
+ /** Artifacts collected from sandbox after execution */
53
+ export interface SandboxArtifacts {
54
+ /** Files modified during execution (relative paths) */
55
+ modifiedFiles: string[];
56
+ /** Git diff output (if applicable) */
57
+ diff?: string;
58
+ /** Stdout/stderr captured during execution */
59
+ output?: string;
60
+ /** Total execution time in milliseconds */
61
+ durationMs: number;
62
+ }
63
+ /**
64
+ * Port interface for sandbox lifecycle management.
65
+ *
66
+ * Implementations handle the full lifecycle: provision → use → collect → teardown.
67
+ */
68
+ export interface SandboxStrategy {
69
+ /** Human-readable strategy name */
70
+ readonly name: string;
71
+ /** Strategy type identifier */
72
+ readonly type: SandboxType;
73
+ /** Check if this strategy is available in the current environment */
74
+ isAvailable(): Promise<boolean>;
75
+ /** Provision a new sandbox environment */
76
+ provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
77
+ /** Collect artifacts from the sandbox after execution */
78
+ collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
79
+ /** Tear down the sandbox (remove files, stop containers) */
80
+ teardown(sandbox: SandboxInfo): Promise<void>;
81
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * SandboxStrategy — port interface for isolated agent execution environments.
3
+ *
4
+ * Three implementations, selected by task config with automatic fallback:
5
+ *
6
+ * - DockerSandboxStrategy — full isolation via Docker containers
7
+ * - TempDirSandboxStrategy — lightweight fallback using OS temp directories
8
+ * - GitWorktreeSandboxStrategy — uses `git worktree` for repo-based tasks
9
+ *
10
+ * Selection: task config specifies preferred strategy; runtime falls back
11
+ * Docker → TempDir if Docker is unavailable. CI environments prefer Docker.
12
+ *
13
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
+ */
15
+ export {};
@@ -0,0 +1,20 @@
1
+ /**
2
+ * TempDirSandboxStrategy — lightweight sandbox using OS temp directories.
3
+ *
4
+ * Creates a temporary directory for each test case, provides it as
5
+ * the working directory, and cleans up after execution. No isolation
6
+ * guarantees — the agent has full access to the host filesystem.
7
+ *
8
+ * This is the universal fallback when Docker is unavailable.
9
+ *
10
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ */
12
+ import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
13
+ export declare class TempDirSandboxStrategy implements SandboxStrategy {
14
+ readonly name = "Temporary Directory";
15
+ readonly type: "tempdir";
16
+ isAvailable(): Promise<boolean>;
17
+ provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
18
+ collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
19
+ teardown(sandbox: SandboxInfo): Promise<void>;
20
+ }
@@ -0,0 +1,74 @@
1
+ /**
2
+ * TempDirSandboxStrategy — lightweight sandbox using OS temp directories.
3
+ *
4
+ * Creates a temporary directory for each test case, provides it as
5
+ * the working directory, and cleans up after execution. No isolation
6
+ * guarantees — the agent has full access to the host filesystem.
7
+ *
8
+ * This is the universal fallback when Docker is unavailable.
9
+ *
10
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
11
+ */
12
+ import { randomUUID } from "crypto";
13
+ import { existsSync, mkdirSync, readdirSync, rmSync } from "fs";
14
+ import { tmpdir } from "os";
15
+ import { resolve } from "path";
16
+ export class TempDirSandboxStrategy {
17
+ name = "Temporary Directory";
18
+ type = "tempdir";
19
+ async isAvailable() {
20
+ // Always available — every OS has a temp directory
21
+ return true;
22
+ }
23
+ async provision(options) {
24
+ // Include sanitized taskId for debugging (strip non-alphanumeric for safety)
25
+ const safeTaskId = (options.taskId ?? "anon")
26
+ .replace(/[^a-zA-Z0-9_-]/g, "")
27
+ .slice(0, 20);
28
+ const id = `ailf-sandbox-${safeTaskId}-${randomUUID().slice(0, 8)}`;
29
+ const workingDir = resolve(tmpdir(), id);
30
+ mkdirSync(workingDir, { recursive: true });
31
+ return {
32
+ id,
33
+ workingDir,
34
+ strategy: "tempdir",
35
+ createdAt: new Date().toISOString(),
36
+ };
37
+ }
38
+ async collectArtifacts(sandbox) {
39
+ const modifiedFiles = [];
40
+ if (existsSync(sandbox.workingDir)) {
41
+ collectFilesRecursive(sandbox.workingDir, "", modifiedFiles);
42
+ }
43
+ return {
44
+ modifiedFiles,
45
+ durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
46
+ };
47
+ }
48
+ async teardown(sandbox) {
49
+ const workDir = resolve(sandbox.workingDir);
50
+ // Guard: only delete directories under os.tmpdir() to prevent
51
+ // accidental deletion if workingDir is corrupted
52
+ if (existsSync(workDir) && workDir.startsWith(resolve(tmpdir()))) {
53
+ rmSync(workDir, { recursive: true, force: true });
54
+ }
55
+ }
56
+ }
57
+ // ---------------------------------------------------------------------------
58
+ // Helpers
59
+ // ---------------------------------------------------------------------------
60
+ function collectFilesRecursive(dir, prefix, files, maxDepth = 20) {
61
+ if (maxDepth <= 0)
62
+ return;
63
+ for (const entry of readdirSync(dir, { withFileTypes: true })) {
64
+ if (entry.isSymbolicLink())
65
+ continue; // Skip symlinks to prevent traversal
66
+ const relative = prefix ? `${prefix}/${entry.name}` : entry.name;
67
+ if (entry.isDirectory()) {
68
+ collectFilesRecursive(resolve(dir, entry.name), relative, files, maxDepth - 1);
69
+ }
70
+ else {
71
+ files.push(relative);
72
+ }
73
+ }
74
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * scoring-bridge.ts — Bridge between Promptfoo raw results and the
3
+ * 4-tier scoring engine.
4
+ *
5
+ * Converts Promptfoo `ComponentResult[]` (from test results) into the
6
+ * scoring engine's `AssertionScore[]` format, then delegates aggregation
7
+ * to `aggregateDimensions` and `computeTaskScore` from core.
8
+ *
9
+ * This bridge replaces the three legacy scoring primitives in
10
+ * `calculate-scores.ts`:
11
+ * - `accumulateDimensions` → `convertToAssertionScores` + `aggregateDimensions`
12
+ * - `averageDimensions` → (handled internally by `aggregateDimensions`)
13
+ * - `weightedComposite` → `computeTaskScore`
14
+ *
15
+ * The bridge preserves the existing 0–100 output scale. The 4-tier
16
+ * engine works in [0, 1]; this module handles the conversion at
17
+ * boundaries.
18
+ *
19
+ * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
20
+ * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
21
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
22
+ */
23
+ import { type DimensionScore } from "../../_vendor/ailf-core/index.d.ts";
24
+ import type { TestResult } from "../../_vendor/ailf-core/index.d.ts";
25
+ /** Result of scoring a group of tests via the 4-tier engine */
26
+ export interface BridgedScoreResult {
27
+ /** Per-dimension breakdown (0–100 scale) */
28
+ dimensions: Record<string, number>;
29
+ /** Weighted composite score (0–100 scale) */
30
+ composite: number;
31
+ /** Total cost across all tests */
32
+ totalCost: number;
33
+ /** Raw DimensionScore objects from the engine (0–1 scale) */
34
+ rawDimensions: DimensionScore[];
35
+ }
36
+ /**
37
+ * Score a group of test results using the 4-tier scoring engine.
38
+ *
39
+ * This replaces the legacy `accumulateDimensions → averageDimensions →
40
+ * weightedComposite` chain with the new engine's `aggregateDimensions →
41
+ * computeTaskScore` chain.
42
+ *
43
+ * @param tests Pre-filtered test results (e.g., all gold or all baseline)
44
+ * @param profile Weight profile mapping kebab-case dimension names to weights
45
+ * (e.g., `{ "task-completion": 0.4, "code-correctness": 0.35, "doc-coverage": 0.25 }`)
46
+ * @param taskId Optional task identifier for traceability in TaskScore output
47
+ * @returns Dimensions (0–100) and composite (0–100), matching legacy output format
48
+ */
49
+ export declare function scoreTestGroup(tests: TestResult[], profile: Record<string, number>, taskId?: string): BridgedScoreResult;
@@ -0,0 +1,114 @@
1
+ /**
2
+ * scoring-bridge.ts — Bridge between Promptfoo raw results and the
3
+ * 4-tier scoring engine.
4
+ *
5
+ * Converts Promptfoo `ComponentResult[]` (from test results) into the
6
+ * scoring engine's `AssertionScore[]` format, then delegates aggregation
7
+ * to `aggregateDimensions` and `computeTaskScore` from core.
8
+ *
9
+ * This bridge replaces the three legacy scoring primitives in
10
+ * `calculate-scores.ts`:
11
+ * - `accumulateDimensions` → `convertToAssertionScores` + `aggregateDimensions`
12
+ * - `averageDimensions` → (handled internally by `aggregateDimensions`)
13
+ * - `weightedComposite` → `computeTaskScore`
14
+ *
15
+ * The bridge preserves the existing 0–100 output scale. The 4-tier
16
+ * engine works in [0, 1]; this module handles the conversion at
17
+ * boundaries.
18
+ *
19
+ * @see packages/core/src/services/scoring-engine.ts — the 4-tier engine
20
+ * @see packages/eval/src/pipeline/calculate-scores.ts — the consumer
21
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
22
+ */
23
+ import { aggregateDimensions, computeTaskScore, normalizeScore, } from "../../_vendor/ailf-core/index.js";
24
+ import { classifyRubric, parseRubricScore } from "../../_vendor/ailf-core/index.js";
25
+ // ---------------------------------------------------------------------------
26
+ // Public API
27
+ // ---------------------------------------------------------------------------
28
+ /**
29
+ * Score a group of test results using the 4-tier scoring engine.
30
+ *
31
+ * This replaces the legacy `accumulateDimensions → averageDimensions →
32
+ * weightedComposite` chain with the new engine's `aggregateDimensions →
33
+ * computeTaskScore` chain.
34
+ *
35
+ * @param tests Pre-filtered test results (e.g., all gold or all baseline)
36
+ * @param profile Weight profile mapping kebab-case dimension names to weights
37
+ * (e.g., `{ "task-completion": 0.4, "code-correctness": 0.35, "doc-coverage": 0.25 }`)
38
+ * @param taskId Optional task identifier for traceability in TaskScore output
39
+ * @returns Dimensions (0–100) and composite (0–100), matching legacy output format
40
+ */
41
+ export function scoreTestGroup(tests, profile, taskId) {
42
+ let totalCost = 0;
43
+ // Step 1: Convert all ComponentResults into AssertionScore[] (0–1 scale)
44
+ const assertionScores = [];
45
+ for (const test of tests) {
46
+ totalCost += test.cost;
47
+ for (const comp of test.gradingResult.componentResults) {
48
+ if (comp.assertion?.type !== "llm-rubric")
49
+ continue;
50
+ const converted = componentToAssertionScore(comp);
51
+ if (converted)
52
+ assertionScores.push(converted);
53
+ }
54
+ }
55
+ // Step 2: Aggregate into DimensionScores (0–1 scale)
56
+ const dimensionLabels = {
57
+ "code-correctness": "Code Correctness",
58
+ "doc-coverage": "Doc Coverage",
59
+ "task-completion": "Task Completion",
60
+ };
61
+ const rawDimensions = aggregateDimensions(assertionScores, {
62
+ defaultAggregation: "mean",
63
+ dimensionLabels,
64
+ });
65
+ // Step 3: Compute weighted composite via TaskScore (0–1 scale)
66
+ const taskScoreResult = computeTaskScore(rawDimensions, {
67
+ taskId: taskId ?? "aggregate",
68
+ weights: profile,
69
+ weightSource: "scoring-bridge",
70
+ });
71
+ // Step 4: Convert back to 0–100 scale for legacy compatibility
72
+ const dimensions = {};
73
+ for (const dim of rawDimensions) {
74
+ // Map kebab-case dimension IDs to camelCase for legacy compatibility
75
+ const camelKey = kebabToCamel(dim.dimensionId);
76
+ dimensions[camelKey] = Math.round(dim.score * 100);
77
+ }
78
+ return {
79
+ composite: Math.round(taskScoreResult.score * 100),
80
+ dimensions,
81
+ rawDimensions,
82
+ totalCost,
83
+ };
84
+ }
85
+ // ---------------------------------------------------------------------------
86
+ // Conversion helpers
87
+ // ---------------------------------------------------------------------------
88
+ /**
89
+ * Convert a single Promptfoo ComponentResult into the scoring engine's
90
+ * AssertionScore format.
91
+ *
92
+ * Returns null if the component doesn't map to a known dimension.
93
+ */
94
+ function componentToAssertionScore(comp) {
95
+ const dim = classifyRubric(comp);
96
+ if (!dim)
97
+ return null;
98
+ // Parse the raw score (0–100 from the grader) and normalize to [0, 1]
99
+ const rawScore = parseRubricScore(comp);
100
+ const normalized = normalizeScore(rawScore, "llm-rubric");
101
+ return {
102
+ assertionType: comp.assertion?.type ?? "llm-rubric",
103
+ dimension: dim,
104
+ latencyMs: 0,
105
+ pass: comp.pass,
106
+ reason: comp.reason ?? "",
107
+ score: normalized,
108
+ weight: 1.0,
109
+ };
110
+ }
111
+ /** Convert kebab-case dimension key to camelCase (e.g., "task-completion" → "taskCompletion") */
112
+ function kebabToCamel(kebab) {
113
+ return kebab.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
114
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * TaskGraphBuilder — converts task definitions into a TaskGraph IR.
3
+ *
4
+ * The builder is the first stage of the compilation pipeline:
5
+ * GeneralizedTaskDefinitions → TaskGraphBuilder → TaskGraph → PromptfooCompiler → YAML
6
+ *
7
+ * Responsibilities:
8
+ * - Accept tasks from any source (TS, YAML, Content Lake)
9
+ * - Apply area/tag/mode filtering
10
+ * - Resolve inter-task dependencies into edges
11
+ * - Validate the graph is a DAG (reject cycles)
12
+ * - Assign execution priority via topological sort
13
+ *
14
+ * This module exists alongside `generate-configs.ts` — it does NOT replace
15
+ * the existing codegen path. Phase 7 will swap callers over to the compiler.
16
+ *
17
+ * @see packages/core/src/types/task-graph.ts — TaskGraph types
18
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
19
+ */
20
+ import type { DependencyEdge, FilterOptions, GeneralizedTaskDefinition, TaskGraph, TaskNode } from "../../_vendor/ailf-core/index.d.ts";
21
+ /** Options for building a task graph */
22
+ export interface TaskGraphBuildOptions {
23
+ /** Task definitions from any source */
24
+ tasks: GeneralizedTaskDefinition[];
25
+ /** Optional filter to narrow task set */
26
+ filter?: FilterOptions;
27
+ /** Compilation target backend */
28
+ compilationTarget?: "custom" | "promptfoo";
29
+ }
30
+ /** Result of building a task graph */
31
+ export interface TaskGraphBuildResult {
32
+ /** The built graph (null if no tasks survived filtering) */
33
+ graph: TaskGraph | null;
34
+ /** Warnings emitted during build (non-fatal) */
35
+ warnings: string[];
36
+ /** Tasks that were filtered out */
37
+ filteredOut: string[];
38
+ }
39
+ /**
40
+ * Build a TaskGraph from task definitions.
41
+ *
42
+ * 1. Filters tasks by area, tags, task IDs, and status
43
+ * 2. Creates TaskNodes with resolved variables
44
+ * 3. Discovers dependency edges from task metadata
45
+ * 4. Validates the graph is acyclic
46
+ * 5. Assigns topological priority
47
+ */
48
+ export declare function buildTaskGraph(options: TaskGraphBuildOptions): TaskGraphBuildResult;
49
+ /**
50
+ * Detect cycles in the task graph using Kahn's algorithm.
51
+ *
52
+ * @returns null if acyclic, or the cycle path as a string array
53
+ */
54
+ export declare function detectCycle(nodes: Map<string, TaskNode>, edges: DependencyEdge[]): string[] | null;