@sanity/ailf 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/config/features.ts +23 -0
  2. package/config/models.ts +83 -0
  3. package/config/prompts.ts +16 -0
  4. package/config/rubrics.ts +225 -0
  5. package/config/schedules.ts +47 -0
  6. package/config/sinks.ts +37 -0
  7. package/config/sources.ts +21 -0
  8. package/config/thresholds.ts +61 -0
  9. package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
  10. package/dist/_vendor/ailf-core/config-helpers.js +150 -0
  11. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  12. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  13. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  14. package/dist/_vendor/ailf-core/index.js +5 -0
  15. package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
  16. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  17. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  18. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  19. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  20. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  21. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  22. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
  23. package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
  24. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
  25. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
  26. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
  27. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
  28. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  29. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  30. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  31. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  32. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  33. package/dist/_vendor/ailf-core/services/index.js +2 -1
  34. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  35. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  36. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  37. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  38. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  39. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  40. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  41. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  42. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
  43. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  44. package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
  45. package/dist/_vendor/ailf-core/types/index.js +8 -1
  46. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
  47. package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
  48. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  49. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  50. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  51. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  52. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  53. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  54. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  55. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  56. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  57. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  58. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  59. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  60. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  61. package/dist/_vendor/ailf-shared/index.js +0 -1
  62. package/dist/adapters/api-client/build-request.js +14 -13
  63. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  64. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  65. package/dist/adapters/config-sources/index.d.ts +2 -0
  66. package/dist/adapters/config-sources/index.js +1 -0
  67. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  68. package/dist/adapters/config-sources/ts-config-loader.js +133 -0
  69. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  70. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  71. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  72. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  73. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  74. package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
  75. package/dist/adapters/task-sources/index.d.ts +1 -0
  76. package/dist/adapters/task-sources/index.js +1 -0
  77. package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
  78. package/dist/adapters/task-sources/repo-task-source.js +69 -16
  79. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  80. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  81. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  82. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  83. package/dist/cli.js +0 -2
  84. package/dist/commands/baseline.js +4 -1
  85. package/dist/commands/calculate-scores.js +1 -1
  86. package/dist/commands/coverage-audit.js +7 -1
  87. package/dist/commands/explain-handler.js +25 -23
  88. package/dist/commands/fetch-docs.js +3 -2
  89. package/dist/commands/generate-configs.js +1 -1
  90. package/dist/commands/interactive.js +11 -7
  91. package/dist/commands/pipeline-action.d.ts +2 -0
  92. package/dist/commands/pipeline-action.js +16 -6
  93. package/dist/commands/pipeline.d.ts +1 -0
  94. package/dist/commands/pipeline.js +4 -2
  95. package/dist/commands/pr-comment.js +1 -1
  96. package/dist/commands/publish.js +2 -2
  97. package/dist/commands/readiness-report.js +13 -6
  98. package/dist/composition-root.d.ts +1 -1
  99. package/dist/composition-root.js +67 -4
  100. package/dist/orchestration/build-app-context.js +1 -0
  101. package/dist/orchestration/build-step-sequence.js +24 -6
  102. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  103. package/dist/orchestration/steps/fetch-docs-step.js +6 -4
  104. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  105. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  106. package/dist/orchestration/steps/generate-configs-step.js +245 -51
  107. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  108. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  109. package/dist/orchestration/steps/readiness-step.js +5 -6
  110. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  111. package/dist/orchestration/steps/run-eval-step.js +8 -7
  112. package/dist/pipeline/cache.d.ts +1 -1
  113. package/dist/pipeline/cache.js +36 -8
  114. package/dist/pipeline/calculate-scores.d.ts +2 -4
  115. package/dist/pipeline/calculate-scores.js +43 -113
  116. package/dist/pipeline/checks.js +2 -2
  117. package/dist/pipeline/compare.js +8 -8
  118. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  119. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  120. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  121. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  122. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  123. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  124. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  125. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  126. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  127. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
  128. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  129. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  130. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  131. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  132. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  133. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
  134. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  135. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  136. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  137. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  138. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  139. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  140. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  141. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  142. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  143. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  144. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  145. package/dist/pipeline/compiler/config-loader.js +111 -0
  146. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  147. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  148. package/dist/pipeline/compiler/hash.d.ts +11 -0
  149. package/dist/pipeline/compiler/hash.js +18 -0
  150. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  151. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  152. package/dist/pipeline/compiler/index.d.ts +29 -0
  153. package/dist/pipeline/compiler/index.js +45 -0
  154. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  155. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  156. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  157. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  158. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  159. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  160. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  161. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  162. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  163. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  164. package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
  165. package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
  166. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  167. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  168. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  169. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  170. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  171. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
  172. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
  173. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
  174. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  175. package/dist/pipeline/compiler/presets/index.js +8 -0
  176. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
  177. package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
  178. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  179. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  180. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  181. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  182. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  183. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  184. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  185. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  186. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  187. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  188. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  189. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  190. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  191. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  192. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  193. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  194. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  195. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  196. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  197. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  198. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  199. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  200. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  201. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  202. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  203. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  204. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  205. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  206. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  207. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  208. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  209. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  210. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  211. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  212. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  213. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  214. package/dist/pipeline/coverage-audit.d.ts +15 -5
  215. package/dist/pipeline/coverage-audit.js +41 -22
  216. package/dist/pipeline/eval-constants.d.ts +16 -6
  217. package/dist/pipeline/eval-constants.js +25 -4
  218. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  219. package/dist/pipeline/eval-fingerprint.js +8 -9
  220. package/dist/pipeline/expand-tasks.d.ts +19 -10
  221. package/dist/pipeline/expand-tasks.js +34 -28
  222. package/dist/pipeline/gap-analysis.d.ts +1 -1
  223. package/dist/pipeline/gap-analysis.js +2 -2
  224. package/dist/pipeline/generate-configs.d.ts +22 -4
  225. package/dist/pipeline/generate-configs.js +53 -24
  226. package/dist/pipeline/grader-api.d.ts +3 -3
  227. package/dist/pipeline/grader-api.js +5 -12
  228. package/dist/pipeline/grader-compare-runner.js +20 -27
  229. package/dist/pipeline/grader-comparison.d.ts +4 -8
  230. package/dist/pipeline/grader-comparison.js +11 -17
  231. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  232. package/dist/pipeline/grader-consistency-runner.js +16 -20
  233. package/dist/pipeline/grader-consistency.d.ts +6 -10
  234. package/dist/pipeline/grader-consistency.js +13 -32
  235. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  236. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  237. package/dist/pipeline/grader-sensitivity.js +10 -10
  238. package/dist/pipeline/grader-validate-runner.js +7 -5
  239. package/dist/pipeline/grader-validation.d.ts +2 -6
  240. package/dist/pipeline/grader-validation.js +14 -22
  241. package/dist/pipeline/map-request-to-config.js +6 -1
  242. package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
  243. package/dist/pipeline/mirror-repo-tasks.js +16 -15
  244. package/dist/pipeline/normalize-mode.d.ts +49 -0
  245. package/dist/pipeline/normalize-mode.js +64 -0
  246. package/dist/pipeline/plan.d.ts +5 -2
  247. package/dist/pipeline/plan.js +134 -78
  248. package/dist/pipeline/pr-comment.js +2 -0
  249. package/dist/pipeline/profile-resolution.d.ts +22 -14
  250. package/dist/pipeline/profile-resolution.js +41 -19
  251. package/dist/pipeline/provenance.d.ts +2 -2
  252. package/dist/pipeline/provenance.js +12 -17
  253. package/dist/pipeline/release-report.js +4 -4
  254. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  255. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  256. package/dist/pipeline/rubric-loader.d.ts +20 -0
  257. package/dist/pipeline/rubric-loader.js +37 -0
  258. package/dist/pipeline/validate.d.ts +4 -4
  259. package/dist/pipeline/validate.js +64 -53
  260. package/dist/schedules/loader.js +18 -8
  261. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  262. package/dist/scripts/migrate-task-mode.js +85 -0
  263. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  264. package/dist/scripts/validate-task-sources.d.ts +1 -1
  265. package/dist/scripts/validate-task-sources.js +15 -15
  266. package/dist/sinks/loader.js +5 -7
  267. package/dist/sources.d.ts +7 -7
  268. package/dist/sources.js +22 -24
  269. package/dist/webhook/dispatch.js +2 -1
  270. package/package.json +6 -3
  271. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  272. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  273. package/tasks/literacy/frameworks.task.ts +128 -0
  274. package/tasks/literacy/functions.task.ts +69 -0
  275. package/tasks/literacy/groq.task.ts +258 -0
  276. package/tasks/literacy/nextjs-live.task.ts +75 -0
  277. package/tasks/literacy/studio-setup.task.ts +131 -0
  278. package/tasks/literacy/visual-editing.task.ts +146 -0
  279. package/config/features.yaml +0 -116
  280. package/config/models.yaml +0 -116
  281. package/config/prompts.yaml +0 -75
  282. package/config/rubrics.yaml +0 -81
  283. package/config/schedules.yaml +0 -43
  284. package/config/sinks.yaml +0 -54
  285. package/config/sources.yaml +0 -51
  286. package/config/thresholds.yaml +0 -49
  287. package/dist/agent-observer/test-imports.d.ts +0 -7
  288. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,230 @@
1
+ /**
2
+ * PromptfooCompiler — compiles a TaskGraph into Promptfoo YAML configuration.
3
+ *
4
+ * The compiler is the core of the new architecture. It takes a validated
5
+ * TaskGraph and produces a Promptfoo config that can be executed via
6
+ * `promptfoo eval`.
7
+ *
8
+ * Compilation pipeline:
9
+ * TaskGraph → resolve fixtures → resolve variables → map assertions
10
+ * → assemble prompts → assemble providers → emit YAML
11
+ *
12
+ * This module exists alongside `generate-configs.ts` — it does NOT replace
13
+ * the existing codegen path. Phase 7 will swap callers over to the compiler.
14
+ *
15
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
16
+ */
17
+ import { mapAssertions } from "./assertion-mapper.js";
18
+ import { resolveTaskFixtures } from "./fixture-resolver.js";
19
+ import { LiteracyVariant } from "../normalize-mode.js";
20
+ import { resolveVariables } from "./variable-resolver.js";
21
+ // ---------------------------------------------------------------------------
22
+ // Public API
23
+ // ---------------------------------------------------------------------------
24
+ /**
25
+ * Compile a TaskGraph into a Promptfoo configuration.
26
+ *
27
+ * Traverses the graph in topological order, resolves fixtures and
28
+ * variables for each node, maps assertions, and assembles the final
29
+ * Promptfoo config.
30
+ */
31
+ export function compileToPromptfoo(graph, options) {
32
+ const warnings = [];
33
+ const tests = [];
34
+ // Sort nodes by priority (topological order)
35
+ const sortedNodes = [...graph.nodes.values()].sort((a, b) => a.priority - b.priority);
36
+ // Compile each node into test cases
37
+ for (const node of sortedNodes) {
38
+ const compiled = compileNode(node, graph, options, warnings);
39
+ tests.push(...compiled);
40
+ }
41
+ // Build providers list from model registry
42
+ const providers = buildProviders(options.models, options.mode);
43
+ // Prompt resolution: handler-owned → explicit override → built-in defaults
44
+ const prompts = resolvePrompts(options);
45
+ const config = {
46
+ description: `AILF evaluation — ${options.mode} mode (${tests.length} test cases)`,
47
+ prompts,
48
+ providers,
49
+ tests,
50
+ ...(options.outputPath ? { outputPath: options.outputPath } : {}),
51
+ ...(options.graderProvider
52
+ ? {
53
+ defaultTest: {
54
+ options: {
55
+ provider: options.graderProvider,
56
+ },
57
+ },
58
+ }
59
+ : {}),
60
+ };
61
+ return {
62
+ config,
63
+ taskCount: sortedNodes.length,
64
+ testCaseCount: tests.length,
65
+ warnings,
66
+ };
67
+ }
68
+ // ---------------------------------------------------------------------------
69
+ // Node compilation
70
+ // ---------------------------------------------------------------------------
71
+ function compileNode(node, graph, options, warnings) {
72
+ // Resolve fixtures using a minimal GeneralizedTaskDefinition stub.
73
+ // The fixture resolver needs task.id and context.docs — we use the
74
+ // node's mode (propagated from the original task definition) to
75
+ // construct the correct variant stub.
76
+ const nodeMode = node.mode ?? options.mode ?? "literacy";
77
+ const fixtureResult = resolveTaskFixtures({
78
+ mode: nodeMode,
79
+ id: node.taskId,
80
+ title: node.taskId,
81
+ prompt: { text: node.resolvedPrompt },
82
+ }, node.resolvedVariables, { rootDir: options.rootDir });
83
+ warnings.push(...fixtureResult.warnings);
84
+ // Resolve dynamic variables
85
+ const varResult = resolveVariables(fixtureResult.updatedVars);
86
+ warnings.push(...varResult.warnings);
87
+ // Map assertions from the node's metadata
88
+ // For now, nodes carry assertion data in resolvedVariables.values.__assertions
89
+ // (set by the TaskGraphBuilder when it has generalized task data)
90
+ const rawAssertions = varResult.envelope.values.__assertions ?? [];
91
+ const { mapped: assertions, warnings: assertionWarnings } = mapAssertions(rawAssertions, { mode: options.mode, graderProvider: options.graderProvider });
92
+ warnings.push(...assertionWarnings);
93
+ // Build test case vars (exclude internal __ prefixed keys)
94
+ const vars = {};
95
+ for (const [key, value] of Object.entries(varResult.envelope.values)) {
96
+ if (!key.startsWith("__")) {
97
+ vars[key] = value;
98
+ }
99
+ }
100
+ // Create the gold test case
101
+ const goldTest = {
102
+ description: node.taskId,
103
+ vars,
104
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
105
+ };
106
+ const tests = [goldTest];
107
+ // For literacy/baseline mode, also create a baseline variant (no docs).
108
+ // Route on the node's mode (from the task definition) rather than
109
+ // the global options.mode, so heterogeneous graphs compile correctly.
110
+ if (nodeMode === LiteracyVariant.STANDARD || nodeMode === "literacy") {
111
+ const baselineVars = { ...vars, docs: "" };
112
+ const baselineTest = {
113
+ description: `${node.taskId} [baseline]`,
114
+ vars: baselineVars,
115
+ prompts: ["without-docs"],
116
+ ...(assertions.length > 0 ? { assert: assertions } : {}),
117
+ };
118
+ tests.push(baselineTest);
119
+ }
120
+ return tests;
121
+ }
122
+ // ---------------------------------------------------------------------------
123
+ // Provider assembly
124
+ // ---------------------------------------------------------------------------
125
+ /**
126
+ * Build the Promptfoo providers list from the model registry.
127
+ */
128
+ function buildProviders(models, mode) {
129
+ const providers = [];
130
+ for (const model of models.models) {
131
+ // Check mode compatibility
132
+ if (model.modes && model.modes.length > 0) {
133
+ if (!modelMatchesMode(model, mode))
134
+ continue;
135
+ }
136
+ // Check env gate
137
+ if (model.env && !process.env[model.env]) {
138
+ continue; // Skip models whose API key isn't set
139
+ }
140
+ providers.push({
141
+ id: model.id,
142
+ label: model.label,
143
+ config: {
144
+ ...models.defaults,
145
+ ...model.config,
146
+ },
147
+ });
148
+ }
149
+ return providers;
150
+ }
151
+ /**
152
+ * Check if a model entry matches the current evaluation mode.
153
+ *
154
+ * Literacy mode defaults to baseline model matching. Variant-specific
155
+ * provider filtering is handled by the provider-assembler and
156
+ * generate-configs-step, not here.
157
+ */
158
+ function modelMatchesMode(model, mode) {
159
+ if (!model.modes || model.modes.length === 0)
160
+ return true;
161
+ switch (mode) {
162
+ case "literacy":
163
+ return model.modes.includes(LiteracyVariant.STANDARD);
164
+ default:
165
+ // Non-literacy modes accept all models by default
166
+ return true;
167
+ }
168
+ }
169
+ // ---------------------------------------------------------------------------
170
+ // Prompt resolution
171
+ // ---------------------------------------------------------------------------
172
+ /**
173
+ * Resolve prompts with a three-level fallback chain:
174
+ * 1. handler.getPrompts() — mode-handler-owned templates
175
+ * 2. options.prompts — explicit caller-provided templates
176
+ * 3. buildDefaultPrompts() — built-in defaults per mode
177
+ */
178
+ function resolvePrompts(options) {
179
+ // 1. Check handler-owned prompts
180
+ const handlerPrompts = options.handler?.getPrompts?.();
181
+ if (handlerPrompts && Object.keys(handlerPrompts).length > 0) {
182
+ return Object.values(handlerPrompts).map(promptTemplateToPromptfoo);
183
+ }
184
+ // 2. Check explicit override
185
+ if (options.prompts)
186
+ return options.prompts;
187
+ // 3. Built-in defaults
188
+ return buildDefaultPrompts(options.mode);
189
+ }
190
+ /**
191
+ * Convert a PromptTemplate (core port type) to a PromptfooPrompt (compiler type).
192
+ */
193
+ function promptTemplateToPromptfoo(pt) {
194
+ return { id: pt.id, label: pt.label, raw: pt.template };
195
+ }
196
+ // ---------------------------------------------------------------------------
197
+ // Default prompts
198
+ // ---------------------------------------------------------------------------
199
+ /**
200
+ * Build default prompt entries for a mode.
201
+ *
202
+ * Handler-owned prompts (via getPrompts()) take precedence over these
203
+ * built-in defaults. This fallback exists for modes that haven't yet
204
+ * migrated to handler-owned prompts.
205
+ */
206
+ function buildDefaultPrompts(mode) {
207
+ switch (mode) {
208
+ case "literacy":
209
+ return [
210
+ {
211
+ id: "with-docs",
212
+ label: "With documentation context",
213
+ raw: "{{task}}\n\nDocumentation context:\n{{docs}}",
214
+ },
215
+ {
216
+ id: "without-docs",
217
+ label: "Without documentation context",
218
+ raw: "{{task}}",
219
+ },
220
+ ];
221
+ default:
222
+ return [
223
+ {
224
+ id: "default",
225
+ label: "Default prompt",
226
+ raw: "{{task}}",
227
+ },
228
+ ];
229
+ }
230
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * provider-assembler.ts — Build per-mode provider arrays from models config.
3
+ *
4
+ * Replicates the provider-building logic from the legacy generate-configs.ts
5
+ * so the new compiler produces identical provider configurations.
6
+ *
7
+ * Separated into its own module so GenerateConfigsStep can import it
8
+ * without pulling in the full legacy generate-configs machinery.
9
+ */
10
+ import { type ModelsConfig } from "../../_vendor/ailf-core/index.d.ts";
11
+ import type { ResolvedSourceConfig } from "../../sources.js";
12
+ /**
13
+ * Provider arrays grouped by literacy variant.
14
+ *
15
+ * These keys are literacy variant names (not EvalMode values). Each variant
16
+ * needs a different set of model providers with variant-specific config
17
+ * (e.g., agentic providers carry tool-use config, observed providers carry
18
+ * observer instrumentation).
19
+ */
20
+ export interface LiteracyVariantProviders {
21
+ baseline: Record<string, unknown>[];
22
+ agentic: Record<string, unknown>[];
23
+ observed: Record<string, unknown>[];
24
+ }
25
+ /** @deprecated Use LiteracyVariantProviders — kept for backward compatibility */
26
+ export type AssembledProviders = LiteracyVariantProviders;
27
+ /** Result of loading models and assembling providers */
28
+ export interface ModelsAndProviders {
29
+ models: ModelsConfig;
30
+ providers: LiteracyVariantProviders;
31
+ }
32
+ /**
33
+ * Load models config and assemble provider arrays for literacy variants.
34
+ *
35
+ * Returns provider arrays keyed by literacy variant name (baseline,
36
+ * agentic, observed). These are consumed by the YAML writer to produce
37
+ * the per-variant promptfoo config files.
38
+ */
39
+ export declare function loadModelsAndProviders(rootDir: string, source?: ResolvedSourceConfig, searchMode?: string, allowedOrigins?: string[]): ModelsAndProviders;
@@ -0,0 +1,137 @@
1
+ /**
2
+ * provider-assembler.ts — Build per-mode provider arrays from models config.
3
+ *
4
+ * Replicates the provider-building logic from the legacy generate-configs.ts
5
+ * so the new compiler produces identical provider configurations.
6
+ *
7
+ * Separated into its own module so GenerateConfigsStep can import it
8
+ * without pulling in the full legacy generate-configs machinery.
9
+ */
10
+ import { extractModelName, extractProvider, mergeConfig, modelMatchesMode, } from "../../_vendor/ailf-core/index.js";
11
+ import { LiteracyVariant } from "../normalize-mode.js";
12
+ import { loadConfigFile } from "./config-loader.js";
13
+ // ---------------------------------------------------------------------------
14
+ // Public API
15
+ // ---------------------------------------------------------------------------
16
+ /**
17
+ * Load models config and assemble provider arrays for literacy variants.
18
+ *
19
+ * Returns provider arrays keyed by literacy variant name (baseline,
20
+ * agentic, observed). These are consumed by the YAML writer to produce
21
+ * the per-variant promptfoo config files.
22
+ */
23
+ export function loadModelsAndProviders(rootDir, source, searchMode, allowedOrigins) {
24
+ const models = loadModelsYaml(rootDir);
25
+ return {
26
+ models,
27
+ providers: {
28
+ baseline: buildBaselineProviders(models),
29
+ agentic: buildAgenticProviders(models, source, searchMode, allowedOrigins),
30
+ observed: buildObservedProviders(models),
31
+ },
32
+ };
33
+ }
34
+ // ---------------------------------------------------------------------------
35
+ // Baseline providers
36
+ // ---------------------------------------------------------------------------
37
+ function buildBaselineProviders(models) {
38
+ return models.models
39
+ .filter((m) => modelMatchesMode(m, LiteracyVariant.STANDARD))
40
+ .map((model) => ({
41
+ config: mergeConfig(models.defaults, model.config),
42
+ id: model.id,
43
+ label: model.label,
44
+ }));
45
+ }
46
+ // ---------------------------------------------------------------------------
47
+ // Observed providers
48
+ // ---------------------------------------------------------------------------
49
+ function buildObservedProviders(models) {
50
+ return models.models
51
+ .filter((m) => modelMatchesMode(m, LiteracyVariant.OBSERVED))
52
+ .map((model) => {
53
+ const modelName = extractModelName(model.id);
54
+ return {
55
+ config: {
56
+ ...mergeConfig(models.defaults, model.config),
57
+ modelName,
58
+ observe: true,
59
+ recordOptions: models.defaults.observerOptions ?? {},
60
+ },
61
+ id: "file://dist/agent-observer/provider.js",
62
+ label: `${model.label} (Observed)`,
63
+ };
64
+ });
65
+ }
66
+ // ---------------------------------------------------------------------------
67
+ // Agentic providers
68
+ // ---------------------------------------------------------------------------
69
+ function buildAgenticProviders(models, source, searchMode, _allowedOrigins) {
70
+ const naiveModels = models.models.filter((m) => modelMatchesMode(m, "agentic-naive"));
71
+ const optimizedModels = models.models.filter((m) => modelMatchesMode(m, "agentic-optimized"));
72
+ const resolvedSearchMode = searchMode ?? "open";
73
+ const sourceConfig = source
74
+ ? {
75
+ ...(source.allowedOrigins?.length
76
+ ? { allowedOrigins: source.allowedOrigins }
77
+ : {}),
78
+ docBaseUrl: source.baseUrl,
79
+ ...(source.headers && Object.keys(source.headers).length > 0
80
+ ? { customHeaders: source.headers }
81
+ : {}),
82
+ llmsTxtUrl: source.llmsTxt,
83
+ ...(source.priorityDomain
84
+ ? { priorityDomain: source.priorityDomain }
85
+ : {}),
86
+ ...(resolvedSearchMode !== "open"
87
+ ? { searchMode: resolvedSearchMode }
88
+ : {}),
89
+ }
90
+ : {};
91
+ const providers = [];
92
+ for (const model of naiveModels) {
93
+ const modelName = extractModelName(model.id);
94
+ const provider = extractProvider(model.id);
95
+ providers.push({
96
+ config: {
97
+ ...mergeConfig(models.defaults, model.config, {
98
+ agentMode: "naive",
99
+ maxToolRounds: models.defaults.maxToolRounds ?? 5,
100
+ model: modelName,
101
+ provider,
102
+ }),
103
+ ...sourceConfig,
104
+ observe: true,
105
+ observerOptions: models.defaults.observerOptions ?? {},
106
+ },
107
+ id: "file://dist/agent-observer/agentic-provider.js",
108
+ label: `${model.label} (Naive Agent)`,
109
+ });
110
+ }
111
+ for (const model of optimizedModels) {
112
+ const modelName = extractModelName(model.id);
113
+ const provider = extractProvider(model.id);
114
+ providers.push({
115
+ config: {
116
+ ...mergeConfig(models.defaults, model.config, {
117
+ agentMode: "optimized",
118
+ maxToolRounds: models.defaults.maxToolRounds ?? 5,
119
+ model: modelName,
120
+ provider,
121
+ }),
122
+ ...sourceConfig,
123
+ observe: true,
124
+ observerOptions: models.defaults.observerOptions ?? {},
125
+ },
126
+ id: "file://dist/agent-observer/agentic-provider.js",
127
+ label: `${model.label} (Optimized Agent)`,
128
+ });
129
+ }
130
+ return providers;
131
+ }
132
+ // ---------------------------------------------------------------------------
133
+ // Helpers
134
+ // ---------------------------------------------------------------------------
135
+ function loadModelsYaml(rootDir) {
136
+ return loadConfigFile("models", rootDir).data;
137
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * DockerSandboxStrategy — full isolation via Docker containers.
3
+ *
4
+ * Provides deterministic filesystem, network control, and resource limits.
5
+ * Falls back to TempDirSandboxStrategy when Docker is unavailable.
6
+ *
7
+ * Docker interaction uses the `docker` CLI via `execFileSync` (array form,
8
+ * no shell) to prevent shell injection from task-supplied values like
9
+ * image names or task IDs.
10
+ *
11
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
+ */
13
+ import type { SandboxArtifacts, SandboxInfo, SandboxProvisionOptions, SandboxStrategy } from "./sandbox-strategy.js";
14
+ export declare class DockerSandboxStrategy implements SandboxStrategy {
15
+ readonly name = "Docker Container";
16
+ readonly type: "docker";
17
+ isAvailable(): Promise<boolean>;
18
+ provision(options: SandboxProvisionOptions): Promise<SandboxInfo>;
19
+ collectArtifacts(sandbox: SandboxInfo): Promise<SandboxArtifacts>;
20
+ teardown(sandbox: SandboxInfo): Promise<void>;
21
+ }
@@ -0,0 +1,136 @@
1
+ /**
2
+ * DockerSandboxStrategy — full isolation via Docker containers.
3
+ *
4
+ * Provides deterministic filesystem, network control, and resource limits.
5
+ * Falls back to TempDirSandboxStrategy when Docker is unavailable.
6
+ *
7
+ * Docker interaction uses the `docker` CLI via `execFileSync` (array form,
8
+ * no shell) to prevent shell injection from task-supplied values like
9
+ * image names or task IDs.
10
+ *
11
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
12
+ */
13
+ import { randomUUID } from "crypto";
14
+ import { execFileSync } from "child_process";
15
+ import { mkdirSync } from "fs";
16
+ import { tmpdir } from "os";
17
+ import { resolve } from "path";
18
+ const DEFAULT_IMAGE = "node:22-slim";
19
+ const DEFAULT_WORKDIR = "/workspace";
20
+ /** Only allow official base images to prevent pulling from untrusted registries. */
21
+ const ALLOWED_IMAGE_PATTERN = /^(node|python|ubuntu|alpine|debian|rust|golang|mcr\.microsoft\.com\/[a-z]+)(:[a-zA-Z0-9._-]+)?$/;
22
+ function validateDockerImage(image) {
23
+ if (!ALLOWED_IMAGE_PATTERN.test(image)) {
24
+ throw new Error(`Docker image "${image}" is not in the allowlist. ` +
25
+ `Only official base images (node, python, ubuntu, alpine, debian, rust, golang) are permitted.`);
26
+ }
27
+ }
28
+ export class DockerSandboxStrategy {
29
+ name = "Docker Container";
30
+ type = "docker";
31
+ async isAvailable() {
32
+ try {
33
+ execFileSync("docker", ["info"], { stdio: "ignore", timeout: 5000 });
34
+ return true;
35
+ }
36
+ catch {
37
+ return false;
38
+ }
39
+ }
40
+ async provision(options) {
41
+ const image = options.image ?? DEFAULT_IMAGE;
42
+ validateDockerImage(image);
43
+ const id = `ailf-${randomUUID().slice(0, 12)}`;
44
+ // Create a local staging directory for fixture injection
45
+ const stagingDir = resolve(tmpdir(), `${id}-staging`);
46
+ mkdirSync(stagingDir, { recursive: true });
47
+ // Build docker create command as array (no shell, prevents injection)
48
+ const args = ["create", "--name", id, "--workdir", DEFAULT_WORKDIR];
49
+ // Security hardening — defense-in-depth against container escape
50
+ args.push("--cap-drop", "ALL");
51
+ args.push("--security-opt", "no-new-privileges");
52
+ args.push("--read-only");
53
+ args.push("--tmpfs", "/tmp:rw,noexec,nosuid,size=100m");
54
+ // Resource limits
55
+ if (options.limits) {
56
+ if (options.limits.cpus) {
57
+ args.push("--cpus", String(options.limits.cpus));
58
+ }
59
+ if (options.limits.memoryBytes) {
60
+ args.push("--memory", String(options.limits.memoryBytes));
61
+ }
62
+ if (options.limits.networkAccess === false) {
63
+ args.push("--network", "none");
64
+ }
65
+ }
66
+ // Bind mount staging directory
67
+ args.push("-v", `${stagingDir}:${DEFAULT_WORKDIR}`);
68
+ args.push(image);
69
+ args.push("sleep", "infinity"); // Keep container alive
70
+ try {
71
+ const containerId = execFileSync("docker", args, {
72
+ encoding: "utf-8",
73
+ timeout: 30_000,
74
+ }).trim();
75
+ // Start the container
76
+ execFileSync("docker", ["start", id], {
77
+ stdio: "ignore",
78
+ timeout: 10_000,
79
+ });
80
+ return {
81
+ id,
82
+ workingDir: stagingDir,
83
+ strategy: "docker",
84
+ containerId: containerId || id,
85
+ createdAt: new Date().toISOString(),
86
+ };
87
+ }
88
+ catch (err) {
89
+ const msg = err instanceof Error ? err.message : String(err);
90
+ throw new Error(`Failed to provision Docker sandbox "${id}": ${msg}`, {
91
+ cause: err,
92
+ });
93
+ }
94
+ }
95
+ async collectArtifacts(sandbox) {
96
+ const modifiedFiles = [];
97
+ try {
98
+ // Get list of modified files via docker diff (array form)
99
+ const diff = execFileSync("docker", ["diff", sandbox.id], {
100
+ encoding: "utf-8",
101
+ timeout: 10_000,
102
+ }).trim();
103
+ if (diff) {
104
+ for (const line of diff.split("\n")) {
105
+ // docker diff output: C /workspace/file.ts (C=changed, A=added, D=deleted)
106
+ const match = /^[ACD]\s+(.+)$/.exec(line.trim());
107
+ if (match && match[1].startsWith(DEFAULT_WORKDIR)) {
108
+ modifiedFiles.push(match[1].replace(`${DEFAULT_WORKDIR}/`, ""));
109
+ }
110
+ }
111
+ }
112
+ return {
113
+ modifiedFiles,
114
+ diff: diff || undefined,
115
+ durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
116
+ };
117
+ }
118
+ catch {
119
+ return {
120
+ modifiedFiles,
121
+ durationMs: Date.now() - new Date(sandbox.createdAt).getTime(),
122
+ };
123
+ }
124
+ }
125
+ async teardown(sandbox) {
126
+ try {
127
+ execFileSync("docker", ["rm", "-f", sandbox.id], {
128
+ stdio: "ignore",
129
+ timeout: 10_000,
130
+ });
131
+ }
132
+ catch {
133
+ // Best-effort cleanup
134
+ }
135
+ }
136
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * Fixture provisioner — five-stage pipeline for preparing sandbox state.
3
+ *
4
+ * Pipeline stages:
5
+ * Resolve → Fetch → Cache → Transform → Inject
6
+ *
7
+ * Handles three URI schemes for v1:
8
+ * - file:// — local filesystem path (relative to task)
9
+ * - template:// — built-in project templates
10
+ * - sanity:// — Content Lake document by ID or query
11
+ *
12
+ * @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
13
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
14
+ */
15
+ import type { SandboxInfo } from "./sandbox-strategy.js";
16
+ /** A fixture reference from a task definition */
17
+ export interface FixtureRef {
18
+ /** URI pointing to the fixture source */
19
+ uri: string;
20
+ /** Injection target */
21
+ inject: "provider_config" | "system_prompt" | "vars" | "working_dir";
22
+ /** Key name (for vars injection) or relative path (for working_dir) */
23
+ key?: string;
24
+ /** Content transform to apply before injection */
25
+ transform?: FixtureTransform;
26
+ }
27
+ /** Available fixture transforms */
28
+ export type FixtureTransform = "extract-text" | "none" | "strip-html" | "truncate";
29
+ /** A resolved and fetched fixture ready for injection */
30
+ export interface ProvisionedFixture {
31
+ /** Original URI */
32
+ uri: string;
33
+ /** Resolved content */
34
+ content: string;
35
+ /** SHA-256 hash of the content */
36
+ contentHash: string;
37
+ /** Injection target */
38
+ inject: FixtureRef["inject"];
39
+ /** Key or path */
40
+ key?: string;
41
+ }
42
+ /** Result of the provisioning pipeline */
43
+ export interface ProvisioningResult {
44
+ /** Successfully provisioned fixtures */
45
+ fixtures: ProvisionedFixture[];
46
+ /** Variable overrides from vars-injected fixtures */
47
+ vars: Record<string, unknown>;
48
+ /** Warnings (non-fatal issues) */
49
+ warnings: string[];
50
+ /** Fixture manifest for reproducibility */
51
+ manifest: Record<string, string>;
52
+ }
53
+ /** Options for the provisioning pipeline */
54
+ export interface ProvisioningOptions {
55
+ /** Root directory for resolving relative paths */
56
+ rootDir: string;
57
+ /** Sandbox to inject working_dir fixtures into */
58
+ sandbox?: SandboxInfo;
59
+ /** Cache directory for content-addressable storage */
60
+ cacheDir?: string;
61
+ }
62
+ /**
63
+ * Run the five-stage fixture provisioning pipeline.
64
+ *
65
+ * @param refs - Fixture references from the task definition
66
+ * @param options - Provisioning configuration
67
+ * @returns Provisioned fixtures and injection metadata
68
+ */
69
+ export declare function provisionFixtures(refs: FixtureRef[], options: ProvisioningOptions): Promise<ProvisioningResult>;