@sanity/ailf 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/config/features.ts +23 -0
  2. package/config/models.ts +83 -0
  3. package/config/prompts.ts +16 -0
  4. package/config/rubrics.ts +225 -0
  5. package/config/schedules.ts +47 -0
  6. package/config/sinks.ts +37 -0
  7. package/config/sources.ts +21 -0
  8. package/config/thresholds.ts +61 -0
  9. package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
  10. package/dist/_vendor/ailf-core/config-helpers.js +150 -0
  11. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  12. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  13. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  14. package/dist/_vendor/ailf-core/index.js +5 -0
  15. package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
  16. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  17. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  18. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  19. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  20. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  21. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  22. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
  23. package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
  24. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
  25. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
  26. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
  27. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
  28. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  29. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  30. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  31. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  32. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  33. package/dist/_vendor/ailf-core/services/index.js +2 -1
  34. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  35. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  36. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  37. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  38. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  39. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  40. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  41. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  42. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
  43. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  44. package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
  45. package/dist/_vendor/ailf-core/types/index.js +8 -1
  46. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
  47. package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
  48. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  49. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  50. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  51. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  52. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  53. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  54. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  55. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  56. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  57. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  58. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  59. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  60. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  61. package/dist/_vendor/ailf-shared/index.js +0 -1
  62. package/dist/adapters/api-client/build-request.js +14 -13
  63. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  64. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  65. package/dist/adapters/config-sources/index.d.ts +2 -0
  66. package/dist/adapters/config-sources/index.js +1 -0
  67. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  68. package/dist/adapters/config-sources/ts-config-loader.js +133 -0
  69. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  70. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  71. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  72. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  73. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  74. package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
  75. package/dist/adapters/task-sources/index.d.ts +1 -0
  76. package/dist/adapters/task-sources/index.js +1 -0
  77. package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
  78. package/dist/adapters/task-sources/repo-task-source.js +69 -16
  79. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  80. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  81. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  82. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  83. package/dist/cli.js +0 -2
  84. package/dist/commands/baseline.js +4 -1
  85. package/dist/commands/calculate-scores.js +1 -1
  86. package/dist/commands/coverage-audit.js +7 -1
  87. package/dist/commands/explain-handler.js +25 -23
  88. package/dist/commands/fetch-docs.js +3 -2
  89. package/dist/commands/generate-configs.js +1 -1
  90. package/dist/commands/interactive.js +11 -7
  91. package/dist/commands/pipeline-action.d.ts +2 -0
  92. package/dist/commands/pipeline-action.js +16 -6
  93. package/dist/commands/pipeline.d.ts +1 -0
  94. package/dist/commands/pipeline.js +4 -2
  95. package/dist/commands/pr-comment.js +1 -1
  96. package/dist/commands/publish.js +2 -2
  97. package/dist/commands/readiness-report.js +13 -6
  98. package/dist/composition-root.d.ts +1 -1
  99. package/dist/composition-root.js +67 -4
  100. package/dist/orchestration/build-app-context.js +1 -0
  101. package/dist/orchestration/build-step-sequence.js +24 -6
  102. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  103. package/dist/orchestration/steps/fetch-docs-step.js +6 -4
  104. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  105. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  106. package/dist/orchestration/steps/generate-configs-step.js +245 -51
  107. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  108. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  109. package/dist/orchestration/steps/readiness-step.js +5 -6
  110. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  111. package/dist/orchestration/steps/run-eval-step.js +8 -7
  112. package/dist/pipeline/cache.d.ts +1 -1
  113. package/dist/pipeline/cache.js +36 -8
  114. package/dist/pipeline/calculate-scores.d.ts +2 -4
  115. package/dist/pipeline/calculate-scores.js +43 -113
  116. package/dist/pipeline/checks.js +2 -2
  117. package/dist/pipeline/compare.js +8 -8
  118. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  119. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  120. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  121. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  122. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  123. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  124. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  125. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  126. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  127. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
  128. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  129. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  130. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  131. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  132. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  133. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
  134. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  135. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  136. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  137. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  138. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  139. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  140. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  141. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  142. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  143. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  144. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  145. package/dist/pipeline/compiler/config-loader.js +111 -0
  146. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  147. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  148. package/dist/pipeline/compiler/hash.d.ts +11 -0
  149. package/dist/pipeline/compiler/hash.js +18 -0
  150. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  151. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  152. package/dist/pipeline/compiler/index.d.ts +29 -0
  153. package/dist/pipeline/compiler/index.js +45 -0
  154. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  155. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  156. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  157. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  158. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  159. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  160. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  161. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  162. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  163. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  164. package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
  165. package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
  166. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  167. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  168. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  169. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  170. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  171. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
  172. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
  173. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
  174. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  175. package/dist/pipeline/compiler/presets/index.js +8 -0
  176. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
  177. package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
  178. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  179. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  180. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  181. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  182. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  183. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  184. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  185. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  186. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  187. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  188. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  189. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  190. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  191. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  192. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  193. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  194. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  195. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  196. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  197. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  198. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  199. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  200. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  201. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  202. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  203. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  204. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  205. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  206. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  207. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  208. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  209. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  210. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  211. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  212. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  213. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  214. package/dist/pipeline/coverage-audit.d.ts +15 -5
  215. package/dist/pipeline/coverage-audit.js +41 -22
  216. package/dist/pipeline/eval-constants.d.ts +16 -6
  217. package/dist/pipeline/eval-constants.js +25 -4
  218. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  219. package/dist/pipeline/eval-fingerprint.js +8 -9
  220. package/dist/pipeline/expand-tasks.d.ts +19 -10
  221. package/dist/pipeline/expand-tasks.js +34 -28
  222. package/dist/pipeline/gap-analysis.d.ts +1 -1
  223. package/dist/pipeline/gap-analysis.js +2 -2
  224. package/dist/pipeline/generate-configs.d.ts +22 -4
  225. package/dist/pipeline/generate-configs.js +53 -24
  226. package/dist/pipeline/grader-api.d.ts +3 -3
  227. package/dist/pipeline/grader-api.js +5 -12
  228. package/dist/pipeline/grader-compare-runner.js +20 -27
  229. package/dist/pipeline/grader-comparison.d.ts +4 -8
  230. package/dist/pipeline/grader-comparison.js +11 -17
  231. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  232. package/dist/pipeline/grader-consistency-runner.js +16 -20
  233. package/dist/pipeline/grader-consistency.d.ts +6 -10
  234. package/dist/pipeline/grader-consistency.js +13 -32
  235. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  236. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  237. package/dist/pipeline/grader-sensitivity.js +10 -10
  238. package/dist/pipeline/grader-validate-runner.js +7 -5
  239. package/dist/pipeline/grader-validation.d.ts +2 -6
  240. package/dist/pipeline/grader-validation.js +14 -22
  241. package/dist/pipeline/map-request-to-config.js +6 -1
  242. package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
  243. package/dist/pipeline/mirror-repo-tasks.js +16 -15
  244. package/dist/pipeline/normalize-mode.d.ts +49 -0
  245. package/dist/pipeline/normalize-mode.js +64 -0
  246. package/dist/pipeline/plan.d.ts +5 -2
  247. package/dist/pipeline/plan.js +134 -78
  248. package/dist/pipeline/pr-comment.js +2 -0
  249. package/dist/pipeline/profile-resolution.d.ts +22 -14
  250. package/dist/pipeline/profile-resolution.js +41 -19
  251. package/dist/pipeline/provenance.d.ts +2 -2
  252. package/dist/pipeline/provenance.js +12 -17
  253. package/dist/pipeline/release-report.js +4 -4
  254. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  255. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  256. package/dist/pipeline/rubric-loader.d.ts +20 -0
  257. package/dist/pipeline/rubric-loader.js +37 -0
  258. package/dist/pipeline/validate.d.ts +4 -4
  259. package/dist/pipeline/validate.js +64 -53
  260. package/dist/schedules/loader.js +18 -8
  261. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  262. package/dist/scripts/migrate-task-mode.js +85 -0
  263. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  264. package/dist/scripts/validate-task-sources.d.ts +1 -1
  265. package/dist/scripts/validate-task-sources.js +15 -15
  266. package/dist/sinks/loader.js +5 -7
  267. package/dist/sources.d.ts +7 -7
  268. package/dist/sources.js +22 -24
  269. package/dist/webhook/dispatch.js +2 -1
  270. package/package.json +6 -3
  271. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  272. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  273. package/tasks/literacy/frameworks.task.ts +128 -0
  274. package/tasks/literacy/functions.task.ts +69 -0
  275. package/tasks/literacy/groq.task.ts +258 -0
  276. package/tasks/literacy/nextjs-live.task.ts +75 -0
  277. package/tasks/literacy/studio-setup.task.ts +131 -0
  278. package/tasks/literacy/visual-editing.task.ts +146 -0
  279. package/config/features.yaml +0 -116
  280. package/config/models.yaml +0 -116
  281. package/config/prompts.yaml +0 -75
  282. package/config/rubrics.yaml +0 -81
  283. package/config/schedules.yaml +0 -43
  284. package/config/sinks.yaml +0 -54
  285. package/config/sources.yaml +0 -51
  286. package/config/thresholds.yaml +0 -49
  287. package/dist/agent-observer/test-imports.d.ts +0 -7
  288. package/dist/agent-observer/test-imports.js +0 -185
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Fixture resolver — resolves fixture references into content for compilation.
3
+ *
4
+ * Handles document fixtures (fetched from Sanity or local files),
5
+ * file fixtures (read from disk), and inline fixtures (embedded in
6
+ * task definitions). Resolved content is injected into the TaskNode's
7
+ * VariableEnvelope for the compiler to use.
8
+ *
9
+ * Currently supports the existing fixture patterns:
10
+ * - `file://contexts/canonical/<id>.md` → read from local fs
11
+ * - Inline `vars.docs` strings → used as-is
12
+ * - Canonical doc references → resolved by DocFetcher port
13
+ *
14
+ * Future phases will add URI scheme resolution (repo://, sanity://, etc.)
15
+ * as described in the fixtures-artifacts design doc.
16
+ *
17
+ * @see docs/design-docs/architecture-overhaul/fixtures-artifacts.md
18
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
19
+ */
20
+ import { existsSync, readFileSync } from "fs";
21
+ import { resolve } from "path";
22
+ import { simpleHash } from "./hash.js";
23
+ /**
24
+ * Resolve fixtures for a task definition.
25
+ *
26
+ * Reads `vars.docs` and canonical doc references, resolves them to
27
+ * content, and updates the variable envelope.
28
+ */
29
+ export function resolveTaskFixtures(task, currentVars, options) {
30
+ const fixtures = new Map();
31
+ const warnings = [];
32
+ const updatedValues = { ...currentVars.values };
33
+ const updatedProvenance = { ...currentVars.provenance };
34
+ // Resolve file:// references in vars
35
+ for (const [key, value] of Object.entries(updatedValues)) {
36
+ if (typeof value === "string" && value.startsWith("file://")) {
37
+ const resolved = resolveFileRef(value, options.rootDir);
38
+ if (resolved.ok) {
39
+ updatedValues[key] = resolved.content;
40
+ updatedProvenance[key] = {
41
+ hash: simpleHash(resolved.content),
42
+ resolvedAt: new Date().toISOString(),
43
+ source: { fixtureId: value, type: "fixture" },
44
+ };
45
+ fixtures.set(value, {
46
+ content: resolved.content,
47
+ contentHash: simpleHash(resolved.content),
48
+ id: value,
49
+ name: value.replace("file://", ""),
50
+ type: "fetched",
51
+ });
52
+ }
53
+ else {
54
+ warnings.push(resolved.error);
55
+ }
56
+ }
57
+ }
58
+ // Resolve canonical doc context if any — extract docs from the mode-specific
59
+ // context field (context.docs exists on literacy, mcp-server, agent-harness,
60
+ // and knowledge-probe variants)
61
+ const contextDocs = "context" in task && task.context?.docs ? task.context.docs : [];
62
+ if (contextDocs.length > 0) {
63
+ const docFixtureId = `canonical-docs:${task.id}`;
64
+ // Canonical docs are resolved at runtime by the DocFetcher port.
65
+ // At compile time, we create a placeholder fixture that signals
66
+ // the compiler to inject doc context at eval time.
67
+ fixtures.set(docFixtureId, {
68
+ content: null, // Deferred — resolved at eval time
69
+ id: docFixtureId,
70
+ name: `Canonical docs for ${task.id}`,
71
+ type: "fetched",
72
+ });
73
+ }
74
+ return {
75
+ fixtures,
76
+ updatedVars: {
77
+ declarations: currentVars.declarations,
78
+ provenance: updatedProvenance,
79
+ values: updatedValues,
80
+ },
81
+ warnings,
82
+ };
83
+ }
84
+ /**
85
+ * Resolve a `file://` reference to file content.
86
+ */
87
+ function resolveFileRef(fileRef, rootDir) {
88
+ const relativePath = fileRef.replace("file://", "");
89
+ const absolutePath = resolve(rootDir, relativePath);
90
+ // Path containment: prevent file://../../etc/passwd from reading outside rootDir
91
+ const normalizedBase = resolve(rootDir) + "/";
92
+ if (!absolutePath.startsWith(normalizedBase) &&
93
+ absolutePath !== resolve(rootDir)) {
94
+ return {
95
+ ok: false,
96
+ error: `Path traversal detected: "${fileRef}" resolves outside rootDir "${rootDir}"`,
97
+ };
98
+ }
99
+ if (!existsSync(absolutePath)) {
100
+ return {
101
+ ok: false,
102
+ error: `Fixture file not found: ${absolutePath} (referenced as ${fileRef})`,
103
+ };
104
+ }
105
+ try {
106
+ const content = readFileSync(absolutePath, "utf-8");
107
+ return { ok: true, content };
108
+ }
109
+ catch (err) {
110
+ const msg = err instanceof Error ? err.message : String(err);
111
+ return { ok: false, error: `Failed to read fixture ${fileRef}: ${msg}` };
112
+ }
113
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * FNV-1a 32-bit hash — fast, non-cryptographic content hash.
3
+ *
4
+ * Used for cache keys, content fingerprinting, and provenance tracking.
5
+ * Returns an 8-character zero-padded hex string.
6
+ *
7
+ * Shared across fixture-resolver, variable-resolver, and trace-store
8
+ * to ensure consistent hash behavior. Uses unsigned right shift (`>>> 0`)
9
+ * to keep the hash in unsigned 32-bit integer range.
10
+ */
11
+ export declare function simpleHash(content: string): string;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * FNV-1a 32-bit hash — fast, non-cryptographic content hash.
3
+ *
4
+ * Used for cache keys, content fingerprinting, and provenance tracking.
5
+ * Returns an 8-character zero-padded hex string.
6
+ *
7
+ * Shared across fixture-resolver, variable-resolver, and trace-store
8
+ * to ensure consistent hash behavior. Uses unsigned right shift (`>>> 0`)
9
+ * to keep the hash in unsigned 32-bit integer range.
10
+ */
11
+ export function simpleHash(content) {
12
+ let hash = 0x811c9dc5; // FNV-1a offset basis
13
+ for (let i = 0; i < content.length; i++) {
14
+ hash ^= content.charCodeAt(i);
15
+ hash = (hash * 0x01000193) >>> 0; // FNV prime, unsigned 32-bit
16
+ }
17
+ return hash.toString(16).padStart(8, "0");
18
+ }
@@ -0,0 +1,53 @@
1
+ /**
2
+ * ignoreFields — strips non-deterministic fields before assertion comparison.
3
+ *
4
+ * For tool outputs that contain timestamps, UUIDs, temp paths, or other
5
+ * non-deterministic values, assertions need a way to exclude specific
6
+ * fields from comparison. This module provides field stripping using
7
+ * dot-notation paths.
8
+ *
9
+ * Usage in task definitions:
10
+ * ```typescript
11
+ * assertions: [
12
+ * {
13
+ * type: "tool-output-matches",
14
+ * value: { title: "Hello" },
15
+ * ignoreFields: ["metadata.createdAt", "result.id", "_rev"],
16
+ * },
17
+ * ]
18
+ * ```
19
+ *
20
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
21
+ */
22
+ /**
23
+ * Strip specified fields from an object using dot-notation paths.
24
+ *
25
+ * @param obj - The object to strip fields from (not mutated)
26
+ * @param fields - Dot-notation field paths to remove
27
+ * @returns A new object with the specified fields removed
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * stripFields(
32
+ * { title: "Hello", metadata: { createdAt: "2024-01-01", author: "Alice" } },
33
+ * ["metadata.createdAt"]
34
+ * )
35
+ * // => { title: "Hello", metadata: { author: "Alice" } }
36
+ * ```
37
+ */
38
+ export declare function stripFields(obj: unknown, fields: string[]): unknown;
39
+ /**
40
+ * Strip specified fields from both actual and expected values,
41
+ * then compare them.
42
+ *
43
+ * @returns true if the objects are equal after stripping
44
+ */
45
+ export declare function compareWithIgnoredFields(actual: unknown, expected: unknown, ignoreFields: string[]): boolean;
46
+ /**
47
+ * Generate a Promptfoo-compatible JavaScript assertion that applies
48
+ * ignoreFields stripping before comparison.
49
+ *
50
+ * This wraps a comparison assertion with field stripping logic,
51
+ * producing a self-contained JS assertion string.
52
+ */
53
+ export declare function buildIgnoreFieldsWrapper(comparisonCode: string, ignoreFields: string[]): string;
@@ -0,0 +1,113 @@
1
+ /**
2
+ * ignoreFields — strips non-deterministic fields before assertion comparison.
3
+ *
4
+ * For tool outputs that contain timestamps, UUIDs, temp paths, or other
5
+ * non-deterministic values, assertions need a way to exclude specific
6
+ * fields from comparison. This module provides field stripping using
7
+ * dot-notation paths.
8
+ *
9
+ * Usage in task definitions:
10
+ * ```typescript
11
+ * assertions: [
12
+ * {
13
+ * type: "tool-output-matches",
14
+ * value: { title: "Hello" },
15
+ * ignoreFields: ["metadata.createdAt", "result.id", "_rev"],
16
+ * },
17
+ * ]
18
+ * ```
19
+ *
20
+ * @see docs/exec-plans/architecture-overhaul/phase-4-agent-harness.md
21
+ */
22
+ // ---------------------------------------------------------------------------
23
+ // Public API
24
+ // ---------------------------------------------------------------------------
25
+ /**
26
+ * Strip specified fields from an object using dot-notation paths.
27
+ *
28
+ * @param obj - The object to strip fields from (not mutated)
29
+ * @param fields - Dot-notation field paths to remove
30
+ * @returns A new object with the specified fields removed
31
+ *
32
+ * @example
33
+ * ```typescript
34
+ * stripFields(
35
+ * { title: "Hello", metadata: { createdAt: "2024-01-01", author: "Alice" } },
36
+ * ["metadata.createdAt"]
37
+ * )
38
+ * // => { title: "Hello", metadata: { author: "Alice" } }
39
+ * ```
40
+ */
41
+ export function stripFields(obj, fields) {
42
+ if (!fields || fields.length === 0)
43
+ return obj;
44
+ if (obj === null || obj === undefined)
45
+ return obj;
46
+ if (typeof obj !== "object")
47
+ return obj;
48
+ // Deep clone to avoid mutation
49
+ const clone = JSON.parse(JSON.stringify(obj));
50
+ for (const field of fields) {
51
+ removeFieldByPath(clone, field.split("."));
52
+ }
53
+ return clone;
54
+ }
55
+ /**
56
+ * Strip specified fields from both actual and expected values,
57
+ * then compare them.
58
+ *
59
+ * @returns true if the objects are equal after stripping
60
+ */
61
+ export function compareWithIgnoredFields(actual, expected, ignoreFields) {
62
+ const strippedActual = stripFields(actual, ignoreFields);
63
+ const strippedExpected = stripFields(expected, ignoreFields);
64
+ return JSON.stringify(strippedActual) === JSON.stringify(strippedExpected);
65
+ }
66
+ /**
67
+ * Generate a Promptfoo-compatible JavaScript assertion that applies
68
+ * ignoreFields stripping before comparison.
69
+ *
70
+ * This wraps a comparison assertion with field stripping logic,
71
+ * producing a self-contained JS assertion string.
72
+ */
73
+ export function buildIgnoreFieldsWrapper(comparisonCode, ignoreFields) {
74
+ if (ignoreFields.length === 0)
75
+ return comparisonCode;
76
+ const stripFn = `function stripFields(obj, fields) {\n` +
77
+ ` if (!obj || typeof obj !== 'object') return obj;\n` +
78
+ ` const clone = JSON.parse(JSON.stringify(obj));\n` +
79
+ ` for (const field of fields) {\n` +
80
+ ` const parts = field.split('.');\n` +
81
+ ` let current = clone;\n` +
82
+ ` for (let i = 0; i < parts.length - 1; i++) {\n` +
83
+ ` if (!current || typeof current !== 'object') break;\n` +
84
+ ` current = current[parts[i]];\n` +
85
+ ` }\n` +
86
+ ` if (current && typeof current === 'object') {\n` +
87
+ ` delete current[parts[parts.length - 1]];\n` +
88
+ ` }\n` +
89
+ ` }\n` +
90
+ ` return clone;\n` +
91
+ `}\n`;
92
+ return (`${stripFn}\n` +
93
+ `const __ignoreFields = ${JSON.stringify(ignoreFields)};\n` +
94
+ `${comparisonCode}`);
95
+ }
96
+ // ---------------------------------------------------------------------------
97
+ // Internal helpers
98
+ // ---------------------------------------------------------------------------
99
+ function removeFieldByPath(obj, path) {
100
+ if (path.length === 0)
101
+ return;
102
+ const [head, ...rest] = path;
103
+ if (rest.length === 0) {
104
+ // Base case: delete the field
105
+ delete obj[head];
106
+ return;
107
+ }
108
+ // Recursive case: traverse into nested object
109
+ const child = obj[head];
110
+ if (child !== null && child !== undefined && typeof child === "object") {
111
+ removeFieldByPath(child, rest);
112
+ }
113
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Config compiler — the heart of the new architecture.
3
+ *
4
+ * Converts task definitions from any source into a TaskGraph IR,
5
+ * then compiles the graph into Promptfoo YAML configuration.
6
+ *
7
+ * This module coexists with the existing `generate-configs.ts` path.
8
+ * Phase 7 will migrate callers to use the compiler exclusively.
9
+ *
10
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
11
+ */
12
+ export { buildTaskGraph, detectCycle, type TaskGraphBuildOptions, type TaskGraphBuildResult, } from "./task-graph-builder.js";
13
+ export { compileToPromptfoo, type CompilationResult, type CompiledPromptfooConfig, type PromptfooCompilerOptions, type PromptfooPrompt, type PromptfooProvider, type PromptfooTestCase, } from "./promptfoo-compiler.js";
14
+ export { isAssertionCompatibleWithMode, isValidAssertionType, mapAssertions, type AssertionMapperOptions, type PromptfooAssertion, } from "./assertion-mapper.js";
15
+ export { resolveTaskFixtures, type FixtureResolutionResult, type FixtureResolverOptions, } from "./fixture-resolver.js";
16
+ export { createEnvelope, resolveVariables, type VariableResolutionResult, type VariableResolverOptions, } from "./variable-resolver.js";
17
+ export { buildMCPAssertions, compileAgentHarnessTask, compileLiteracyTask, compileKnowledgeProbeTask, compileMCPTask, validateAgentHarnessTask, validateLiteracyTask, validateKnowledgeProbeTask, validateMCPTask, type AgentHarnessCompileOptions, type AgentHarnessCompileResult, type AgentHarnessValidationError, type LiteracyCompileOptions, type LiteracyCompileResult, type LiteracyValidationError, type KnowledgeProbeCompileOptions, type KnowledgeProbeCompileResult, type KnowledgeProbeMetadata, type KnowledgeProbeValidationError, type MCPAssertionContext, type MCPCompileOptions, type MCPCompileResult, type MCPValidationError, type PromptfooExtension, type SandboxConfigMeta, } from "./mode-handlers/index.js";
18
+ export { createSandboxStrategy, DockerSandboxStrategy, GitWorktreeSandboxStrategy, selectSandboxStrategy, TempDirSandboxStrategy, type SandboxArtifacts, type SandboxInfo, type SandboxProvisionOptions, type SandboxSelectionResult, type SandboxStrategy, type SandboxType, } from "./sandbox/index.js";
19
+ export { provisionFixtures, type FixtureRef, type FixtureTransform, type ProvisionedFixture, type ProvisioningOptions, type ProvisioningResult, } from "./sandbox/fixture-provisioner.js";
20
+ export { loadModelsAndProviders, type AssembledProviders, type LiteracyVariantProviders, type ModelsAndProviders, } from "./provider-assembler.js";
21
+ export { writeCompiledLiteracyConfigs, type WriteCompiledConfigOptions, } from "./compiler-to-yaml.js";
22
+ export { compileLiteracyTasks, compareCompilerOutputs, type ComparisonDiscrepancy, type ComparisonResult, type LegacyEntry, type LiteracyBridgeOptions, type LiteracyBridgeResult, } from "./literacy-bridge.js";
23
+ export { checkBudget, classifyToolCall, classifyToolCalls, collectTrace, computeCost, createRedactionConfig, DEFAULT_REDACTION_RULES, estimateRunCost, extractTraceSummary, LocalTraceStore, lookupPricing, mergeTraces, redactTrace, type ActualCost, type BudgetCheckResult, type BudgetConfig, type CostEstimate, type ModelPricing, type ProviderResponse, type RawToolCall, type RedactionConfig, type RedactionResult, type RedactionRule, type TraceCollectorOptions, type TraceStore, type TraceStoreResult, type TraceSummary, } from "./telemetry/index.js";
24
+ export { registerSanityLiteracyPreset, sanityLiteracyPreset, } from "./presets/index.js";
25
+ export { buildIgnoreFieldsWrapper, compareWithIgnoredFields, stripFields, } from "./ignore-fields.js";
26
+ export { simpleHash } from "./hash.js";
27
+ export { scoreTestGroup, type BridgedScoreResult } from "./scoring-bridge.js";
28
+ export { ConfigNotFoundError, loadConfigFile, tryLoadConfigFile, } from "./config-loader.js";
29
+ export type { ConfigLoadResult } from "./config-loader.js";
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Config compiler — the heart of the new architecture.
3
+ *
4
+ * Converts task definitions from any source into a TaskGraph IR,
5
+ * then compiles the graph into Promptfoo YAML configuration.
6
+ *
7
+ * This module coexists with the existing `generate-configs.ts` path.
8
+ * Phase 7 will migrate callers to use the compiler exclusively.
9
+ *
10
+ * @see docs/exec-plans/architecture-overhaul/phase-2-config-compiler.md
11
+ */
12
+ // TaskGraph builder
13
+ export { buildTaskGraph, detectCycle, } from "./task-graph-builder.js";
14
+ // Promptfoo compiler
15
+ export { compileToPromptfoo, } from "./promptfoo-compiler.js";
16
+ // Assertion mapper
17
+ export { isAssertionCompatibleWithMode, isValidAssertionType, mapAssertions, } from "./assertion-mapper.js";
18
+ // Fixture resolver
19
+ export { resolveTaskFixtures, } from "./fixture-resolver.js";
20
+ // Variable resolver
21
+ export { createEnvelope, resolveVariables, } from "./variable-resolver.js";
22
+ // Mode handlers
23
+ export { buildMCPAssertions, compileAgentHarnessTask, compileLiteracyTask, compileKnowledgeProbeTask, compileMCPTask, validateAgentHarnessTask, validateLiteracyTask, validateKnowledgeProbeTask, validateMCPTask, } from "./mode-handlers/index.js";
24
+ // Sandbox infrastructure
25
+ export { createSandboxStrategy, DockerSandboxStrategy, GitWorktreeSandboxStrategy, selectSandboxStrategy, TempDirSandboxStrategy, } from "./sandbox/index.js";
26
+ // Fixture provisioning
27
+ export { provisionFixtures, } from "./sandbox/fixture-provisioner.js";
28
+ // Provider assembler — builds per-variant provider arrays from models config
29
+ export { loadModelsAndProviders, } from "./provider-assembler.js";
30
+ // Compiler-to-YAML — serializes compiled config to Promptfoo YAML files
31
+ export { writeCompiledLiteracyConfigs, } from "./compiler-to-yaml.js";
32
+ // Literacy bridge — LiteracyTaskDefinition → new compiler
33
+ export { compileLiteracyTasks, compareCompilerOutputs, } from "./literacy-bridge.js";
34
+ // Telemetry — observability & tracing
35
+ export { checkBudget, classifyToolCall, classifyToolCalls, collectTrace, computeCost, createRedactionConfig, DEFAULT_REDACTION_RULES, estimateRunCost, extractTraceSummary, LocalTraceStore, lookupPricing, mergeTraces, redactTrace, } from "./telemetry/index.js";
36
+ // Presets — bundled evaluation capabilities
37
+ export { registerSanityLiteracyPreset, sanityLiteracyPreset, } from "./presets/index.js";
38
+ // Field stripping for non-deterministic outputs
39
+ export { buildIgnoreFieldsWrapper, compareWithIgnoredFields, stripFields, } from "./ignore-fields.js";
40
+ // Hash utility
41
+ export { simpleHash } from "./hash.js";
42
+ // Scoring bridge — 4-tier engine integration
43
+ export { scoreTestGroup } from "./scoring-bridge.js";
44
+ // Unified config loader
45
+ export { ConfigNotFoundError, loadConfigFile, tryLoadConfigFile, } from "./config-loader.js";
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Literacy bridge — maps LiteracyTaskDefinition to the new compiler pipeline.
3
+ *
4
+ * This module bridges the task loading system (TaskSource →
5
+ * GeneralizedTaskDefinition) and the compiler (TaskGraph →
6
+ * PromptfooCompiler). It allows literacy tasks to run through
7
+ * the compiler.
8
+ *
9
+ * Pipeline: LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
10
+ * LiteracyModeHandler (per task) → LiteracyBridgeResult
11
+ *
12
+ * Key behaviors:
13
+ * - Tasks without explicit mode get mode: "literacy" (backward compat)
14
+ * - LiteracyTaskDefinition fields map to compiler input fields
15
+ * - Rubric config is loaded from config/rubrics
16
+ * - Prompts from config/prompts are integrated
17
+ * - TaskGraphBuilder validates the DAG, deduplicates, and orders tasks
18
+ *
19
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
20
+ */
21
+ import type { LiteracyTaskDefinition } from "../../_vendor/ailf-core/index.d.ts";
22
+ import { type LiteracyCompileResult } from "./mode-handlers/literacy-handler.js";
23
+ import { type LiteracyEvalSubMode } from "../normalize-mode.js";
24
+ /** Options for compiling all literacy tasks via the new compiler */
25
+ export interface LiteracyBridgeOptions {
26
+ /** Root directory of the eval package */
27
+ rootDir: string;
28
+ /** Eval mode to compile for */
29
+ evalMode?: LiteracyEvalSubMode;
30
+ /** Grader provider ID */
31
+ graderProvider?: string;
32
+ /** Model providers */
33
+ models?: {
34
+ id: string;
35
+ label: string;
36
+ config?: Record<string, unknown>;
37
+ }[];
38
+ }
39
+ /** Result of compiling all literacy tasks */
40
+ export interface LiteracyBridgeResult {
41
+ /** Per-task compilation results */
42
+ tasks: {
43
+ taskId: string;
44
+ result: LiteracyCompileResult;
45
+ }[];
46
+ /** All warnings across tasks */
47
+ warnings: string[];
48
+ /** Total test cases generated */
49
+ totalTests: number;
50
+ }
51
+ /**
52
+ * Compile an array of LiteracyTaskDefinition through the literacy handler.
53
+ *
54
+ * This is the primary entry point for Phase 7 migration. It takes
55
+ * LiteracyTaskDefinition[] and routes them through the compiler pipeline:
56
+ *
57
+ * LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
58
+ * LiteracyModeHandler (per task) → LiteracyBridgeResult
59
+ *
60
+ * The TaskGraphBuilder provides:
61
+ * - Duplicate task ID detection (warns on collisions)
62
+ * - Status-based filtering (archived/paused/draft)
63
+ * - Dependency edge discovery and DAG cycle validation
64
+ * - Topological priority assignment (tasks with deps run in order)
65
+ *
66
+ * Note: The incoming tasks are typically pre-filtered by the pipeline
67
+ * step (area/tag/taskId filters + release auto-scope). The graph
68
+ * builder's own filtering is intentionally invoked WITHOUT a filter
69
+ * argument to avoid double-filtering — it still applies status-based
70
+ * rules (e.g., rejecting archived tasks that slipped through).
71
+ */
72
+ export declare function compileLiteracyTasks(tasks: LiteracyTaskDefinition[], options: LiteracyBridgeOptions): LiteracyBridgeResult;
73
+ /**
74
+ * Compare old-style expanded entries with new-style compiled entries.
75
+ *
76
+ * This is the parallel comparison gate (task 7b). For each task, it
77
+ * checks that the new compiler produces structurally equivalent output
78
+ * to the legacy expand-tasks path.
79
+ */
80
+ export declare function compareCompilerOutputs(legacyEntries: LegacyEntry[], newResult: LiteracyBridgeResult): ComparisonResult;
81
+ /** Minimal legacy entry shape (from expand-tasks) */
82
+ export interface LegacyEntry {
83
+ description?: string;
84
+ vars?: Record<string, unknown>;
85
+ assert?: {
86
+ type: string;
87
+ value?: unknown;
88
+ }[];
89
+ prompts?: string[];
90
+ }
91
+ export interface ComparisonDiscrepancy {
92
+ taskId: string;
93
+ field: string;
94
+ legacy: unknown;
95
+ new_: unknown;
96
+ message: string;
97
+ }
98
+ export interface ComparisonResult {
99
+ passed: boolean;
100
+ discrepancies: ComparisonDiscrepancy[];
101
+ summary: string;
102
+ }
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Literacy bridge — maps LiteracyTaskDefinition to the new compiler pipeline.
3
+ *
4
+ * This module bridges the task loading system (TaskSource →
5
+ * GeneralizedTaskDefinition) and the compiler (TaskGraph →
6
+ * PromptfooCompiler). It allows literacy tasks to run through
7
+ * the compiler.
8
+ *
9
+ * Pipeline: LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
10
+ * LiteracyModeHandler (per task) → LiteracyBridgeResult
11
+ *
12
+ * Key behaviors:
13
+ * - Tasks without explicit mode get mode: "literacy" (backward compat)
14
+ * - LiteracyTaskDefinition fields map to compiler input fields
15
+ * - Rubric config is loaded from config/rubrics
16
+ * - Prompts from config/prompts are integrated
17
+ * - TaskGraphBuilder validates the DAG, deduplicates, and orders tasks
18
+ *
19
+ * @see docs/exec-plans/architecture-overhaul/phase-7-migrate-literacy.md
20
+ */
21
+ import { compileLiteracyTask, } from "./mode-handlers/literacy-handler.js";
22
+ import { tryLoadConfigFile } from "./config-loader.js";
23
+ import { buildTaskGraph } from "./task-graph-builder.js";
24
+ // ---------------------------------------------------------------------------
25
+ // Public API
26
+ // ---------------------------------------------------------------------------
27
+ /**
28
+ * Compile an array of LiteracyTaskDefinition through the literacy handler.
29
+ *
30
+ * This is the primary entry point for Phase 7 migration. It takes
31
+ * LiteracyTaskDefinition[] and routes them through the compiler pipeline:
32
+ *
33
+ * LiteracyTaskDefinition[] → TaskGraphBuilder → topological order →
34
+ * LiteracyModeHandler (per task) → LiteracyBridgeResult
35
+ *
36
+ * The TaskGraphBuilder provides:
37
+ * - Duplicate task ID detection (warns on collisions)
38
+ * - Status-based filtering (archived/paused/draft)
39
+ * - Dependency edge discovery and DAG cycle validation
40
+ * - Topological priority assignment (tasks with deps run in order)
41
+ *
42
+ * Note: The incoming tasks are typically pre-filtered by the pipeline
43
+ * step (area/tag/taskId filters + release auto-scope). The graph
44
+ * builder's own filtering is intentionally invoked WITHOUT a filter
45
+ * argument to avoid double-filtering — it still applies status-based
46
+ * rules (e.g., rejecting archived tasks that slipped through).
47
+ */
48
+ export function compileLiteracyTasks(tasks, options) {
49
+ const rubricConfig = loadRubricConfig(options.rootDir);
50
+ const warnings = [];
51
+ const results = [];
52
+ let totalTests = 0;
53
+ // Build the task graph — validates DAG, deduplicates, assigns priority.
54
+ // No filter passed: tasks are already pre-filtered by the pipeline step.
55
+ const graphResult = buildTaskGraph({ tasks });
56
+ warnings.push(...graphResult.warnings);
57
+ if (graphResult.filteredOut.length > 0) {
58
+ warnings.push(`TaskGraphBuilder filtered out ${graphResult.filteredOut.length} task(s) ` +
59
+ `by status: ${graphResult.filteredOut.join(", ")}`);
60
+ }
61
+ // If all tasks were filtered out, return empty result
62
+ if (!graphResult.graph) {
63
+ return { tasks: [], warnings, totalTests: 0 };
64
+ }
65
+ // Extract tasks in topological order from the graph.
66
+ // The graph nodes are keyed by taskId; we sort by priority (lower = first)
67
+ // and look up the original LiteracyTaskDefinition for each node.
68
+ const taskMap = new Map(tasks.map((t) => [t.id, t]));
69
+ const orderedNodes = [...graphResult.graph.nodes.values()].sort((a, b) => a.priority - b.priority);
70
+ const compileOptions = {
71
+ graderProvider: options.graderProvider,
72
+ rootDir: options.rootDir,
73
+ evalMode: options.evalMode,
74
+ models: options.models,
75
+ rubricConfig,
76
+ };
77
+ for (const node of orderedNodes) {
78
+ const task = taskMap.get(node.taskId);
79
+ if (!task) {
80
+ warnings.push(`TaskGraphBuilder produced node "${node.taskId}" with no matching LiteracyTaskDefinition — skipped`);
81
+ continue;
82
+ }
83
+ const result = compileLiteracyTask(task, compileOptions);
84
+ results.push({ taskId: task.id, result });
85
+ warnings.push(...result.warnings);
86
+ totalTests += result.tests.length;
87
+ }
88
+ return { tasks: results, warnings, totalTests };
89
+ }
90
+ /**
91
+ * Compare old-style expanded entries with new-style compiled entries.
92
+ *
93
+ * This is the parallel comparison gate (task 7b). For each task, it
94
+ * checks that the new compiler produces structurally equivalent output
95
+ * to the legacy expand-tasks path.
96
+ */
97
+ export function compareCompilerOutputs(legacyEntries, newResult) {
98
+ const discrepancies = [];
99
+ for (const { taskId, result } of newResult.tasks) {
100
+ // Find matching legacy entries by task description
101
+ const legacyForTask = legacyEntries.filter((e) => e.description?.includes(taskId) ||
102
+ e.description?.includes("(gold)") ||
103
+ e.description?.includes("(baseline)"));
104
+ // Check test count matches
105
+ const newTestCount = result.tests.length;
106
+ const legacyGold = legacyForTask.filter((e) => e.description?.includes("(gold)"));
107
+ const legacyBaseline = legacyForTask.filter((e) => e.description?.includes("(baseline)"));
108
+ const legacyCount = legacyGold.length + legacyBaseline.length;
109
+ if (legacyCount > 0 && newTestCount !== legacyCount) {
110
+ discrepancies.push({
111
+ taskId,
112
+ field: "testCount",
113
+ legacy: legacyCount,
114
+ new_: newTestCount,
115
+ message: `Test count mismatch: legacy=${legacyCount}, new=${newTestCount}`,
116
+ });
117
+ }
118
+ // Check assertion count on gold entries
119
+ for (const test of result.tests) {
120
+ if (test.description.includes("(gold)")) {
121
+ const assertCount = test.assert?.length ?? 0;
122
+ const matchingLegacy = legacyGold[0];
123
+ if (matchingLegacy?.assert) {
124
+ const legacyAssertCount = matchingLegacy.assert.length;
125
+ if (assertCount !== legacyAssertCount) {
126
+ discrepancies.push({
127
+ taskId,
128
+ field: "assertionCount",
129
+ legacy: legacyAssertCount,
130
+ new_: assertCount,
131
+ message: `Gold assertion count mismatch: legacy=${legacyAssertCount}, new=${assertCount}`,
132
+ });
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ return {
139
+ passed: discrepancies.length === 0,
140
+ discrepancies,
141
+ summary: discrepancies.length === 0
142
+ ? "All tasks produce structurally equivalent output"
143
+ : `${discrepancies.length} discrepancy(ies) found`,
144
+ };
145
+ }
146
+ // ---------------------------------------------------------------------------
147
+ // Rubric config loading
148
+ // ---------------------------------------------------------------------------
149
+ function loadRubricConfig(rootDir) {
150
+ const result = tryLoadConfigFile("rubrics", rootDir);
151
+ if (!result)
152
+ return undefined;
153
+ try {
154
+ const parsed = result.data;
155
+ if (!parsed?.templates)
156
+ return undefined;
157
+ const templates = {};
158
+ for (const [key, val] of Object.entries(parsed.templates)) {
159
+ const t = val;
160
+ templates[key] = {
161
+ header: String(t.header ?? ""),
162
+ scale: t.scale ?? [],
163
+ dimension: t.dimension ? String(t.dimension) : undefined,
164
+ criteria_label: t.criteria_label ? String(t.criteria_label) : undefined,
165
+ };
166
+ }
167
+ return { templates };
168
+ }
169
+ catch {
170
+ return undefined;
171
+ }
172
+ }