@sanity/ailf 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. package/config/features.ts +23 -0
  2. package/config/models.ts +83 -0
  3. package/config/prompts.ts +16 -0
  4. package/config/rubrics.ts +225 -0
  5. package/config/schedules.ts +47 -0
  6. package/config/sinks.ts +37 -0
  7. package/config/sources.ts +21 -0
  8. package/config/thresholds.ts +61 -0
  9. package/dist/_vendor/ailf-core/config-helpers.d.ts +174 -0
  10. package/dist/_vendor/ailf-core/config-helpers.js +150 -0
  11. package/dist/_vendor/ailf-core/env-helper.d.ts +35 -0
  12. package/dist/_vendor/ailf-core/env-helper.js +45 -0
  13. package/dist/_vendor/ailf-core/index.d.ts +3 -0
  14. package/dist/_vendor/ailf-core/index.js +5 -0
  15. package/dist/_vendor/ailf-core/ports/context.d.ts +15 -2
  16. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +2 -2
  17. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -1
  18. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +129 -0
  19. package/dist/_vendor/ailf-core/ports/mode-handler.js +19 -0
  20. package/dist/_vendor/ailf-core/ports/task-source.d.ts +16 -122
  21. package/dist/_vendor/ailf-core/ports/task-source.js +7 -7
  22. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +7 -2
  23. package/dist/_vendor/ailf-core/schemas/eval-config.js +7 -2
  24. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +8 -3
  25. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +6 -1
  26. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +14 -29
  27. package/dist/_vendor/ailf-core/schemas/pipeline.js +17 -8
  28. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +14 -4
  29. package/dist/_vendor/ailf-core/schemas/schedules.js +6 -2
  30. package/dist/_vendor/ailf-core/schemas/sinks.d.ts +1 -1
  31. package/dist/_vendor/ailf-core/services/comparison-formatters.js +57 -19
  32. package/dist/_vendor/ailf-core/services/index.d.ts +2 -1
  33. package/dist/_vendor/ailf-core/services/index.js +2 -1
  34. package/dist/_vendor/ailf-core/services/scoring-engine.d.ts +153 -0
  35. package/dist/_vendor/ailf-core/services/scoring-engine.js +237 -0
  36. package/dist/_vendor/ailf-core/services/scoring.d.ts +15 -2
  37. package/dist/_vendor/ailf-core/services/scoring.js +25 -15
  38. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +137 -0
  39. package/dist/_vendor/ailf-core/types/branded-ids.js +136 -0
  40. package/dist/_vendor/ailf-core/types/eval-mode-config.d.ts +150 -0
  41. package/dist/_vendor/ailf-core/types/eval-mode-config.js +24 -0
  42. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +319 -0
  43. package/dist/_vendor/ailf-core/types/generalized-task.js +13 -0
  44. package/dist/_vendor/ailf-core/types/index.d.ts +45 -81
  45. package/dist/_vendor/ailf-core/types/index.js +8 -1
  46. package/dist/_vendor/ailf-core/types/plugin-registry.d.ts +202 -0
  47. package/dist/_vendor/ailf-core/types/plugin-registry.js +132 -0
  48. package/dist/_vendor/ailf-core/types/storage-schema.d.ts +199 -0
  49. package/dist/_vendor/ailf-core/types/storage-schema.js +39 -0
  50. package/dist/_vendor/ailf-core/types/task-graph.d.ts +86 -0
  51. package/dist/_vendor/ailf-core/types/task-graph.js +20 -0
  52. package/dist/_vendor/ailf-core/types/trace.d.ts +118 -0
  53. package/dist/_vendor/ailf-core/types/trace.js +18 -0
  54. package/dist/_vendor/ailf-core/types/variable-envelope.d.ts +80 -0
  55. package/dist/_vendor/ailf-core/types/variable-envelope.js +16 -0
  56. package/dist/_vendor/ailf-shared/dimension-names.d.ts +5 -18
  57. package/dist/_vendor/ailf-shared/dimension-names.js +6 -24
  58. package/dist/_vendor/ailf-shared/eval-modes.d.ts +38 -6
  59. package/dist/_vendor/ailf-shared/eval-modes.js +26 -2
  60. package/dist/_vendor/ailf-shared/index.d.ts +0 -1
  61. package/dist/_vendor/ailf-shared/index.js +0 -1
  62. package/dist/adapters/api-client/build-request.js +14 -13
  63. package/dist/adapters/config-sources/file-config-adapter.d.ts +20 -11
  64. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  65. package/dist/adapters/config-sources/index.d.ts +2 -0
  66. package/dist/adapters/config-sources/index.js +1 -0
  67. package/dist/adapters/config-sources/ts-config-loader.d.ts +59 -0
  68. package/dist/adapters/config-sources/ts-config-loader.js +133 -0
  69. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +3 -2
  70. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +7 -2
  71. package/dist/adapters/task-sources/composite-task-source.d.ts +3 -3
  72. package/dist/adapters/task-sources/composite-task-source.js +1 -1
  73. package/dist/adapters/task-sources/content-lake-task-source.d.ts +7 -6
  74. package/dist/adapters/task-sources/content-lake-task-source.js +22 -23
  75. package/dist/adapters/task-sources/index.d.ts +1 -0
  76. package/dist/adapters/task-sources/index.js +1 -0
  77. package/dist/adapters/task-sources/repo-task-source.d.ts +4 -4
  78. package/dist/adapters/task-sources/repo-task-source.js +69 -16
  79. package/dist/adapters/task-sources/task-file-loader.d.ts +64 -0
  80. package/dist/adapters/task-sources/task-file-loader.js +83 -0
  81. package/dist/adapters/task-sources/yaml-task-source.d.ts +6 -6
  82. package/dist/adapters/task-sources/yaml-task-source.js +19 -16
  83. package/dist/cli.js +0 -2
  84. package/dist/commands/baseline.js +4 -1
  85. package/dist/commands/calculate-scores.js +1 -1
  86. package/dist/commands/coverage-audit.js +7 -1
  87. package/dist/commands/explain-handler.js +25 -23
  88. package/dist/commands/fetch-docs.js +3 -2
  89. package/dist/commands/generate-configs.js +1 -1
  90. package/dist/commands/interactive.js +11 -7
  91. package/dist/commands/pipeline-action.d.ts +2 -0
  92. package/dist/commands/pipeline-action.js +16 -6
  93. package/dist/commands/pipeline.d.ts +1 -0
  94. package/dist/commands/pipeline.js +4 -2
  95. package/dist/commands/pr-comment.js +1 -1
  96. package/dist/commands/publish.js +2 -2
  97. package/dist/commands/readiness-report.js +13 -6
  98. package/dist/composition-root.d.ts +1 -1
  99. package/dist/composition-root.js +67 -4
  100. package/dist/orchestration/build-app-context.js +1 -0
  101. package/dist/orchestration/build-step-sequence.js +24 -6
  102. package/dist/orchestration/steps/calculate-scores-step.js +24 -11
  103. package/dist/orchestration/steps/fetch-docs-step.js +6 -4
  104. package/dist/orchestration/steps/gap-analysis-step.js +8 -7
  105. package/dist/orchestration/steps/generate-configs-step.d.ts +16 -3
  106. package/dist/orchestration/steps/generate-configs-step.js +245 -51
  107. package/dist/orchestration/steps/grader-consistency-step.js +7 -4
  108. package/dist/orchestration/steps/mirror-repo-tasks-step.js +1 -1
  109. package/dist/orchestration/steps/readiness-step.js +5 -6
  110. package/dist/orchestration/steps/run-eval-step.d.ts +1 -2
  111. package/dist/orchestration/steps/run-eval-step.js +8 -7
  112. package/dist/pipeline/cache.d.ts +1 -1
  113. package/dist/pipeline/cache.js +36 -8
  114. package/dist/pipeline/calculate-scores.d.ts +2 -4
  115. package/dist/pipeline/calculate-scores.js +43 -113
  116. package/dist/pipeline/checks.js +2 -2
  117. package/dist/pipeline/compare.js +8 -8
  118. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +10 -0
  119. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +288 -0
  120. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +9 -0
  121. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +145 -0
  122. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +10 -0
  123. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +314 -0
  124. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +10 -0
  125. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +486 -0
  126. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +10 -0
  127. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +355 -0
  128. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +9 -0
  129. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +333 -0
  130. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +12 -0
  131. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +210 -0
  132. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +7 -0
  133. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +471 -0
  134. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +10 -0
  135. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +184 -0
  136. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +8 -0
  137. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +301 -0
  138. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +9 -0
  139. package/dist/pipeline/compiler/__tests__/telemetry.test.js +503 -0
  140. package/dist/pipeline/compiler/assertion-mapper.d.ts +58 -0
  141. package/dist/pipeline/compiler/assertion-mapper.js +175 -0
  142. package/dist/pipeline/compiler/compiler-to-yaml.d.ts +51 -0
  143. package/dist/pipeline/compiler/compiler-to-yaml.js +222 -0
  144. package/dist/pipeline/compiler/config-loader.d.ts +56 -0
  145. package/dist/pipeline/compiler/config-loader.js +111 -0
  146. package/dist/pipeline/compiler/fixture-resolver.d.ts +41 -0
  147. package/dist/pipeline/compiler/fixture-resolver.js +113 -0
  148. package/dist/pipeline/compiler/hash.d.ts +11 -0
  149. package/dist/pipeline/compiler/hash.js +18 -0
  150. package/dist/pipeline/compiler/ignore-fields.d.ts +53 -0
  151. package/dist/pipeline/compiler/ignore-fields.js +113 -0
  152. package/dist/pipeline/compiler/index.d.ts +29 -0
  153. package/dist/pipeline/compiler/index.js +45 -0
  154. package/dist/pipeline/compiler/literacy-bridge.d.ts +102 -0
  155. package/dist/pipeline/compiler/literacy-bridge.js +172 -0
  156. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.d.ts +14 -0
  157. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +152 -0
  158. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.d.ts +32 -0
  159. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +176 -0
  160. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.d.ts +49 -0
  161. package/dist/pipeline/compiler/mode-handlers/__fixtures__/mcp-example-tasks.js +259 -0
  162. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +70 -0
  163. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +485 -0
  164. package/dist/pipeline/compiler/mode-handlers/index.d.ts +16 -0
  165. package/dist/pipeline/compiler/mode-handlers/index.js +21 -0
  166. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +76 -0
  167. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +245 -0
  168. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +89 -0
  169. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +379 -0
  170. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +50 -0
  171. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +277 -0
  172. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +67 -0
  173. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +309 -0
  174. package/dist/pipeline/compiler/presets/index.d.ts +9 -0
  175. package/dist/pipeline/compiler/presets/index.js +8 -0
  176. package/dist/pipeline/compiler/presets/sanity-literacy.d.ts +45 -0
  177. package/dist/pipeline/compiler/presets/sanity-literacy.js +354 -0
  178. package/dist/pipeline/compiler/promptfoo-compiler.d.ts +96 -0
  179. package/dist/pipeline/compiler/promptfoo-compiler.js +230 -0
  180. package/dist/pipeline/compiler/provider-assembler.d.ts +39 -0
  181. package/dist/pipeline/compiler/provider-assembler.js +137 -0
  182. package/dist/pipeline/compiler/sandbox/docker-sandbox.d.ts +21 -0
  183. package/dist/pipeline/compiler/sandbox/docker-sandbox.js +136 -0
  184. package/dist/pipeline/compiler/sandbox/fixture-provisioner.d.ts +69 -0
  185. package/dist/pipeline/compiler/sandbox/fixture-provisioner.js +189 -0
  186. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.d.ts +20 -0
  187. package/dist/pipeline/compiler/sandbox/git-worktree-sandbox.js +114 -0
  188. package/dist/pipeline/compiler/sandbox/index.d.ts +10 -0
  189. package/dist/pipeline/compiler/sandbox/index.js +11 -0
  190. package/dist/pipeline/compiler/sandbox/sandbox-selector.d.ts +35 -0
  191. package/dist/pipeline/compiler/sandbox/sandbox-selector.js +86 -0
  192. package/dist/pipeline/compiler/sandbox/sandbox-strategy.d.ts +81 -0
  193. package/dist/pipeline/compiler/sandbox/sandbox-strategy.js +15 -0
  194. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.d.ts +20 -0
  195. package/dist/pipeline/compiler/sandbox/tempdir-sandbox.js +74 -0
  196. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -0
  197. package/dist/pipeline/compiler/scoring-bridge.js +114 -0
  198. package/dist/pipeline/compiler/task-graph-builder.d.ts +54 -0
  199. package/dist/pipeline/compiler/task-graph-builder.js +291 -0
  200. package/dist/pipeline/compiler/telemetry/cost-tracker.d.ts +90 -0
  201. package/dist/pipeline/compiler/telemetry/cost-tracker.js +146 -0
  202. package/dist/pipeline/compiler/telemetry/index.d.ts +14 -0
  203. package/dist/pipeline/compiler/telemetry/index.js +19 -0
  204. package/dist/pipeline/compiler/telemetry/redactor.d.ts +58 -0
  205. package/dist/pipeline/compiler/telemetry/redactor.js +222 -0
  206. package/dist/pipeline/compiler/telemetry/tool-classifier.d.ts +32 -0
  207. package/dist/pipeline/compiler/telemetry/tool-classifier.js +120 -0
  208. package/dist/pipeline/compiler/telemetry/trace-collector.d.ts +75 -0
  209. package/dist/pipeline/compiler/telemetry/trace-collector.js +297 -0
  210. package/dist/pipeline/compiler/telemetry/trace-store.d.ts +78 -0
  211. package/dist/pipeline/compiler/telemetry/trace-store.js +85 -0
  212. package/dist/pipeline/compiler/variable-resolver.d.ts +46 -0
  213. package/dist/pipeline/compiler/variable-resolver.js +115 -0
  214. package/dist/pipeline/coverage-audit.d.ts +15 -5
  215. package/dist/pipeline/coverage-audit.js +41 -22
  216. package/dist/pipeline/eval-constants.d.ts +16 -6
  217. package/dist/pipeline/eval-constants.js +25 -4
  218. package/dist/pipeline/eval-fingerprint.d.ts +2 -2
  219. package/dist/pipeline/eval-fingerprint.js +8 -9
  220. package/dist/pipeline/expand-tasks.d.ts +19 -10
  221. package/dist/pipeline/expand-tasks.js +34 -28
  222. package/dist/pipeline/gap-analysis.d.ts +1 -1
  223. package/dist/pipeline/gap-analysis.js +2 -2
  224. package/dist/pipeline/generate-configs.d.ts +22 -4
  225. package/dist/pipeline/generate-configs.js +53 -24
  226. package/dist/pipeline/grader-api.d.ts +3 -3
  227. package/dist/pipeline/grader-api.js +5 -12
  228. package/dist/pipeline/grader-compare-runner.js +20 -27
  229. package/dist/pipeline/grader-comparison.d.ts +4 -8
  230. package/dist/pipeline/grader-comparison.js +11 -17
  231. package/dist/pipeline/grader-consistency-runner.d.ts +2 -3
  232. package/dist/pipeline/grader-consistency-runner.js +16 -20
  233. package/dist/pipeline/grader-consistency.d.ts +6 -10
  234. package/dist/pipeline/grader-consistency.js +13 -32
  235. package/dist/pipeline/grader-sensitivity-runner.js +7 -5
  236. package/dist/pipeline/grader-sensitivity.d.ts +2 -6
  237. package/dist/pipeline/grader-sensitivity.js +10 -10
  238. package/dist/pipeline/grader-validate-runner.js +7 -5
  239. package/dist/pipeline/grader-validation.d.ts +2 -6
  240. package/dist/pipeline/grader-validation.js +14 -22
  241. package/dist/pipeline/map-request-to-config.js +6 -1
  242. package/dist/pipeline/mirror-repo-tasks.d.ts +6 -6
  243. package/dist/pipeline/mirror-repo-tasks.js +16 -15
  244. package/dist/pipeline/normalize-mode.d.ts +49 -0
  245. package/dist/pipeline/normalize-mode.js +64 -0
  246. package/dist/pipeline/plan.d.ts +5 -2
  247. package/dist/pipeline/plan.js +134 -78
  248. package/dist/pipeline/pr-comment.js +2 -0
  249. package/dist/pipeline/profile-resolution.d.ts +22 -14
  250. package/dist/pipeline/profile-resolution.js +41 -19
  251. package/dist/pipeline/provenance.d.ts +2 -2
  252. package/dist/pipeline/provenance.js +12 -17
  253. package/dist/pipeline/release-report.js +4 -4
  254. package/dist/pipeline/repo-threshold-evaluator.d.ts +1 -1
  255. package/dist/pipeline/repo-threshold-evaluator.js +1 -1
  256. package/dist/pipeline/rubric-loader.d.ts +20 -0
  257. package/dist/pipeline/rubric-loader.js +37 -0
  258. package/dist/pipeline/validate.d.ts +4 -4
  259. package/dist/pipeline/validate.js +64 -53
  260. package/dist/schedules/loader.js +18 -8
  261. package/dist/scripts/migrate-task-mode.d.ts +24 -0
  262. package/dist/scripts/migrate-task-mode.js +85 -0
  263. package/dist/scripts/migrate-tasks-to-content-lake.js +11 -10
  264. package/dist/scripts/validate-task-sources.d.ts +1 -1
  265. package/dist/scripts/validate-task-sources.js +15 -15
  266. package/dist/sinks/loader.js +5 -7
  267. package/dist/sources.d.ts +7 -7
  268. package/dist/sources.js +22 -24
  269. package/dist/webhook/dispatch.js +2 -1
  270. package/package.json +6 -3
  271. package/tasks/knowledge-probe/define-type-api.task.ts +55 -0
  272. package/tasks/knowledge-probe/groq-projections.task.ts +59 -0
  273. package/tasks/literacy/frameworks.task.ts +128 -0
  274. package/tasks/literacy/functions.task.ts +69 -0
  275. package/tasks/literacy/groq.task.ts +258 -0
  276. package/tasks/literacy/nextjs-live.task.ts +75 -0
  277. package/tasks/literacy/studio-setup.task.ts +131 -0
  278. package/tasks/literacy/visual-editing.task.ts +146 -0
  279. package/config/features.yaml +0 -116
  280. package/config/models.yaml +0 -116
  281. package/config/prompts.yaml +0 -75
  282. package/config/rubrics.yaml +0 -81
  283. package/config/schedules.yaml +0 -43
  284. package/config/sinks.yaml +0 -54
  285. package/config/sources.yaml +0 -51
  286. package/config/thresholds.yaml +0 -49
  287. package/dist/agent-observer/test-imports.d.ts +0 -7
  288. package/dist/agent-observer/test-imports.js +0 -185
@@ -7,150 +7,44 @@
7
7
  * - RepoTaskSource (tasks-as-content Phase 4) — reads .ailf/tasks/
8
8
  *
9
9
  * The key invariant: the pipeline orchestrator and all downstream steps
10
- * work with TaskDefinition[] regardless of where they came from.
10
+ * work with GeneralizedTaskDefinition[] regardless of where they came from.
11
11
  */
12
+ import type { GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, IdDocRef, PathDocRef, PerspectiveDocRef, SlugDocRef } from "../types/generalized-task.js";
12
13
  import type { FilterOptions } from "../types/index.js";
13
- /** A templated assertion referencing a rubric template from config/rubrics.yaml */
14
- export interface TemplatedAssertion {
15
- type: "llm-rubric";
16
- template: string;
17
- criteria: string[];
18
- weight?: number;
19
- }
20
- /** A value-based assertion (contains, javascript, etc.) */
21
- export interface ValueAssertion {
22
- type: string;
23
- value?: unknown;
24
- weight?: number;
25
- [key: string]: unknown;
26
- }
27
- /** Any assertion definition — either templated or value-based */
28
- export type AssertionDefinition = TemplatedAssertion | ValueAssertion;
29
- /** Baseline variant configuration */
30
- export interface BaselineConfig {
31
- /** Whether to generate a baseline variant. Default: true */
32
- enabled?: boolean;
33
- /** Rubric mode for baseline. Default: "full" */
34
- rubric?: "abbreviated" | "full" | "none";
35
- }
36
- /**
37
- * A canonical documentation reference. Each entry resolves docs through
38
- * one of four strategies, discriminated by key presence (no explicit
39
- * `type` field). All strategies carry an optional `reason` for context.
40
- *
41
- * Strategies:
42
- * - `slug` — one article by slug field (legacy, may not be unique)
43
- * - `path` — one article by URL path (unique across sections)
44
- * - `id` — one document by Sanity `_id` (drafts, imports)
45
- * - `perspective` — all articles in a content release (one-to-many)
46
- *
47
- * @see docs/design-docs/canonical-doc-resolution.md
48
- */
49
- export type CanonicalDocRef = SlugDocRef | PathDocRef | IdDocRef | PerspectiveDocRef;
50
- /** Resolve by article slug field. Legacy — prefer `path` for uniqueness. */
51
- export interface SlugDocRef {
52
- slug: string;
53
- reason?: string;
54
- }
55
- /** Resolve by URL path (after /docs/). Unique across sections. */
56
- export interface PathDocRef {
57
- path: string;
58
- reason?: string;
59
- }
60
- /** Resolve by Sanity document `_id`. The primary resolution strategy.
61
- *
62
- * Optional `slug` and `path` provide human-readable context — they are
63
- * NOT used for resolution (the `_id` is authoritative) but help YAML
64
- * authors understand which document is being referenced. The Content Lake
65
- * adapter populates them from the dereferenced article.
66
- */
67
- export interface IdDocRef {
68
- id: string;
69
- reason?: string;
70
- /** Human-readable slug (informational only — not used for resolution) */
71
- slug?: string;
72
- /** Human-readable path (informational only — not used for resolution) */
73
- path?: string;
74
- }
75
- /** Resolve all articles in a content release. One-to-many. */
76
- export interface PerspectiveDocRef {
77
- perspective: string;
78
- reason?: string;
79
- }
80
- /**
81
- * A loaded, validated task definition ready for expansion.
82
- *
83
- * This is the canonical intermediate representation — adapters produce
84
- * this from YAML, Content Lake, or .ailf/ files. Downstream consumers
85
- * (expansion, doc fetching, validation) work exclusively with this type.
86
- *
87
- * Design notes:
88
- * - `taskPrompt` is extracted from `vars.task` in YAML format
89
- * - `docsPath` is NOT included — it's an infrastructure detail derived
90
- * from convention (`file://contexts/canonical/${id}.md`)
91
- * - `featureArea` is derived by the adapter (filename stem, document
92
- * field, directory structure — depends on the source)
93
- */
94
- export interface TaskDefinition {
95
- /** Unique task identifier */
96
- id: string;
97
- /** Human-readable description */
98
- description: string;
99
- /** Feature area this task belongs to */
100
- featureArea: string;
101
- /** The implementation task prompt (the user-facing request) */
102
- taskPrompt: string;
103
- /** Canonical doc references with reasons */
104
- canonicalDocs: CanonicalDocRef[];
105
- /** Path to the reference solution (relative to eval package root) */
106
- referenceSolution: string;
107
- /** Whether doc coverage rubric should be auto-generated */
108
- docCoverage: boolean;
109
- /** Assertion definitions (rubric templates + value assertions) */
110
- assertions: AssertionDefinition[];
111
- /** Baseline variant configuration */
112
- baseline?: BaselineConfig;
113
- /** Additional template variables beyond task (e.g., custom vars) */
114
- extraVars?: Record<string, unknown>;
115
- /** Lifecycle status — controls pipeline inclusion. Absent = "active". */
116
- status?: "active" | "archived" | "draft" | "paused";
117
- /** Freeform labels for filtering and organization */
118
- tags?: string[];
119
- }
120
- /** Check if a canonical doc ref resolves by slug.
14
+ /** Check if a doc ref resolves by slug.
121
15
  *
122
16
  * Excludes IdDocRef (which may carry an optional `slug` for display).
123
17
  * When both `id` and `slug` are present, it's an IdDocRef, not a SlugDocRef.
124
18
  */
125
- export declare function isSlugRef(ref: CanonicalDocRef): ref is SlugDocRef;
126
- /** Check if a canonical doc ref resolves by path.
19
+ export declare function isSlugRef(ref: GeneralizedDocRef): ref is SlugDocRef;
20
+ /** Check if a doc ref resolves by path.
127
21
  *
128
22
  * Excludes IdDocRef (which may carry an optional `path` for display).
129
23
  * When both `id` and `path` are present, it's an IdDocRef, not a PathDocRef.
130
24
  */
131
- export declare function isPathRef(ref: CanonicalDocRef): ref is PathDocRef;
132
- /** Check if a canonical doc ref resolves by document ID.
25
+ export declare function isPathRef(ref: GeneralizedDocRef): ref is PathDocRef;
26
+ /** Check if a doc ref resolves by document ID.
133
27
  *
134
28
  * Uses `"id" in ref` as the primary discriminator. IdDocRef may also carry
135
29
  * optional `slug` and `path` for display purposes, so we cannot exclude
136
30
  * on those keys. When both `id` and `slug` are present, `id` wins.
137
31
  */
138
- export declare function isIdRef(ref: CanonicalDocRef): ref is IdDocRef;
139
- /** Check if a canonical doc ref resolves by content release perspective */
140
- export declare function isPerspectiveRef(ref: CanonicalDocRef): ref is PerspectiveDocRef;
32
+ export declare function isIdRef(ref: GeneralizedDocRef): ref is IdDocRef;
33
+ /** Check if a doc ref resolves by content release perspective */
34
+ export declare function isPerspectiveRef(ref: GeneralizedDocRef): ref is PerspectiveDocRef;
141
35
  /**
142
- * Extract a display identifier from any canonical doc ref.
36
+ * Extract a display identifier from any doc ref.
143
37
  * Useful for logging, error messages, and retrieval metrics.
144
38
  */
145
- export declare function canonicalDocRefLabel(ref: CanonicalDocRef): string;
39
+ export declare function canonicalDocRefLabel(ref: GeneralizedDocRef): string;
146
40
  /** Check if an assertion uses the templated format (template + criteria) */
147
- export declare function isTemplatedAssertion(entry: AssertionDefinition): entry is TemplatedAssertion;
41
+ export declare function isTemplatedAssertion(entry: GeneralizedAssertionDefinition): entry is GeneralizedTemplatedAssertion;
148
42
  /**
149
43
  * Port: Where task definitions come from.
150
44
  *
151
45
  * The pipeline never knows HOW tasks are loaded — it only sees
152
- * TaskDefinition[]. The adapter handles YAML parsing, GROQ queries,
153
- * filesystem scanning, etc.
46
+ * GeneralizedTaskDefinition[]. The adapter handles YAML parsing, GROQ
47
+ * queries, filesystem scanning, etc.
154
48
  */
155
49
  export interface TaskSource {
156
50
  /**
@@ -159,5 +53,5 @@ export interface TaskSource {
159
53
  * @param filter — Area, task ID, or changed-doc filters
160
54
  * @returns Validated task definitions ready for expansion
161
55
  */
162
- loadTasks(filter?: FilterOptions): Promise<TaskDefinition[]>;
56
+ loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
163
57
  }
@@ -7,12 +7,12 @@
7
7
  * - RepoTaskSource (tasks-as-content Phase 4) — reads .ailf/tasks/
8
8
  *
9
9
  * The key invariant: the pipeline orchestrator and all downstream steps
10
- * work with TaskDefinition[] regardless of where they came from.
10
+ * work with GeneralizedTaskDefinition[] regardless of where they came from.
11
11
  */
12
12
  // ---------------------------------------------------------------------------
13
- // Type guards — canonical doc refs
13
+ // Type guards — doc refs
14
14
  // ---------------------------------------------------------------------------
15
- /** Check if a canonical doc ref resolves by slug.
15
+ /** Check if a doc ref resolves by slug.
16
16
  *
17
17
  * Excludes IdDocRef (which may carry an optional `slug` for display).
18
18
  * When both `id` and `slug` are present, it's an IdDocRef, not a SlugDocRef.
@@ -20,7 +20,7 @@
20
20
  export function isSlugRef(ref) {
21
21
  return "slug" in ref && !("id" in ref);
22
22
  }
23
- /** Check if a canonical doc ref resolves by path.
23
+ /** Check if a doc ref resolves by path.
24
24
  *
25
25
  * Excludes IdDocRef (which may carry an optional `path` for display).
26
26
  * When both `id` and `path` are present, it's an IdDocRef, not a PathDocRef.
@@ -28,7 +28,7 @@ export function isSlugRef(ref) {
28
28
  export function isPathRef(ref) {
29
29
  return "path" in ref && !("id" in ref);
30
30
  }
31
- /** Check if a canonical doc ref resolves by document ID.
31
+ /** Check if a doc ref resolves by document ID.
32
32
  *
33
33
  * Uses `"id" in ref` as the primary discriminator. IdDocRef may also carry
34
34
  * optional `slug` and `path` for display purposes, so we cannot exclude
@@ -37,12 +37,12 @@ export function isPathRef(ref) {
37
37
  export function isIdRef(ref) {
38
38
  return "id" in ref;
39
39
  }
40
- /** Check if a canonical doc ref resolves by content release perspective */
40
+ /** Check if a doc ref resolves by content release perspective */
41
41
  export function isPerspectiveRef(ref) {
42
42
  return "perspective" in ref;
43
43
  }
44
44
  /**
45
- * Extract a display identifier from any canonical doc ref.
45
+ * Extract a display identifier from any doc ref.
46
46
  * Useful for logging, error messages, and retrieval metrics.
47
47
  */
48
48
  export function canonicalDocRefLabel(ref) {
@@ -29,10 +29,15 @@ export declare const EvalConfigSchema: z.ZodObject<{
29
29
  graderReplications: z.ZodOptional<z.ZodNumber>;
30
30
  headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
31
31
  mode: z.ZodOptional<z.ZodEnum<{
32
- agentic: "agentic";
32
+ custom: "custom";
33
+ literacy: "literacy";
34
+ "mcp-server": "mcp-server";
35
+ "agent-harness": "agent-harness";
36
+ "knowledge-probe": "knowledge-probe";
33
37
  baseline: "baseline";
34
- full: "full";
38
+ agentic: "agentic";
35
39
  observed: "observed";
40
+ full: "full";
36
41
  }>>;
37
42
  noAutoScope: z.ZodOptional<z.ZodBoolean>;
38
43
  noCache: z.ZodOptional<z.ZodBoolean>;
@@ -10,6 +10,7 @@
10
10
  * (studio-eval-config) so Content Lake documents validate identically.
11
11
  */
12
12
  import { z } from "zod";
13
+ import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
13
14
  export const EvalConfigSchema = z
14
15
  .object({
15
16
  /** Allowed origins for agentic mode */
@@ -46,8 +47,12 @@ export const EvalConfigSchema = z
46
47
  graderReplications: z.number().int().positive().optional(),
47
48
  /** Custom headers for doc fetching */
48
49
  headers: z.record(z.string(), z.string()).optional(),
49
- /** Evaluation mode */
50
- mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
50
+ /**
51
+ * Evaluation mode accepts both canonical and legacy names.
52
+ * Legacy names ("baseline", "agentic", "observed", "full") must pass
53
+ * through normalizeMode() before entering typed pipeline code.
54
+ */
55
+ mode: z.enum(RAW_EVAL_MODES).optional(),
51
56
  /** Disable release-aware auto-scoping */
52
57
  noAutoScope: z.boolean().optional(),
53
58
  /** Disable local cache */
@@ -49,10 +49,15 @@ export declare const PipelineRequestSchema: z.ZodObject<{
49
49
  inlineTasks: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
50
50
  jobId: z.ZodOptional<z.ZodString>;
51
51
  mode: z.ZodOptional<z.ZodEnum<{
52
- agentic: "agentic";
52
+ custom: "custom";
53
+ literacy: "literacy";
54
+ "mcp-server": "mcp-server";
55
+ "agent-harness": "agent-harness";
56
+ "knowledge-probe": "knowledge-probe";
53
57
  baseline: "baseline";
54
- full: "full";
58
+ agentic: "agentic";
55
59
  observed: "observed";
60
+ full: "full";
56
61
  }>>;
57
62
  noAutoScope: z.ZodOptional<z.ZodBoolean>;
58
63
  noCache: z.ZodOptional<z.ZodBoolean>;
@@ -70,9 +75,9 @@ export declare const PipelineRequestSchema: z.ZodObject<{
70
75
  source: z.ZodOptional<z.ZodString>;
71
76
  sourceReportId: z.ZodOptional<z.ZodString>;
72
77
  taskMode: z.ZodOptional<z.ZodEnum<{
78
+ inline: "inline";
73
79
  "content-lake": "content-lake";
74
80
  yaml: "yaml";
75
- inline: "inline";
76
81
  }>>;
77
82
  tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
78
83
  urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -13,6 +13,7 @@
13
13
  * @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
14
14
  */
15
15
  import { z } from "zod";
16
+ import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
16
17
  // ---------------------------------------------------------------------------
17
18
  // Debug options — boolean shorthand or structured object
18
19
  // ---------------------------------------------------------------------------
@@ -69,7 +70,11 @@ export const PipelineRequestSchema = z.object({
69
70
  headers: z.record(z.string(), z.string()).optional(),
70
71
  inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
71
72
  jobId: z.string().optional(),
72
- mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
73
+ /**
74
+ * Evaluation mode — accepts both canonical and legacy names.
75
+ * Legacy names must pass through normalizeMode() before entering typed pipeline code.
76
+ */
77
+ mode: z.enum(RAW_EVAL_MODES).optional(),
73
78
  noAutoScope: z.boolean().optional(),
74
79
  noCache: z.boolean().optional(),
75
80
  noRemoteCache: z.boolean().optional(),
@@ -47,7 +47,7 @@ export type WeightProfile = z.infer<typeof WeightProfileSchema>;
47
47
  */
48
48
  export declare const RubricConfigSchema: z.ZodObject<{
49
49
  footer: z.ZodString;
50
- "mode-profiles": z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodString>>>;
50
+ "mode-profiles": z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodString, z.ZodRecord<z.ZodString, z.ZodString>]>>>>;
51
51
  profiles: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodRecord<z.ZodString, z.ZodNumber>>>;
52
52
  templates: z.ZodRecord<z.ZodString, z.ZodObject<{
53
53
  criteria_label: z.ZodOptional<z.ZodNullable<z.ZodString>>;
@@ -67,17 +67,17 @@ export declare const FeatureSchema: z.ZodObject<{
67
67
  id: z.ZodString;
68
68
  name: z.ZodString;
69
69
  priority: z.ZodEnum<{
70
+ critical: "critical";
70
71
  high: "high";
71
- low: "low";
72
72
  medium: "medium";
73
- critical: "critical";
73
+ low: "low";
74
74
  }>;
75
75
  sections: z.ZodArray<z.ZodString>;
76
76
  status: z.ZodEnum<{
77
77
  covered: "covered";
78
- "out-of-scope": "out-of-scope";
79
- planned: "planned";
80
78
  uncovered: "uncovered";
79
+ planned: "planned";
80
+ "out-of-scope": "out-of-scope";
81
81
  }>;
82
82
  taskCount: z.ZodOptional<z.ZodNumber>;
83
83
  }, z.core.$strip>;
@@ -92,17 +92,17 @@ export declare const FeatureRegistrySchema: z.ZodObject<{
92
92
  id: z.ZodString;
93
93
  name: z.ZodString;
94
94
  priority: z.ZodEnum<{
95
+ critical: "critical";
95
96
  high: "high";
96
- low: "low";
97
97
  medium: "medium";
98
- critical: "critical";
98
+ low: "low";
99
99
  }>;
100
100
  sections: z.ZodArray<z.ZodString>;
101
101
  status: z.ZodEnum<{
102
102
  covered: "covered";
103
- "out-of-scope": "out-of-scope";
104
- planned: "planned";
105
103
  uncovered: "uncovered";
104
+ planned: "planned";
105
+ "out-of-scope": "out-of-scope";
106
106
  }>;
107
107
  taskCount: z.ZodOptional<z.ZodNumber>;
108
108
  }, z.core.$strip>>;
@@ -440,14 +440,11 @@ export declare const TaskFileSchema: z.ZodArray<z.ZodUnion<readonly [z.ZodObject
440
440
  export type TaskFile = z.infer<typeof TaskFileSchema>;
441
441
  /**
442
442
  * Schema for per-dimension threshold values.
443
+ * Uses a dynamic record to support all evaluation modes, not just literacy.
443
444
  * Keys use kebab-case to match YAML convention; the threshold engine
444
445
  * normalizes to camelCase for comparison against FeatureScore fields.
445
446
  */
446
- export declare const ThresholdDimensionsSchema: z.ZodObject<{
447
- "code-correctness": z.ZodOptional<z.ZodNumber>;
448
- "doc-coverage": z.ZodOptional<z.ZodNumber>;
449
- "task-completion": z.ZodOptional<z.ZodNumber>;
450
- }, z.core.$strip>;
447
+ export declare const ThresholdDimensionsSchema: z.ZodRecord<z.ZodString, z.ZodNumber>;
451
448
  /** Inferred TypeScript type for threshold dimension overrides. */
452
449
  export type ThresholdDimensions = z.infer<typeof ThresholdDimensionsSchema>;
453
450
  /**
@@ -457,11 +454,7 @@ export type ThresholdDimensions = z.infer<typeof ThresholdDimensionsSchema>;
457
454
  export declare const ThresholdDefaultsSchema: z.ZodObject<{
458
455
  ceiling: z.ZodOptional<z.ZodNumber>;
459
456
  composite: z.ZodNumber;
460
- dimensions: z.ZodOptional<z.ZodObject<{
461
- "code-correctness": z.ZodOptional<z.ZodNumber>;
462
- "doc-coverage": z.ZodOptional<z.ZodNumber>;
463
- "task-completion": z.ZodOptional<z.ZodNumber>;
464
- }, z.core.$strip>>;
457
+ dimensions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
465
458
  "doc-lift": z.ZodOptional<z.ZodNumber>;
466
459
  }, z.core.$strip>;
467
460
  /** Inferred TypeScript type for threshold defaults. */
@@ -501,21 +494,13 @@ export declare const ThresholdConfigSchema: z.ZodObject<{
501
494
  areas: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
502
495
  ceiling: z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
503
496
  composite: z.ZodOptional<z.ZodNumber>;
504
- dimensions: z.ZodOptional<z.ZodOptional<z.ZodObject<{
505
- "code-correctness": z.ZodOptional<z.ZodNumber>;
506
- "doc-coverage": z.ZodOptional<z.ZodNumber>;
507
- "task-completion": z.ZodOptional<z.ZodNumber>;
508
- }, z.core.$strip>>>;
497
+ dimensions: z.ZodOptional<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>>;
509
498
  "doc-lift": z.ZodOptional<z.ZodOptional<z.ZodNumber>>;
510
499
  }, z.core.$strip>>>;
511
500
  defaults: z.ZodObject<{
512
501
  ceiling: z.ZodOptional<z.ZodNumber>;
513
502
  composite: z.ZodNumber;
514
- dimensions: z.ZodOptional<z.ZodObject<{
515
- "code-correctness": z.ZodOptional<z.ZodNumber>;
516
- "doc-coverage": z.ZodOptional<z.ZodNumber>;
517
- "task-completion": z.ZodOptional<z.ZodNumber>;
518
- }, z.core.$strip>>;
503
+ dimensions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodNumber>>;
519
504
  "doc-lift": z.ZodOptional<z.ZodNumber>;
520
505
  }, z.core.$strip>;
521
506
  regression: z.ZodOptional<z.ZodObject<{
@@ -43,10 +43,22 @@ const WeightProfileSchema = z
43
43
  return Math.abs(sum - 1.0) < 0.001;
44
44
  }, { message: "profile weights must sum to 1.0" });
45
45
  /**
46
- * Mode-to-profile bindings — maps (mode, variant) pairs to profile names.
47
- * Example: { baseline: { gold: "default", baseline: "output-only" } }
46
+ * Mode-to-profile bindings — maps (mode, perspective) pairs to profile names.
47
+ *
48
+ * Flat form (most modes):
49
+ * { "mcp-server": { gold: "mcp-behavior" } }
50
+ *
51
+ * Nested form (literacy mode with variant sub-keys):
52
+ * { literacy: { baseline: { gold: "default", baseline: "output-only" }, agentic: { gold: "default" } } }
53
+ *
54
+ * The nested form adds a variant level between mode and perspective,
55
+ * allowing a single canonical mode to host multiple scoring variants.
48
56
  */
49
- const ModeProfilesSchema = z.record(z.string(), z.record(z.string(), z.string()));
57
+ const ModeProfileEntrySchema = z.union([
58
+ z.string(),
59
+ z.record(z.string(), z.string()),
60
+ ]);
61
+ const ModeProfilesSchema = z.record(z.string(), z.record(z.string(), ModeProfileEntrySchema));
50
62
  /**
51
63
  * Schema for the full config/rubrics.yaml config file.
52
64
  *
@@ -277,14 +289,11 @@ export const TaskFileSchema = z
277
289
  // ---------------------------------------------------------------------------
278
290
  /**
279
291
  * Schema for per-dimension threshold values.
292
+ * Uses a dynamic record to support all evaluation modes, not just literacy.
280
293
  * Keys use kebab-case to match YAML convention; the threshold engine
281
294
  * normalizes to camelCase for comparison against FeatureScore fields.
282
295
  */
283
- export const ThresholdDimensionsSchema = z.object({
284
- "code-correctness": z.number().min(0).max(100).optional(),
285
- "doc-coverage": z.number().min(0).max(100).optional(),
286
- "task-completion": z.number().min(0).max(100).optional(),
287
- });
296
+ export const ThresholdDimensionsSchema = z.record(z.string(), z.number().min(0).max(100));
288
297
  /**
289
298
  * Schema for threshold defaults (and per-area overrides).
290
299
  * All fields are optional in per-area overrides; defaults must have composite.
@@ -18,10 +18,15 @@ export declare const ScheduleEntrySchema: z.ZodObject<{
18
18
  cron: z.ZodString;
19
19
  enabled: z.ZodDefault<z.ZodBoolean>;
20
20
  mode: z.ZodDefault<z.ZodEnum<{
21
- agentic: "agentic";
21
+ custom: "custom";
22
+ literacy: "literacy";
23
+ "mcp-server": "mcp-server";
24
+ "agent-harness": "agent-harness";
25
+ "knowledge-probe": "knowledge-probe";
22
26
  baseline: "baseline";
23
- full: "full";
27
+ agentic: "agentic";
24
28
  observed: "observed";
29
+ full: "full";
25
30
  }>>;
26
31
  name: z.ZodString;
27
32
  publish: z.ZodDefault<z.ZodBoolean>;
@@ -53,10 +58,15 @@ export declare const SchedulesFileSchema: z.ZodObject<{
53
58
  cron: z.ZodString;
54
59
  enabled: z.ZodDefault<z.ZodBoolean>;
55
60
  mode: z.ZodDefault<z.ZodEnum<{
56
- agentic: "agentic";
61
+ custom: "custom";
62
+ literacy: "literacy";
63
+ "mcp-server": "mcp-server";
64
+ "agent-harness": "agent-harness";
65
+ "knowledge-probe": "knowledge-probe";
57
66
  baseline: "baseline";
58
- full: "full";
67
+ agentic: "agentic";
59
68
  observed: "observed";
69
+ full: "full";
60
70
  }>>;
61
71
  name: z.ZodString;
62
72
  publish: z.ZodDefault<z.ZodBoolean>;
@@ -11,6 +11,7 @@
11
11
  * @see docs/design-docs/report-store/implementation.md — Phase 5
12
12
  */
13
13
  import { z } from "zod";
14
+ import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
14
15
  // ---------------------------------------------------------------------------
15
16
  // Cron expression validation
16
17
  // ---------------------------------------------------------------------------
@@ -34,8 +35,11 @@ export const ScheduleEntrySchema = z.object({
34
35
  cron: CronSchema,
35
36
  /** Whether this schedule is active */
36
37
  enabled: z.boolean().default(true),
37
- /** Evaluation mode */
38
- mode: z.enum(["agentic", "baseline", "full", "observed"]).default("baseline"),
38
+ /**
39
+ * Evaluation mode accepts both canonical and legacy names.
40
+ * Legacy names must pass through normalizeMode() before entering typed pipeline code.
41
+ */
42
+ mode: z.enum(RAW_EVAL_MODES).default("baseline"),
39
43
  /** Human-readable schedule name (used as report tag) */
40
44
  name: z
41
45
  .string()
@@ -17,10 +17,10 @@
17
17
  import { z } from "zod";
18
18
  /** All supported sink types as a Zod union. */
19
19
  export declare const SinkTypeSchema: z.ZodEnum<{
20
- webhook: "webhook";
21
20
  bigquery: "bigquery";
22
21
  "github-comment": "github-comment";
23
22
  slack: "slack";
23
+ webhook: "webhook";
24
24
  }>;
25
25
  /** Supported sink type string literal union. */
26
26
  export type SinkType = z.infer<typeof SinkTypeSchema>;
@@ -25,12 +25,21 @@ export function formatComparisonMarkdown(report) {
25
25
  lines.push("");
26
26
  lines.push(`**Overall: ${Math.round(report.baseline.overall.avgScore)} → ${Math.round(report.experiment.overall.avgScore)}** (${overallIcon} ${deltaStr(overall)})`);
27
27
  lines.push("");
28
- // Per-area table
29
- lines.push("| Feature | Baseline | Current | Delta | Task | Code | Docs |");
30
- lines.push("|---------|----------|---------|-------|------|------|------|");
28
+ // Derive dimension columns from the first area's keys (all areas share the
29
+ // same scoring profile, so the key set is uniform).
30
+ const dimKeys = report.areas.length > 0
31
+ ? Object.keys(report.areas[0].dimensions)
32
+ : Object.keys(report.deltas.perDimension);
33
+ // Per-area table — columns are dynamic
34
+ const dimHeaders = dimKeys.map(kebabToTitleCase);
35
+ const headerRow = ["Feature", "Baseline", "Current", "Delta", ...dimHeaders];
36
+ const separatorRow = headerRow.map(() => "------");
37
+ lines.push(`| ${headerRow.join(" | ")} |`);
38
+ lines.push(`|${separatorRow.join("|")}|`);
31
39
  for (const a of report.areas) {
32
40
  const icon = changeIcon(a.change);
33
- lines.push(`| ${a.area} | ${a.baseline} | ${a.experiment} | ${icon} ${deltaStr(a.delta)} | ${deltaStr(a.dimensions.taskCompletion.delta)} | ${deltaStr(a.dimensions.codeCorrectness.delta)} | ${deltaStr(a.dimensions.docCoverage.delta)} |`);
41
+ const dimCells = dimKeys.map((k) => deltaStr(a.dimensions[k]?.delta ?? 0));
42
+ lines.push(`| ${a.area} | ${a.baseline} | ${a.experiment} | ${icon} ${deltaStr(a.delta)} | ${dimCells.join(" | ")} |`);
34
43
  }
35
44
  lines.push("");
36
45
  // Summary
@@ -55,9 +64,9 @@ export function formatComparisonMarkdown(report) {
55
64
  const dim = report.deltas.perDimension;
56
65
  lines.push("| Dimension | Delta |");
57
66
  lines.push("|-----------|-------|");
58
- lines.push(`| Task Completion | ${deltaStr(dim.taskCompletion)} |`);
59
- lines.push(`| Code Correctness | ${deltaStr(dim.codeCorrectness)} |`);
60
- lines.push(`| Doc Coverage | ${deltaStr(dim.docCoverage)} |`);
67
+ for (const k of Object.keys(dim)) {
68
+ lines.push(`| ${kebabToTitleCase(k)} | ${deltaStr(dim[k])} |`);
69
+ }
61
70
  lines.push(`| Doc Lift | ${deltaStr(report.deltas.docLift)} |`);
62
71
  if (report.deltas.cost !== undefined) {
63
72
  const costStr = report.deltas.cost > 0
@@ -91,29 +100,51 @@ export function formatComparisonTable(report) {
91
100
  : "unchanged");
92
101
  lines.push(` Overall: ${Math.round(report.baseline.overall.avgScore)} → ${Math.round(report.experiment.overall.avgScore)} (${overallIcon} ${deltaStr(overall)})`);
93
102
  lines.push("");
94
- // Per-dimension averages
103
+ // Per-dimension averages — derived dynamically from the report
95
104
  const dim = report.deltas.perDimension;
105
+ const dimKeys = report.areas.length > 0
106
+ ? Object.keys(report.areas[0].dimensions)
107
+ : Object.keys(dim);
96
108
  lines.push(" Dimension averages:");
97
- lines.push(` Task Completion: ${deltaStr(dim.taskCompletion)}`);
98
- lines.push(` Code Correctness: ${deltaStr(dim.codeCorrectness)}`);
99
- lines.push(` Doc Coverage: ${deltaStr(dim.docCoverage)}`);
100
- lines.push(` Doc Lift: ${deltaStr(report.deltas.docLift)}`);
109
+ // Pad labels to the longest dimension label for alignment
110
+ const dimLabels = dimKeys.map(kebabToTitleCase);
111
+ // +1 for the colon appended to each label
112
+ const maxLabelLen = Math.max(...dimLabels.map((l) => l.length + 1), "Doc Lift:".length);
113
+ for (let i = 0; i < dimKeys.length; i++) {
114
+ lines.push(` ${(dimLabels[i] + ":").padEnd(maxLabelLen)} ${deltaStr(dim[dimKeys[i]] ?? 0)}`);
115
+ }
116
+ lines.push(` ${"Doc Lift:".padEnd(maxLabelLen)} ${deltaStr(report.deltas.docLift)}`);
101
117
  if (report.deltas.cost !== undefined) {
102
- lines.push(` Cost: ${report.deltas.cost > 0 ? "+" : ""}$${report.deltas.cost.toFixed(4)}`);
118
+ lines.push(` ${"Cost:".padEnd(maxLabelLen)} ${report.deltas.cost > 0 ? "+" : ""}$${report.deltas.cost.toFixed(4)}`);
103
119
  }
104
120
  lines.push("");
105
- // Per-area table
121
+ // Per-area table — columns are dynamic
106
122
  lines.push("-".repeat(80));
107
123
  lines.push("PER-AREA BREAKDOWN");
108
124
  lines.push("-".repeat(80));
109
125
  lines.push("");
110
- const h = "| Feature Area | Baseline | Experiment | Delta | Task | Code | Docs |";
111
- const sep = "|---------------------|----------|------------|-------|------|------|------|";
112
- lines.push(h);
113
- lines.push(sep);
126
+ const dimHeaders = dimKeys.map(kebabToTitleCase);
127
+ const colWidths = dimHeaders.map((h) => Math.max(h.length, 4));
128
+ const hCols = [
129
+ "Feature Area".padEnd(19),
130
+ "Baseline".padStart(8),
131
+ "Experiment".padStart(10),
132
+ "Delta".padStart(5),
133
+ ...dimHeaders.map((h, i) => h.padStart(colWidths[i])),
134
+ ];
135
+ const sepCols = [
136
+ "-".repeat(21),
137
+ "-".repeat(10),
138
+ "-".repeat(12),
139
+ "-".repeat(7),
140
+ ...colWidths.map((w) => "-".repeat(w + 2)),
141
+ ];
142
+ lines.push(`| ${hCols.join(" | ")} |`);
143
+ lines.push(`|${sepCols.join("|")}|`);
114
144
  for (const a of report.areas) {
115
145
  const icon = changeIcon(a.change);
116
- lines.push(`| ${icon} ${a.area.padEnd(17)} | ${String(a.baseline).padStart(8)} | ${String(a.experiment).padStart(10)} | ${deltaStr(a.delta).padStart(5)} | ${deltaStr(a.dimensions.taskCompletion.delta).padStart(4)} | ${deltaStr(a.dimensions.codeCorrectness.delta).padStart(4)} | ${deltaStr(a.dimensions.docCoverage.delta).padStart(4)} |`);
146
+ const dimCells = dimKeys.map((k, i) => deltaStr(a.dimensions[k]?.delta ?? 0).padStart(colWidths[i]));
147
+ lines.push(`| ${icon} ${a.area.padEnd(17)} | ${String(a.baseline).padStart(8)} | ${String(a.experiment).padStart(10)} | ${deltaStr(a.delta).padStart(5)} | ${dimCells.join(" | ")} |`);
117
148
  }
118
149
  lines.push("");
119
150
  // Classification summary
@@ -187,3 +218,10 @@ function deltaStr(d) {
187
218
  return `${Math.round(d)}`;
188
219
  return "0";
189
220
  }
221
+ /** Convert kebab-case dimension name to title case (e.g. 'task-completion' → 'Task Completion') */
222
+ function kebabToTitleCase(name) {
223
+ return name
224
+ .split("-")
225
+ .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
226
+ .join(" ");
227
+ }