@sanity/ailf 4.0.1 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -76,6 +76,13 @@ export declare const PipelineRequestSchema: z.ZodObject<{
76
76
  "content-lake": "content-lake";
77
77
  inline: "inline";
78
78
  }>>;
79
+ taskSource: z.ZodOptional<z.ZodObject<{
80
+ type: z.ZodOptional<z.ZodEnum<{
81
+ "content-lake": "content-lake";
82
+ repo: "repo";
83
+ }>>;
84
+ repoTasksPath: z.ZodOptional<z.ZodString>;
85
+ }, z.core.$strip>>;
79
86
  tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
80
87
  urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
81
88
  variant: z.ZodOptional<z.ZodEnum<{
@@ -126,6 +126,24 @@ export const PipelineRequestSchema = z.object({
126
126
  source: z.string().optional(),
127
127
  sourceReportId: z.string().optional(),
128
128
  taskMode: z.enum(["content-lake", "inline"]).optional(),
129
+ /**
130
+ * Task-source configuration (W0077 Phase 6h). Mirrors
131
+ * `EvalConfigSchema.taskSource` so consumers can express task-source
132
+ * intent through the universal request payload instead of CLI flags.
133
+ *
134
+ * - `type` — `content-lake` (default) or `repo`
135
+ * - `repoTasksPath` — path interpreted relative to `rootDir` when
136
+ * mapped through `mapRequestToConfig`
137
+ *
138
+ * When both `taskSource.type` and the legacy `taskMode` are present,
139
+ * `taskSource.type` wins.
140
+ */
141
+ taskSource: z
142
+ .object({
143
+ type: z.enum(["content-lake", "repo"]).optional(),
144
+ repoTasksPath: z.string().min(1).optional(),
145
+ })
146
+ .optional(),
129
147
  tasks: z.array(z.string()).optional(),
130
148
  urls: z.array(z.string().url()).optional(),
131
149
  /**
@@ -1224,8 +1224,11 @@ export interface ComparisonReport {
1224
1224
  deltas: {
1225
1225
  /** Overall score delta (experiment.avgScore − baseline.avgScore) */
1226
1226
  overall: number;
1227
- /** Per-area total score deltas */
1228
- perArea: Record<string, number>;
1227
+ /** Per-area total score deltas (array shape — W0137 / D0041) */
1228
+ perArea: {
1229
+ area: string;
1230
+ delta: number;
1231
+ }[];
1229
1232
  /** Per-dimension average deltas (across all areas) */
1230
1233
  perDimension: Record<string, number>;
1231
1234
  /** Doc Lift average delta */
@@ -178,44 +178,17 @@ export function resolveTasksDir(rootDir, explicitPath) {
178
178
  // Helpers
179
179
  // ---------------------------------------------------------------------------
180
180
  /**
181
- * Convert a GeneralizedTaskDefinition to the camelCase inline format expected
182
- * by the API.
181
+ * Convert a `GeneralizedTaskDefinition` to the wire format expected by the
182
+ * pipeline's `inlineTasks` array. The canonical shape *is* the wire format —
183
+ * `CanonicalTaskSchema` (the receiving validator in
184
+ * `packages/eval/src/adapters/task-sources/repo-schemas.ts`) mirrors this
185
+ * type per-variant via `.strict()`, so any reshaping here would either be a
186
+ * no-op or rejected on the receiver. The helper exists as a typed boundary
187
+ * between the typed task array and `PipelineRequestSchema.inlineTasks`'s
188
+ * `Record<string, unknown>[]` shape.
183
189
  */
184
190
  function taskToInlineFormat(task) {
185
- const inline = {
186
- id: task.id,
187
- mode: task.mode,
188
- description: task.title,
189
- featureArea: task.area ?? "",
190
- assert: task.assertions ?? [],
191
- };
192
- if (task.context?.docs?.length) {
193
- inline.canonicalDocs = task.context.docs;
194
- }
195
- const taskPrompt = task.prompt?.text ?? "";
196
- if (taskPrompt) {
197
- inline.vars = {
198
- task: taskPrompt,
199
- docs: "",
200
- ...(task.prompt?.vars ?? {}),
201
- };
202
- }
203
- // Literacy-specific fields
204
- if (task.mode === "literacy") {
205
- if (task.docCoverage) {
206
- inline.docCoverage = true;
207
- }
208
- if (task.referenceSolution) {
209
- inline.referenceSolution = task.referenceSolution;
210
- }
211
- if (task.baseline) {
212
- inline.baseline = task.baseline;
213
- }
214
- }
215
- if (task.tags?.length) {
216
- inline.tags = task.tags;
217
- }
218
- return inline;
191
+ return task;
219
192
  }
220
193
  /**
221
194
  * Build a descriptive error when the task list is empty after filtering.
@@ -16,7 +16,7 @@ import type { JobResponse } from "./types.js";
16
16
  * ❌ Pipeline failed at step 'fetch-docs'
17
17
  * Postcondition failed: Canonical context for task "foo" is empty.
18
18
  *
19
- * 💡 One or more canonicalDocs slugs in your task definitions don't match ...
19
+ * 💡 One or more context.docs entries in your task definitions don't resolve ...
20
20
  * ```
21
21
  */
22
22
  export declare function formatJobError(job: JobResponse): string;
@@ -16,7 +16,7 @@ import { getRemediationHint } from "./remediation.js";
16
16
  * ❌ Pipeline failed at step 'fetch-docs'
17
17
  * Postcondition failed: Canonical context for task "foo" is empty.
18
18
  *
19
- * 💡 One or more canonicalDocs slugs in your task definitions don't match ...
19
+ * 💡 One or more context.docs entries in your task definitions don't resolve ...
20
20
  * ```
21
21
  */
22
22
  export function formatJobError(job) {
@@ -15,9 +15,10 @@ const HINTS = [
15
15
  {
16
16
  match: (e) => /canonical context.*empty/i.test(e.message) ||
17
17
  /no article found for slug/i.test(e.message),
18
- hint: "One or more `canonicalDocs` slugs in your task definitions don't match " +
19
- "any article in the documentation. Check the `slug` values in " +
20
- "`.ailf/tasks/` and ensure they correspond to real articles.\n" +
18
+ hint: "One or more `context.docs` entries in your task definitions don't " +
19
+ "resolve to any article in the documentation. Check the `slug`, " +
20
+ "`path`, or `id` values in `.ailf/tasks/` and ensure they correspond " +
21
+ "to real articles.\n" +
21
22
  " Run `ailf validate` to check your task definitions locally.",
22
23
  },
23
24
  {
@@ -49,9 +50,9 @@ const HINTS = [
49
50
  {
50
51
  match: (e) => e.step === "fetch-docs" && /postcondition/i.test(e.message),
51
52
  hint: "The documentation fetch step completed but one or more tasks had " +
52
- "empty context. This usually means a `canonicalDocs` slug doesn't " +
53
- "match any article.\n" +
54
- " Check the slug values in `.ailf/tasks/`.",
53
+ "empty context. This usually means a `context.docs` entry doesn't " +
54
+ "resolve to any article.\n" +
55
+ " Check the slug/path/id values in `.ailf/tasks/`.",
55
56
  },
56
57
  {
57
58
  match: (e) => e.step === "dispatch" && /dispatch failed/i.test(e.message),
@@ -1,6 +1,6 @@
1
1
  export { CompositeTaskSource } from "./composite-task-source.js";
2
2
  export { ContentLakeTaskSource } from "./content-lake-task-source.js";
3
- export { CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type ContentLakeAuthorableTaskParsed, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
3
+ export { AilfEvalWorkflowSchema, CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseAilfEvalWorkflow, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type AilfEvalWorkflow, type CanonicalTask, type ContentLakeAuthorableTaskParsed, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
4
4
  export { RepoTaskSource } from "./repo-task-source.js";
5
5
  export { detectTriggerContext, resolveTrigger, type ResolvedTrigger, type TriggerContext, } from "./repo-trigger.js";
6
6
  export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./repo-validation.js";
@@ -1,6 +1,6 @@
1
1
  export { CompositeTaskSource } from "./composite-task-source.js";
2
2
  export { ContentLakeTaskSource } from "./content-lake-task-source.js";
3
- export { CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
3
+ export { AilfEvalWorkflowSchema, CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseAilfEvalWorkflow, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
4
4
  export { RepoTaskSource } from "./repo-task-source.js";
5
5
  export { detectTriggerContext, resolveTrigger, } from "./repo-trigger.js";
6
6
  export { formatValidationResult, validateCanonicalTasks, } from "./repo-validation.js";
@@ -1526,3 +1526,34 @@ export type RepoConfig = z.infer<typeof RepoConfigSchema>;
1526
1526
  * Parse and validate .ailf/config.yaml content. Returns typed config or throws.
1527
1527
  */
1528
1528
  export declare function parseRepoConfig(raw: unknown, filename?: string): RepoConfig;
1529
+ /**
1530
+ * Structural schema for the `ailf-eval.yml` workflow template emitted by
1531
+ * `ailf init`. Validates the consumer-visible contract: a workflow named
1532
+ * "AI Literacy Eval" with at least one job containing checkout + eval
1533
+ * steps. The literal YAML body is intentionally not pinned — comments,
1534
+ * step ordering, and option flags can shift without breaking consumers.
1535
+ */
1536
+ export declare const AilfEvalWorkflowSchema: z.ZodObject<{
1537
+ name: z.ZodString;
1538
+ on: z.ZodUnknown;
1539
+ jobs: z.ZodRecord<z.ZodString, z.ZodObject<{
1540
+ name: z.ZodOptional<z.ZodString>;
1541
+ "runs-on": z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
1542
+ permissions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1543
+ steps: z.ZodArray<z.ZodObject<{
1544
+ name: z.ZodOptional<z.ZodString>;
1545
+ id: z.ZodOptional<z.ZodString>;
1546
+ if: z.ZodOptional<z.ZodString>;
1547
+ uses: z.ZodOptional<z.ZodString>;
1548
+ run: z.ZodOptional<z.ZodString>;
1549
+ env: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1550
+ with: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
1551
+ }, z.core.$loose>>;
1552
+ }, z.core.$loose>>;
1553
+ }, z.core.$loose>;
1554
+ export type AilfEvalWorkflow = z.infer<typeof AilfEvalWorkflowSchema>;
1555
+ /**
1556
+ * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
1557
+ * loaded from YAML). Throws with a Zod-formatted message on failure.
1558
+ */
1559
+ export declare function parseAilfEvalWorkflow(raw: unknown, filename?: string): AilfEvalWorkflow;
@@ -602,3 +602,61 @@ export function parseRepoConfig(raw, filename = ".ailf/config.yaml") {
602
602
  }
603
603
  return result.data;
604
604
  }
605
+ // ---------------------------------------------------------------------------
606
+ // Workflow contract — structural shape of `.github/workflows/ailf-eval.yml`
607
+ //
608
+ // Used by the Tier 1 init contract test (W0139 M2) and the Tier 4 drift
609
+ // check to assert that the template `init` writes still matches what a
610
+ // downstream consumer's PR comment + report-publish path depends on.
611
+ //
612
+ // Loose by design: this is a contract on the consumer-visible parts (job
613
+ // runs `npx @sanity/ailf ... run --remote`, posts a PR comment, names the
614
+ // workflow `AI Literacy Eval`), not the literal byte content of the YAML.
615
+ // ---------------------------------------------------------------------------
616
+ const WorkflowStepSchema = z
617
+ .object({
618
+ name: z.string().optional(),
619
+ id: z.string().optional(),
620
+ if: z.string().optional(),
621
+ uses: z.string().optional(),
622
+ run: z.string().optional(),
623
+ env: z.record(z.string(), z.unknown()).optional(),
624
+ with: z.record(z.string(), z.unknown()).optional(),
625
+ })
626
+ .passthrough();
627
+ const WorkflowJobSchema = z
628
+ .object({
629
+ name: z.string().optional(),
630
+ "runs-on": z.union([z.string(), z.array(z.string())]),
631
+ permissions: z.record(z.string(), z.unknown()).optional(),
632
+ steps: z.array(WorkflowStepSchema).min(1),
633
+ })
634
+ .passthrough();
635
+ /**
636
+ * Structural schema for the `ailf-eval.yml` workflow template emitted by
637
+ * `ailf init`. Validates the consumer-visible contract: a workflow named
638
+ * "AI Literacy Eval" with at least one job containing checkout + eval
639
+ * steps. The literal YAML body is intentionally not pinned — comments,
640
+ * step ordering, and option flags can shift without breaking consumers.
641
+ */
642
+ export const AilfEvalWorkflowSchema = z
643
+ .object({
644
+ name: z.string().min(1),
645
+ on: z.unknown(),
646
+ jobs: z.record(z.string(), WorkflowJobSchema),
647
+ })
648
+ .passthrough();
649
+ /**
650
+ * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
651
+ * loaded from YAML). Throws with a Zod-formatted message on failure.
652
+ */
653
+ export function parseAilfEvalWorkflow(raw, filename = ".github/workflows/ailf-eval.yml") {
654
+ const result = AilfEvalWorkflowSchema.safeParse(raw);
655
+ if (!result.success) {
656
+ const messages = result.error.issues
657
+ .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
658
+ .join("\n");
659
+ throw new Error(`Invalid workflow "${filename}":\n${messages}`);
660
+ }
661
+ return result.data;
662
+ }
@@ -17,4 +17,11 @@
17
17
  * ailf init --path ./my-dir # target a specific directory
18
18
  */
19
19
  import { Command } from "commander";
20
+ export interface InitOptions {
21
+ format: "ts" | "yaml" | "json";
22
+ force: boolean;
23
+ path: string;
24
+ mode?: string;
25
+ }
20
26
  export declare function createInitCommand(): Command;
27
+ export declare function runInit(opts: InitOptions): Promise<void>;
@@ -61,7 +61,7 @@ function taskStemsForMode(mode) {
61
61
  // ---------------------------------------------------------------------------
62
62
  // Init logic
63
63
  // ---------------------------------------------------------------------------
64
- async function runInit(opts) {
64
+ export async function runInit(opts) {
65
65
  const validFormats = new Set(["ts", "yaml", "json"]);
66
66
  if (!validFormats.has(opts.format)) {
67
67
  console.error(` ✗ Invalid output format "${opts.format}". Valid options: ts, yaml, json`);
@@ -71,11 +71,13 @@ export function compare(baseline, experiment, options) {
71
71
  const notEvaluated = areas
72
72
  .filter((a) => a.change === "not-evaluated")
73
73
  .map((a) => a.area);
74
- // Per-area deltas as a record
75
- const perArea = {};
76
- for (const a of areas) {
77
- perArea[a.area] = a.delta;
78
- }
74
+ // Per-area deltas as an array (W0137 / D0041) — keyed-map shapes are
75
+ // capped by the Sanity attribute limit because each new feature area
76
+ // mints a permanent attribute path.
77
+ const perArea = areas.map((a) => ({
78
+ area: a.area,
79
+ delta: a.delta,
80
+ }));
79
81
  // Per-dimension average deltas (only for areas present in both summaries)
80
82
  const commonAreas = areas.filter((a) => baselineAreas.has(a.area) && experimentAreas.has(a.area));
81
83
  const commonCount = commonAreas.length || 1;
@@ -61,14 +61,16 @@ export function mapRequestToConfig(request, rootDir) {
61
61
  datasetOverride: request.dataset,
62
62
  projectIdOverride: request.projectId,
63
63
  perspectiveOverride: request.perspective,
64
- taskSourceType: mapTaskSourceType(request.taskMode),
64
+ taskSourceType: mapTaskSourceType(request.taskSource?.type, request.taskMode),
65
65
  outputPath: undefined,
66
66
  promptfooUrl: undefined,
67
67
  studioOriginOverride: undefined,
68
68
  sanityDocumentArgs: undefined,
69
69
  sourceReportId: request.sourceReportId,
70
70
  beforeOption: undefined,
71
- repoTasksPath: undefined,
71
+ repoTasksPath: request.taskSource?.repoTasksPath
72
+ ? resolve(rootDir, request.taskSource.repoTasksPath)
73
+ : undefined,
72
74
  callerGit: request.callerGit,
73
75
  callerEnvelope: buildCallerEnvelope(request),
74
76
  callback: request.callback,
@@ -107,14 +109,19 @@ function buildCallerEnvelope(request) {
107
109
  }
108
110
  return { classification, owner, executor, purpose, labels };
109
111
  }
110
- function mapTaskSourceType(taskMode) {
112
+ function mapTaskSourceType(taskSourceType, taskMode) {
113
+ // `taskSource.type` is the canonical field; honor it first when set.
114
+ if (taskSourceType === "repo")
115
+ return "repo";
116
+ if (taskSourceType === "content-lake")
117
+ return "content-lake";
118
+ // Fall back to the legacy `taskMode` field.
111
119
  if (taskMode === "content-lake")
112
- return taskMode;
120
+ return "content-lake";
113
121
  // "inline" means the caller sent inline tasks that will be materialized
114
- // to a temp directory and loaded via --repo-tasks-path. Use "repo" to
115
- // ensure ONLY those tasks are used (no Content Lake merge).
122
+ // to a temp directory. Use "repo" to ensure ONLY those tasks are used
123
+ // (no Content Lake merge).
116
124
  if (taskMode === "inline")
117
125
  return "repo";
118
- // "yaml" was removed — treat it as default (Content Lake)
119
126
  return undefined;
120
127
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "4.0.1",
3
+ "version": "4.0.3",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -77,7 +77,8 @@
77
77
  "test": "tsx --test src/__tests__/*.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
78
78
  "test:e2e": "AILF_E2E=1 tsx --test src/__tests__/e2e/*.e2e.test.ts",
79
79
  "test:e2e:adapters": "AILF_E2E=1 tsx --test src/adapters/**/__tests__/*.adapter.test.ts",
80
- "test:e2e:api": "AILF_E2E_API=1 tsx --test src/__tests__/api-tier2-tenant-integration.test.ts",
80
+ "test:e2e:api": "AILF_E2E_API=1 tsx --test src/__tests__/api-tier2-tenant-integration.test.ts src/__tests__/gcs-artifact-writer-roundtrip.test.ts",
81
+ "test:tier3:roundtrip": "AILF_E2E_API=1 AILF_E2E_GITHUB_DISPATCH=1 tsx --test src/__tests__/api-tier3-round-trip.test.ts",
81
82
  "test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
82
83
  "pr-comment": "tsx src/cli.ts pr-comment",
83
84
  "coverage-audit": "tsx src/cli.ts report coverage",