npm - @sanity/ailf - Versions diffs - 4.0.1 → 4.0.3 - Mend

@sanity/ailf 4.0.1 → 4.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +7 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +18 -0
package/dist/_vendor/ailf-core/types/index.d.ts +5 -2
package/dist/adapters/api-client/build-request.js +9 -36
package/dist/adapters/api-client/format-error.d.ts +1 -1
package/dist/adapters/api-client/format-error.js +1 -1
package/dist/adapters/api-client/remediation.js +7 -6
package/dist/adapters/task-sources/index.d.ts +1 -1
package/dist/adapters/task-sources/index.js +1 -1
package/dist/adapters/task-sources/repo-schemas.d.ts +31 -0
package/dist/adapters/task-sources/repo-schemas.js +58 -0
package/dist/commands/init.d.ts +7 -0
package/dist/commands/init.js +1 -1
package/dist/pipeline/compare.js +7 -5
package/dist/pipeline/map-request-to-config.js +14 -7
package/package.json +3 -2

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -76,6 +76,13 @@ export declare const PipelineRequestSchema: z.ZodObject<{
         "content-lake": "content-lake";
         inline: "inline";
     }>>;
+    taskSource: z.ZodOptional<z.ZodObject<{
+        type: z.ZodOptional<z.ZodEnum<{
+            "content-lake": "content-lake";
+            repo: "repo";
+        }>>;
+        repoTasksPath: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
     tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
     urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
     variant: z.ZodOptional<z.ZodEnum<{

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -126,6 +126,24 @@ export const PipelineRequestSchema = z.object({
     source: z.string().optional(),
     sourceReportId: z.string().optional(),
     taskMode: z.enum(["content-lake", "inline"]).optional(),
+    /**
+     * Task-source configuration (W0077 Phase 6h). Mirrors
+     * `EvalConfigSchema.taskSource` so consumers can express task-source
+     * intent through the universal request payload instead of CLI flags.
+     *
+     * - `type`           — `content-lake` (default) or `repo`
+     * - `repoTasksPath`  — path interpreted relative to `rootDir` when
+     *                      mapped through `mapRequestToConfig`
+     *
+     * When both `taskSource.type` and the legacy `taskMode` are present,
+     * `taskSource.type` wins.
+     */
+    taskSource: z
+        .object({
+        type: z.enum(["content-lake", "repo"]).optional(),
+        repoTasksPath: z.string().min(1).optional(),
+    })
+        .optional(),
     tasks: z.array(z.string()).optional(),
     urls: z.array(z.string().url()).optional(),
     /**

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -1224,8 +1224,11 @@ export interface ComparisonReport {
     deltas: {
         /** Overall score delta (experiment.avgScore − baseline.avgScore) */
         overall: number;
-        /** Per-area total score deltas */
-        perArea: Record<string, number>;
+        /** Per-area total score deltas (array shape — W0137 / D0041) */
+        perArea: {
+            area: string;
+            delta: number;
+        }[];
         /** Per-dimension average deltas (across all areas) */
         perDimension: Record<string, number>;
         /** Doc Lift average delta */

package/dist/adapters/api-client/build-request.js CHANGED Viewed

@@ -178,44 +178,17 @@ export function resolveTasksDir(rootDir, explicitPath) {
 // Helpers
 // ---------------------------------------------------------------------------
 /**
- * Convert a GeneralizedTaskDefinition to the camelCase inline format expected
- * by the API.
+ * Convert a `GeneralizedTaskDefinition` to the wire format expected by the
+ * pipeline's `inlineTasks` array. The canonical shape *is* the wire format —
+ * `CanonicalTaskSchema` (the receiving validator in
+ * `packages/eval/src/adapters/task-sources/repo-schemas.ts`) mirrors this
+ * type per-variant via `.strict()`, so any reshaping here would either be a
+ * no-op or rejected on the receiver. The helper exists as a typed boundary
+ * between the typed task array and `PipelineRequestSchema.inlineTasks`'s
+ * `Record<string, unknown>[]` shape.
  */
 function taskToInlineFormat(task) {
-    const inline = {
-        id: task.id,
-        mode: task.mode,
-        description: task.title,
-        featureArea: task.area ?? "",
-        assert: task.assertions ?? [],
-    };
-    if (task.context?.docs?.length) {
-        inline.canonicalDocs = task.context.docs;
-    }
-    const taskPrompt = task.prompt?.text ?? "";
-    if (taskPrompt) {
-        inline.vars = {
-            task: taskPrompt,
-            docs: "",
-            ...(task.prompt?.vars ?? {}),
-        };
-    }
-    // Literacy-specific fields
-    if (task.mode === "literacy") {
-        if (task.docCoverage) {
-            inline.docCoverage = true;
-        }
-        if (task.referenceSolution) {
-            inline.referenceSolution = task.referenceSolution;
-        }
-        if (task.baseline) {
-            inline.baseline = task.baseline;
-        }
-    }
-    if (task.tags?.length) {
-        inline.tags = task.tags;
-    }
-    return inline;
+    return task;
 }
 /**
  * Build a descriptive error when the task list is empty after filtering.

package/dist/adapters/api-client/format-error.d.ts CHANGED Viewed

@@ -16,7 +16,7 @@ import type { JobResponse } from "./types.js";
  * ❌ Pipeline failed at step 'fetch-docs'
  *    Postcondition failed: Canonical context for task "foo" is empty.
  *
- * 💡 One or more canonicalDocs slugs in your task definitions don't match ...
+ * 💡 One or more context.docs entries in your task definitions don't resolve ...
  * ```
  */
 export declare function formatJobError(job: JobResponse): string;

package/dist/adapters/api-client/format-error.js CHANGED Viewed

@@ -16,7 +16,7 @@ import { getRemediationHint } from "./remediation.js";
  * ❌ Pipeline failed at step 'fetch-docs'
  *    Postcondition failed: Canonical context for task "foo" is empty.
  *
- * 💡 One or more canonicalDocs slugs in your task definitions don't match ...
+ * 💡 One or more context.docs entries in your task definitions don't resolve ...
  * ```
  */
 export function formatJobError(job) {

package/dist/adapters/api-client/remediation.js CHANGED Viewed

@@ -15,9 +15,10 @@ const HINTS = [
     {
         match: (e) => /canonical context.*empty/i.test(e.message) ||
             /no article found for slug/i.test(e.message),
-        hint: "One or more `canonicalDocs` slugs in your task definitions don't match " +
-            "any article in the documentation. Check the `slug` values in " +
-            "`.ailf/tasks/` and ensure they correspond to real articles.\n" +
+        hint: "One or more `context.docs` entries in your task definitions don't " +
+            "resolve to any article in the documentation. Check the `slug`, " +
+            "`path`, or `id` values in `.ailf/tasks/` and ensure they correspond " +
+            "to real articles.\n" +
             "  Run `ailf validate` to check your task definitions locally.",
     },
     {
@@ -49,9 +50,9 @@ const HINTS = [
     {
         match: (e) => e.step === "fetch-docs" && /postcondition/i.test(e.message),
         hint: "The documentation fetch step completed but one or more tasks had " +
-            "empty context. This usually means a `canonicalDocs` slug doesn't " +
-            "match any article.\n" +
-            "  Check the slug values in `.ailf/tasks/`.",
+            "empty context. This usually means a `context.docs` entry doesn't " +
+            "resolve to any article.\n" +
+            "  Check the slug/path/id values in `.ailf/tasks/`.",
     },
     {
         match: (e) => e.step === "dispatch" && /dispatch failed/i.test(e.message),

package/dist/adapters/task-sources/index.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 export { CompositeTaskSource } from "./composite-task-source.js";
 export { ContentLakeTaskSource } from "./content-lake-task-source.js";
-export { CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type CanonicalTask, type ContentLakeAuthorableTaskParsed, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
+export { AilfEvalWorkflowSchema, CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseAilfEvalWorkflow, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type AilfEvalWorkflow, type CanonicalTask, type ContentLakeAuthorableTaskParsed, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
 export { RepoTaskSource } from "./repo-task-source.js";
 export { detectTriggerContext, resolveTrigger, type ResolvedTrigger, type TriggerContext, } from "./repo-trigger.js";
 export { formatValidationResult, validateCanonicalTasks, type ValidationMessage, type ValidationResult, } from "./repo-validation.js";

package/dist/adapters/task-sources/index.js CHANGED Viewed

@@ -1,6 +1,6 @@
 export { CompositeTaskSource } from "./composite-task-source.js";
 export { ContentLakeTaskSource } from "./content-lake-task-source.js";
-export { CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
+export { AilfEvalWorkflowSchema, CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseAilfEvalWorkflow, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
 export { RepoTaskSource } from "./repo-task-source.js";
 export { detectTriggerContext, resolveTrigger, } from "./repo-trigger.js";
 export { formatValidationResult, validateCanonicalTasks, } from "./repo-validation.js";

package/dist/adapters/task-sources/repo-schemas.d.ts CHANGED Viewed

@@ -1526,3 +1526,34 @@ export type RepoConfig = z.infer<typeof RepoConfigSchema>;
  * Parse and validate .ailf/config.yaml content. Returns typed config or throws.
  */
 export declare function parseRepoConfig(raw: unknown, filename?: string): RepoConfig;
+/**
+ * Structural schema for the `ailf-eval.yml` workflow template emitted by
+ * `ailf init`. Validates the consumer-visible contract: a workflow named
+ * "AI Literacy Eval" with at least one job containing checkout + eval
+ * steps. The literal YAML body is intentionally not pinned — comments,
+ * step ordering, and option flags can shift without breaking consumers.
+ */
+export declare const AilfEvalWorkflowSchema: z.ZodObject<{
+    name: z.ZodString;
+    on: z.ZodUnknown;
+    jobs: z.ZodRecord<z.ZodString, z.ZodObject<{
+        name: z.ZodOptional<z.ZodString>;
+        "runs-on": z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
+        permissions: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+        steps: z.ZodArray<z.ZodObject<{
+            name: z.ZodOptional<z.ZodString>;
+            id: z.ZodOptional<z.ZodString>;
+            if: z.ZodOptional<z.ZodString>;
+            uses: z.ZodOptional<z.ZodString>;
+            run: z.ZodOptional<z.ZodString>;
+            env: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+            with: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
+        }, z.core.$loose>>;
+    }, z.core.$loose>>;
+}, z.core.$loose>;
+export type AilfEvalWorkflow = z.infer<typeof AilfEvalWorkflowSchema>;
+/**
+ * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
+ * loaded from YAML). Throws with a Zod-formatted message on failure.
+ */
+export declare function parseAilfEvalWorkflow(raw: unknown, filename?: string): AilfEvalWorkflow;

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -602,3 +602,61 @@ export function parseRepoConfig(raw, filename = ".ailf/config.yaml") {
     }
     return result.data;
 }
+// ---------------------------------------------------------------------------
+// Workflow contract — structural shape of `.github/workflows/ailf-eval.yml`
+//
+// Used by the Tier 1 init contract test (W0139 M2) and the Tier 4 drift
+// check to assert that the template `init` writes still matches what a
+// downstream consumer's PR comment + report-publish path depends on.
+//
+// Loose by design: this is a contract on the consumer-visible parts (job
+// runs `npx @sanity/ailf ... run --remote`, posts a PR comment, names the
+// workflow `AI Literacy Eval`), not the literal byte content of the YAML.
+// ---------------------------------------------------------------------------
+const WorkflowStepSchema = z
+    .object({
+    name: z.string().optional(),
+    id: z.string().optional(),
+    if: z.string().optional(),
+    uses: z.string().optional(),
+    run: z.string().optional(),
+    env: z.record(z.string(), z.unknown()).optional(),
+    with: z.record(z.string(), z.unknown()).optional(),
+})
+    .passthrough();
+const WorkflowJobSchema = z
+    .object({
+    name: z.string().optional(),
+    "runs-on": z.union([z.string(), z.array(z.string())]),
+    permissions: z.record(z.string(), z.unknown()).optional(),
+    steps: z.array(WorkflowStepSchema).min(1),
+})
+    .passthrough();
+/**
+ * Structural schema for the `ailf-eval.yml` workflow template emitted by
+ * `ailf init`. Validates the consumer-visible contract: a workflow named
+ * "AI Literacy Eval" with at least one job containing checkout + eval
+ * steps. The literal YAML body is intentionally not pinned — comments,
+ * step ordering, and option flags can shift without breaking consumers.
+ */
+export const AilfEvalWorkflowSchema = z
+    .object({
+    name: z.string().min(1),
+    on: z.unknown(),
+    jobs: z.record(z.string(), WorkflowJobSchema),
+})
+    .passthrough();
+/**
+ * Parse and validate a `.github/workflows/ailf-eval.yml` payload (already
+ * loaded from YAML). Throws with a Zod-formatted message on failure.
+ */
+export function parseAilfEvalWorkflow(raw, filename = ".github/workflows/ailf-eval.yml") {
+    const result = AilfEvalWorkflowSchema.safeParse(raw);
+    if (!result.success) {
+        const messages = result.error.issues
+            .map((i) => `  [${i.path.join(".")}]: ${i.message}`)
+            .join("\n");
+        throw new Error(`Invalid workflow "${filename}":\n${messages}`);
+    }
+    return result.data;
+}

package/dist/commands/init.d.ts CHANGED Viewed

@@ -17,4 +17,11 @@
  *   ailf init --path ./my-dir        # target a specific directory
  */
 import { Command } from "commander";
+export interface InitOptions {
+    format: "ts" | "yaml" | "json";
+    force: boolean;
+    path: string;
+    mode?: string;
+}
 export declare function createInitCommand(): Command;
+export declare function runInit(opts: InitOptions): Promise<void>;

package/dist/commands/init.js CHANGED Viewed

@@ -61,7 +61,7 @@ function taskStemsForMode(mode) {
 // ---------------------------------------------------------------------------
 // Init logic
 // ---------------------------------------------------------------------------
-async function runInit(opts) {
+export async function runInit(opts) {
     const validFormats = new Set(["ts", "yaml", "json"]);
     if (!validFormats.has(opts.format)) {
         console.error(`  ✗ Invalid output format "${opts.format}". Valid options: ts, yaml, json`);

package/dist/pipeline/compare.js CHANGED Viewed

@@ -71,11 +71,13 @@ export function compare(baseline, experiment, options) {
     const notEvaluated = areas
         .filter((a) => a.change === "not-evaluated")
         .map((a) => a.area);
-    // Per-area deltas as a record
-    const perArea = {};
-    for (const a of areas) {
-        perArea[a.area] = a.delta;
-    }
+    // Per-area deltas as an array (W0137 / D0041) — keyed-map shapes are
+    // capped by the Sanity attribute limit because each new feature area
+    // mints a permanent attribute path.
+    const perArea = areas.map((a) => ({
+        area: a.area,
+        delta: a.delta,
+    }));
     // Per-dimension average deltas (only for areas present in both summaries)
     const commonAreas = areas.filter((a) => baselineAreas.has(a.area) && experimentAreas.has(a.area));
     const commonCount = commonAreas.length || 1;

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -61,14 +61,16 @@ export function mapRequestToConfig(request, rootDir) {
         datasetOverride: request.dataset,
         projectIdOverride: request.projectId,
         perspectiveOverride: request.perspective,
-        taskSourceType: mapTaskSourceType(request.taskMode),
+        taskSourceType: mapTaskSourceType(request.taskSource?.type, request.taskMode),
         outputPath: undefined,
         promptfooUrl: undefined,
         studioOriginOverride: undefined,
         sanityDocumentArgs: undefined,
         sourceReportId: request.sourceReportId,
         beforeOption: undefined,
-        repoTasksPath: undefined,
+        repoTasksPath: request.taskSource?.repoTasksPath
+            ? resolve(rootDir, request.taskSource.repoTasksPath)
+            : undefined,
         callerGit: request.callerGit,
         callerEnvelope: buildCallerEnvelope(request),
         callback: request.callback,
@@ -107,14 +109,19 @@ function buildCallerEnvelope(request) {
     }
     return { classification, owner, executor, purpose, labels };
 }
-function mapTaskSourceType(taskMode) {
+function mapTaskSourceType(taskSourceType, taskMode) {
+    // `taskSource.type` is the canonical field; honor it first when set.
+    if (taskSourceType === "repo")
+        return "repo";
+    if (taskSourceType === "content-lake")
+        return "content-lake";
+    // Fall back to the legacy `taskMode` field.
     if (taskMode === "content-lake")
-        return taskMode;
+        return "content-lake";
     // "inline" means the caller sent inline tasks that will be materialized
-    // to a temp directory and loaded via --repo-tasks-path. Use "repo" to
-    // ensure ONLY those tasks are used (no Content Lake merge).
+    // to a temp directory. Use "repo" to ensure ONLY those tasks are used
+    // (no Content Lake merge).
     if (taskMode === "inline")
         return "repo";
-    // "yaml" was removed — treat it as default (Content Lake)
     return undefined;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "4.0.1",
+  "version": "4.0.3",
   "private": false,
   "publishConfig": {
     "access": "public"
@@ -77,7 +77,8 @@
     "test": "tsx --test src/__tests__/*.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
     "test:e2e": "AILF_E2E=1 tsx --test src/__tests__/e2e/*.e2e.test.ts",
     "test:e2e:adapters": "AILF_E2E=1 tsx --test src/adapters/**/__tests__/*.adapter.test.ts",
-    "test:e2e:api": "AILF_E2E_API=1 tsx --test src/__tests__/api-tier2-tenant-integration.test.ts",
+    "test:e2e:api": "AILF_E2E_API=1 tsx --test src/__tests__/api-tier2-tenant-integration.test.ts src/__tests__/gcs-artifact-writer-roundtrip.test.ts",
+    "test:tier3:roundtrip": "AILF_E2E_API=1 AILF_E2E_GITHUB_DISPATCH=1 tsx --test src/__tests__/api-tier3-round-trip.test.ts",
     "test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts src/adapters/**/__tests__/*.adapter.test.ts",
     "pr-comment": "tsx src/cli.ts pr-comment",
     "coverage-audit": "tsx src/cli.ts report coverage",