npm - @sanity/ailf - Versions diffs - 3.5.0 → 3.6.0 - Mend

@sanity/ailf 3.5.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/bin/ailf.js +16 -1
package/config/bigquery/README.md +35 -6
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +23 -0
package/dist/_vendor/ailf-core/types/index.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +11 -0
package/dist/adapters/api-client/build-request.js +106 -9
package/dist/adapters/api-client/index.d.ts +1 -1
package/dist/adapters/api-client/index.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +8 -3
package/dist/adapters/task-sources/content-lake-task-source.js +19 -8
package/dist/adapters/task-sources/repo-schemas.d.ts +1093 -41
package/dist/adapters/task-sources/repo-schemas.js +178 -44
package/dist/commands/pipeline-action.js +8 -1
package/dist/commands/pipeline.js +1 -2
package/dist/commands/remote-pipeline.js +6 -1
package/package.json +1 -1

package/dist/adapters/task-sources/repo-schemas.js CHANGED Viewed

@@ -139,76 +139,209 @@ const AssertionSchema = z.union([
     ValueAssertionSchema,
 ]);
 // ---------------------------------------------------------------------------
-// Nested config schemas
+// Shared field schemas — building blocks reused across mode variants
 // ---------------------------------------------------------------------------
-const BaselineConfigSchema = z
-    .object({
+const TaskPromptSchema = z.object({
+    template: z.string().optional(),
+    text: z.string().optional(),
+    systemMessage: z.string().optional(),
+    vars: z.record(z.string(), z.unknown()).optional(),
+});
+const RubricRefSchema = z.union([
+    z.object({ ref: z.string().min(1) }),
+    z.object({
+        inline: z.string().min(1),
+        dimensions: z
+            .array(z.object({ key: z.string().min(1), weight: z.number() }))
+            .optional(),
+    }),
+]);
+const TaskProviderConfigSchema = z.object({
+    id: z.string().min(1),
+    config: z.record(z.string(), z.unknown()).optional(),
+});
+const TaskOptionsSchema = z.object({
+    timeout: z.number().optional(),
+    cache: z.boolean().optional(),
+    transformOutput: z.string().optional(),
+    promptfooOverrides: z.record(z.string(), z.unknown()).optional(),
+});
+const BaselineConfigSchema = z.object({
     enabled: z.boolean().optional(),
     rubric: z.enum(["abbreviated", "full", "none"]).optional(),
-})
-    .optional();
+});
+const MultiTurnSchema = z.object({
+    turns: z.array(z.object({
+        role: z.enum(["user", "assistant"]),
+        content: z.string(),
+    })),
+});
+// Context variants — shape differs slightly per mode. Keeping them separate
+// is what enforces variant-specific strictness (e.g. only mcp-server carries
+// `context.tools`).
+const ContextLiteracyLikeSchema = z.object({
+    docs: z.array(CanonicalDocRefSchema).optional(),
+    fixtures: z.array(z.string()).optional(),
+});
+const ContextMcpSchema = z.object({
+    docs: z.array(CanonicalDocRefSchema).optional(),
+    fixtures: z.array(z.string()).optional(),
+    tools: z.array(z.string()).optional(),
+});
+// Mode-specific nested shapes
+const McpAuthSchema = z.object({
+    type: z.enum(["bearer", "basic", "api_key", "oauth"]),
+    token: z.string().optional(),
+    username: z.string().optional(),
+    password: z.string().optional(),
+    value: z.string().optional(),
+    keyName: z.string().optional(),
+    placement: z.enum(["header", "query"]).optional(),
+    grantType: z.enum(["client_credentials", "password"]).optional(),
+    tokenUrl: z.string().optional(),
+    clientId: z.string().optional(),
+    clientSecret: z.string().optional(),
+    scopes: z.array(z.string()).optional(),
+});
+const McpServerConfigSchema = z.object({
+    transport: z.enum(["stdio", "sse", "streamable-http"]),
+    command: z.string().optional(),
+    url: z.string().optional(),
+    env: z.record(z.string(), z.string()).optional(),
+    headers: z.record(z.string(), z.string()).optional(),
+    startupTimeoutMs: z.number().optional(),
+    auth: McpAuthSchema.optional(),
+});
+const SandboxSchema = z.object({
+    type: z.enum(["docker", "git-worktree", "none", "nsjail", "tempdir"]),
+    image: z.string().optional(),
+    limits: z
+        .object({
+        cpus: z.number().optional(),
+        memoryBytes: z.number().optional(),
+        diskBytes: z.number().optional(),
+        networkAccess: z.boolean().optional(),
+    })
+        .optional(),
+});
+const KnowledgeBaseSchema = z.object({
+    type: z.enum(["sanity-dataset", "embeddings-index", "file-corpus"]),
+    name: z.string().min(1),
+    config: z.record(z.string(), z.unknown()).optional(),
+});
 // ---------------------------------------------------------------------------
-// CanonicalTaskSchema — the single canonical task shape
+// CanonicalTaskSchema — mode-discriminated union
 //
-// Validates the GeneralizedTaskDefinition shape. Field names match the
-// internal domain model directly — no mapping layer needed.
+// Mirrors `GeneralizedTaskDefinition` from
+// packages/core/src/types/generalized-task.ts. Each variant declares only
+// the fields that belong to its mode — cross-variant field mixing (e.g.
+// `{ mode: "mcp-server", sandbox: {...} }`) is rejected because strict
+// objects do not permit unknown keys.
 //
-// YAML tasks may omit `mode` (defaults to "literacy"). All other fields
-// use the canonical names: `title`, `area`, `prompt.text`, `context.docs`,
-// `assertions`.
+// YAML tasks may omit `mode` (defaults to "literacy"). The default is
+// applied in `parseCanonicalTaskFile` before the discriminated union runs,
+// since Zod cannot default a discriminator inside a discriminated union.
 // ---------------------------------------------------------------------------
-/**
- * Zod schema for a single task definition using canonical field names.
- *
- * Uses .passthrough() to allow mode-specific fields (serverConfig, sandbox,
- * handler, etc.) without listing every possible field. Mode-specific
- * validation is deferred to the pipeline's mode handlers.
- */
-export const CanonicalTaskSchema = z
-    .object({
+// Common fields shared by every variant — used as a base for each per-mode
+// schema via `.extend()`. Kept internal to avoid consumers building tasks
+// from the base alone.
+const TaskCommonFieldsSchema = z.object({
     id: z
         .string()
         .min(1)
         .regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
-    mode: z.string().default("literacy"),
     title: z.string().min(1),
     description: z.string().optional(),
     area: z.string().optional(),
     difficulty: z.enum(["basic", "intermediate", "advanced"]).optional(),
+    tags: z.array(z.string()).optional(),
     status: z
         .enum(["active", "draft", "paused", "archived"])
         .optional()
         .default("active"),
-    tags: z.array(z.string()).optional(),
-    prompt: z
-        .object({
-        template: z.string().optional(),
-        text: z.string().optional(),
-        systemMessage: z.string().optional(),
-        vars: z.record(z.string(), z.unknown()).optional(),
-    })
-        .optional(),
-    context: z
-        .object({
-        docs: z.array(CanonicalDocRefSchema).optional(),
-        fixtures: z.array(z.string()).optional(),
-    })
-        .optional(),
     assertions: z.array(AssertionSchema).optional(),
+    rubric: RubricRefSchema.optional(),
+    providers: z.array(TaskProviderConfigSchema).optional(),
+    options: TaskOptionsSchema.optional(),
+    prompt: TaskPromptSchema.optional(),
+    metadata: z.record(z.string(), z.unknown()).optional(),
+});
+// `.strict()` on each variant rejects unknown keys — this is what enforces
+// cross-variant strictness (e.g. a literacy task cannot carry `sandbox`).
+const LiteracyTaskSchema = TaskCommonFieldsSchema.extend({
+    mode: z.literal("literacy"),
+    context: ContextLiteracyLikeSchema.optional(),
     referenceSolution: z.string().optional(),
     docCoverage: z.boolean().optional().default(false),
-    baseline: BaselineConfigSchema,
-    rubric: z.unknown().optional(),
-    providers: z.array(z.unknown()).optional(),
-    options: z.unknown().optional(),
-    metadata: z.record(z.string(), z.unknown()).optional(),
-})
-    .passthrough();
+    baseline: BaselineConfigSchema.optional(),
+}).strict();
+const MCPServerTaskSchema = TaskCommonFieldsSchema.extend({
+    mode: z.literal("mcp-server"),
+    serverConfig: McpServerConfigSchema.optional(),
+    capabilities: z.array(z.string()).optional(),
+    context: ContextMcpSchema.optional(),
+    multiTurn: MultiTurnSchema.optional(),
+    models: z.array(z.string()).optional(),
+    maxToolRounds: z.number().optional(),
+}).strict();
+const AgentHarnessTaskSchema = TaskCommonFieldsSchema.extend({
+    mode: z.literal("agent-harness"),
+    sandbox: SandboxSchema.optional(),
+    tools: z.array(z.string()).optional(),
+    fixtures: z.array(z.string()).optional(),
+    context: ContextLiteracyLikeSchema.optional(),
+    multiTurn: MultiTurnSchema.optional(),
+}).strict();
+const KnowledgeProbeTaskSchema = TaskCommonFieldsSchema.extend({
+    mode: z.literal("knowledge-probe"),
+    probeStrategy: z
+        .enum(["breadth-first", "depth-first", "random-sample", "coverage-guided"])
+        .optional(),
+    knowledgeBase: KnowledgeBaseSchema.optional(),
+    context: ContextLiteracyLikeSchema.optional(),
+}).strict();
+const CustomTaskSchema = TaskCommonFieldsSchema.extend({
+    mode: z.literal("custom"),
+    handler: z.string().min(1),
+    schema: z.record(z.string(), z.unknown()).optional(),
+    context: ContextLiteracyLikeSchema.optional(),
+}).strict();
+/**
+ * Zod schema for a single task definition — a mode-discriminated union
+ * mirroring `GeneralizedTaskDefinition`.
+ *
+ * Unknown keys are rejected on each variant. Invalid modes are rejected
+ * by the discriminator itself.
+ */
+export const CanonicalTaskSchema = z.discriminatedUnion("mode", [
+    LiteracyTaskSchema,
+    MCPServerTaskSchema,
+    AgentHarnessTaskSchema,
+    KnowledgeProbeTaskSchema,
+    CustomTaskSchema,
+]);
 /**
  * Schema for an array of canonical tasks — what a single .ailf/tasks/*.yaml
  * file contains. Each file must define at least one task.
  */
 export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
+/**
+ * Pre-process raw task entries before discriminated-union parsing: when
+ * `mode` is missing, default it to `"literacy"`. Zod cannot default a
+ * discriminator in-place, so we normalise here.
+ */
+function applyModeDefault(raw) {
+    if (!Array.isArray(raw))
+        return raw;
+    return raw.map((entry) => {
+        if (typeof entry !== "object" || entry === null)
+            return entry;
+        const obj = entry;
+        if (obj.mode === undefined)
+            return { ...obj, mode: "literacy" };
+        return obj;
+    });
+}
 /**
  * Parse and validate a task file's content against the canonical schema.
  * Returns typed tasks or throws with a user-friendly Zod error message.
@@ -216,7 +349,8 @@ export const CanonicalTaskFileSchema = z.array(CanonicalTaskSchema).min(1);
  * Accepts pre-parsed YAML data (unknown), not a raw string.
  */
 export function parseCanonicalTaskFile(raw, filename) {
-    const result = CanonicalTaskFileSchema.safeParse(raw);
+    const prepared = applyModeDefault(raw);
+    const result = CanonicalTaskFileSchema.safeParse(prepared);
     if (!result.success) {
         const messages = result.error.issues
             .map((i) => `  [${i.path.join(".")}]: ${i.message}`)

package/dist/commands/pipeline-action.js CHANGED Viewed

@@ -14,7 +14,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 import { dirname, resolve } from "path";
 import { fileURLToPath } from "url";
 import { classifyUrls } from "../pipeline/classify-url.js";
-import { normalizeMode } from "../pipeline/normalize-mode.js";
+import { LiteracyVariant, normalizeMode } from "../pipeline/normalize-mode.js";
 import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
 import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build-app-context.js";
 import { buildStepSequence } from "../orchestration/build-step-sequence.js";
@@ -47,6 +47,13 @@ export function computeResolvedOptions(opts) {
         mode = normalized.mode;
         // Explicit --variant flag takes precedence over what normalizeMode inferred
         variant = opts.variant ?? normalized.variant;
+        // Canonical mode "literacy" with no variant defaults to the full variant
+        // (standard + agentic). This preserves the pre-canonical CLI behavior
+        // where `--mode full` was the default, without emitting the legacy alias
+        // deprecation warning for users who pass no flags at all.
+        if (mode === "literacy" && !variant) {
+            variant = LiteracyVariant.FULL;
+        }
     }
     catch (err) {
         console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);

package/dist/commands/pipeline.js CHANGED Viewed

@@ -8,12 +8,11 @@
  * @see docs/cli.md for the full flag reference.
  */
 import { Command } from "commander";
-import { LiteracyVariant } from "../pipeline/normalize-mode.js";
 import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
 export function createPipelineCommand() {
     const cmd = new Command("pipeline")
         .description("Run the full evaluation pipeline")
-        .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", LiteracyVariant.FULL)
+        .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", "literacy")
         .option("--variant <variant>", "Literacy variant: full (default — standard + agentic), baseline (standard only), agentic (agentic only), observed. Only applies to --mode literacy.")
         .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
         .option("-n, --dry-run", "Validate configuration only, no execution", false)

package/dist/commands/remote-pipeline.js CHANGED Viewed

@@ -14,7 +14,7 @@
  * @see docs/design-docs/cli-as-api-client.md — design doc
  */
 import { ZodError } from "zod";
-import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
+import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, NoRunnableTasksError, resolveTasksDir, } from "../adapters/api-client/index.js";
 import { writeRemoteResults } from "./remote-results.js";
 // ---------------------------------------------------------------------------
 // Public API
@@ -66,6 +66,10 @@ export async function runRemotePipeline(opts, rootDir) {
             console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
             process.exit(2);
         }
+        if (err instanceof NoRunnableTasksError) {
+            console.error(`❌ ${err.message}`);
+            process.exit(2);
+        }
         throw err;
     }
     console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
@@ -103,6 +107,7 @@ export async function runRemotePipeline(opts, rootDir) {
 function toConfigSlice(opts) {
     return {
         mode: opts.mode,
+        variant: opts.variant,
         debug: opts.debug,
         areas: opts.areaOption
             ?.split(",")

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "3.5.0",
+  "version": "3.6.0",
   "private": false,
   "publishConfig": {
     "access": "public"