npm - @sanity/ailf - Versions diffs - 3.5.0 → 3.6.0 - Mend

@sanity/ailf 3.5.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/bin/ailf.js +16 -1
package/config/bigquery/README.md +35 -6
package/dist/_vendor/ailf-core/types/generalized-task.d.ts +23 -0
package/dist/_vendor/ailf-core/types/index.d.ts +1 -1
package/dist/adapters/api-client/build-request.d.ts +11 -0
package/dist/adapters/api-client/build-request.js +106 -9
package/dist/adapters/api-client/index.d.ts +1 -1
package/dist/adapters/api-client/index.js +1 -1
package/dist/adapters/task-sources/content-lake-task-source.d.ts +8 -3
package/dist/adapters/task-sources/content-lake-task-source.js +19 -8
package/dist/adapters/task-sources/repo-schemas.d.ts +1093 -41
package/dist/adapters/task-sources/repo-schemas.js +178 -44
package/dist/commands/pipeline-action.js +8 -1
package/dist/commands/pipeline.js +1 -2
package/dist/commands/remote-pipeline.js +6 -1
package/package.json +1 -1

package/bin/ailf.js CHANGED Viewed

@@ -33,9 +33,24 @@ const callerCwd = process.cwd()
 // ---------------------------------------------------------------------------
 if (existsSync(tsSrc)) {
   try {
+    // Enable the `ailf-source` export condition so @sanity/ailf-shared and
+    // @sanity/ailf-core resolve to their `src/index.ts` entrypoints rather
+    // than whatever happens to be in their `dist/` directories. Without
+    // this, running `ailf …` against a freshly pulled monorepo (or any
+    // workspace with a stale dist) fails at import time whenever the
+    // source introduces a new export that the dist hasn't caught up with.
+    const existingNodeOptions = process.env.NODE_OPTIONS ?? ""
+    const conditionFlag = "--conditions=ailf-source"
+    const nodeOptions = existingNodeOptions.includes(conditionFlag)
+      ? existingNodeOptions
+      : `${existingNodeOptions} ${conditionFlag}`.trim()
     execFileSync("npx", ["tsx", tsSrc, ...args], {
       cwd: ROOT,
-      env: { ...process.env, AILF_CALLER_CWD: callerCwd },
+      env: {
+        ...process.env,
+        AILF_CALLER_CWD: callerCwd,
+        NODE_OPTIONS: nodeOptions,
+      },
       stdio: "inherit",
     })
     process.exit(0)

package/config/bigquery/README.md CHANGED Viewed

@@ -39,26 +39,55 @@ from `docs/design-docs/report-store/bigquery.md`.
 ### 1. Create the raw dataset (Airbyte writes here)
 ```bash
-bq mk --dataset data-platform-302218:ailf_raw
+bq --project_id=data-platform-302218 --location=EU mk --dataset ailf_raw
 ```
 ### 2. Create the analytics dataset (views live here)
 ```bash
-bq mk --dataset data-platform-302218:ailf
+bq --project_id=data-platform-302218 --location=EU mk --dataset ailf
 ```
 ### 3. Create the views
+**Important ordering (learned 2026-04-23):** Airbyte must be redeployed with the
+current manifest **before** you run these view SQLs. Each view binds to specific
+columns on `ailf_raw.reports`; if the raw table is missing columns the Airbyte
+projection expects, the `CREATE VIEW` statement fails with
+`Unrecognized name: <column>`.
+If your Airbyte destination has **schema evolution enabled** ("Propagate column
+changes" in the UI), new columns appear automatically on the next incremental
+sync. If not, flip it on, trigger a resync, and confirm the expected columns
+exist before creating views:
+```bash
+bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false \
+  "SELECT column_name FROM ailf_raw.INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'reports' ORDER BY column_name"
+```
+If propagation is disabled and you can't flip it quickly, manually
+`ALTER TABLE ailf_raw.reports ADD COLUMN IF NOT EXISTS …` for each missing
+column as a stop-gap. Values will be `NULL` until Airbyte writes to them on the
+next sync.
+Once the raw table has the expected columns:
 ```bash
-bq query --use_legacy_sql=false < views/reports.sql
-bq query --use_legacy_sql=false < views/area_scores.sql
-bq query --use_legacy_sql=false < views/official_runs.sql
-bq query --use_legacy_sql=false < views/official_area_scores.sql
+cd packages/eval/config/bigquery
+bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/reports.sql
+bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/area_scores.sql
+bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_runs.sql
+bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_area_scores.sql
 # per-team views are optional — copy views/team_runs_template.sql,
 # fill in the slug, and run.
 ```
+> `--project_id` / `--location=EU` are required because `bq` needs an explicit
+> billing project and the `ailf*` datasets live in the EU multi-region. If you
+> run `bq query` from this repo regularly, consider setting the default with
+> `gcloud config set project data-platform-302218`.
 ## Naming conventions
 - **`ailf_raw.*`** — raw Airbyte-loaded tables (nested JSON, Airbyte metadata

package/dist/_vendor/ailf-core/types/generalized-task.d.ts CHANGED Viewed

@@ -341,3 +341,26 @@ export interface CustomTaskDefinition extends TaskCommonFields {
  * when authoring tasks.
  */
 export type GeneralizedTaskDefinition = LiteracyTaskDefinition | MCPServerTaskDefinition | AgentHarnessTaskDefinition | KnowledgeProbeTaskDefinition | CustomTaskDefinition;
+/**
+ * The subset of task modes that can be authored as `ailf.task` documents in
+ * the Content Lake (Sanity Studio). Today exactly `"literacy"`.
+ *
+ * Expanding this set is a deliberate decision: execution-bound fields
+ * (filesystem handles, local commands, sandbox config, module paths) cannot
+ * round-trip through Content Lake, so not every mode belongs here. Adding a
+ * mode requires a new or superseding ADR and a coordinated schema update
+ * across the domain type, Studio schema, and `ContentLakeTaskSource` adapter
+ * per the `ailf-schema-sync` skill.
+ *
+ * @see docs/decisions/D0038-content-lake-authorable-task-modes.md
+ */
+export type ContentLakeAuthorableMode = "literacy";
+/**
+ * The slice of `GeneralizedTaskDefinition` authorable in the Content Lake,
+ * derived mechanically from `ContentLakeAuthorableMode`. Used as the return
+ * type of `ContentLakeTaskSource` so the adapter's mode literal is
+ * type-checked against the boundary rather than a loose cast.
+ */
+export type ContentLakeAuthorableTask = Extract<GeneralizedTaskDefinition, {
+    mode: ContentLakeAuthorableMode;
+}>;

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -25,7 +25,7 @@ export type { VariableDeclaration, VariableEnvelope, VariableProvenance, Variabl
 export type { EvalTrace, ToolCallCategory, ToolCallRecord, TraceEvent, TraceSpan, TraceTokenUsage, } from "./trace.js";
 export type { ArtifactId, AssociationAxis, AssociationValues, Brand, EntryKey, Err, FixtureId, IdValidationError, NewReportId, Ok, ProviderId, PromptId, Result, ResultId, RubricId, RunFingerprint, RunId, SuiteId, TaskId, TaskSlug, TraceId, } from "./branded-ids.js";
 export { err, fixtureId, generateRunId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
-export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
+export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLakeAuthorableTask, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
 type DocumentRef = _DocumentRef;
 /** Aggregated retrieval metrics for a feature area */
 export interface AreaRetrievalMetrics {

package/dist/adapters/api-client/build-request.d.ts CHANGED Viewed

@@ -13,6 +13,16 @@
  * @see packages/eval/src/adapters/task-sources/repo-task-source.ts
  */
 import { type PipelineRequest } from "../../_vendor/ailf-core/index.d.ts";
+/**
+ * Thrown when `buildRemoteRequest` can't find any runnable tasks.
+ *
+ * The CLI catches this separately from ZodError so it can print the
+ * message without an accompanying stack trace — the message is already
+ * the whole story for the user.
+ */
+export declare class NoRunnableTasksError extends Error {
+    readonly name = "NoRunnableTasksError";
+}
 /** Options for building a remote pipeline request. */
 export interface BuildRequestOptions {
     /** Path to .ailf/tasks/ directory. */
@@ -27,6 +37,7 @@ export interface BuildRequestOptions {
  */
 export interface RemoteConfigSlice {
     mode?: string;
+    variant?: string;
     debug?: {
         enabled?: boolean;
         firstN?: number;

package/dist/adapters/api-client/build-request.js CHANGED Viewed

@@ -16,7 +16,6 @@ import { existsSync } from "fs";
 import { resolve } from "path";
 import { PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
 import { LEGACY_EVAL_MODE_ALIASES, isRunClassification, } from "../../_vendor/ailf-shared/index.js";
-import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
 import { RepoTaskSource } from "../task-sources/repo-task-source.js";
 const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
 /**
@@ -27,6 +26,16 @@ const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
 function resolveCanonicalTaskMode(configMode) {
     return LEGACY_LITERACY_VARIANT_SET.has(configMode) ? "literacy" : configMode;
 }
+/**
+ * Thrown when `buildRemoteRequest` can't find any runnable tasks.
+ *
+ * The CLI catches this separately from ZodError so it can print the
+ * message without an accompanying stack trace — the message is already
+ * the whole story for the user.
+ */
+export class NoRunnableTasksError extends Error {
+    name = "NoRunnableTasksError";
+}
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
@@ -56,11 +65,13 @@ export async function buildRemoteRequest(options) {
         ? allTasks.filter((t) => t.mode === taskModeFilter)
         : allTasks;
     if (tasks.length === 0) {
-        throw new Error("No tasks found after applying filters.\n" +
-            `  Tasks directory: ${tasksDir}\n` +
-            (config.areas ? `  Area filter: ${config.areas.join(", ")}\n` : "") +
-            (config.tasks ? `  Task filter: ${config.tasks.join(", ")}\n` : "") +
-            "  Check that your .ailf/tasks/ YAML files define tasks matching these filters.");
+        throw await emptyTasksError({
+            taskSource,
+            tasksDir,
+            config,
+            filterOptions,
+            taskModeFilter,
+        });
     }
     // 2. Convert tasks to inline format
     const inlineTasks = tasks.map(taskToInlineFormat);
@@ -69,10 +80,14 @@ export async function buildRemoteRequest(options) {
         taskMode: "inline",
         inlineTasks,
     };
-    // Mode
-    if (config.mode && config.mode !== LiteracyVariant.FULL) {
+    // Mode + variant — send both when set so the server sees the caller's
+    // canonical intent. Legacy aliases ("full", "baseline", …) are accepted
+    // by `PipelineRequestSchema.mode` for back-compat but the CLI now emits
+    // the canonical form (`mode: "literacy"` + explicit `variant`).
+    if (config.mode)
         raw.mode = config.mode;
-    }
+    if (config.variant)
+        raw.variant = config.variant;
     // Debug
     if (config.debug?.enabled) {
         raw.debug = config.debug;
@@ -206,6 +221,88 @@ function taskToInlineFormat(task) {
     }
     return inline;
 }
+/**
+ * Build a descriptive error when the task list is empty after filtering.
+ *
+ * Loads the full task list a second time with `includeDrafts: true` so we
+ * can distinguish the two common failure modes:
+ *
+ * 1. Every discovered task is non-active (`status: "draft"` from
+ *    `ailf init` scaffolding, or `status: "paused"`). Tell the user how
+ *    to opt a task in.
+ * 2. The tasks directory is genuinely empty for this filter combination.
+ *    Echo the filters back so the mismatch is obvious.
+ *
+ * The directory-missing and file-missing cases are already surfaced
+ * earlier by `RepoTaskSource.loadTasks()`, so we never reach this helper
+ * for those.
+ */
+async function emptyTasksError(args) {
+    const { taskSource, tasksDir, config, filterOptions, taskModeFilter } = args;
+    // Re-load without the status gate to categorize what got filtered.
+    let relaxed = [];
+    try {
+        relaxed = await taskSource.loadTasks({
+            ...(filterOptions ?? {}),
+            includeDrafts: true,
+        });
+    }
+    catch {
+        // Fall through to the generic message if re-loading fails for any
+        // reason (e.g. directory removed mid-run).
+    }
+    const modeMatched = taskModeFilter
+        ? relaxed.filter((t) => t.mode === taskModeFilter)
+        : relaxed;
+    const drafts = modeMatched.filter((t) => (t.status ?? "active") === "draft");
+    const paused = modeMatched.filter((t) => t.status === "paused");
+    const filtersBlock = (config.areas?.length
+        ? `  Area filter: ${config.areas.join(", ")}\n`
+        : "") +
+        (config.tasks?.length
+            ? `  Task filter: ${config.tasks.join(", ")}\n`
+            : "") +
+        (config.tags?.length ? `  Tag filter: ${config.tags.join(", ")}\n` : "") +
+        (taskModeFilter ? `  Mode filter: ${taskModeFilter}\n` : "");
+    if (modeMatched.length === 0) {
+        return new NoRunnableTasksError("No tasks matched your filters.\n" +
+            `  Tasks directory: ${tasksDir}\n` +
+            filtersBlock +
+            "  Check that your .ailf/tasks/ YAML or .task.ts files define tasks\n" +
+            "  matching these filters.");
+    }
+    // All matched tasks were excluded by the status gate.
+    const draftIds = drafts.map((t) => t.id);
+    const pausedIds = paused.map((t) => t.id);
+    const draftSample = draftIds.slice(0, 3).join(", ");
+    const draftMore = draftIds.length > 3 ? `, +${draftIds.length - 3} more` : "";
+    const pausedSample = pausedIds.slice(0, 3).join(", ");
+    const pausedMore = pausedIds.length > 3 ? `, +${pausedIds.length - 3} more` : "";
+    const lines = [];
+    lines.push("No runnable tasks after applying filters.");
+    lines.push(`  Tasks directory: ${tasksDir}`);
+    if (filtersBlock)
+        lines.push(filtersBlock.trimEnd());
+    if (drafts.length > 0) {
+        lines.push(`  ${drafts.length} task(s) skipped because status: "draft": ${draftSample}${draftMore}`);
+    }
+    if (paused.length > 0) {
+        lines.push(`  ${paused.length} task(s) skipped because status: "paused": ${pausedSample}${pausedMore}`);
+    }
+    lines.push("");
+    lines.push("  To run one of these anyway, either:");
+    if (drafts.length > 0) {
+        lines.push(`    • Change the task's status field from "draft" to "active", or`);
+        lines.push(`    • Target it explicitly: --task ${drafts[0]?.id ?? "<id>"}`);
+    }
+    else if (paused.length > 0) {
+        lines.push(`    • Target it explicitly by id: --task ${paused[0]?.id ?? "<id>"}, or`);
+        lines.push(`    • Flip its status from "paused" to "active"`);
+    }
+    lines.push("  Tasks scaffolded by `ailf init` ship as drafts so you can edit");
+    lines.push("  them before they start contributing to your literacy score.");
+    return new NoRunnableTasksError(lines.join("\n"));
+}
 function buildFilterOptions(config) {
     const areas = config.areas?.length ? config.areas : undefined;
     const taskIds = config.tasks?.length ? config.tasks : undefined;

package/dist/adapters/api-client/index.d.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  *   import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
  */
 export { ApiClient } from "./api-client.js";
-export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
+export { buildCallerEnvelope, buildRemoteRequest, NoRunnableTasksError, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
 export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
 export { formatJobError } from "./format-error.js";
 export { createProgressDisplay } from "./progress.js";

package/dist/adapters/api-client/index.js CHANGED Viewed

@@ -5,7 +5,7 @@
  *   import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
  */
 export { ApiClient } from "./api-client.js";
-export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
+export { buildCallerEnvelope, buildRemoteRequest, NoRunnableTasksError, resolveTasksDir, } from "./build-request.js";
 export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
 export { formatJobError } from "./format-error.js";
 export { createProgressDisplay } from "./progress.js";

package/dist/adapters/task-sources/content-lake-task-source.d.ts CHANGED Viewed

@@ -2,13 +2,18 @@
  * Adapter: Load task definitions from the Sanity Content Lake.
  *
  * Fetches ailf.task documents via GROQ and maps them to
- * GeneralizedTaskDefinition (LiteracyTaskDefinition variant).
- * The pipeline never knows which adapter loaded the tasks.
+ * `ContentLakeAuthorableTask` — the subset of `GeneralizedTaskDefinition`
+ * authorable in Studio per D0038. Today that subset is exactly the
+ * literacy variant.
+ *
+ * The pipeline never knows which adapter loaded the tasks; the
+ * `TaskSource` port widens the return type back to
+ * `GeneralizedTaskDefinition[]`.
  *
  * Wired in the composition root as the default task source.
  *
  * @see packages/core/src/ports/task-source.ts — TaskSource port
- * @see docs/archive/exec-plans/tasks-as-content/phase-2-pipeline-integration.md
+ * @see docs/decisions/D0038-content-lake-authorable-task-modes.md
  */
 import type { SanityClient } from "@sanity/client";
 import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";

package/dist/adapters/task-sources/content-lake-task-source.js CHANGED Viewed

@@ -2,13 +2,18 @@
  * Adapter: Load task definitions from the Sanity Content Lake.
  *
  * Fetches ailf.task documents via GROQ and maps them to
- * GeneralizedTaskDefinition (LiteracyTaskDefinition variant).
- * The pipeline never knows which adapter loaded the tasks.
+ * `ContentLakeAuthorableTask` — the subset of `GeneralizedTaskDefinition`
+ * authorable in Studio per D0038. Today that subset is exactly the
+ * literacy variant.
+ *
+ * The pipeline never knows which adapter loaded the tasks; the
+ * `TaskSource` port widens the return type back to
+ * `GeneralizedTaskDefinition[]`.
  *
  * Wired in the composition root as the default task source.
  *
  * @see packages/core/src/ports/task-source.ts — TaskSource port
- * @see docs/archive/exec-plans/tasks-as-content/phase-2-pipeline-integration.md
+ * @see docs/decisions/D0038-content-lake-authorable-task-modes.md
  */
 // ---------------------------------------------------------------------------
 // GROQ query — fetches ailf.task documents with resolved references
@@ -47,11 +52,15 @@ const TASKS_QUERY = /* groq */ `
   && (!defined($tags) || count((tags)[@ in $tags]) > 0)
 ] | order(coalesce(area->areaId.current, featureArea->areaId.current) asc, id.current asc) {
   "taskId": id.current,
-  // Coalesce current and legacy field names so documents created before
-  // the schema rename are still readable.
+  // The coalesce on title preserves back-compat: older documents that used
+  // the description field as the task label (before title was required)
+  // still read cleanly. New documents have title and description as
+  // distinct fields.
   "title": coalesce(title, description),
+  description,
   "areaId": coalesce(area->areaId.current, featureArea->areaId.current),
   "promptText": coalesce(promptText, taskPrompt),
+  status,
   docCoverage,
   "contextDocs": coalesce(contextDocs, canonicalDocs)[] {
     refType,
@@ -86,7 +95,7 @@ export class ContentLakeTaskSource {
         }
         const definitions = [];
         for (const entry of raw) {
-            const mapped = mapToLiteracyTask(entry);
+            const mapped = mapToAuthorableTask(entry);
             if (!mapped)
                 continue;
             definitions.push(mapped);
@@ -115,14 +124,14 @@ function buildGroqParams(filter) {
 // Mapping: Content Lake → LiteracyTaskDefinition
 // ---------------------------------------------------------------------------
 /**
- * Map a Content Lake ailf.task document directly to a LiteracyTaskDefinition.
+ * Map a Content Lake ailf.task document to a `ContentLakeAuthorableTask`.
  *
  * Returns null if the document is missing required fields (taskId,
  * title, areaId, promptText). These are required by the
  * Studio schema, but defensive coding handles edge cases (drafts,
  * partially-created documents, etc.).
  */
-function mapToLiteracyTask(raw) {
+function mapToAuthorableTask(raw) {
     // Required fields — skip malformed documents
     if (!raw.taskId || !raw.title || !raw.areaId || !raw.promptText) {
         return null;
@@ -172,6 +181,8 @@ function mapToLiteracyTask(raw) {
         referenceSolution: "",
         ...(baseline ? { baseline } : {}),
         ...(raw.tags?.length ? { tags: raw.tags } : {}),
+        ...(raw.status ? { status: raw.status } : {}),
+        ...(raw.description ? { description: raw.description } : {}),
     };
 }
 /**