@sanity/ailf 3.5.0 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/ailf.js CHANGED
@@ -33,9 +33,24 @@ const callerCwd = process.cwd()
33
33
  // ---------------------------------------------------------------------------
34
34
  if (existsSync(tsSrc)) {
35
35
  try {
36
+ // Enable the `ailf-source` export condition so @sanity/ailf-shared and
37
+ // @sanity/ailf-core resolve to their `src/index.ts` entrypoints rather
38
+ // than whatever happens to be in their `dist/` directories. Without
39
+ // this, running `ailf …` against a freshly pulled monorepo (or any
40
+ // workspace with a stale dist) fails at import time whenever the
41
+ // source introduces a new export that the dist hasn't caught up with.
42
+ const existingNodeOptions = process.env.NODE_OPTIONS ?? ""
43
+ const conditionFlag = "--conditions=ailf-source"
44
+ const nodeOptions = existingNodeOptions.includes(conditionFlag)
45
+ ? existingNodeOptions
46
+ : `${existingNodeOptions} ${conditionFlag}`.trim()
36
47
  execFileSync("npx", ["tsx", tsSrc, ...args], {
37
48
  cwd: ROOT,
38
- env: { ...process.env, AILF_CALLER_CWD: callerCwd },
49
+ env: {
50
+ ...process.env,
51
+ AILF_CALLER_CWD: callerCwd,
52
+ NODE_OPTIONS: nodeOptions,
53
+ },
39
54
  stdio: "inherit",
40
55
  })
41
56
  process.exit(0)
@@ -39,26 +39,55 @@ from `docs/design-docs/report-store/bigquery.md`.
39
39
  ### 1. Create the raw dataset (Airbyte writes here)
40
40
 
41
41
  ```bash
42
- bq mk --dataset data-platform-302218:ailf_raw
42
+ bq --project_id=data-platform-302218 --location=EU mk --dataset ailf_raw
43
43
  ```
44
44
 
45
45
  ### 2. Create the analytics dataset (views live here)
46
46
 
47
47
  ```bash
48
- bq mk --dataset data-platform-302218:ailf
48
+ bq --project_id=data-platform-302218 --location=EU mk --dataset ailf
49
49
  ```
50
50
 
51
51
  ### 3. Create the views
52
52
 
53
+ **Important ordering (learned 2026-04-23):** Airbyte must be redeployed with the
54
+ current manifest **before** you run these view SQLs. Each view binds to specific
55
+ columns on `ailf_raw.reports`; if the raw table is missing columns the Airbyte
56
+ projection expects, the `CREATE VIEW` statement fails with
57
+ `Unrecognized name: <column>`.
58
+
59
+ If your Airbyte destination has **schema evolution enabled** ("Propagate column
60
+ changes" in the UI), new columns appear automatically on the next incremental
61
+ sync. If not, flip it on, trigger a resync, and confirm the expected columns
62
+ exist before creating views:
63
+
64
+ ```bash
65
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false \
66
+ "SELECT column_name FROM ailf_raw.INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'reports' ORDER BY column_name"
67
+ ```
68
+
69
+ If propagation is disabled and you can't flip it quickly, manually
70
+ `ALTER TABLE ailf_raw.reports ADD COLUMN IF NOT EXISTS …` for each missing
71
+ column as a stop-gap. Values will be `NULL` until Airbyte writes to them on the
72
+ next sync.
73
+
74
+ Once the raw table has the expected columns:
75
+
53
76
  ```bash
54
- bq query --use_legacy_sql=false < views/reports.sql
55
- bq query --use_legacy_sql=false < views/area_scores.sql
56
- bq query --use_legacy_sql=false < views/official_runs.sql
57
- bq query --use_legacy_sql=false < views/official_area_scores.sql
77
+ cd packages/eval/config/bigquery
78
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/reports.sql
79
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/area_scores.sql
80
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_runs.sql
81
+ bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_area_scores.sql
58
82
  # per-team views are optional — copy views/team_runs_template.sql,
59
83
  # fill in the slug, and run.
60
84
  ```
61
85
 
86
+ > `--project_id` / `--location=EU` are required because `bq` needs an explicit
87
+ > billing project and the `ailf*` datasets live in the EU multi-region. If you
88
+ > run `bq query` from this repo regularly, consider setting the default with
89
+ > `gcloud config set project data-platform-302218`.
90
+
62
91
  ## Naming conventions
63
92
 
64
93
  - **`ailf_raw.*`** — raw Airbyte-loaded tables (nested JSON, Airbyte metadata
@@ -13,6 +13,16 @@
13
13
  * @see packages/eval/src/adapters/task-sources/repo-task-source.ts
14
14
  */
15
15
  import { type PipelineRequest } from "../../_vendor/ailf-core/index.d.ts";
16
+ /**
17
+ * Thrown when `buildRemoteRequest` can't find any runnable tasks.
18
+ *
19
+ * The CLI catches this separately from ZodError so it can print the
20
+ * message without an accompanying stack trace — the message is already
21
+ * the whole story for the user.
22
+ */
23
+ export declare class NoRunnableTasksError extends Error {
24
+ readonly name = "NoRunnableTasksError";
25
+ }
16
26
  /** Options for building a remote pipeline request. */
17
27
  export interface BuildRequestOptions {
18
28
  /** Path to .ailf/tasks/ directory. */
@@ -27,6 +37,7 @@ export interface BuildRequestOptions {
27
37
  */
28
38
  export interface RemoteConfigSlice {
29
39
  mode?: string;
40
+ variant?: string;
30
41
  debug?: {
31
42
  enabled?: boolean;
32
43
  firstN?: number;
@@ -16,7 +16,6 @@ import { existsSync } from "fs";
16
16
  import { resolve } from "path";
17
17
  import { PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
18
18
  import { LEGACY_EVAL_MODE_ALIASES, isRunClassification, } from "../../_vendor/ailf-shared/index.js";
19
- import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
20
19
  import { RepoTaskSource } from "../task-sources/repo-task-source.js";
21
20
  const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
22
21
  /**
@@ -27,6 +26,16 @@ const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
27
26
  function resolveCanonicalTaskMode(configMode) {
28
27
  return LEGACY_LITERACY_VARIANT_SET.has(configMode) ? "literacy" : configMode;
29
28
  }
29
+ /**
30
+ * Thrown when `buildRemoteRequest` can't find any runnable tasks.
31
+ *
32
+ * The CLI catches this separately from ZodError so it can print the
33
+ * message without an accompanying stack trace — the message is already
34
+ * the whole story for the user.
35
+ */
36
+ export class NoRunnableTasksError extends Error {
37
+ name = "NoRunnableTasksError";
38
+ }
30
39
  // ---------------------------------------------------------------------------
31
40
  // Public API
32
41
  // ---------------------------------------------------------------------------
@@ -56,11 +65,13 @@ export async function buildRemoteRequest(options) {
56
65
  ? allTasks.filter((t) => t.mode === taskModeFilter)
57
66
  : allTasks;
58
67
  if (tasks.length === 0) {
59
- throw new Error("No tasks found after applying filters.\n" +
60
- ` Tasks directory: ${tasksDir}\n` +
61
- (config.areas ? ` Area filter: ${config.areas.join(", ")}\n` : "") +
62
- (config.tasks ? ` Task filter: ${config.tasks.join(", ")}\n` : "") +
63
- " Check that your .ailf/tasks/ YAML files define tasks matching these filters.");
68
+ throw await emptyTasksError({
69
+ taskSource,
70
+ tasksDir,
71
+ config,
72
+ filterOptions,
73
+ taskModeFilter,
74
+ });
64
75
  }
65
76
  // 2. Convert tasks to inline format
66
77
  const inlineTasks = tasks.map(taskToInlineFormat);
@@ -69,10 +80,14 @@ export async function buildRemoteRequest(options) {
69
80
  taskMode: "inline",
70
81
  inlineTasks,
71
82
  };
72
- // Mode
73
- if (config.mode && config.mode !== LiteracyVariant.FULL) {
83
+ // Mode + variant — send both when set so the server sees the caller's
84
+ // canonical intent. Legacy aliases ("full", "baseline", …) are accepted
85
+ // by `PipelineRequestSchema.mode` for back-compat but the CLI now emits
86
+ // the canonical form (`mode: "literacy"` + explicit `variant`).
87
+ if (config.mode)
74
88
  raw.mode = config.mode;
75
- }
89
+ if (config.variant)
90
+ raw.variant = config.variant;
76
91
  // Debug
77
92
  if (config.debug?.enabled) {
78
93
  raw.debug = config.debug;
@@ -206,6 +221,88 @@ function taskToInlineFormat(task) {
206
221
  }
207
222
  return inline;
208
223
  }
224
+ /**
225
+ * Build a descriptive error when the task list is empty after filtering.
226
+ *
227
+ * Loads the full task list a second time with `includeDrafts: true` so we
228
+ * can distinguish the two common failure modes:
229
+ *
230
+ * 1. Every discovered task is non-active (`status: "draft"` from
231
+ * `ailf init` scaffolding, or `status: "paused"`). Tell the user how
232
+ * to opt a task in.
233
+ * 2. The tasks directory is genuinely empty for this filter combination.
234
+ * Echo the filters back so the mismatch is obvious.
235
+ *
236
+ * The directory-missing and file-missing cases are already surfaced
237
+ * earlier by `RepoTaskSource.loadTasks()`, so we never reach this helper
238
+ * for those.
239
+ */
240
+ async function emptyTasksError(args) {
241
+ const { taskSource, tasksDir, config, filterOptions, taskModeFilter } = args;
242
+ // Re-load without the status gate to categorize what got filtered.
243
+ let relaxed = [];
244
+ try {
245
+ relaxed = await taskSource.loadTasks({
246
+ ...(filterOptions ?? {}),
247
+ includeDrafts: true,
248
+ });
249
+ }
250
+ catch {
251
+ // Fall through to the generic message if re-loading fails for any
252
+ // reason (e.g. directory removed mid-run).
253
+ }
254
+ const modeMatched = taskModeFilter
255
+ ? relaxed.filter((t) => t.mode === taskModeFilter)
256
+ : relaxed;
257
+ const drafts = modeMatched.filter((t) => (t.status ?? "active") === "draft");
258
+ const paused = modeMatched.filter((t) => t.status === "paused");
259
+ const filtersBlock = (config.areas?.length
260
+ ? ` Area filter: ${config.areas.join(", ")}\n`
261
+ : "") +
262
+ (config.tasks?.length
263
+ ? ` Task filter: ${config.tasks.join(", ")}\n`
264
+ : "") +
265
+ (config.tags?.length ? ` Tag filter: ${config.tags.join(", ")}\n` : "") +
266
+ (taskModeFilter ? ` Mode filter: ${taskModeFilter}\n` : "");
267
+ if (modeMatched.length === 0) {
268
+ return new NoRunnableTasksError("No tasks matched your filters.\n" +
269
+ ` Tasks directory: ${tasksDir}\n` +
270
+ filtersBlock +
271
+ " Check that your .ailf/tasks/ YAML or .task.ts files define tasks\n" +
272
+ " matching these filters.");
273
+ }
274
+ // All matched tasks were excluded by the status gate.
275
+ const draftIds = drafts.map((t) => t.id);
276
+ const pausedIds = paused.map((t) => t.id);
277
+ const draftSample = draftIds.slice(0, 3).join(", ");
278
+ const draftMore = draftIds.length > 3 ? `, +${draftIds.length - 3} more` : "";
279
+ const pausedSample = pausedIds.slice(0, 3).join(", ");
280
+ const pausedMore = pausedIds.length > 3 ? `, +${pausedIds.length - 3} more` : "";
281
+ const lines = [];
282
+ lines.push("No runnable tasks after applying filters.");
283
+ lines.push(` Tasks directory: ${tasksDir}`);
284
+ if (filtersBlock)
285
+ lines.push(filtersBlock.trimEnd());
286
+ if (drafts.length > 0) {
287
+ lines.push(` ${drafts.length} task(s) skipped because status: "draft": ${draftSample}${draftMore}`);
288
+ }
289
+ if (paused.length > 0) {
290
+ lines.push(` ${paused.length} task(s) skipped because status: "paused": ${pausedSample}${pausedMore}`);
291
+ }
292
+ lines.push("");
293
+ lines.push(" To run one of these anyway, either:");
294
+ if (drafts.length > 0) {
295
+ lines.push(` • Change the task's status field from "draft" to "active", or`);
296
+ lines.push(` • Target it explicitly: --task ${drafts[0]?.id ?? "<id>"}`);
297
+ }
298
+ else if (paused.length > 0) {
299
+ lines.push(` • Target it explicitly by id: --task ${paused[0]?.id ?? "<id>"}, or`);
300
+ lines.push(` • Flip its status from "paused" to "active"`);
301
+ }
302
+ lines.push(" Tasks scaffolded by `ailf init` ship as drafts so you can edit");
303
+ lines.push(" them before they start contributing to your literacy score.");
304
+ return new NoRunnableTasksError(lines.join("\n"));
305
+ }
209
306
  function buildFilterOptions(config) {
210
307
  const areas = config.areas?.length ? config.areas : undefined;
211
308
  const taskIds = config.tasks?.length ? config.tasks : undefined;
@@ -5,7 +5,7 @@
5
5
  * import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
6
6
  */
7
7
  export { ApiClient } from "./api-client.js";
8
- export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
8
+ export { buildCallerEnvelope, buildRemoteRequest, NoRunnableTasksError, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
9
9
  export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
10
10
  export { formatJobError } from "./format-error.js";
11
11
  export { createProgressDisplay } from "./progress.js";
@@ -5,7 +5,7 @@
5
5
  * import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
6
6
  */
7
7
  export { ApiClient } from "./api-client.js";
8
- export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
8
+ export { buildCallerEnvelope, buildRemoteRequest, NoRunnableTasksError, resolveTasksDir, } from "./build-request.js";
9
9
  export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
10
10
  export { formatJobError } from "./format-error.js";
11
11
  export { createProgressDisplay } from "./progress.js";
@@ -14,7 +14,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
14
14
  import { dirname, resolve } from "path";
15
15
  import { fileURLToPath } from "url";
16
16
  import { classifyUrls } from "../pipeline/classify-url.js";
17
- import { normalizeMode } from "../pipeline/normalize-mode.js";
17
+ import { LiteracyVariant, normalizeMode } from "../pipeline/normalize-mode.js";
18
18
  import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
19
19
  import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build-app-context.js";
20
20
  import { buildStepSequence } from "../orchestration/build-step-sequence.js";
@@ -47,6 +47,13 @@ export function computeResolvedOptions(opts) {
47
47
  mode = normalized.mode;
48
48
  // Explicit --variant flag takes precedence over what normalizeMode inferred
49
49
  variant = opts.variant ?? normalized.variant;
50
+ // Canonical mode "literacy" with no variant defaults to the full variant
51
+ // (standard + agentic). This preserves the pre-canonical CLI behavior
52
+ // where `--mode full` was the default, without emitting the legacy alias
53
+ // deprecation warning for users who pass no flags at all.
54
+ if (mode === "literacy" && !variant) {
55
+ variant = LiteracyVariant.FULL;
56
+ }
50
57
  }
51
58
  catch (err) {
52
59
  console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
@@ -8,12 +8,11 @@
8
8
  * @see docs/cli.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
- import { LiteracyVariant } from "../pipeline/normalize-mode.js";
12
11
  import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
13
12
  export function createPipelineCommand() {
14
13
  const cmd = new Command("pipeline")
15
14
  .description("Run the full evaluation pipeline")
16
- .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", LiteracyVariant.FULL)
15
+ .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", "literacy")
17
16
  .option("--variant <variant>", "Literacy variant: full (default — standard + agentic), baseline (standard only), agentic (agentic only), observed. Only applies to --mode literacy.")
18
17
  .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
19
18
  .option("-n, --dry-run", "Validate configuration only, no execution", false)
@@ -14,7 +14,7 @@
14
14
  * @see docs/design-docs/cli-as-api-client.md — design doc
15
15
  */
16
16
  import { ZodError } from "zod";
17
- import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
17
+ import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, NoRunnableTasksError, resolveTasksDir, } from "../adapters/api-client/index.js";
18
18
  import { writeRemoteResults } from "./remote-results.js";
19
19
  // ---------------------------------------------------------------------------
20
20
  // Public API
@@ -66,6 +66,10 @@ export async function runRemotePipeline(opts, rootDir) {
66
66
  console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
67
67
  process.exit(2);
68
68
  }
69
+ if (err instanceof NoRunnableTasksError) {
70
+ console.error(`❌ ${err.message}`);
71
+ process.exit(2);
72
+ }
69
73
  throw err;
70
74
  }
71
75
  console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
@@ -103,6 +107,7 @@ export async function runRemotePipeline(opts, rootDir) {
103
107
  function toConfigSlice(opts) {
104
108
  return {
105
109
  mode: opts.mode,
110
+ variant: opts.variant,
106
111
  debug: opts.debug,
107
112
  areas: opts.areaOption
108
113
  ?.split(",")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "3.5.0",
3
+ "version": "3.5.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"