@sanity/ailf 3.5.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ailf.js +16 -1
- package/config/bigquery/README.md +35 -6
- package/dist/adapters/api-client/build-request.d.ts +11 -0
- package/dist/adapters/api-client/build-request.js +106 -9
- package/dist/adapters/api-client/index.d.ts +1 -1
- package/dist/adapters/api-client/index.js +1 -1
- package/dist/commands/pipeline-action.js +8 -1
- package/dist/commands/pipeline.js +1 -2
- package/dist/commands/remote-pipeline.js +6 -1
- package/package.json +1 -1
package/bin/ailf.js
CHANGED
|
@@ -33,9 +33,24 @@ const callerCwd = process.cwd()
|
|
|
33
33
|
// ---------------------------------------------------------------------------
|
|
34
34
|
if (existsSync(tsSrc)) {
|
|
35
35
|
try {
|
|
36
|
+
// Enable the `ailf-source` export condition so @sanity/ailf-shared and
|
|
37
|
+
// @sanity/ailf-core resolve to their `src/index.ts` entrypoints rather
|
|
38
|
+
// than whatever happens to be in their `dist/` directories. Without
|
|
39
|
+
// this, running `ailf …` against a freshly pulled monorepo (or any
|
|
40
|
+
// workspace with a stale dist) fails at import time whenever the
|
|
41
|
+
// source introduces a new export that the dist hasn't caught up with.
|
|
42
|
+
const existingNodeOptions = process.env.NODE_OPTIONS ?? ""
|
|
43
|
+
const conditionFlag = "--conditions=ailf-source"
|
|
44
|
+
const nodeOptions = existingNodeOptions.includes(conditionFlag)
|
|
45
|
+
? existingNodeOptions
|
|
46
|
+
: `${existingNodeOptions} ${conditionFlag}`.trim()
|
|
36
47
|
execFileSync("npx", ["tsx", tsSrc, ...args], {
|
|
37
48
|
cwd: ROOT,
|
|
38
|
-
env: {
|
|
49
|
+
env: {
|
|
50
|
+
...process.env,
|
|
51
|
+
AILF_CALLER_CWD: callerCwd,
|
|
52
|
+
NODE_OPTIONS: nodeOptions,
|
|
53
|
+
},
|
|
39
54
|
stdio: "inherit",
|
|
40
55
|
})
|
|
41
56
|
process.exit(0)
|
|
@@ -39,26 +39,55 @@ from `docs/design-docs/report-store/bigquery.md`.
|
|
|
39
39
|
### 1. Create the raw dataset (Airbyte writes here)
|
|
40
40
|
|
|
41
41
|
```bash
|
|
42
|
-
bq
|
|
42
|
+
bq --project_id=data-platform-302218 --location=EU mk --dataset ailf_raw
|
|
43
43
|
```
|
|
44
44
|
|
|
45
45
|
### 2. Create the analytics dataset (views live here)
|
|
46
46
|
|
|
47
47
|
```bash
|
|
48
|
-
bq
|
|
48
|
+
bq --project_id=data-platform-302218 --location=EU mk --dataset ailf
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
### 3. Create the views
|
|
52
52
|
|
|
53
|
+
**Important ordering (learned 2026-04-23):** Airbyte must be redeployed with the
|
|
54
|
+
current manifest **before** you run these view SQLs. Each view binds to specific
|
|
55
|
+
columns on `ailf_raw.reports`; if the raw table is missing columns the Airbyte
|
|
56
|
+
projection expects, the `CREATE VIEW` statement fails with
|
|
57
|
+
`Unrecognized name: <column>`.
|
|
58
|
+
|
|
59
|
+
If your Airbyte destination has **schema evolution enabled** ("Propagate column
|
|
60
|
+
changes" in the UI), new columns appear automatically on the next incremental
|
|
61
|
+
sync. If not, flip it on, trigger a resync, and confirm the expected columns
|
|
62
|
+
exist before creating views:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false \
|
|
66
|
+
"SELECT column_name FROM ailf_raw.INFORMATION_SCHEMA.COLUMNS WHERE table_name = 'reports' ORDER BY column_name"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
If propagation is disabled and you can't flip it quickly, manually
|
|
70
|
+
`ALTER TABLE ailf_raw.reports ADD COLUMN IF NOT EXISTS …` for each missing
|
|
71
|
+
column as a stop-gap. Values will be `NULL` until Airbyte writes to them on the
|
|
72
|
+
next sync.
|
|
73
|
+
|
|
74
|
+
Once the raw table has the expected columns:
|
|
75
|
+
|
|
53
76
|
```bash
|
|
54
|
-
|
|
55
|
-
bq query --use_legacy_sql=false < views/
|
|
56
|
-
bq query --use_legacy_sql=false < views/
|
|
57
|
-
bq query --use_legacy_sql=false < views/
|
|
77
|
+
cd packages/eval/config/bigquery
|
|
78
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/reports.sql
|
|
79
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/area_scores.sql
|
|
80
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_runs.sql
|
|
81
|
+
bq --project_id=data-platform-302218 --location=EU query --use_legacy_sql=false < views/official_area_scores.sql
|
|
58
82
|
# per-team views are optional — copy views/team_runs_template.sql,
|
|
59
83
|
# fill in the slug, and run.
|
|
60
84
|
```
|
|
61
85
|
|
|
86
|
+
> `--project_id` / `--location=EU` are required because `bq` needs an explicit
|
|
87
|
+
> billing project and the `ailf*` datasets live in the EU multi-region. If you
|
|
88
|
+
> run `bq query` from this repo regularly, consider setting the default with
|
|
89
|
+
> `gcloud config set project data-platform-302218`.
|
|
90
|
+
|
|
62
91
|
## Naming conventions
|
|
63
92
|
|
|
64
93
|
- **`ailf_raw.*`** — raw Airbyte-loaded tables (nested JSON, Airbyte metadata
|
|
@@ -13,6 +13,16 @@
|
|
|
13
13
|
* @see packages/eval/src/adapters/task-sources/repo-task-source.ts
|
|
14
14
|
*/
|
|
15
15
|
import { type PipelineRequest } from "../../_vendor/ailf-core/index.d.ts";
|
|
16
|
+
/**
|
|
17
|
+
* Thrown when `buildRemoteRequest` can't find any runnable tasks.
|
|
18
|
+
*
|
|
19
|
+
* The CLI catches this separately from ZodError so it can print the
|
|
20
|
+
* message without an accompanying stack trace — the message is already
|
|
21
|
+
* the whole story for the user.
|
|
22
|
+
*/
|
|
23
|
+
export declare class NoRunnableTasksError extends Error {
|
|
24
|
+
readonly name = "NoRunnableTasksError";
|
|
25
|
+
}
|
|
16
26
|
/** Options for building a remote pipeline request. */
|
|
17
27
|
export interface BuildRequestOptions {
|
|
18
28
|
/** Path to .ailf/tasks/ directory. */
|
|
@@ -27,6 +37,7 @@ export interface BuildRequestOptions {
|
|
|
27
37
|
*/
|
|
28
38
|
export interface RemoteConfigSlice {
|
|
29
39
|
mode?: string;
|
|
40
|
+
variant?: string;
|
|
30
41
|
debug?: {
|
|
31
42
|
enabled?: boolean;
|
|
32
43
|
firstN?: number;
|
|
@@ -16,7 +16,6 @@ import { existsSync } from "fs";
|
|
|
16
16
|
import { resolve } from "path";
|
|
17
17
|
import { PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
|
|
18
18
|
import { LEGACY_EVAL_MODE_ALIASES, isRunClassification, } from "../../_vendor/ailf-shared/index.js";
|
|
19
|
-
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
20
19
|
import { RepoTaskSource } from "../task-sources/repo-task-source.js";
|
|
21
20
|
const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
|
|
22
21
|
/**
|
|
@@ -27,6 +26,16 @@ const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
|
|
|
27
26
|
function resolveCanonicalTaskMode(configMode) {
|
|
28
27
|
return LEGACY_LITERACY_VARIANT_SET.has(configMode) ? "literacy" : configMode;
|
|
29
28
|
}
|
|
29
|
+
/**
|
|
30
|
+
* Thrown when `buildRemoteRequest` can't find any runnable tasks.
|
|
31
|
+
*
|
|
32
|
+
* The CLI catches this separately from ZodError so it can print the
|
|
33
|
+
* message without an accompanying stack trace — the message is already
|
|
34
|
+
* the whole story for the user.
|
|
35
|
+
*/
|
|
36
|
+
export class NoRunnableTasksError extends Error {
|
|
37
|
+
name = "NoRunnableTasksError";
|
|
38
|
+
}
|
|
30
39
|
// ---------------------------------------------------------------------------
|
|
31
40
|
// Public API
|
|
32
41
|
// ---------------------------------------------------------------------------
|
|
@@ -56,11 +65,13 @@ export async function buildRemoteRequest(options) {
|
|
|
56
65
|
? allTasks.filter((t) => t.mode === taskModeFilter)
|
|
57
66
|
: allTasks;
|
|
58
67
|
if (tasks.length === 0) {
|
|
59
|
-
throw
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
68
|
+
throw await emptyTasksError({
|
|
69
|
+
taskSource,
|
|
70
|
+
tasksDir,
|
|
71
|
+
config,
|
|
72
|
+
filterOptions,
|
|
73
|
+
taskModeFilter,
|
|
74
|
+
});
|
|
64
75
|
}
|
|
65
76
|
// 2. Convert tasks to inline format
|
|
66
77
|
const inlineTasks = tasks.map(taskToInlineFormat);
|
|
@@ -69,10 +80,14 @@ export async function buildRemoteRequest(options) {
|
|
|
69
80
|
taskMode: "inline",
|
|
70
81
|
inlineTasks,
|
|
71
82
|
};
|
|
72
|
-
// Mode
|
|
73
|
-
|
|
83
|
+
// Mode + variant — send both when set so the server sees the caller's
|
|
84
|
+
// canonical intent. Legacy aliases ("full", "baseline", …) are accepted
|
|
85
|
+
// by `PipelineRequestSchema.mode` for back-compat but the CLI now emits
|
|
86
|
+
// the canonical form (`mode: "literacy"` + explicit `variant`).
|
|
87
|
+
if (config.mode)
|
|
74
88
|
raw.mode = config.mode;
|
|
75
|
-
|
|
89
|
+
if (config.variant)
|
|
90
|
+
raw.variant = config.variant;
|
|
76
91
|
// Debug
|
|
77
92
|
if (config.debug?.enabled) {
|
|
78
93
|
raw.debug = config.debug;
|
|
@@ -206,6 +221,88 @@ function taskToInlineFormat(task) {
|
|
|
206
221
|
}
|
|
207
222
|
return inline;
|
|
208
223
|
}
|
|
224
|
+
/**
|
|
225
|
+
* Build a descriptive error when the task list is empty after filtering.
|
|
226
|
+
*
|
|
227
|
+
* Loads the full task list a second time with `includeDrafts: true` so we
|
|
228
|
+
* can distinguish the two common failure modes:
|
|
229
|
+
*
|
|
230
|
+
* 1. Every discovered task is non-active (`status: "draft"` from
|
|
231
|
+
* `ailf init` scaffolding, or `status: "paused"`). Tell the user how
|
|
232
|
+
* to opt a task in.
|
|
233
|
+
* 2. The tasks directory is genuinely empty for this filter combination.
|
|
234
|
+
* Echo the filters back so the mismatch is obvious.
|
|
235
|
+
*
|
|
236
|
+
* The directory-missing and file-missing cases are already surfaced
|
|
237
|
+
* earlier by `RepoTaskSource.loadTasks()`, so we never reach this helper
|
|
238
|
+
* for those.
|
|
239
|
+
*/
|
|
240
|
+
async function emptyTasksError(args) {
|
|
241
|
+
const { taskSource, tasksDir, config, filterOptions, taskModeFilter } = args;
|
|
242
|
+
// Re-load without the status gate to categorize what got filtered.
|
|
243
|
+
let relaxed = [];
|
|
244
|
+
try {
|
|
245
|
+
relaxed = await taskSource.loadTasks({
|
|
246
|
+
...(filterOptions ?? {}),
|
|
247
|
+
includeDrafts: true,
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
catch {
|
|
251
|
+
// Fall through to the generic message if re-loading fails for any
|
|
252
|
+
// reason (e.g. directory removed mid-run).
|
|
253
|
+
}
|
|
254
|
+
const modeMatched = taskModeFilter
|
|
255
|
+
? relaxed.filter((t) => t.mode === taskModeFilter)
|
|
256
|
+
: relaxed;
|
|
257
|
+
const drafts = modeMatched.filter((t) => (t.status ?? "active") === "draft");
|
|
258
|
+
const paused = modeMatched.filter((t) => t.status === "paused");
|
|
259
|
+
const filtersBlock = (config.areas?.length
|
|
260
|
+
? ` Area filter: ${config.areas.join(", ")}\n`
|
|
261
|
+
: "") +
|
|
262
|
+
(config.tasks?.length
|
|
263
|
+
? ` Task filter: ${config.tasks.join(", ")}\n`
|
|
264
|
+
: "") +
|
|
265
|
+
(config.tags?.length ? ` Tag filter: ${config.tags.join(", ")}\n` : "") +
|
|
266
|
+
(taskModeFilter ? ` Mode filter: ${taskModeFilter}\n` : "");
|
|
267
|
+
if (modeMatched.length === 0) {
|
|
268
|
+
return new NoRunnableTasksError("No tasks matched your filters.\n" +
|
|
269
|
+
` Tasks directory: ${tasksDir}\n` +
|
|
270
|
+
filtersBlock +
|
|
271
|
+
" Check that your .ailf/tasks/ YAML or .task.ts files define tasks\n" +
|
|
272
|
+
" matching these filters.");
|
|
273
|
+
}
|
|
274
|
+
// All matched tasks were excluded by the status gate.
|
|
275
|
+
const draftIds = drafts.map((t) => t.id);
|
|
276
|
+
const pausedIds = paused.map((t) => t.id);
|
|
277
|
+
const draftSample = draftIds.slice(0, 3).join(", ");
|
|
278
|
+
const draftMore = draftIds.length > 3 ? `, +${draftIds.length - 3} more` : "";
|
|
279
|
+
const pausedSample = pausedIds.slice(0, 3).join(", ");
|
|
280
|
+
const pausedMore = pausedIds.length > 3 ? `, +${pausedIds.length - 3} more` : "";
|
|
281
|
+
const lines = [];
|
|
282
|
+
lines.push("No runnable tasks after applying filters.");
|
|
283
|
+
lines.push(` Tasks directory: ${tasksDir}`);
|
|
284
|
+
if (filtersBlock)
|
|
285
|
+
lines.push(filtersBlock.trimEnd());
|
|
286
|
+
if (drafts.length > 0) {
|
|
287
|
+
lines.push(` ${drafts.length} task(s) skipped because status: "draft": ${draftSample}${draftMore}`);
|
|
288
|
+
}
|
|
289
|
+
if (paused.length > 0) {
|
|
290
|
+
lines.push(` ${paused.length} task(s) skipped because status: "paused": ${pausedSample}${pausedMore}`);
|
|
291
|
+
}
|
|
292
|
+
lines.push("");
|
|
293
|
+
lines.push(" To run one of these anyway, either:");
|
|
294
|
+
if (drafts.length > 0) {
|
|
295
|
+
lines.push(` • Change the task's status field from "draft" to "active", or`);
|
|
296
|
+
lines.push(` • Target it explicitly: --task ${drafts[0]?.id ?? "<id>"}`);
|
|
297
|
+
}
|
|
298
|
+
else if (paused.length > 0) {
|
|
299
|
+
lines.push(` • Target it explicitly by id: --task ${paused[0]?.id ?? "<id>"}, or`);
|
|
300
|
+
lines.push(` • Flip its status from "paused" to "active"`);
|
|
301
|
+
}
|
|
302
|
+
lines.push(" Tasks scaffolded by `ailf init` ship as drafts so you can edit");
|
|
303
|
+
lines.push(" them before they start contributing to your literacy score.");
|
|
304
|
+
return new NoRunnableTasksError(lines.join("\n"));
|
|
305
|
+
}
|
|
209
306
|
function buildFilterOptions(config) {
|
|
210
307
|
const areas = config.areas?.length ? config.areas : undefined;
|
|
211
308
|
const taskIds = config.tasks?.length ? config.tasks : undefined;
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
|
|
6
6
|
*/
|
|
7
7
|
export { ApiClient } from "./api-client.js";
|
|
8
|
-
export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
|
|
8
|
+
export { buildCallerEnvelope, buildRemoteRequest, NoRunnableTasksError, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
|
|
9
9
|
export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
|
|
10
10
|
export { formatJobError } from "./format-error.js";
|
|
11
11
|
export { createProgressDisplay } from "./progress.js";
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
|
|
6
6
|
*/
|
|
7
7
|
export { ApiClient } from "./api-client.js";
|
|
8
|
-
export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
|
|
8
|
+
export { buildCallerEnvelope, buildRemoteRequest, NoRunnableTasksError, resolveTasksDir, } from "./build-request.js";
|
|
9
9
|
export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
|
|
10
10
|
export { formatJobError } from "./format-error.js";
|
|
11
11
|
export { createProgressDisplay } from "./progress.js";
|
|
@@ -14,7 +14,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
|
14
14
|
import { dirname, resolve } from "path";
|
|
15
15
|
import { fileURLToPath } from "url";
|
|
16
16
|
import { classifyUrls } from "../pipeline/classify-url.js";
|
|
17
|
-
import { normalizeMode } from "../pipeline/normalize-mode.js";
|
|
17
|
+
import { LiteracyVariant, normalizeMode } from "../pipeline/normalize-mode.js";
|
|
18
18
|
import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
|
|
19
19
|
import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build-app-context.js";
|
|
20
20
|
import { buildStepSequence } from "../orchestration/build-step-sequence.js";
|
|
@@ -47,6 +47,13 @@ export function computeResolvedOptions(opts) {
|
|
|
47
47
|
mode = normalized.mode;
|
|
48
48
|
// Explicit --variant flag takes precedence over what normalizeMode inferred
|
|
49
49
|
variant = opts.variant ?? normalized.variant;
|
|
50
|
+
// Canonical mode "literacy" with no variant defaults to the full variant
|
|
51
|
+
// (standard + agentic). This preserves the pre-canonical CLI behavior
|
|
52
|
+
// where `--mode full` was the default, without emitting the legacy alias
|
|
53
|
+
// deprecation warning for users who pass no flags at all.
|
|
54
|
+
if (mode === "literacy" && !variant) {
|
|
55
|
+
variant = LiteracyVariant.FULL;
|
|
56
|
+
}
|
|
50
57
|
}
|
|
51
58
|
catch (err) {
|
|
52
59
|
console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
@@ -8,12 +8,11 @@
|
|
|
8
8
|
* @see docs/cli.md for the full flag reference.
|
|
9
9
|
*/
|
|
10
10
|
import { Command } from "commander";
|
|
11
|
-
import { LiteracyVariant } from "../pipeline/normalize-mode.js";
|
|
12
11
|
import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
|
|
13
12
|
export function createPipelineCommand() {
|
|
14
13
|
const cmd = new Command("pipeline")
|
|
15
14
|
.description("Run the full evaluation pipeline")
|
|
16
|
-
.option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.",
|
|
15
|
+
.option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", "literacy")
|
|
17
16
|
.option("--variant <variant>", "Literacy variant: full (default — standard + agentic), baseline (standard only), agentic (agentic only), observed. Only applies to --mode literacy.")
|
|
18
17
|
.option("-s, --source <name>", "Documentation source name (from sources.yaml)")
|
|
19
18
|
.option("-n, --dry-run", "Validate configuration only, no execution", false)
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
* @see docs/design-docs/cli-as-api-client.md — design doc
|
|
15
15
|
*/
|
|
16
16
|
import { ZodError } from "zod";
|
|
17
|
-
import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
|
|
17
|
+
import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, NoRunnableTasksError, resolveTasksDir, } from "../adapters/api-client/index.js";
|
|
18
18
|
import { writeRemoteResults } from "./remote-results.js";
|
|
19
19
|
// ---------------------------------------------------------------------------
|
|
20
20
|
// Public API
|
|
@@ -66,6 +66,10 @@ export async function runRemotePipeline(opts, rootDir) {
|
|
|
66
66
|
console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
|
|
67
67
|
process.exit(2);
|
|
68
68
|
}
|
|
69
|
+
if (err instanceof NoRunnableTasksError) {
|
|
70
|
+
console.error(`❌ ${err.message}`);
|
|
71
|
+
process.exit(2);
|
|
72
|
+
}
|
|
69
73
|
throw err;
|
|
70
74
|
}
|
|
71
75
|
console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
|
|
@@ -103,6 +107,7 @@ export async function runRemotePipeline(opts, rootDir) {
|
|
|
103
107
|
function toConfigSlice(opts) {
|
|
104
108
|
return {
|
|
105
109
|
mode: opts.mode,
|
|
110
|
+
variant: opts.variant,
|
|
106
111
|
debug: opts.debug,
|
|
107
112
|
areas: opts.areaOption
|
|
108
113
|
?.split(",")
|