@sanity/ailf 3.4.1 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/bin/ailf.js +16 -1
  2. package/config/airbyte/ai_literacy_framework.connector.yaml +114 -0
  3. package/config/bigquery/README.md +44 -8
  4. package/config/bigquery/views/official_area_scores.sql +20 -0
  5. package/config/bigquery/views/official_runs.sql +31 -0
  6. package/config/bigquery/views/reports.sql +19 -0
  7. package/config/bigquery/views/team_runs_template.sql +17 -0
  8. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  9. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  10. package/dist/_vendor/ailf-core/ports/context.d.ts +25 -0
  11. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +23 -0
  12. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +59 -1
  13. package/dist/_vendor/ailf-shared/index.d.ts +2 -0
  14. package/dist/_vendor/ailf-shared/index.js +2 -0
  15. package/dist/_vendor/ailf-shared/owner-teams.d.ts +26 -0
  16. package/dist/_vendor/ailf-shared/owner-teams.js +52 -0
  17. package/dist/_vendor/ailf-shared/run-classification.d.ts +100 -0
  18. package/dist/_vendor/ailf-shared/run-classification.js +28 -0
  19. package/dist/_vendor/ailf-shared/run-context.d.ts +23 -0
  20. package/dist/adapters/api-client/build-request.d.ts +42 -0
  21. package/dist/adapters/api-client/build-request.js +188 -10
  22. package/dist/adapters/api-client/index.d.ts +1 -1
  23. package/dist/adapters/api-client/index.js +1 -1
  24. package/dist/commands/explain-handler.js +5 -0
  25. package/dist/commands/pipeline-action.d.ts +6 -0
  26. package/dist/commands/pipeline-action.js +13 -1
  27. package/dist/commands/pipeline.d.ts +5 -0
  28. package/dist/commands/pipeline.js +16 -2
  29. package/dist/commands/remote-pipeline.js +13 -1
  30. package/dist/orchestration/steps/finalize-run-step.js +1 -0
  31. package/dist/orchestration/steps/publish-report-step.js +1 -0
  32. package/dist/pipeline/map-request-to-config.js +18 -0
  33. package/dist/pipeline/run-context.d.ts +63 -0
  34. package/dist/pipeline/run-context.js +166 -0
  35. package/package.json +1 -1
@@ -8,12 +8,11 @@
8
8
  * @see docs/cli.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
- import { LiteracyVariant } from "../pipeline/normalize-mode.js";
12
11
  import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
13
12
  export function createPipelineCommand() {
14
13
  const cmd = new Command("pipeline")
15
14
  .description("Run the full evaluation pipeline")
16
- .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", LiteracyVariant.FULL)
15
+ .option("-m, --mode <mode>", "Evaluation mode: literacy (default), mcp-server, agent-harness, knowledge-probe, custom. Legacy aliases (baseline, agentic, observed, full) are accepted and normalized to literacy + variant.", "literacy")
17
16
  .option("--variant <variant>", "Literacy variant: full (default — standard + agentic), baseline (standard only), agentic (agentic only), observed. Only applies to --mode literacy.")
18
17
  .option("-s, --source <name>", "Documentation source name (from sources.yaml)")
19
18
  .option("-n, --dry-run", "Validate configuration only, no execution", false)
@@ -58,6 +57,21 @@ export function createPipelineCommand() {
58
57
  .option("--artifacts-dir <path>", "Root directory for local artifact output (D0033; default: .ailf/results/captures/)")
59
58
  .option("--artifacts-dry-run", "Run artifact writers in dry-run mode — log intended writes, touch no storage", false)
60
59
  .option("--artifacts-exclude <types>", "Comma-separated artifact types to skip (e.g. traces,graderPrompts)")
60
+ // D0037 caller envelope (W0069) — threads through --remote so the
61
+ // server-side pipeline attributes provenance to the caller, not the
62
+ // API gateway runner. All env-var equivalents are honored too;
63
+ // explicit flags win over env vars.
64
+ .option("--classification <value>", "Run classification for provenance: official | ad-hoc | experimental | test | external. Overrides AILF_CLASSIFICATION. See D0037.")
65
+ .option("--owner-team <slug>", "Team slug this run is attributable to. Overrides AILF_OWNER_TEAM.")
66
+ .option("--owner-individual <slug>", "Individual (GH actor / user ID) this run is attributable to. Overrides AILF_OWNER_INDIVIDUAL.")
67
+ .option("--purpose <text>", 'Free-text "why I ran this" attached to provenance. Overrides AILF_PURPOSE.')
68
+ .option("--label <value>", "Free-form searchable label (repeatable). Appends to any AILF_LABELS env value.", (val, prev) => [
69
+ ...prev,
70
+ ...val
71
+ .split(",")
72
+ .map((s) => s.trim())
73
+ .filter(Boolean),
74
+ ], [])
61
75
  .action(async (opts) => {
62
76
  const { executePipeline } = await import("./pipeline-action.js");
63
77
  await executePipeline(opts);
@@ -14,7 +14,7 @@
14
14
  * @see docs/design-docs/cli-as-api-client.md — design doc
15
15
  */
16
16
  import { ZodError } from "zod";
17
- import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, resolveTasksDir, } from "../adapters/api-client/index.js";
17
+ import { ApiClient, buildRemoteRequest, createProgressDisplay, formatJobError, NoRunnableTasksError, resolveTasksDir, } from "../adapters/api-client/index.js";
18
18
  import { writeRemoteResults } from "./remote-results.js";
19
19
  // ---------------------------------------------------------------------------
20
20
  // Public API
@@ -66,6 +66,10 @@ export async function runRemotePipeline(opts, rootDir) {
66
66
  console.error("💡 Fix the issues above in your .ailf/tasks/ YAML files.");
67
67
  process.exit(2);
68
68
  }
69
+ if (err instanceof NoRunnableTasksError) {
70
+ console.error(`❌ ${err.message}`);
71
+ process.exit(2);
72
+ }
69
73
  throw err;
70
74
  }
71
75
  console.log(`📦 Found ${taskCount} task(s) in ${tasksDir}`);
@@ -103,6 +107,7 @@ export async function runRemotePipeline(opts, rootDir) {
103
107
  function toConfigSlice(opts) {
104
108
  return {
105
109
  mode: opts.mode,
110
+ variant: opts.variant,
106
111
  debug: opts.debug,
107
112
  areas: opts.areaOption
108
113
  ?.split(",")
@@ -133,5 +138,12 @@ function toConfigSlice(opts) {
133
138
  readinessEnabled: opts.readinessEnabled,
134
139
  discoveryReportEnabled: opts.discoveryReportEnabled,
135
140
  noRemoteCache: opts.noRemoteCache,
141
+ // D0037 / W0069 caller envelope overrides — flags override env vars
142
+ // inside buildCallerEnvelope(), which also merges AILF_* defaults.
143
+ classificationOption: opts.classificationOption,
144
+ ownerTeamOption: opts.ownerTeamOption,
145
+ ownerIndividualOption: opts.ownerIndividualOption,
146
+ purposeOption: opts.purposeOption,
147
+ labelOptions: opts.labelOptions,
136
148
  };
137
149
  }
@@ -77,6 +77,7 @@ export class FinalizeRunStep {
77
77
  const runContext = buildRunContext({
78
78
  areas: maybeSummary?.scores?.map((s) => s.feature) ?? ctx.config.areas ?? [],
79
79
  callerGit: ctx.config.callerGit,
80
+ callerEnvelope: ctx.config.callerEnvelope,
80
81
  evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
81
82
  logger: ctx.logger,
82
83
  mode: ctx.config.mode,
@@ -225,6 +225,7 @@ function buildProvenanceInput(summary, ctx, options, autoScope) {
225
225
  areas,
226
226
  autoScope,
227
227
  callerGit: ctx.config.callerGit,
228
+ callerEnvelope: ctx.config.callerEnvelope,
228
229
  evalFingerprint,
229
230
  mode,
230
231
  promptfooUrls: options.promptfooUrls,
@@ -72,6 +72,7 @@ export function mapRequestToConfig(request, rootDir) {
72
72
  beforeOption: undefined,
73
73
  repoTasksPath: undefined,
74
74
  callerGit: request.callerGit,
75
+ callerEnvelope: buildCallerEnvelope(request),
75
76
  callback: request.callback,
76
77
  jobId: request.jobId,
77
78
  remote: false,
@@ -91,6 +92,23 @@ function mapDebug(debug) {
91
92
  sample: debug.sample,
92
93
  };
93
94
  }
95
+ /**
96
+ * Collect the D0037 caller envelope fields from a PipelineRequest into a
97
+ * single `callerEnvelope` object. Returns undefined when no envelope
98
+ * fields were provided, so downstream consumers can short-circuit with
99
+ * `config.callerEnvelope?.classification` etc.
100
+ */
101
+ function buildCallerEnvelope(request) {
102
+ const { classification, owner, executor, purpose, labels } = request;
103
+ if (classification === undefined &&
104
+ owner === undefined &&
105
+ executor === undefined &&
106
+ purpose === undefined &&
107
+ labels === undefined) {
108
+ return undefined;
109
+ }
110
+ return { classification, owner, executor, purpose, labels };
111
+ }
94
112
  function mapTaskSourceType(taskMode) {
95
113
  if (taskMode === "content-lake")
96
114
  return taskMode;
@@ -13,6 +13,7 @@
13
13
  * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
14
14
  */
15
15
  import type { Logger, RunContext } from "../_vendor/ailf-core/index.d.ts";
16
+ import { type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
16
17
  import type { ResolvedSourceConfig } from "../sources.js";
17
18
  import type { EvalMode } from "./types.js";
18
19
  /**
@@ -34,8 +35,35 @@ export interface RunContextInput {
34
35
  repo: string;
35
36
  sha?: string;
36
37
  };
38
+ /**
39
+ * Caller-provided D0037 envelope from a `--remote` PipelineRequest.
40
+ * When set, overrides the server-env detection so the caller's intent
41
+ * survives the API boundary. Same override pattern as `callerGit`.
42
+ *
43
+ * Only caller-identity fields are carried — `executor.email`, `tool`,
44
+ * and `host` stay server-inferred.
45
+ *
46
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
47
+ */
48
+ callerEnvelope?: {
49
+ classification?: RunClassification;
50
+ owner?: {
51
+ team: string;
52
+ individual?: string;
53
+ };
54
+ executor?: {
55
+ type: "user";
56
+ surface: RunExecutorSurface;
57
+ name?: string;
58
+ githubActor?: string;
59
+ };
60
+ purpose?: string;
61
+ labels?: string[];
62
+ };
37
63
  /** Evaluation fingerprint for cross-environment cache lookup */
38
64
  evalFingerprint?: string;
65
+ /** Caller-supplied run lineage (re-runs, comparison partners, parent job). */
66
+ lineage?: RunLineage;
39
67
  /** Logger instance (defaults to ConsoleLogger) */
40
68
  logger?: Logger;
41
69
  /** Evaluation mode */
@@ -55,3 +83,38 @@ export interface RunContextInput {
55
83
  * former directly, the latter transitively through `buildProvenance`.
56
84
  */
57
85
  export declare function buildRunContext(input: RunContextInput): RunContext;
86
+ /**
87
+ * Resolve `classification` from `AILF_CLASSIFICATION`, validated against
88
+ * the closed enum. Defaults to `"ad-hoc"` so unannotated runs never leak
89
+ * into the canonical `"official"` series.
90
+ */
91
+ export declare function detectClassification(log: Logger): RunClassification;
92
+ /**
93
+ * Resolve `owner` from `AILF_OWNER_TEAM` (+ optional
94
+ * `AILF_OWNER_INDIVIDUAL`). `team` is free-form; default is `"unknown"`.
95
+ */
96
+ export declare function detectOwner(): RunOwner;
97
+ /**
98
+ * Detect who/what invoked the run.
99
+ *
100
+ * Priority:
101
+ * 1. GitHub Actions context → `{ type: "system", name: "github-actions", ... }`
102
+ * 2. CLI context → `{ type: "user", surface: "cli", ... }` with git-config
103
+ * or OS username fallback. Email capture gated by
104
+ * `AILF_CAPTURE_EMAIL` (default on; set `0` to opt out).
105
+ *
106
+ * Every identity field is optional — missing git, containers, or masked
107
+ * env vars must never block a run.
108
+ */
109
+ export declare function detectExecutor(): RunExecutor;
110
+ /**
111
+ * Resolve `tool` — which AILF/Node ran the eval. Captured on every new
112
+ * run so cross-version trend comparisons can isolate framework changes
113
+ * from doc changes.
114
+ */
115
+ export declare function detectTool(log: Logger): RunTool;
116
+ /**
117
+ * Resolve `host` — platform + arch + CI provider. Hostname is
118
+ * intentionally excluded (leaks identity without filtering benefit).
119
+ */
120
+ export declare function detectHost(): RunHost;
@@ -12,8 +12,13 @@
12
12
  *
13
13
  * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
14
14
  */
15
+ import { execSync } from "node:child_process";
16
+ import { createRequire } from "node:module";
17
+ import * as os from "node:os";
18
+ import { isRunClassification, } from "../_vendor/ailf-shared/index.js";
15
19
  import { ConsoleLogger } from "../adapters/loggers/index.js";
16
20
  import { tryLoadConfigFile } from "./compiler/config-loader.js";
21
+ const requireFromHere = createRequire(import.meta.url);
17
22
  /**
18
23
  * Derive `RunContext` from pipeline inputs. The only construction path.
19
24
  *
@@ -35,6 +40,21 @@ export function buildRunContext(input) {
35
40
  }
36
41
  : detectGitMetadata();
37
42
  const trigger = detectTrigger();
43
+ // D0037: caller envelope (from PipelineRequest) takes precedence over
44
+ // server-env detection — same pattern as `callerGit` for identity
45
+ // preservation across the --remote boundary.
46
+ const envelope = input.callerEnvelope;
47
+ const classification = envelope?.classification ?? detectClassification(log);
48
+ const owner = envelope?.owner ?? detectOwner();
49
+ const executor = envelope?.executor ?? detectExecutor();
50
+ // `tool` and `host` are server-environment facts — they always reflect
51
+ // where this pipeline is actually running, never what a caller claimed.
52
+ // Callers cannot override these via the envelope and the wire schema
53
+ // explicitly doesn't carry them (see pipeline-request.ts).
54
+ const tool = detectTool(log);
55
+ const host = detectHost();
56
+ const labels = envelope?.labels ?? detectLabels();
57
+ const purpose = envelope?.purpose ?? (process.env.AILF_PURPOSE?.trim() || undefined);
38
58
  // Non-literacy modes (agent-harness, mcp-server, etc.) don't use the
39
59
  // config/models.ts model matrix — listing those models would be
40
60
  // misleading. Only include them for literacy mode where they're the
@@ -44,11 +64,18 @@ export function buildRunContext(input) {
44
64
  : [];
45
65
  return {
46
66
  areas: input.areas,
67
+ classification,
47
68
  evalFingerprint: input.evalFingerprint,
69
+ executor,
48
70
  git,
49
71
  graderModel: models.grader.id,
72
+ host,
73
+ labels,
74
+ lineage: input.lineage,
50
75
  mode: input.mode,
51
76
  models: evaluatedModels,
77
+ owner,
78
+ purpose,
52
79
  source: {
53
80
  baseUrl: input.source.baseUrl,
54
81
  dataset: input.source.dataset,
@@ -57,6 +84,7 @@ export function buildRunContext(input) {
57
84
  projectId: input.source.projectId,
58
85
  },
59
86
  taskIds: input.taskIds,
87
+ tool,
60
88
  trigger,
61
89
  };
62
90
  }
@@ -137,6 +165,144 @@ function detectTrigger() {
137
165
  return { type: "manual" };
138
166
  }
139
167
  // ---------------------------------------------------------------------------
168
+ // Classification, owner, executor, labels, tool, host detection (D0037)
169
+ // ---------------------------------------------------------------------------
170
+ /**
171
+ * Resolve `classification` from `AILF_CLASSIFICATION`, validated against
172
+ * the closed enum. Defaults to `"ad-hoc"` so unannotated runs never leak
173
+ * into the canonical `"official"` series.
174
+ */
175
+ export function detectClassification(log) {
176
+ const raw = process.env.AILF_CLASSIFICATION?.trim();
177
+ if (!raw)
178
+ return "ad-hoc";
179
+ if (isRunClassification(raw))
180
+ return raw;
181
+ log.warn(`AILF_CLASSIFICATION="${raw}" is not a recognized value; defaulting to "ad-hoc"`);
182
+ return "ad-hoc";
183
+ }
184
+ /**
185
+ * Resolve `owner` from `AILF_OWNER_TEAM` (+ optional
186
+ * `AILF_OWNER_INDIVIDUAL`). `team` is free-form; default is `"unknown"`.
187
+ */
188
+ export function detectOwner() {
189
+ const team = process.env.AILF_OWNER_TEAM?.trim() || "unknown";
190
+ const individual = process.env.AILF_OWNER_INDIVIDUAL?.trim() || undefined;
191
+ return individual ? { individual, team } : { team };
192
+ }
193
+ /**
194
+ * Detect who/what invoked the run.
195
+ *
196
+ * Priority:
197
+ * 1. GitHub Actions context → `{ type: "system", name: "github-actions", ... }`
198
+ * 2. CLI context → `{ type: "user", surface: "cli", ... }` with git-config
199
+ * or OS username fallback. Email capture gated by
200
+ * `AILF_CAPTURE_EMAIL` (default on; set `0` to opt out).
201
+ *
202
+ * Every identity field is optional — missing git, containers, or masked
203
+ * env vars must never block a run.
204
+ */
205
+ export function detectExecutor() {
206
+ if (process.env.GITHUB_ACTIONS === "true") {
207
+ return {
208
+ name: "github-actions",
209
+ runId: process.env.GITHUB_RUN_ID?.trim() || undefined,
210
+ type: "system",
211
+ workflow: process.env.GITHUB_WORKFLOW?.trim() || undefined,
212
+ };
213
+ }
214
+ const surface = resolveExecutorSurface();
215
+ const githubActor = process.env.GITHUB_ACTOR?.trim() || undefined;
216
+ const name = detectGitConfig("user.name") ?? githubActor ?? osUsername() ?? undefined;
217
+ const email = shouldCaptureEmail() ? detectGitConfig("user.email") : undefined;
218
+ const exec = { surface, type: "user" };
219
+ if (name)
220
+ exec.name = name;
221
+ if (email)
222
+ exec.email = email;
223
+ if (githubActor)
224
+ exec.githubActor = githubActor;
225
+ return exec;
226
+ }
227
+ function resolveExecutorSurface() {
228
+ const explicit = process.env.AILF_EXECUTOR_SURFACE?.trim();
229
+ if (explicit === "cli" || explicit === "studio" || explicit === "api") {
230
+ return explicit;
231
+ }
232
+ return "cli";
233
+ }
234
+ function shouldCaptureEmail() {
235
+ const raw = process.env.AILF_CAPTURE_EMAIL?.trim().toLowerCase();
236
+ if (raw === "0" || raw === "false" || raw === "no")
237
+ return false;
238
+ return true;
239
+ }
240
+ function detectGitConfig(key) {
241
+ try {
242
+ const value = execSync(`git config --get ${key}`, {
243
+ encoding: "utf8",
244
+ stdio: ["ignore", "pipe", "ignore"],
245
+ timeout: 500,
246
+ }).trim();
247
+ return value || undefined;
248
+ }
249
+ catch {
250
+ return undefined;
251
+ }
252
+ }
253
+ function osUsername() {
254
+ try {
255
+ return os.userInfo().username || undefined;
256
+ }
257
+ catch {
258
+ return undefined;
259
+ }
260
+ }
261
+ /**
262
+ * Parse `AILF_LABELS` (comma-separated) into a trimmed unique list.
263
+ * Returns undefined when empty so absent labels stay absent.
264
+ */
265
+ function detectLabels() {
266
+ const raw = process.env.AILF_LABELS;
267
+ if (!raw)
268
+ return undefined;
269
+ const labels = raw
270
+ .split(",")
271
+ .map((label) => label.trim())
272
+ .filter(Boolean);
273
+ if (labels.length === 0)
274
+ return undefined;
275
+ return Array.from(new Set(labels));
276
+ }
277
+ /**
278
+ * Resolve `tool` — which AILF/Node ran the eval. Captured on every new
279
+ * run so cross-version trend comparisons can isolate framework changes
280
+ * from doc changes.
281
+ */
282
+ export function detectTool(log) {
283
+ const nodeVersion = process.version;
284
+ let ailfVersion;
285
+ try {
286
+ const pkg = requireFromHere("../../package.json");
287
+ ailfVersion = pkg.version;
288
+ }
289
+ catch (err) {
290
+ log.warn(`Could not read @sanity/ailf package.json for tool.ailfVersion: ${err instanceof Error ? err.message : String(err)}`);
291
+ }
292
+ return { ailfVersion: ailfVersion ?? "unknown", nodeVersion };
293
+ }
294
+ /**
295
+ * Resolve `host` — platform + arch + CI provider. Hostname is
296
+ * intentionally excluded (leaks identity without filtering benefit).
297
+ */
298
+ export function detectHost() {
299
+ const ci = process.env.GITHUB_ACTIONS === "true" ? "github-actions" : undefined;
300
+ const host = { arch: os.arch(), platform: os.platform() };
301
+ if (ci)
302
+ host.ci = ci;
303
+ return host;
304
+ }
305
+ // ---------------------------------------------------------------------------
140
306
  // Model config loading
141
307
  // ---------------------------------------------------------------------------
142
308
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "3.4.1",
3
+ "version": "3.5.1",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"