@sanity/ailf 3.4.1 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +114 -0
  2. package/config/bigquery/README.md +11 -4
  3. package/config/bigquery/views/official_area_scores.sql +20 -0
  4. package/config/bigquery/views/official_runs.sql +31 -0
  5. package/config/bigquery/views/reports.sql +19 -0
  6. package/config/bigquery/views/team_runs_template.sql +17 -0
  7. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  8. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +25 -0
  10. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +23 -0
  11. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +59 -1
  12. package/dist/_vendor/ailf-shared/index.d.ts +2 -0
  13. package/dist/_vendor/ailf-shared/index.js +2 -0
  14. package/dist/_vendor/ailf-shared/owner-teams.d.ts +26 -0
  15. package/dist/_vendor/ailf-shared/owner-teams.js +52 -0
  16. package/dist/_vendor/ailf-shared/run-classification.d.ts +100 -0
  17. package/dist/_vendor/ailf-shared/run-classification.js +28 -0
  18. package/dist/_vendor/ailf-shared/run-context.d.ts +23 -0
  19. package/dist/adapters/api-client/build-request.d.ts +31 -0
  20. package/dist/adapters/api-client/build-request.js +82 -1
  21. package/dist/adapters/api-client/index.d.ts +1 -1
  22. package/dist/adapters/api-client/index.js +1 -1
  23. package/dist/commands/explain-handler.js +5 -0
  24. package/dist/commands/pipeline-action.d.ts +6 -0
  25. package/dist/commands/pipeline-action.js +5 -0
  26. package/dist/commands/pipeline.d.ts +5 -0
  27. package/dist/commands/pipeline.js +15 -0
  28. package/dist/commands/remote-pipeline.js +7 -0
  29. package/dist/orchestration/steps/finalize-run-step.js +1 -0
  30. package/dist/orchestration/steps/publish-report-step.js +1 -0
  31. package/dist/pipeline/map-request-to-config.js +18 -0
  32. package/dist/pipeline/run-context.d.ts +63 -0
  33. package/dist/pipeline/run-context.js +166 -0
  34. package/package.json +1 -1
@@ -0,0 +1,100 @@
1
+ /**
2
+ * Run classification, ownership, executor, and environment metadata.
3
+ *
4
+ * These fields extend `RunContext` to capture run *intent*, *attribution*,
5
+ * and *reproducibility* — orthogonal to the *mechanism* captured by
6
+ * `RunTrigger`. A scheduled run can be experimental; a manual run can be
7
+ * official; a PR-triggered run is executed by GH Actions but attributable
8
+ * to the PR author.
9
+ *
10
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
11
+ * @see docs/design-docs/run-classification-and-ownership.md
12
+ */
13
+ /**
14
+ * How a run should be treated for reporting and trend tracking.
15
+ *
16
+ * Orthogonal to `RunTrigger` (mechanism). Defaults to `"ad-hoc"` when
17
+ * unannotated so pre-taxonomy runs never leak into the canonical series.
18
+ */
19
+ export type RunClassification = "official" | "ad-hoc" | "experimental" | "test" | "external";
20
+ export declare const RUN_CLASSIFICATIONS: readonly RunClassification[];
21
+ export declare function isRunClassification(value: unknown): value is RunClassification;
22
+ /**
23
+ * Attribution — which team and (optionally) individual the run *belongs to*.
24
+ *
25
+ * `team` is a free-form slug, not a closed enum: external teams name
26
+ * themselves and internal names drift. A soft-normalization layer under
27
+ * `config/owners.ts` maps aliases to canonical slugs (warn-only).
28
+ */
29
+ export interface RunOwner {
30
+ team: string;
31
+ individual?: string;
32
+ }
33
+ /**
34
+ * Who or what actually invoked the run.
35
+ *
36
+ * Separate from `RunOwner` because they diverge for automated surfaces:
37
+ * a PR gate is *executed by* GH Actions but *attributable to* the PR
38
+ * author. Both variants expose a `name` field so consumers can format
39
+ * them with one template.
40
+ *
41
+ * Every detectable identity field is optional — a misconfigured shell,
42
+ * a container without `git`, or a CI provider that doesn't expose actor
43
+ * metadata can all still produce a valid run with thin provenance.
44
+ */
45
+ export type RunExecutor = RunExecutorUser | RunExecutorSystem;
46
+ export interface RunExecutorUser {
47
+ type: "user";
48
+ /** Detected from `git config user.name`, `os.userInfo().username`, or GH actor. */
49
+ name?: string;
50
+ /** From `git config user.email`. Subject to the `AILF_CAPTURE_EMAIL` opt-out. */
51
+ email?: string;
52
+ /** Where the invocation originated. Always knowable. */
53
+ surface: RunExecutorSurface;
54
+ /** GH actor when the user invoked via a GH surface (PR, manual dispatch). */
55
+ githubActor?: string;
56
+ }
57
+ export interface RunExecutorSystem {
58
+ type: "system";
59
+ /** e.g. `"github-actions"`, `"vercel-cron"`, `"sanity-webhook"`. */
60
+ name: string;
61
+ workflow?: string;
62
+ runId?: string;
63
+ }
64
+ export type RunExecutorSurface = "cli" | "studio" | "api";
65
+ export declare const RUN_EXECUTOR_SURFACES: readonly RunExecutorSurface[];
66
+ /**
67
+ * Links to related runs. Fills the gap where the Studio report schema
68
+ * already carried these fields but `RunContext` did not.
69
+ */
70
+ export interface RunLineage {
71
+ /** Prior `RunId` this run re-executes. */
72
+ rerunOf?: string;
73
+ /** Sibling `RunId` this run is intentionally compared against. */
74
+ comparedAgainst?: string;
75
+ /** API-gateway job ID that dispatched this run. */
76
+ parentJobId?: string;
77
+ }
78
+ /**
79
+ * Reproducibility metadata — which AILF/Node ran the eval.
80
+ *
81
+ * Required on every new run so cross-version trend comparisons can
82
+ * isolate framework changes from doc changes.
83
+ */
84
+ export interface RunTool {
85
+ ailfVersion: string;
86
+ nodeVersion: string;
87
+ }
88
+ /**
89
+ * Platform + CI-provider metadata for debugging flakes. Hostname is
90
+ * intentionally excluded — it leaks machine/user identity without
91
+ * filtering benefit.
92
+ */
93
+ export interface RunHost {
94
+ /** `os.platform()` — `"darwin"` | `"linux"` | `"win32"`. */
95
+ platform: string;
96
+ /** `os.arch()` — `"x64"` | `"arm64"`. */
97
+ arch: string;
98
+ /** CI provider when running under one, e.g. `"github-actions"`. */
99
+ ci?: string;
100
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Run classification, ownership, executor, and environment metadata.
3
+ *
4
+ * These fields extend `RunContext` to capture run *intent*, *attribution*,
5
+ * and *reproducibility* — orthogonal to the *mechanism* captured by
6
+ * `RunTrigger`. A scheduled run can be experimental; a manual run can be
7
+ * official; a PR-triggered run is executed by GH Actions but attributable
8
+ * to the PR author.
9
+ *
10
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
11
+ * @see docs/design-docs/run-classification-and-ownership.md
12
+ */
13
+ export const RUN_CLASSIFICATIONS = [
14
+ "official",
15
+ "ad-hoc",
16
+ "experimental",
17
+ "test",
18
+ "external",
19
+ ];
20
+ export function isRunClassification(value) {
21
+ return (typeof value === "string" &&
22
+ RUN_CLASSIFICATIONS.includes(value));
23
+ }
24
+ export const RUN_EXECUTOR_SURFACES = [
25
+ "cli",
26
+ "studio",
27
+ "api",
28
+ ];
@@ -15,15 +15,26 @@
15
15
  * @see docs/design-docs/run-artifact-store.md (§ Drift Prevention)
16
16
  */
17
17
  import type { EvalMode } from "./eval-modes.js";
18
+ import type { RunClassification, RunExecutor, RunHost, RunLineage, RunOwner, RunTool } from "./run-classification.js";
18
19
  import type { RunTrigger } from "./run-trigger.js";
19
20
  export interface RunContext {
20
21
  /** Which feature areas were evaluated */
21
22
  areas: string[];
23
+ /**
24
+ * How this run should be treated for reporting and trend tracking.
25
+ * Orthogonal to `trigger` (mechanism). Defaults to `"ad-hoc"` when
26
+ * unannotated — only the scheduled workflow mints `"official"`.
27
+ *
28
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
29
+ */
30
+ classification: RunClassification;
22
31
  /**
23
32
  * Evaluation fingerprint — SHA-256 of all inputs that affect eval output.
24
33
  * Used for cross-environment cache lookup (CI → Content Lake).
25
34
  */
26
35
  evalFingerprint?: string;
36
+ /** Who/what actually invoked the run. May or may not match `owner`. */
37
+ executor: RunExecutor;
27
38
  /** Git metadata (when run from CI) */
28
39
  git?: {
29
40
  branch: string;
@@ -33,6 +44,12 @@ export interface RunContext {
33
44
  };
34
45
  /** Grader model used for scoring */
35
46
  graderModel: string;
47
+ /** Platform/CI metadata for debugging flakes. */
48
+ host?: RunHost;
49
+ /** Free-form searchable tags — release IDs, regression hunts, experiments. */
50
+ labels?: string[];
51
+ /** Links to related runs (re-runs, comparison partners, API parent job). */
52
+ lineage?: RunLineage;
36
53
  /** Evaluation mode */
37
54
  mode: EvalMode;
38
55
  /** Models under evaluation */
@@ -40,6 +57,10 @@ export interface RunContext {
40
57
  id: string;
41
58
  label: string;
42
59
  }[];
60
+ /** Which team (and optionally individual) this run is attributable to. */
61
+ owner: RunOwner;
62
+ /** Human-authored "why I ran this" — useful for Content Lake archaeology. */
63
+ purpose?: string;
43
64
  /** Documentation source configuration */
44
65
  source: {
45
66
  baseUrl: string;
@@ -50,6 +71,8 @@ export interface RunContext {
50
71
  };
51
72
  /** Specific task IDs evaluated when scoped to a subset */
52
73
  taskIds?: string[];
74
+ /** Which AILF/Node ran the eval — for cross-version trend compatibility. */
75
+ tool?: RunTool;
53
76
  /** What initiated this run */
54
77
  trigger: RunTrigger;
55
78
  }
@@ -51,6 +51,18 @@ export interface RemoteConfigSlice {
51
51
  readinessEnabled?: boolean;
52
52
  discoveryReportEnabled?: boolean;
53
53
  noRemoteCache?: boolean;
54
+ /**
55
+ * D0037 / W0069 — CLI-flag overrides for the caller envelope. These
56
+ * take precedence over the equivalent env vars when set. When both a
57
+ * flag and its env var are unset the field is omitted from the
58
+ * request (server applies its own defaults).
59
+ */
60
+ classificationOption?: string;
61
+ ownerTeamOption?: string;
62
+ ownerIndividualOption?: string;
63
+ purposeOption?: string;
64
+ /** Repeatable --label values; appended to AILF_LABELS env values. */
65
+ labelOptions?: string[];
54
66
  }
55
67
  /**
56
68
  * Build a PipelineRequest from local tasks and config.
@@ -75,3 +87,22 @@ export declare function buildRemoteRequest(options: BuildRequestOptions): Promis
75
87
  * Returns the resolved path or throws if not found.
76
88
  */
77
89
  export declare function resolveTasksDir(rootDir: string, explicitPath?: string): string;
90
+ /**
91
+ * Build the D0037 caller envelope payload from CLI flags + env vars.
92
+ *
93
+ * Precedence, highest first:
94
+ * 1. Explicit CLI flag (--classification, --owner-team, --purpose, …)
95
+ * 2. Env var (AILF_CLASSIFICATION, AILF_OWNER_TEAM, AILF_PURPOSE, …)
96
+ * 3. Omit — server applies its own defaults (ad-hoc / unknown).
97
+ *
98
+ * Labels are additive: --label values concatenate with AILF_LABELS.
99
+ *
100
+ * `executor` is always set on remote submissions because we know the
101
+ * invocation is a user-driven CLI call. Surface defaults to `"cli"`
102
+ * unless AILF_EXECUTOR_SURFACE explicitly overrides; name falls back to
103
+ * GITHUB_ACTOR when available.
104
+ *
105
+ * Returns partial `PipelineRequest` fields only. Omits any key whose
106
+ * source (flag + env) was unset.
107
+ */
108
+ export declare function buildCallerEnvelope(config: RemoteConfigSlice): Partial<PipelineRequest>;
@@ -15,7 +15,7 @@
15
15
  import { existsSync } from "fs";
16
16
  import { resolve } from "path";
17
17
  import { PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
18
- import { LEGACY_EVAL_MODE_ALIASES } from "../../_vendor/ailf-shared/index.js";
18
+ import { LEGACY_EVAL_MODE_ALIASES, isRunClassification, } from "../../_vendor/ailf-shared/index.js";
19
19
  import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
20
20
  import { RepoTaskSource } from "../task-sources/repo-task-source.js";
21
21
  const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
@@ -127,6 +127,10 @@ export async function buildRemoteRequest(options) {
127
127
  const callerGit = detectCallerGit();
128
128
  if (callerGit)
129
129
  raw.callerGit = callerGit;
130
+ // D0037 caller envelope — merge CLI flags + env vars and attach each
131
+ // populated field. Flags override env. Skipped fields are omitted so
132
+ // the server applies its own defaults.
133
+ Object.assign(raw, buildCallerEnvelope(config));
130
134
  // 4. Validate the assembled request
131
135
  const parsed = PipelineRequestSchema.parse(raw);
132
136
  return { request: parsed, taskCount: tasks.length };
@@ -210,6 +214,83 @@ function buildFilterOptions(config) {
210
214
  return undefined;
211
215
  return { areas, taskIds, tags };
212
216
  }
217
+ /**
218
+ * Build the D0037 caller envelope payload from CLI flags + env vars.
219
+ *
220
+ * Precedence, highest first:
221
+ * 1. Explicit CLI flag (--classification, --owner-team, --purpose, …)
222
+ * 2. Env var (AILF_CLASSIFICATION, AILF_OWNER_TEAM, AILF_PURPOSE, …)
223
+ * 3. Omit — server applies its own defaults (ad-hoc / unknown).
224
+ *
225
+ * Labels are additive: --label values concatenate with AILF_LABELS.
226
+ *
227
+ * `executor` is always set on remote submissions because we know the
228
+ * invocation is a user-driven CLI call. Surface defaults to `"cli"`
229
+ * unless AILF_EXECUTOR_SURFACE explicitly overrides; name falls back to
230
+ * GITHUB_ACTOR when available.
231
+ *
232
+ * Returns partial `PipelineRequest` fields only. Omits any key whose
233
+ * source (flag + env) was unset.
234
+ */
235
+ export function buildCallerEnvelope(config) {
236
+ const out = {};
237
+ // Classification: flag > env. Validated against the closed enum.
238
+ const rawClassification = config.classificationOption ??
239
+ process.env.AILF_CLASSIFICATION?.trim() ??
240
+ undefined;
241
+ if (rawClassification) {
242
+ if (isRunClassification(rawClassification)) {
243
+ out.classification = rawClassification;
244
+ }
245
+ else {
246
+ // Surface the invalid value so downstream Zod validation gives a
247
+ // clear error message pointing at the flag, not the inner enum.
248
+ out.classification =
249
+ rawClassification;
250
+ }
251
+ }
252
+ // Owner: flag > env. Team required, individual optional.
253
+ const team = config.ownerTeamOption ?? process.env.AILF_OWNER_TEAM?.trim() ?? undefined;
254
+ const individual = config.ownerIndividualOption ??
255
+ process.env.AILF_OWNER_INDIVIDUAL?.trim() ??
256
+ process.env.GITHUB_ACTOR?.trim() ??
257
+ undefined;
258
+ if (team) {
259
+ out.owner = individual ? { team, individual } : { team };
260
+ }
261
+ // Purpose: flag > env.
262
+ const purpose = config.purposeOption ?? process.env.AILF_PURPOSE?.trim() ?? undefined;
263
+ if (purpose)
264
+ out.purpose = purpose;
265
+ // Labels: flag AND env are additive (dedup + trim).
266
+ const flagLabels = config.labelOptions ?? [];
267
+ const envLabels = (process.env.AILF_LABELS ?? "")
268
+ .split(",")
269
+ .map((s) => s.trim())
270
+ .filter(Boolean);
271
+ const mergedLabels = Array.from(new Set([...envLabels, ...flagLabels]));
272
+ if (mergedLabels.length > 0)
273
+ out.labels = mergedLabels;
274
+ // Executor: always set on remote submissions — we know this is a CLI
275
+ // user. Only omit when absolutely nothing identifying is available.
276
+ const surfaceEnv = process.env.AILF_EXECUTOR_SURFACE?.trim();
277
+ const surface = surfaceEnv === "studio" || surfaceEnv === "api" ? surfaceEnv : "cli";
278
+ const githubActor = process.env.GITHUB_ACTOR?.trim() || undefined;
279
+ const nameFromIndividual = config.ownerIndividualOption ??
280
+ process.env.AILF_OWNER_INDIVIDUAL?.trim() ??
281
+ undefined;
282
+ const executorName = githubActor ?? nameFromIndividual;
283
+ const executor = {
284
+ type: "user",
285
+ surface,
286
+ };
287
+ if (executorName)
288
+ executor.name = executorName;
289
+ if (githubActor)
290
+ executor.githubActor = githubActor;
291
+ out.executor = executor;
292
+ return out;
293
+ }
213
294
  /**
214
295
  * Auto-detect caller git metadata from GitHub Actions environment variables.
215
296
  *
@@ -5,7 +5,7 @@
5
5
  * import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
6
6
  */
7
7
  export { ApiClient } from "./api-client.js";
8
- export { buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
8
+ export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
9
9
  export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
10
10
  export { formatJobError } from "./format-error.js";
11
11
  export { createProgressDisplay } from "./progress.js";
@@ -5,7 +5,7 @@
5
5
  * import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
6
6
  */
7
7
  export { ApiClient } from "./api-client.js";
8
- export { buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
8
+ export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
9
9
  export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
10
10
  export { formatJobError } from "./format-error.js";
11
11
  export { createProgressDisplay } from "./progress.js";
@@ -727,6 +727,11 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
727
727
  artifactsDir: raw.artifactsDir,
728
728
  artifactsDryRun: raw.artifactsDryRun ?? false,
729
729
  artifactsExclude: raw.artifactsExclude,
730
+ classification: raw.classification,
731
+ ownerTeam: raw.ownerTeam,
732
+ ownerIndividual: raw.ownerIndividual,
733
+ purpose: raw.purpose,
734
+ label: raw.label ?? [],
730
735
  };
731
736
  const resolved = computeResolvedOptions(withDefaults);
732
737
  const planOpts = {
@@ -68,6 +68,12 @@ export interface ResolvedOptions {
68
68
  artifactsDir?: string;
69
69
  artifactsDryRun: boolean;
70
70
  artifactsExclude?: readonly string[];
71
+ /** D0037 / W0069 caller envelope — surfaces only on --remote today. */
72
+ classificationOption?: string;
73
+ ownerTeamOption?: string;
74
+ ownerIndividualOption?: string;
75
+ purposeOption?: string;
76
+ labelOptions: string[];
71
77
  }
72
78
  /**
73
79
  * Pure option resolution — computes ResolvedOptions from CLI flags without
@@ -269,6 +269,11 @@ export function computeResolvedOptions(opts) {
269
269
  artifactsDir: resolveArtifactsDir(opts),
270
270
  artifactsDryRun: opts.artifactsDryRun,
271
271
  artifactsExclude: parseArtifactsExcludeList(opts.artifactsExclude),
272
+ classificationOption: opts.classification?.trim() || undefined,
273
+ ownerTeamOption: opts.ownerTeam?.trim() || undefined,
274
+ ownerIndividualOption: opts.ownerIndividual?.trim() || undefined,
275
+ purposeOption: opts.purpose?.trim() || undefined,
276
+ labelOptions: opts.label ?? [],
272
277
  };
273
278
  }
274
279
  /**
@@ -68,5 +68,10 @@ export interface PipelineCliOptions {
68
68
  artifactsDir?: string;
69
69
  artifactsDryRun: boolean;
70
70
  artifactsExclude?: string;
71
+ classification?: string;
72
+ ownerTeam?: string;
73
+ ownerIndividual?: string;
74
+ purpose?: string;
75
+ label: string[];
71
76
  }
72
77
  export declare function createPipelineCommand(): Command;
@@ -58,6 +58,21 @@ export function createPipelineCommand() {
58
58
  .option("--artifacts-dir <path>", "Root directory for local artifact output (D0033; default: .ailf/results/captures/)")
59
59
  .option("--artifacts-dry-run", "Run artifact writers in dry-run mode — log intended writes, touch no storage", false)
60
60
  .option("--artifacts-exclude <types>", "Comma-separated artifact types to skip (e.g. traces,graderPrompts)")
61
+ // D0037 caller envelope (W0069) — threads through --remote so the
62
+ // server-side pipeline attributes provenance to the caller, not the
63
+ // API gateway runner. All env-var equivalents are honored too;
64
+ // explicit flags win over env vars.
65
+ .option("--classification <value>", "Run classification for provenance: official | ad-hoc | experimental | test | external. Overrides AILF_CLASSIFICATION. See D0037.")
66
+ .option("--owner-team <slug>", "Team slug this run is attributable to. Overrides AILF_OWNER_TEAM.")
67
+ .option("--owner-individual <slug>", "Individual (GH actor / user ID) this run is attributable to. Overrides AILF_OWNER_INDIVIDUAL.")
68
+ .option("--purpose <text>", 'Free-text "why I ran this" attached to provenance. Overrides AILF_PURPOSE.')
69
+ .option("--label <value>", "Free-form searchable label (repeatable). Appends to any AILF_LABELS env value.", (val, prev) => [
70
+ ...prev,
71
+ ...val
72
+ .split(",")
73
+ .map((s) => s.trim())
74
+ .filter(Boolean),
75
+ ], [])
61
76
  .action(async (opts) => {
62
77
  const { executePipeline } = await import("./pipeline-action.js");
63
78
  await executePipeline(opts);
@@ -133,5 +133,12 @@ function toConfigSlice(opts) {
133
133
  readinessEnabled: opts.readinessEnabled,
134
134
  discoveryReportEnabled: opts.discoveryReportEnabled,
135
135
  noRemoteCache: opts.noRemoteCache,
136
+ // D0037 / W0069 caller envelope overrides — flags override env vars
137
+ // inside buildCallerEnvelope(), which also merges AILF_* defaults.
138
+ classificationOption: opts.classificationOption,
139
+ ownerTeamOption: opts.ownerTeamOption,
140
+ ownerIndividualOption: opts.ownerIndividualOption,
141
+ purposeOption: opts.purposeOption,
142
+ labelOptions: opts.labelOptions,
136
143
  };
137
144
  }
@@ -77,6 +77,7 @@ export class FinalizeRunStep {
77
77
  const runContext = buildRunContext({
78
78
  areas: maybeSummary?.scores?.map((s) => s.feature) ?? ctx.config.areas ?? [],
79
79
  callerGit: ctx.config.callerGit,
80
+ callerEnvelope: ctx.config.callerEnvelope,
80
81
  evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
81
82
  logger: ctx.logger,
82
83
  mode: ctx.config.mode,
@@ -225,6 +225,7 @@ function buildProvenanceInput(summary, ctx, options, autoScope) {
225
225
  areas,
226
226
  autoScope,
227
227
  callerGit: ctx.config.callerGit,
228
+ callerEnvelope: ctx.config.callerEnvelope,
228
229
  evalFingerprint,
229
230
  mode,
230
231
  promptfooUrls: options.promptfooUrls,
@@ -72,6 +72,7 @@ export function mapRequestToConfig(request, rootDir) {
72
72
  beforeOption: undefined,
73
73
  repoTasksPath: undefined,
74
74
  callerGit: request.callerGit,
75
+ callerEnvelope: buildCallerEnvelope(request),
75
76
  callback: request.callback,
76
77
  jobId: request.jobId,
77
78
  remote: false,
@@ -91,6 +92,23 @@ function mapDebug(debug) {
91
92
  sample: debug.sample,
92
93
  };
93
94
  }
95
+ /**
96
+ * Collect the D0037 caller envelope fields from a PipelineRequest into a
97
+ * single `callerEnvelope` object. Returns undefined when no envelope
98
+ * fields were provided, so downstream consumers can short-circuit with
99
+ * `config.callerEnvelope?.classification` etc.
100
+ */
101
+ function buildCallerEnvelope(request) {
102
+ const { classification, owner, executor, purpose, labels } = request;
103
+ if (classification === undefined &&
104
+ owner === undefined &&
105
+ executor === undefined &&
106
+ purpose === undefined &&
107
+ labels === undefined) {
108
+ return undefined;
109
+ }
110
+ return { classification, owner, executor, purpose, labels };
111
+ }
94
112
  function mapTaskSourceType(taskMode) {
95
113
  if (taskMode === "content-lake")
96
114
  return taskMode;
@@ -13,6 +13,7 @@
13
13
  * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
14
14
  */
15
15
  import type { Logger, RunContext } from "../_vendor/ailf-core/index.d.ts";
16
+ import { type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
16
17
  import type { ResolvedSourceConfig } from "../sources.js";
17
18
  import type { EvalMode } from "./types.js";
18
19
  /**
@@ -34,8 +35,35 @@ export interface RunContextInput {
34
35
  repo: string;
35
36
  sha?: string;
36
37
  };
38
+ /**
39
+ * Caller-provided D0037 envelope from a `--remote` PipelineRequest.
40
+ * When set, overrides the server-env detection so the caller's intent
41
+ * survives the API boundary. Same override pattern as `callerGit`.
42
+ *
43
+ * Only caller-identity fields are carried — `executor.email`, `tool`,
44
+ * and `host` stay server-inferred.
45
+ *
46
+ * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
47
+ */
48
+ callerEnvelope?: {
49
+ classification?: RunClassification;
50
+ owner?: {
51
+ team: string;
52
+ individual?: string;
53
+ };
54
+ executor?: {
55
+ type: "user";
56
+ surface: RunExecutorSurface;
57
+ name?: string;
58
+ githubActor?: string;
59
+ };
60
+ purpose?: string;
61
+ labels?: string[];
62
+ };
37
63
  /** Evaluation fingerprint for cross-environment cache lookup */
38
64
  evalFingerprint?: string;
65
+ /** Caller-supplied run lineage (re-runs, comparison partners, parent job). */
66
+ lineage?: RunLineage;
39
67
  /** Logger instance (defaults to ConsoleLogger) */
40
68
  logger?: Logger;
41
69
  /** Evaluation mode */
@@ -55,3 +83,38 @@ export interface RunContextInput {
55
83
  * former directly, the latter transitively through `buildProvenance`.
56
84
  */
57
85
  export declare function buildRunContext(input: RunContextInput): RunContext;
86
+ /**
87
+ * Resolve `classification` from `AILF_CLASSIFICATION`, validated against
88
+ * the closed enum. Defaults to `"ad-hoc"` so unannotated runs never leak
89
+ * into the canonical `"official"` series.
90
+ */
91
+ export declare function detectClassification(log: Logger): RunClassification;
92
+ /**
93
+ * Resolve `owner` from `AILF_OWNER_TEAM` (+ optional
94
+ * `AILF_OWNER_INDIVIDUAL`). `team` is free-form; default is `"unknown"`.
95
+ */
96
+ export declare function detectOwner(): RunOwner;
97
+ /**
98
+ * Detect who/what invoked the run.
99
+ *
100
+ * Priority:
101
+ * 1. GitHub Actions context → `{ type: "system", name: "github-actions", ... }`
102
+ * 2. CLI context → `{ type: "user", surface: "cli", ... }` with git-config
103
+ * or OS username fallback. Email capture gated by
104
+ * `AILF_CAPTURE_EMAIL` (default on; set `0` to opt out).
105
+ *
106
+ * Every identity field is optional — missing git, containers, or masked
107
+ * env vars must never block a run.
108
+ */
109
+ export declare function detectExecutor(): RunExecutor;
110
+ /**
111
+ * Resolve `tool` — which AILF/Node ran the eval. Captured on every new
112
+ * run so cross-version trend comparisons can isolate framework changes
113
+ * from doc changes.
114
+ */
115
+ export declare function detectTool(log: Logger): RunTool;
116
+ /**
117
+ * Resolve `host` — platform + arch + CI provider. Hostname is
118
+ * intentionally excluded (leaks identity without filtering benefit).
119
+ */
120
+ export declare function detectHost(): RunHost;