@sanity/ailf 3.4.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +114 -0
- package/config/bigquery/README.md +11 -4
- package/config/bigquery/views/official_area_scores.sql +20 -0
- package/config/bigquery/views/official_runs.sql +31 -0
- package/config/bigquery/views/reports.sql +19 -0
- package/config/bigquery/views/team_runs_template.sql +17 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +25 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +23 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +59 -1
- package/dist/_vendor/ailf-shared/index.d.ts +2 -0
- package/dist/_vendor/ailf-shared/index.js +2 -0
- package/dist/_vendor/ailf-shared/owner-teams.d.ts +26 -0
- package/dist/_vendor/ailf-shared/owner-teams.js +52 -0
- package/dist/_vendor/ailf-shared/run-classification.d.ts +100 -0
- package/dist/_vendor/ailf-shared/run-classification.js +28 -0
- package/dist/_vendor/ailf-shared/run-context.d.ts +23 -0
- package/dist/adapters/api-client/build-request.d.ts +31 -0
- package/dist/adapters/api-client/build-request.js +82 -1
- package/dist/adapters/api-client/index.d.ts +1 -1
- package/dist/adapters/api-client/index.js +1 -1
- package/dist/adapters/task-sources/repo-validation.js +4 -2
- package/dist/commands/explain-handler.js +5 -0
- package/dist/commands/init.js +5 -0
- package/dist/commands/pipeline-action.d.ts +6 -0
- package/dist/commands/pipeline-action.js +5 -0
- package/dist/commands/pipeline.d.ts +5 -0
- package/dist/commands/pipeline.js +15 -0
- package/dist/commands/remote-pipeline.js +7 -0
- package/dist/orchestration/steps/finalize-run-step.js +1 -0
- package/dist/orchestration/steps/publish-report-step.js +1 -0
- package/dist/pipeline/map-request-to-config.js +18 -0
- package/dist/pipeline/run-context.d.ts +63 -0
- package/dist/pipeline/run-context.js +166 -0
- package/package.json +1 -1
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run classification, ownership, executor, and environment metadata.
|
|
3
|
+
*
|
|
4
|
+
* These fields extend `RunContext` to capture run *intent*, *attribution*,
|
|
5
|
+
* and *reproducibility* — orthogonal to the *mechanism* captured by
|
|
6
|
+
* `RunTrigger`. A scheduled run can be experimental; a manual run can be
|
|
7
|
+
* official; a PR-triggered run is executed by GH Actions but attributable
|
|
8
|
+
* to the PR author.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
11
|
+
* @see docs/design-docs/run-classification-and-ownership.md
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* How a run should be treated for reporting and trend tracking.
|
|
15
|
+
*
|
|
16
|
+
* Orthogonal to `RunTrigger` (mechanism). Defaults to `"ad-hoc"` when
|
|
17
|
+
* unannotated so pre-taxonomy runs never leak into the canonical series.
|
|
18
|
+
*/
|
|
19
|
+
export type RunClassification = "official" | "ad-hoc" | "experimental" | "test" | "external";
|
|
20
|
+
export declare const RUN_CLASSIFICATIONS: readonly RunClassification[];
|
|
21
|
+
export declare function isRunClassification(value: unknown): value is RunClassification;
|
|
22
|
+
/**
|
|
23
|
+
* Attribution — which team and (optionally) individual the run *belongs to*.
|
|
24
|
+
*
|
|
25
|
+
* `team` is a free-form slug, not a closed enum: external teams name
|
|
26
|
+
* themselves and internal names drift. A soft-normalization layer under
|
|
27
|
+
* `config/owners.ts` maps aliases to canonical slugs (warn-only).
|
|
28
|
+
*/
|
|
29
|
+
export interface RunOwner {
|
|
30
|
+
team: string;
|
|
31
|
+
individual?: string;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Who or what actually invoked the run.
|
|
35
|
+
*
|
|
36
|
+
* Separate from `RunOwner` because they diverge for automated surfaces:
|
|
37
|
+
* a PR gate is *executed by* GH Actions but *attributable to* the PR
|
|
38
|
+
* author. Both variants expose a `name` field so consumers can format
|
|
39
|
+
* them with one template.
|
|
40
|
+
*
|
|
41
|
+
* Every detectable identity field is optional — a misconfigured shell,
|
|
42
|
+
* a container without `git`, or a CI provider that doesn't expose actor
|
|
43
|
+
* metadata can all still produce a valid run with thin provenance.
|
|
44
|
+
*/
|
|
45
|
+
export type RunExecutor = RunExecutorUser | RunExecutorSystem;
|
|
46
|
+
export interface RunExecutorUser {
|
|
47
|
+
type: "user";
|
|
48
|
+
/** Detected from `git config user.name`, `os.userInfo().username`, or GH actor. */
|
|
49
|
+
name?: string;
|
|
50
|
+
/** From `git config user.email`. Subject to the `AILF_CAPTURE_EMAIL` opt-out. */
|
|
51
|
+
email?: string;
|
|
52
|
+
/** Where the invocation originated. Always knowable. */
|
|
53
|
+
surface: RunExecutorSurface;
|
|
54
|
+
/** GH actor when the user invoked via a GH surface (PR, manual dispatch). */
|
|
55
|
+
githubActor?: string;
|
|
56
|
+
}
|
|
57
|
+
export interface RunExecutorSystem {
|
|
58
|
+
type: "system";
|
|
59
|
+
/** e.g. `"github-actions"`, `"vercel-cron"`, `"sanity-webhook"`. */
|
|
60
|
+
name: string;
|
|
61
|
+
workflow?: string;
|
|
62
|
+
runId?: string;
|
|
63
|
+
}
|
|
64
|
+
export type RunExecutorSurface = "cli" | "studio" | "api";
|
|
65
|
+
export declare const RUN_EXECUTOR_SURFACES: readonly RunExecutorSurface[];
|
|
66
|
+
/**
|
|
67
|
+
* Links to related runs. Fills the gap where the Studio report schema
|
|
68
|
+
* already carried these fields but `RunContext` did not.
|
|
69
|
+
*/
|
|
70
|
+
export interface RunLineage {
|
|
71
|
+
/** Prior `RunId` this run re-executes. */
|
|
72
|
+
rerunOf?: string;
|
|
73
|
+
/** Sibling `RunId` this run is intentionally compared against. */
|
|
74
|
+
comparedAgainst?: string;
|
|
75
|
+
/** API-gateway job ID that dispatched this run. */
|
|
76
|
+
parentJobId?: string;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Reproducibility metadata — which AILF/Node ran the eval.
|
|
80
|
+
*
|
|
81
|
+
* Required on every new run so cross-version trend comparisons can
|
|
82
|
+
* isolate framework changes from doc changes.
|
|
83
|
+
*/
|
|
84
|
+
export interface RunTool {
|
|
85
|
+
ailfVersion: string;
|
|
86
|
+
nodeVersion: string;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Platform + CI-provider metadata for debugging flakes. Hostname is
|
|
90
|
+
* intentionally excluded — it leaks machine/user identity without
|
|
91
|
+
* filtering benefit.
|
|
92
|
+
*/
|
|
93
|
+
export interface RunHost {
|
|
94
|
+
/** `os.platform()` — `"darwin"` | `"linux"` | `"win32"`. */
|
|
95
|
+
platform: string;
|
|
96
|
+
/** `os.arch()` — `"x64"` | `"arm64"`. */
|
|
97
|
+
arch: string;
|
|
98
|
+
/** CI provider when running under one, e.g. `"github-actions"`. */
|
|
99
|
+
ci?: string;
|
|
100
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run classification, ownership, executor, and environment metadata.
|
|
3
|
+
*
|
|
4
|
+
* These fields extend `RunContext` to capture run *intent*, *attribution*,
|
|
5
|
+
* and *reproducibility* — orthogonal to the *mechanism* captured by
|
|
6
|
+
* `RunTrigger`. A scheduled run can be experimental; a manual run can be
|
|
7
|
+
* official; a PR-triggered run is executed by GH Actions but attributable
|
|
8
|
+
* to the PR author.
|
|
9
|
+
*
|
|
10
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
11
|
+
* @see docs/design-docs/run-classification-and-ownership.md
|
|
12
|
+
*/
|
|
13
|
+
export const RUN_CLASSIFICATIONS = [
|
|
14
|
+
"official",
|
|
15
|
+
"ad-hoc",
|
|
16
|
+
"experimental",
|
|
17
|
+
"test",
|
|
18
|
+
"external",
|
|
19
|
+
];
|
|
20
|
+
export function isRunClassification(value) {
|
|
21
|
+
return (typeof value === "string" &&
|
|
22
|
+
RUN_CLASSIFICATIONS.includes(value));
|
|
23
|
+
}
|
|
24
|
+
export const RUN_EXECUTOR_SURFACES = [
|
|
25
|
+
"cli",
|
|
26
|
+
"studio",
|
|
27
|
+
"api",
|
|
28
|
+
];
|
|
@@ -15,15 +15,26 @@
|
|
|
15
15
|
* @see docs/design-docs/run-artifact-store.md (§ Drift Prevention)
|
|
16
16
|
*/
|
|
17
17
|
import type { EvalMode } from "./eval-modes.js";
|
|
18
|
+
import type { RunClassification, RunExecutor, RunHost, RunLineage, RunOwner, RunTool } from "./run-classification.js";
|
|
18
19
|
import type { RunTrigger } from "./run-trigger.js";
|
|
19
20
|
export interface RunContext {
|
|
20
21
|
/** Which feature areas were evaluated */
|
|
21
22
|
areas: string[];
|
|
23
|
+
/**
|
|
24
|
+
* How this run should be treated for reporting and trend tracking.
|
|
25
|
+
* Orthogonal to `trigger` (mechanism). Defaults to `"ad-hoc"` when
|
|
26
|
+
* unannotated — only the scheduled workflow mints `"official"`.
|
|
27
|
+
*
|
|
28
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
29
|
+
*/
|
|
30
|
+
classification: RunClassification;
|
|
22
31
|
/**
|
|
23
32
|
* Evaluation fingerprint — SHA-256 of all inputs that affect eval output.
|
|
24
33
|
* Used for cross-environment cache lookup (CI → Content Lake).
|
|
25
34
|
*/
|
|
26
35
|
evalFingerprint?: string;
|
|
36
|
+
/** Who/what actually invoked the run. May or may not match `owner`. */
|
|
37
|
+
executor: RunExecutor;
|
|
27
38
|
/** Git metadata (when run from CI) */
|
|
28
39
|
git?: {
|
|
29
40
|
branch: string;
|
|
@@ -33,6 +44,12 @@ export interface RunContext {
|
|
|
33
44
|
};
|
|
34
45
|
/** Grader model used for scoring */
|
|
35
46
|
graderModel: string;
|
|
47
|
+
/** Platform/CI metadata for debugging flakes. */
|
|
48
|
+
host?: RunHost;
|
|
49
|
+
/** Free-form searchable tags — release IDs, regression hunts, experiments. */
|
|
50
|
+
labels?: string[];
|
|
51
|
+
/** Links to related runs (re-runs, comparison partners, API parent job). */
|
|
52
|
+
lineage?: RunLineage;
|
|
36
53
|
/** Evaluation mode */
|
|
37
54
|
mode: EvalMode;
|
|
38
55
|
/** Models under evaluation */
|
|
@@ -40,6 +57,10 @@ export interface RunContext {
|
|
|
40
57
|
id: string;
|
|
41
58
|
label: string;
|
|
42
59
|
}[];
|
|
60
|
+
/** Which team (and optionally individual) this run is attributable to. */
|
|
61
|
+
owner: RunOwner;
|
|
62
|
+
/** Human-authored "why I ran this" — useful for Content Lake archaeology. */
|
|
63
|
+
purpose?: string;
|
|
43
64
|
/** Documentation source configuration */
|
|
44
65
|
source: {
|
|
45
66
|
baseUrl: string;
|
|
@@ -50,6 +71,8 @@ export interface RunContext {
|
|
|
50
71
|
};
|
|
51
72
|
/** Specific task IDs evaluated when scoped to a subset */
|
|
52
73
|
taskIds?: string[];
|
|
74
|
+
/** Which AILF/Node ran the eval — for cross-version trend compatibility. */
|
|
75
|
+
tool?: RunTool;
|
|
53
76
|
/** What initiated this run */
|
|
54
77
|
trigger: RunTrigger;
|
|
55
78
|
}
|
|
@@ -51,6 +51,18 @@ export interface RemoteConfigSlice {
|
|
|
51
51
|
readinessEnabled?: boolean;
|
|
52
52
|
discoveryReportEnabled?: boolean;
|
|
53
53
|
noRemoteCache?: boolean;
|
|
54
|
+
/**
|
|
55
|
+
* D0037 / W0069 — CLI-flag overrides for the caller envelope. These
|
|
56
|
+
* take precedence over the equivalent env vars when set. When both a
|
|
57
|
+
* flag and its env var are unset the field is omitted from the
|
|
58
|
+
* request (server applies its own defaults).
|
|
59
|
+
*/
|
|
60
|
+
classificationOption?: string;
|
|
61
|
+
ownerTeamOption?: string;
|
|
62
|
+
ownerIndividualOption?: string;
|
|
63
|
+
purposeOption?: string;
|
|
64
|
+
/** Repeatable --label values; appended to AILF_LABELS env values. */
|
|
65
|
+
labelOptions?: string[];
|
|
54
66
|
}
|
|
55
67
|
/**
|
|
56
68
|
* Build a PipelineRequest from local tasks and config.
|
|
@@ -75,3 +87,22 @@ export declare function buildRemoteRequest(options: BuildRequestOptions): Promis
|
|
|
75
87
|
* Returns the resolved path or throws if not found.
|
|
76
88
|
*/
|
|
77
89
|
export declare function resolveTasksDir(rootDir: string, explicitPath?: string): string;
|
|
90
|
+
/**
|
|
91
|
+
* Build the D0037 caller envelope payload from CLI flags + env vars.
|
|
92
|
+
*
|
|
93
|
+
* Precedence, highest first:
|
|
94
|
+
* 1. Explicit CLI flag (--classification, --owner-team, --purpose, …)
|
|
95
|
+
* 2. Env var (AILF_CLASSIFICATION, AILF_OWNER_TEAM, AILF_PURPOSE, …)
|
|
96
|
+
* 3. Omit — server applies its own defaults (ad-hoc / unknown).
|
|
97
|
+
*
|
|
98
|
+
* Labels are additive: --label values concatenate with AILF_LABELS.
|
|
99
|
+
*
|
|
100
|
+
* `executor` is always set on remote submissions because we know the
|
|
101
|
+
* invocation is a user-driven CLI call. Surface defaults to `"cli"`
|
|
102
|
+
* unless AILF_EXECUTOR_SURFACE explicitly overrides; name falls back to
|
|
103
|
+
* GITHUB_ACTOR when available.
|
|
104
|
+
*
|
|
105
|
+
* Returns partial `PipelineRequest` fields only. Omits any key whose
|
|
106
|
+
* source (flag + env) was unset.
|
|
107
|
+
*/
|
|
108
|
+
export declare function buildCallerEnvelope(config: RemoteConfigSlice): Partial<PipelineRequest>;
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import { existsSync } from "fs";
|
|
16
16
|
import { resolve } from "path";
|
|
17
17
|
import { PipelineRequestSchema, } from "../../_vendor/ailf-core/index.js";
|
|
18
|
-
import { LEGACY_EVAL_MODE_ALIASES } from "../../_vendor/ailf-shared/index.js";
|
|
18
|
+
import { LEGACY_EVAL_MODE_ALIASES, isRunClassification, } from "../../_vendor/ailf-shared/index.js";
|
|
19
19
|
import { LiteracyVariant } from "../../pipeline/normalize-mode.js";
|
|
20
20
|
import { RepoTaskSource } from "../task-sources/repo-task-source.js";
|
|
21
21
|
const LEGACY_LITERACY_VARIANT_SET = new Set(LEGACY_EVAL_MODE_ALIASES);
|
|
@@ -127,6 +127,10 @@ export async function buildRemoteRequest(options) {
|
|
|
127
127
|
const callerGit = detectCallerGit();
|
|
128
128
|
if (callerGit)
|
|
129
129
|
raw.callerGit = callerGit;
|
|
130
|
+
// D0037 caller envelope — merge CLI flags + env vars and attach each
|
|
131
|
+
// populated field. Flags override env. Skipped fields are omitted so
|
|
132
|
+
// the server applies its own defaults.
|
|
133
|
+
Object.assign(raw, buildCallerEnvelope(config));
|
|
130
134
|
// 4. Validate the assembled request
|
|
131
135
|
const parsed = PipelineRequestSchema.parse(raw);
|
|
132
136
|
return { request: parsed, taskCount: tasks.length };
|
|
@@ -210,6 +214,83 @@ function buildFilterOptions(config) {
|
|
|
210
214
|
return undefined;
|
|
211
215
|
return { areas, taskIds, tags };
|
|
212
216
|
}
|
|
217
|
+
/**
|
|
218
|
+
* Build the D0037 caller envelope payload from CLI flags + env vars.
|
|
219
|
+
*
|
|
220
|
+
* Precedence, highest first:
|
|
221
|
+
* 1. Explicit CLI flag (--classification, --owner-team, --purpose, …)
|
|
222
|
+
* 2. Env var (AILF_CLASSIFICATION, AILF_OWNER_TEAM, AILF_PURPOSE, …)
|
|
223
|
+
* 3. Omit — server applies its own defaults (ad-hoc / unknown).
|
|
224
|
+
*
|
|
225
|
+
* Labels are additive: --label values concatenate with AILF_LABELS.
|
|
226
|
+
*
|
|
227
|
+
* `executor` is always set on remote submissions because we know the
|
|
228
|
+
* invocation is a user-driven CLI call. Surface defaults to `"cli"`
|
|
229
|
+
* unless AILF_EXECUTOR_SURFACE explicitly overrides; name falls back to
|
|
230
|
+
* GITHUB_ACTOR when available.
|
|
231
|
+
*
|
|
232
|
+
* Returns partial `PipelineRequest` fields only. Omits any key whose
|
|
233
|
+
* source (flag + env) was unset.
|
|
234
|
+
*/
|
|
235
|
+
export function buildCallerEnvelope(config) {
|
|
236
|
+
const out = {};
|
|
237
|
+
// Classification: flag > env. Validated against the closed enum.
|
|
238
|
+
const rawClassification = config.classificationOption ??
|
|
239
|
+
process.env.AILF_CLASSIFICATION?.trim() ??
|
|
240
|
+
undefined;
|
|
241
|
+
if (rawClassification) {
|
|
242
|
+
if (isRunClassification(rawClassification)) {
|
|
243
|
+
out.classification = rawClassification;
|
|
244
|
+
}
|
|
245
|
+
else {
|
|
246
|
+
// Surface the invalid value so downstream Zod validation gives a
|
|
247
|
+
// clear error message pointing at the flag, not the inner enum.
|
|
248
|
+
out.classification =
|
|
249
|
+
rawClassification;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
// Owner: flag > env. Team required, individual optional.
|
|
253
|
+
const team = config.ownerTeamOption ?? process.env.AILF_OWNER_TEAM?.trim() ?? undefined;
|
|
254
|
+
const individual = config.ownerIndividualOption ??
|
|
255
|
+
process.env.AILF_OWNER_INDIVIDUAL?.trim() ??
|
|
256
|
+
process.env.GITHUB_ACTOR?.trim() ??
|
|
257
|
+
undefined;
|
|
258
|
+
if (team) {
|
|
259
|
+
out.owner = individual ? { team, individual } : { team };
|
|
260
|
+
}
|
|
261
|
+
// Purpose: flag > env.
|
|
262
|
+
const purpose = config.purposeOption ?? process.env.AILF_PURPOSE?.trim() ?? undefined;
|
|
263
|
+
if (purpose)
|
|
264
|
+
out.purpose = purpose;
|
|
265
|
+
// Labels: flag AND env are additive (dedup + trim).
|
|
266
|
+
const flagLabels = config.labelOptions ?? [];
|
|
267
|
+
const envLabels = (process.env.AILF_LABELS ?? "")
|
|
268
|
+
.split(",")
|
|
269
|
+
.map((s) => s.trim())
|
|
270
|
+
.filter(Boolean);
|
|
271
|
+
const mergedLabels = Array.from(new Set([...envLabels, ...flagLabels]));
|
|
272
|
+
if (mergedLabels.length > 0)
|
|
273
|
+
out.labels = mergedLabels;
|
|
274
|
+
// Executor: always set on remote submissions — we know this is a CLI
|
|
275
|
+
// user. Only omit when absolutely nothing identifying is available.
|
|
276
|
+
const surfaceEnv = process.env.AILF_EXECUTOR_SURFACE?.trim();
|
|
277
|
+
const surface = surfaceEnv === "studio" || surfaceEnv === "api" ? surfaceEnv : "cli";
|
|
278
|
+
const githubActor = process.env.GITHUB_ACTOR?.trim() || undefined;
|
|
279
|
+
const nameFromIndividual = config.ownerIndividualOption ??
|
|
280
|
+
process.env.AILF_OWNER_INDIVIDUAL?.trim() ??
|
|
281
|
+
undefined;
|
|
282
|
+
const executorName = githubActor ?? nameFromIndividual;
|
|
283
|
+
const executor = {
|
|
284
|
+
type: "user",
|
|
285
|
+
surface,
|
|
286
|
+
};
|
|
287
|
+
if (executorName)
|
|
288
|
+
executor.name = executorName;
|
|
289
|
+
if (githubActor)
|
|
290
|
+
executor.githubActor = githubActor;
|
|
291
|
+
out.executor = executor;
|
|
292
|
+
return out;
|
|
293
|
+
}
|
|
213
294
|
/**
|
|
214
295
|
* Auto-detect caller git metadata from GitHub Actions environment variables.
|
|
215
296
|
*
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
|
|
6
6
|
*/
|
|
7
7
|
export { ApiClient } from "./api-client.js";
|
|
8
|
-
export { buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
|
|
8
|
+
export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, type BuildRequestOptions, type RemoteConfigSlice, } from "./build-request.js";
|
|
9
9
|
export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
|
|
10
10
|
export { formatJobError } from "./format-error.js";
|
|
11
11
|
export { createProgressDisplay } from "./progress.js";
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* import { ApiClient, buildRemoteRequest, resolveTasksDir } from "./adapters/api-client/index.js"
|
|
6
6
|
*/
|
|
7
7
|
export { ApiClient } from "./api-client.js";
|
|
8
|
-
export { buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
|
|
8
|
+
export { buildCallerEnvelope, buildRemoteRequest, resolveTasksDir, } from "./build-request.js";
|
|
9
9
|
export { ApiAuthError, ApiConnectionError, ApiError, ApiTimeoutError, } from "./errors.js";
|
|
10
10
|
export { formatJobError } from "./format-error.js";
|
|
11
11
|
export { createProgressDisplay } from "./progress.js";
|
|
@@ -81,9 +81,11 @@ export function validateCanonicalTasks(tasks) {
|
|
|
81
81
|
}
|
|
82
82
|
}
|
|
83
83
|
}
|
|
84
|
-
// Check task has at least one llm-rubric assertion (recommended but not required)
|
|
84
|
+
// Check task has at least one llm-rubric assertion (recommended but not required).
|
|
85
|
+
// agent-harness tasks grade side-effects (file-exists, command-succeeds, etc.),
|
|
86
|
+
// not text output, so an llm-rubric is not expected.
|
|
85
87
|
const hasLlmRubric = assertions.some((a) => a.type === "llm-rubric");
|
|
86
|
-
if (!hasLlmRubric) {
|
|
88
|
+
if (!hasLlmRubric && task.mode !== "agent-harness") {
|
|
87
89
|
warnings.push({
|
|
88
90
|
taskId: task.id,
|
|
89
91
|
field: "assertions",
|
|
@@ -727,6 +727,11 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
727
727
|
artifactsDir: raw.artifactsDir,
|
|
728
728
|
artifactsDryRun: raw.artifactsDryRun ?? false,
|
|
729
729
|
artifactsExclude: raw.artifactsExclude,
|
|
730
|
+
classification: raw.classification,
|
|
731
|
+
ownerTeam: raw.ownerTeam,
|
|
732
|
+
ownerIndividual: raw.ownerIndividual,
|
|
733
|
+
purpose: raw.purpose,
|
|
734
|
+
label: raw.label ?? [],
|
|
730
735
|
};
|
|
731
736
|
const resolved = computeResolvedOptions(withDefaults);
|
|
732
737
|
const planOpts = {
|
package/dist/commands/init.js
CHANGED
|
@@ -258,6 +258,11 @@ async function runInit(opts) {
|
|
|
258
258
|
console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
|
|
259
259
|
console.log(" slugs and prompts for your documentation");
|
|
260
260
|
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
261
|
+
console.log();
|
|
262
|
+
console.log(' Note: tasks with status: "draft" are skipped on normal runs.');
|
|
263
|
+
console.log(" To run one anyway, target it explicitly with --task <id>, e.g.:");
|
|
264
|
+
console.log(" npx @sanity/ailf@latest pipeline --task example-agent-add-schema");
|
|
265
|
+
console.log();
|
|
261
266
|
console.log(" 3. Add a GitHub Actions secret");
|
|
262
267
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
263
268
|
console.log(" • AILF_API_KEY — your API key");
|
|
@@ -68,6 +68,12 @@ export interface ResolvedOptions {
|
|
|
68
68
|
artifactsDir?: string;
|
|
69
69
|
artifactsDryRun: boolean;
|
|
70
70
|
artifactsExclude?: readonly string[];
|
|
71
|
+
/** D0037 / W0069 caller envelope — surfaces only on --remote today. */
|
|
72
|
+
classificationOption?: string;
|
|
73
|
+
ownerTeamOption?: string;
|
|
74
|
+
ownerIndividualOption?: string;
|
|
75
|
+
purposeOption?: string;
|
|
76
|
+
labelOptions: string[];
|
|
71
77
|
}
|
|
72
78
|
/**
|
|
73
79
|
* Pure option resolution — computes ResolvedOptions from CLI flags without
|
|
@@ -269,6 +269,11 @@ export function computeResolvedOptions(opts) {
|
|
|
269
269
|
artifactsDir: resolveArtifactsDir(opts),
|
|
270
270
|
artifactsDryRun: opts.artifactsDryRun,
|
|
271
271
|
artifactsExclude: parseArtifactsExcludeList(opts.artifactsExclude),
|
|
272
|
+
classificationOption: opts.classification?.trim() || undefined,
|
|
273
|
+
ownerTeamOption: opts.ownerTeam?.trim() || undefined,
|
|
274
|
+
ownerIndividualOption: opts.ownerIndividual?.trim() || undefined,
|
|
275
|
+
purposeOption: opts.purpose?.trim() || undefined,
|
|
276
|
+
labelOptions: opts.label ?? [],
|
|
272
277
|
};
|
|
273
278
|
}
|
|
274
279
|
/**
|
|
@@ -68,5 +68,10 @@ export interface PipelineCliOptions {
|
|
|
68
68
|
artifactsDir?: string;
|
|
69
69
|
artifactsDryRun: boolean;
|
|
70
70
|
artifactsExclude?: string;
|
|
71
|
+
classification?: string;
|
|
72
|
+
ownerTeam?: string;
|
|
73
|
+
ownerIndividual?: string;
|
|
74
|
+
purpose?: string;
|
|
75
|
+
label: string[];
|
|
71
76
|
}
|
|
72
77
|
export declare function createPipelineCommand(): Command;
|
|
@@ -58,6 +58,21 @@ export function createPipelineCommand() {
|
|
|
58
58
|
.option("--artifacts-dir <path>", "Root directory for local artifact output (D0033; default: .ailf/results/captures/)")
|
|
59
59
|
.option("--artifacts-dry-run", "Run artifact writers in dry-run mode — log intended writes, touch no storage", false)
|
|
60
60
|
.option("--artifacts-exclude <types>", "Comma-separated artifact types to skip (e.g. traces,graderPrompts)")
|
|
61
|
+
// D0037 caller envelope (W0069) — threads through --remote so the
|
|
62
|
+
// server-side pipeline attributes provenance to the caller, not the
|
|
63
|
+
// API gateway runner. All env-var equivalents are honored too;
|
|
64
|
+
// explicit flags win over env vars.
|
|
65
|
+
.option("--classification <value>", "Run classification for provenance: official | ad-hoc | experimental | test | external. Overrides AILF_CLASSIFICATION. See D0037.")
|
|
66
|
+
.option("--owner-team <slug>", "Team slug this run is attributable to. Overrides AILF_OWNER_TEAM.")
|
|
67
|
+
.option("--owner-individual <slug>", "Individual (GH actor / user ID) this run is attributable to. Overrides AILF_OWNER_INDIVIDUAL.")
|
|
68
|
+
.option("--purpose <text>", 'Free-text "why I ran this" attached to provenance. Overrides AILF_PURPOSE.')
|
|
69
|
+
.option("--label <value>", "Free-form searchable label (repeatable). Appends to any AILF_LABELS env value.", (val, prev) => [
|
|
70
|
+
...prev,
|
|
71
|
+
...val
|
|
72
|
+
.split(",")
|
|
73
|
+
.map((s) => s.trim())
|
|
74
|
+
.filter(Boolean),
|
|
75
|
+
], [])
|
|
61
76
|
.action(async (opts) => {
|
|
62
77
|
const { executePipeline } = await import("./pipeline-action.js");
|
|
63
78
|
await executePipeline(opts);
|
|
@@ -133,5 +133,12 @@ function toConfigSlice(opts) {
|
|
|
133
133
|
readinessEnabled: opts.readinessEnabled,
|
|
134
134
|
discoveryReportEnabled: opts.discoveryReportEnabled,
|
|
135
135
|
noRemoteCache: opts.noRemoteCache,
|
|
136
|
+
// D0037 / W0069 caller envelope overrides — flags override env vars
|
|
137
|
+
// inside buildCallerEnvelope(), which also merges AILF_* defaults.
|
|
138
|
+
classificationOption: opts.classificationOption,
|
|
139
|
+
ownerTeamOption: opts.ownerTeamOption,
|
|
140
|
+
ownerIndividualOption: opts.ownerIndividualOption,
|
|
141
|
+
purposeOption: opts.purposeOption,
|
|
142
|
+
labelOptions: opts.labelOptions,
|
|
136
143
|
};
|
|
137
144
|
}
|
|
@@ -77,6 +77,7 @@ export class FinalizeRunStep {
|
|
|
77
77
|
const runContext = buildRunContext({
|
|
78
78
|
areas: maybeSummary?.scores?.map((s) => s.feature) ?? ctx.config.areas ?? [],
|
|
79
79
|
callerGit: ctx.config.callerGit,
|
|
80
|
+
callerEnvelope: ctx.config.callerEnvelope,
|
|
80
81
|
evalFingerprint: state.evalFingerprint ?? this.options.evalFingerprint,
|
|
81
82
|
logger: ctx.logger,
|
|
82
83
|
mode: ctx.config.mode,
|
|
@@ -72,6 +72,7 @@ export function mapRequestToConfig(request, rootDir) {
|
|
|
72
72
|
beforeOption: undefined,
|
|
73
73
|
repoTasksPath: undefined,
|
|
74
74
|
callerGit: request.callerGit,
|
|
75
|
+
callerEnvelope: buildCallerEnvelope(request),
|
|
75
76
|
callback: request.callback,
|
|
76
77
|
jobId: request.jobId,
|
|
77
78
|
remote: false,
|
|
@@ -91,6 +92,23 @@ function mapDebug(debug) {
|
|
|
91
92
|
sample: debug.sample,
|
|
92
93
|
};
|
|
93
94
|
}
|
|
95
|
+
/**
|
|
96
|
+
* Collect the D0037 caller envelope fields from a PipelineRequest into a
|
|
97
|
+
* single `callerEnvelope` object. Returns undefined when no envelope
|
|
98
|
+
* fields were provided, so downstream consumers can short-circuit with
|
|
99
|
+
* `config.callerEnvelope?.classification` etc.
|
|
100
|
+
*/
|
|
101
|
+
function buildCallerEnvelope(request) {
|
|
102
|
+
const { classification, owner, executor, purpose, labels } = request;
|
|
103
|
+
if (classification === undefined &&
|
|
104
|
+
owner === undefined &&
|
|
105
|
+
executor === undefined &&
|
|
106
|
+
purpose === undefined &&
|
|
107
|
+
labels === undefined) {
|
|
108
|
+
return undefined;
|
|
109
|
+
}
|
|
110
|
+
return { classification, owner, executor, purpose, labels };
|
|
111
|
+
}
|
|
94
112
|
function mapTaskSourceType(taskMode) {
|
|
95
113
|
if (taskMode === "content-lake")
|
|
96
114
|
return taskMode;
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
* @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
|
|
14
14
|
*/
|
|
15
15
|
import type { Logger, RunContext } from "../_vendor/ailf-core/index.d.ts";
|
|
16
|
+
import { type RunClassification, type RunExecutor, type RunExecutorSurface, type RunHost, type RunLineage, type RunOwner, type RunTool } from "../_vendor/ailf-shared/index.d.ts";
|
|
16
17
|
import type { ResolvedSourceConfig } from "../sources.js";
|
|
17
18
|
import type { EvalMode } from "./types.js";
|
|
18
19
|
/**
|
|
@@ -34,8 +35,35 @@ export interface RunContextInput {
|
|
|
34
35
|
repo: string;
|
|
35
36
|
sha?: string;
|
|
36
37
|
};
|
|
38
|
+
/**
|
|
39
|
+
* Caller-provided D0037 envelope from a `--remote` PipelineRequest.
|
|
40
|
+
* When set, overrides the server-env detection so the caller's intent
|
|
41
|
+
* survives the API boundary. Same override pattern as `callerGit`.
|
|
42
|
+
*
|
|
43
|
+
* Only caller-identity fields are carried — `executor.email`, `tool`,
|
|
44
|
+
* and `host` stay server-inferred.
|
|
45
|
+
*
|
|
46
|
+
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
47
|
+
*/
|
|
48
|
+
callerEnvelope?: {
|
|
49
|
+
classification?: RunClassification;
|
|
50
|
+
owner?: {
|
|
51
|
+
team: string;
|
|
52
|
+
individual?: string;
|
|
53
|
+
};
|
|
54
|
+
executor?: {
|
|
55
|
+
type: "user";
|
|
56
|
+
surface: RunExecutorSurface;
|
|
57
|
+
name?: string;
|
|
58
|
+
githubActor?: string;
|
|
59
|
+
};
|
|
60
|
+
purpose?: string;
|
|
61
|
+
labels?: string[];
|
|
62
|
+
};
|
|
37
63
|
/** Evaluation fingerprint for cross-environment cache lookup */
|
|
38
64
|
evalFingerprint?: string;
|
|
65
|
+
/** Caller-supplied run lineage (re-runs, comparison partners, parent job). */
|
|
66
|
+
lineage?: RunLineage;
|
|
39
67
|
/** Logger instance (defaults to ConsoleLogger) */
|
|
40
68
|
logger?: Logger;
|
|
41
69
|
/** Evaluation mode */
|
|
@@ -55,3 +83,38 @@ export interface RunContextInput {
|
|
|
55
83
|
* former directly, the latter transitively through `buildProvenance`.
|
|
56
84
|
*/
|
|
57
85
|
export declare function buildRunContext(input: RunContextInput): RunContext;
|
|
86
|
+
/**
|
|
87
|
+
* Resolve `classification` from `AILF_CLASSIFICATION`, validated against
|
|
88
|
+
* the closed enum. Defaults to `"ad-hoc"` so unannotated runs never leak
|
|
89
|
+
* into the canonical `"official"` series.
|
|
90
|
+
*/
|
|
91
|
+
export declare function detectClassification(log: Logger): RunClassification;
|
|
92
|
+
/**
|
|
93
|
+
* Resolve `owner` from `AILF_OWNER_TEAM` (+ optional
|
|
94
|
+
* `AILF_OWNER_INDIVIDUAL`). `team` is free-form; default is `"unknown"`.
|
|
95
|
+
*/
|
|
96
|
+
export declare function detectOwner(): RunOwner;
|
|
97
|
+
/**
|
|
98
|
+
* Detect who/what invoked the run.
|
|
99
|
+
*
|
|
100
|
+
* Priority:
|
|
101
|
+
* 1. GitHub Actions context → `{ type: "system", name: "github-actions", ... }`
|
|
102
|
+
* 2. CLI context → `{ type: "user", surface: "cli", ... }` with git-config
|
|
103
|
+
* or OS username fallback. Email capture gated by
|
|
104
|
+
* `AILF_CAPTURE_EMAIL` (default on; set `0` to opt out).
|
|
105
|
+
*
|
|
106
|
+
* Every identity field is optional — missing git, containers, or masked
|
|
107
|
+
* env vars must never block a run.
|
|
108
|
+
*/
|
|
109
|
+
export declare function detectExecutor(): RunExecutor;
|
|
110
|
+
/**
|
|
111
|
+
* Resolve `tool` — which AILF/Node ran the eval. Captured on every new
|
|
112
|
+
* run so cross-version trend comparisons can isolate framework changes
|
|
113
|
+
* from doc changes.
|
|
114
|
+
*/
|
|
115
|
+
export declare function detectTool(log: Logger): RunTool;
|
|
116
|
+
/**
|
|
117
|
+
* Resolve `host` — platform + arch + CI provider. Hostname is
|
|
118
|
+
* intentionally excluded (leaks identity without filtering benefit).
|
|
119
|
+
*/
|
|
120
|
+
export declare function detectHost(): RunHost;
|