@sanity/ailf 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/_vendor/ailf-core/artifact-registry.d.ts +72 -0
  2. package/dist/_vendor/ailf-core/artifact-registry.js +150 -0
  3. package/dist/_vendor/ailf-core/index.d.ts +2 -1
  4. package/dist/_vendor/ailf-core/index.js +2 -1
  5. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +3 -3
  6. package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +56 -0
  7. package/dist/_vendor/ailf-core/ports/artifact-writer.js +28 -0
  8. package/dist/_vendor/ailf-core/ports/context.d.ts +13 -3
  9. package/dist/_vendor/ailf-core/ports/index.d.ts +3 -3
  10. package/dist/_vendor/ailf-core/ports/index.js +1 -1
  11. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +9 -0
  12. package/dist/_vendor/ailf-core/types/branded-ids.js +21 -0
  13. package/dist/_vendor/ailf-core/types/index.d.ts +110 -68
  14. package/dist/_vendor/ailf-core/types/index.js +1 -1
  15. package/dist/_vendor/ailf-shared/index.d.ts +2 -0
  16. package/dist/_vendor/ailf-shared/index.js +2 -0
  17. package/dist/_vendor/ailf-shared/run-context.d.ts +55 -0
  18. package/dist/_vendor/ailf-shared/run-context.js +17 -0
  19. package/dist/_vendor/ailf-shared/run-trigger.d.ts +30 -0
  20. package/dist/_vendor/ailf-shared/run-trigger.js +13 -0
  21. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +39 -0
  22. package/dist/artifact-capture/api-gateway-artifact-writer.js +148 -0
  23. package/dist/artifact-capture/gcs-artifact-writer.d.ts +30 -0
  24. package/dist/artifact-capture/gcs-artifact-writer.js +119 -0
  25. package/dist/commands/publish.js +3 -2
  26. package/dist/composition-root.d.ts +3 -3
  27. package/dist/composition-root.js +20 -15
  28. package/dist/orchestration/build-step-sequence.js +6 -1
  29. package/dist/orchestration/steps/calculate-scores-step.js +42 -2
  30. package/dist/orchestration/steps/finalize-run-step.d.ts +29 -0
  31. package/dist/orchestration/steps/finalize-run-step.js +103 -0
  32. package/dist/orchestration/steps/publish-report-step.js +19 -39
  33. package/dist/pipeline/calculate-scores.js +13 -2
  34. package/dist/pipeline/provenance.d.ts +24 -44
  35. package/dist/pipeline/provenance.js +17 -165
  36. package/dist/pipeline/report-title.d.ts +2 -2
  37. package/dist/pipeline/run-context.d.ts +57 -0
  38. package/dist/pipeline/run-context.js +156 -0
  39. package/dist/pipeline/upload-test-outputs.d.ts +26 -0
  40. package/dist/pipeline/upload-test-outputs.js +34 -0
  41. package/dist/report-store.js +4 -2
  42. package/package.json +3 -3
  43. package/dist/_vendor/ailf-core/ports/artifact-uploader.d.ts +0 -35
  44. package/dist/_vendor/ailf-core/ports/artifact-uploader.js +0 -18
  45. package/dist/artifact-capture/api-gateway-artifact-uploader.d.ts +0 -41
  46. package/dist/artifact-capture/api-gateway-artifact-uploader.js +0 -123
  47. package/dist/artifact-capture/gcs-report-artifact-uploader.d.ts +0 -31
  48. package/dist/artifact-capture/gcs-report-artifact-uploader.js +0 -66
@@ -1,65 +1,45 @@
1
1
  /**
2
2
  * pipeline/provenance.ts
3
3
  *
4
- * Builds ReportProvenance from data available during a pipeline run.
4
+ * Builds `ReportProvenance` from data available during a pipeline run.
5
5
  *
6
- * Provenance captures what produced an evaluation report: which models,
7
- * which source, which mode, what triggered it, git metadata, etc.
8
- * Most of this data already flows through the pipeline — this module
9
- * just captures what would otherwise be ephemeral.
6
+ * `ReportProvenance extends RunContext` (D0032). This module derives
7
+ * RunContext via `buildRunContext()` and attaches report-specific extras
8
+ * (lineage, autoScope, promptfoo URLs, targetDocuments, runId). A single
9
+ * derivation path for RunContext foreclosures drift between the run
10
+ * manifest (GCS) and the report provenance (Content Lake).
10
11
  *
11
- * @see docs/design-docs/report-store/domain-model.md
12
- * @see docs/design-docs/report-store/architecture.md Provenance collection
12
+ * @see packages/eval/src/pipeline/run-context.ts — the shared derivation path
13
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md Drift Prevention)
13
14
  */
14
- import type { Logger } from "../_vendor/ailf-core/index.d.ts";
15
- import type { ResolvedSourceConfig } from "../sources.js";
16
- import type { EvalMode, PromptfooUrlEntry, ReportAutoScope, ReportProvenance } from "./types.js";
17
- export interface ProvenanceInput {
18
- /** Feature areas that were evaluated */
19
- areas: string[];
20
- /** Logger instance (defaults to ConsoleLogger) */
21
- logger?: Logger;
15
+ import type { PromptfooUrlEntry, ReportAutoScope, ReportProvenance, RunId } from "./types.js";
16
+ import { type RunContextInput } from "./run-context.js";
17
+ /**
18
+ * Inputs needed to build a ReportProvenance. Extends `RunContextInput` so
19
+ * the RunContext derivation path is shared.
20
+ */
21
+ export interface ProvenanceInput extends RunContextInput {
22
22
  /** Release auto-scope metadata (when perspective evaluation was scoped) */
23
23
  autoScope?: ReportAutoScope;
24
- /**
25
- * Git metadata from the *calling* repository (cross-repo evaluations).
26
- * When provided, overrides CI env var detection so provenance attributes
27
- * to the caller — not the AILF core repo where the workflow executes.
28
- */
29
- callerGit?: {
30
- branch?: string;
31
- prNumber?: number;
32
- repo: string;
33
- sha?: string;
34
- };
35
24
  /** SHA-256 hash of the doc context files (from cache system) */
36
25
  contextHash?: string;
37
- /** Evaluation fingerprint for cross-environment cache lookup */
38
- evalFingerprint?: string;
39
- /** Evaluation mode */
40
- mode: EvalMode;
41
26
  /** @deprecated Use `promptfooUrls` — kept for backward compatibility */
42
27
  promptfooUrl?: string;
43
28
  /** Per-mode Promptfoo share URLs */
44
29
  promptfooUrls?: PromptfooUrlEntry[];
45
- /** Path to the package root (for reading config/models) */
46
- rootDir: string;
47
- /** Report ID that triggered this re-run (becomes lineage.rerunOf) */
48
- sourceReportId?: string;
30
+ /** Identity of the pipeline run that produced this report (D0032) */
31
+ runId: RunId;
49
32
  /** Sanity document IDs targeted */
50
33
  sanityDocumentIds?: string[];
51
- /** Resolved documentation source */
52
- source: ResolvedSourceConfig;
53
- /** Specific task IDs evaluated (if scoped) */
54
- taskIds?: string[];
34
+ /** Report ID that triggered this re-run (becomes lineage.rerunOf) */
35
+ sourceReportId?: string;
55
36
  }
56
37
  /**
57
- * Build a ReportProvenance object from pipeline context.
38
+ * Build a ReportProvenance from pipeline context.
58
39
  *
59
- * Assembles provenance from:
60
- * - Pipeline options (mode, source, areas, tasks)
61
- * - config/models.ts (model list, grader)
62
- * - Environment variables (CI metadata, trigger detection)
63
- * - Optional metadata (context hash, Promptfoo URL)
40
+ * RunContext fields (mode, areas, taskIds, models, graderModel, source,
41
+ * evalFingerprint, trigger, git) come from `buildRunContext`. Report-
42
+ * specific fields (autoScope, contextHash, lineage, promptfoo*, runId,
43
+ * targetDocuments) are attached here.
64
44
  */
65
45
  export declare function buildProvenance(input: ProvenanceInput): ReportProvenance;
@@ -1,188 +1,40 @@
1
1
  /**
2
2
  * pipeline/provenance.ts
3
3
  *
4
- * Builds ReportProvenance from data available during a pipeline run.
4
+ * Builds `ReportProvenance` from data available during a pipeline run.
5
5
  *
6
- * Provenance captures what produced an evaluation report: which models,
7
- * which source, which mode, what triggered it, git metadata, etc.
8
- * Most of this data already flows through the pipeline — this module
9
- * just captures what would otherwise be ephemeral.
6
+ * `ReportProvenance extends RunContext` (D0032). This module derives
7
+ * RunContext via `buildRunContext()` and attaches report-specific extras
8
+ * (lineage, autoScope, promptfoo URLs, targetDocuments, runId). A single
9
+ * derivation path for RunContext foreclosures drift between the run
10
+ * manifest (GCS) and the report provenance (Content Lake).
10
11
  *
11
- * @see docs/design-docs/report-store/domain-model.md
12
- * @see docs/design-docs/report-store/architecture.md Provenance collection
12
+ * @see packages/eval/src/pipeline/run-context.ts — the shared derivation path
13
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md Drift Prevention)
13
14
  */
14
- import { ConsoleLogger } from "../adapters/loggers/index.js";
15
- import { tryLoadConfigFile } from "./compiler/config-loader.js";
15
+ import { buildRunContext } from "./run-context.js";
16
16
  /**
17
- * Build a ReportProvenance object from pipeline context.
17
+ * Build a ReportProvenance from pipeline context.
18
18
  *
19
- * Assembles provenance from:
20
- * - Pipeline options (mode, source, areas, tasks)
21
- * - config/models.ts (model list, grader)
22
- * - Environment variables (CI metadata, trigger detection)
23
- * - Optional metadata (context hash, Promptfoo URL)
19
+ * RunContext fields (mode, areas, taskIds, models, graderModel, source,
20
+ * evalFingerprint, trigger, git) come from `buildRunContext`. Report-
21
+ * specific fields (autoScope, contextHash, lineage, promptfoo*, runId,
22
+ * targetDocuments) are attached here.
24
23
  */
25
24
  export function buildProvenance(input) {
26
- const log = input.logger ?? new ConsoleLogger();
27
- const models = loadModelsConfig(input.rootDir, log);
28
- log.debug("Assembling provenance input", {
29
- mode: input.mode,
30
- sourceName: input.source.name,
31
- sourceBaseUrl: input.source.baseUrl,
32
- areas: input.areas,
33
- taskIds: input.taskIds,
34
- hasContextHash: Boolean(input.contextHash),
35
- hasEvalFingerprint: Boolean(input.evalFingerprint),
36
- hasCallerGit: Boolean(input.callerGit),
37
- hasSourceReportId: Boolean(input.sourceReportId),
38
- modelCount: models.models.length,
39
- });
40
- // Cross-repo evaluations: prefer explicit caller git metadata over
41
- // CI env vars (which always reflect the AILF core repo).
42
- const git = input.callerGit
43
- ? {
44
- branch: input.callerGit.branch ?? "unknown",
45
- prNumber: input.callerGit.prNumber,
46
- repo: input.callerGit.repo,
47
- sha: input.callerGit.sha ?? "unknown",
48
- }
49
- : detectGitMetadata();
25
+ const runContext = buildRunContext(input);
50
26
  // Build lineage from explicit relationships
51
27
  const lineage = input.sourceReportId
52
28
  ? { rerunOf: input.sourceReportId }
53
29
  : undefined;
54
- const trigger = detectTrigger();
55
- log.debug("Provenance computed", {
56
- triggerType: trigger.type,
57
- gitRepo: git?.repo,
58
- gitBranch: git?.branch,
59
- evalFingerprint: input.evalFingerprint,
60
- hasLineage: Boolean(lineage),
61
- });
62
- // Non-literacy modes (agent-harness, mcp-server, etc.) don't use the
63
- // config/models.ts model matrix — listing those models would be misleading.
64
- // Only include them for literacy mode where they're the actual eval targets.
65
- const evaluatedModels = input.mode === "literacy"
66
- ? models.models.map((m) => ({ id: m.id, label: m.label }))
67
- : [];
68
30
  return {
69
- areas: input.areas,
31
+ ...runContext,
70
32
  autoScope: input.autoScope,
71
33
  contextHash: input.contextHash,
72
- evalFingerprint: input.evalFingerprint,
73
- git,
74
- graderModel: models.grader.id,
75
34
  lineage,
76
- mode: input.mode,
77
- models: evaluatedModels,
78
35
  promptfooUrl: input.promptfooUrl,
79
36
  promptfooUrls: input.promptfooUrls,
80
- source: {
81
- baseUrl: input.source.baseUrl,
82
- dataset: input.source.dataset,
83
- name: input.source.name,
84
- perspective: input.source.perspective,
85
- projectId: input.source.projectId,
86
- },
37
+ runId: input.runId,
87
38
  targetDocuments: input.sanityDocumentIds,
88
- taskIds: input.taskIds,
89
- trigger: detectTrigger(),
90
- };
91
- }
92
- // ---------------------------------------------------------------------------
93
- // Trigger detection
94
- // ---------------------------------------------------------------------------
95
- /**
96
- * Extract git metadata from GitHub Actions environment variables.
97
- * Returns undefined when not running in CI.
98
- */
99
- function detectGitMetadata() {
100
- const repo = process.env.GITHUB_REPOSITORY;
101
- if (!repo)
102
- return undefined;
103
- const sha = process.env.GITHUB_SHA ?? "unknown";
104
- const ref = process.env.GITHUB_REF ?? "";
105
- // Extract branch name from ref (refs/heads/main → main)
106
- const branch = ref.startsWith("refs/heads/")
107
- ? ref.slice("refs/heads/".length)
108
- : ref.startsWith("refs/pull/")
109
- ? `pr-${ref.split("/")[2]}`
110
- : ref;
111
- // Extract PR number from GITHUB_REF (refs/pull/123/merge)
112
- const prMatch = ref.match(/^refs\/pull\/(\d+)\//);
113
- const prNumber = prMatch ? parseInt(prMatch[1], 10) : undefined;
114
- return { branch, prNumber, repo, sha };
115
- }
116
- // ---------------------------------------------------------------------------
117
- // Git metadata
118
- // ---------------------------------------------------------------------------
119
- /**
120
- * Infer what triggered this evaluation from environment variables.
121
- *
122
- * Detection order:
123
- * 1. AILF_TRIGGER_TYPE — explicit override (for custom integrations)
124
- * 2. GITHUB_EVENT_NAME === "schedule" — cron-triggered
125
- * 3. GITHUB_EVENT_NAME === "repository_dispatch" — cross-repo trigger
126
- * 4. GITHUB_ACTIONS === "true" — CI-triggered
127
- * 5. Default: manual
128
- */
129
- function detectTrigger() {
130
- const explicit = process.env.AILF_TRIGGER_TYPE;
131
- if (explicit === "scheduled") {
132
- return {
133
- schedule: process.env.AILF_SCHEDULE ?? "unknown",
134
- type: "scheduled",
135
- };
136
- }
137
- if (explicit === "webhook") {
138
- return {
139
- documentId: process.env.AILF_WEBHOOK_DOCUMENT_ID,
140
- source: process.env.AILF_WEBHOOK_SOURCE ?? "unknown",
141
- type: "webhook",
142
- };
143
- }
144
- // GitHub Actions context
145
- const eventName = process.env.GITHUB_EVENT_NAME;
146
- if (eventName === "schedule") {
147
- return {
148
- schedule: process.env.GITHUB_SCHEDULE ?? "unknown",
149
- type: "scheduled",
150
- };
151
- }
152
- if (eventName === "repository_dispatch") {
153
- return {
154
- callerRef: process.env.GITHUB_REF,
155
- // Note: callerRepo here is a fallback. The accurate caller repo
156
- // comes from callerGit (injected into the PipelineRequest payload).
157
- // GITHUB_REPOSITORY_OWNER_ID is just the org ID, not owner/repo.
158
- callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
159
- type: "cross-repo",
160
- };
161
- }
162
- if (process.env.GITHUB_ACTIONS === "true") {
163
- return {
164
- runId: process.env.GITHUB_RUN_ID ?? "unknown",
165
- type: "ci",
166
- workflow: process.env.GITHUB_WORKFLOW ?? "unknown",
167
- };
168
- }
169
- return { type: "manual" };
170
- }
171
- // ---------------------------------------------------------------------------
172
- // Model config loading
173
- // ---------------------------------------------------------------------------
174
- /**
175
- * Load config/models to extract model list and grader info.
176
- * Falls back to a minimal config if the file can't be read.
177
- */
178
- function loadModelsConfig(rootDir, log) {
179
- const result = tryLoadConfigFile("models", rootDir);
180
- if (result)
181
- return result.data;
182
- log.warn("Could not read config/models for provenance");
183
- return {
184
- defaults: {},
185
- grader: { id: "unknown" },
186
- models: [],
187
39
  };
188
40
  }
@@ -15,7 +15,7 @@
15
15
  * @see docs/design-docs/report-store/domain-model.md
16
16
  * @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
17
17
  */
18
- import type { EvalMode, ReportTrigger } from "./types.js";
18
+ import type { EvalMode, RunTrigger } from "./types.js";
19
19
  /** Input required to generate a human-readable report title. */
20
20
  export interface ReportTitleInput {
21
21
  provenance: {
@@ -31,7 +31,7 @@ export interface ReportTitleInput {
31
31
  /** Sanity document IDs targeted (when scoped to specific documents) */
32
32
  targetDocuments?: string[];
33
33
  /** What triggered the evaluation */
34
- trigger: ReportTrigger;
34
+ trigger: RunTrigger;
35
35
  };
36
36
  /**
37
37
  * Total number of known feature areas in the system.
@@ -0,0 +1,57 @@
1
+ /**
2
+ * buildRunContext — the single code path that derives `RunContext` from
3
+ * pipeline inputs.
4
+ *
5
+ * `RunContext` is the 9-field shape shared between `RunManifest.context`
6
+ * (in GCS) and `ReportProvenance` (in Content Lake, which `extends
7
+ * RunContext`). Routing every consumer through this function makes it
8
+ * structurally impossible for the two to disagree: there is no second
9
+ * code path to drift against.
10
+ *
11
+ * Contract test: `packages/eval/src/__tests__/run-context-parity.test.ts`
12
+ *
13
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
14
+ */
15
+ import type { Logger, RunContext } from "../_vendor/ailf-core/index.d.ts";
16
+ import type { ResolvedSourceConfig } from "../sources.js";
17
+ import type { EvalMode } from "./types.js";
18
+ /**
19
+ * Inputs required to derive a `RunContext`. `ProvenanceInput` extends this
20
+ * so every caller of `buildProvenance` is automatically a valid input to
21
+ * `buildRunContext`.
22
+ */
23
+ export interface RunContextInput {
24
+ /** Feature areas that were evaluated */
25
+ areas: string[];
26
+ /**
27
+ * Git metadata from the *calling* repository (cross-repo evaluations).
28
+ * When provided, overrides CI env var detection so context attributes
29
+ * to the caller — not the AILF core repo where the workflow executes.
30
+ */
31
+ callerGit?: {
32
+ branch?: string;
33
+ prNumber?: number;
34
+ repo: string;
35
+ sha?: string;
36
+ };
37
+ /** Evaluation fingerprint for cross-environment cache lookup */
38
+ evalFingerprint?: string;
39
+ /** Logger instance (defaults to ConsoleLogger) */
40
+ logger?: Logger;
41
+ /** Evaluation mode */
42
+ mode: EvalMode;
43
+ /** Path to the package root (for reading config/models) */
44
+ rootDir: string;
45
+ /** Resolved documentation source */
46
+ source: ResolvedSourceConfig;
47
+ /** Specific task IDs evaluated (if scoped) */
48
+ taskIds?: string[];
49
+ }
50
+ /**
51
+ * Derive `RunContext` from pipeline inputs. The only construction path.
52
+ *
53
+ * Both `FinalizeRunStep` (via `RunManifest.context`) and
54
+ * `PublishReportStep` (via `ReportProvenance`) call this function — the
55
+ * former directly, the latter transitively through `buildProvenance`.
56
+ */
57
+ export declare function buildRunContext(input: RunContextInput): RunContext;
@@ -0,0 +1,156 @@
1
+ /**
2
+ * buildRunContext — the single code path that derives `RunContext` from
3
+ * pipeline inputs.
4
+ *
5
+ * `RunContext` is the 9-field shape shared between `RunManifest.context`
6
+ * (in GCS) and `ReportProvenance` (in Content Lake, which `extends
7
+ * RunContext`). Routing every consumer through this function makes it
8
+ * structurally impossible for the two to disagree: there is no second
9
+ * code path to drift against.
10
+ *
11
+ * Contract test: `packages/eval/src/__tests__/run-context-parity.test.ts`
12
+ *
13
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md (§ Move 5 — Drift Prevention)
14
+ */
15
+ import { ConsoleLogger } from "../adapters/loggers/index.js";
16
+ import { tryLoadConfigFile } from "./compiler/config-loader.js";
17
+ /**
18
+ * Derive `RunContext` from pipeline inputs. The only construction path.
19
+ *
20
+ * Both `FinalizeRunStep` (via `RunManifest.context`) and
21
+ * `PublishReportStep` (via `ReportProvenance`) call this function — the
22
+ * former directly, the latter transitively through `buildProvenance`.
23
+ */
24
+ export function buildRunContext(input) {
25
+ const log = input.logger ?? new ConsoleLogger();
26
+ const models = loadModelsConfig(input.rootDir, log);
27
+ // Cross-repo evaluations: prefer explicit caller git metadata over
28
+ // CI env vars (which always reflect the AILF core repo).
29
+ const git = input.callerGit
30
+ ? {
31
+ branch: input.callerGit.branch ?? "unknown",
32
+ prNumber: input.callerGit.prNumber,
33
+ repo: input.callerGit.repo,
34
+ sha: input.callerGit.sha ?? "unknown",
35
+ }
36
+ : detectGitMetadata();
37
+ const trigger = detectTrigger();
38
+ // Non-literacy modes (agent-harness, mcp-server, etc.) don't use the
39
+ // config/models.ts model matrix — listing those models would be
40
+ // misleading. Only include them for literacy mode where they're the
41
+ // actual eval targets.
42
+ const evaluatedModels = input.mode === "literacy"
43
+ ? models.models.map((m) => ({ id: m.id, label: m.label }))
44
+ : [];
45
+ return {
46
+ areas: input.areas,
47
+ evalFingerprint: input.evalFingerprint,
48
+ git,
49
+ graderModel: models.grader.id,
50
+ mode: input.mode,
51
+ models: evaluatedModels,
52
+ source: {
53
+ baseUrl: input.source.baseUrl,
54
+ dataset: input.source.dataset,
55
+ name: input.source.name,
56
+ perspective: input.source.perspective,
57
+ projectId: input.source.projectId,
58
+ },
59
+ taskIds: input.taskIds,
60
+ trigger,
61
+ };
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Environment-derived context
65
+ // ---------------------------------------------------------------------------
66
+ /**
67
+ * Extract git metadata from GitHub Actions environment variables.
68
+ * Returns undefined when not running in CI.
69
+ */
70
+ function detectGitMetadata() {
71
+ const repo = process.env.GITHUB_REPOSITORY;
72
+ if (!repo)
73
+ return undefined;
74
+ const sha = process.env.GITHUB_SHA ?? "unknown";
75
+ const ref = process.env.GITHUB_REF ?? "";
76
+ // Extract branch name from ref (refs/heads/main → main)
77
+ const branch = ref.startsWith("refs/heads/")
78
+ ? ref.slice("refs/heads/".length)
79
+ : ref.startsWith("refs/pull/")
80
+ ? `pr-${ref.split("/")[2]}`
81
+ : ref;
82
+ // Extract PR number from GITHUB_REF (refs/pull/123/merge)
83
+ const prMatch = ref.match(/^refs\/pull\/(\d+)\//);
84
+ const prNumber = prMatch ? parseInt(prMatch[1], 10) : undefined;
85
+ return { branch, prNumber, repo, sha };
86
+ }
87
+ /**
88
+ * Infer what triggered this evaluation from environment variables.
89
+ *
90
+ * Detection order:
91
+ * 1. AILF_TRIGGER_TYPE — explicit override (for custom integrations)
92
+ * 2. GITHUB_EVENT_NAME === "schedule" — cron-triggered
93
+ * 3. GITHUB_EVENT_NAME === "repository_dispatch" — cross-repo trigger
94
+ * 4. GITHUB_ACTIONS === "true" — CI-triggered
95
+ * 5. Default: manual
96
+ */
97
+ function detectTrigger() {
98
+ const explicit = process.env.AILF_TRIGGER_TYPE;
99
+ if (explicit === "scheduled") {
100
+ return {
101
+ schedule: process.env.AILF_SCHEDULE ?? "unknown",
102
+ type: "scheduled",
103
+ };
104
+ }
105
+ if (explicit === "webhook") {
106
+ return {
107
+ documentId: process.env.AILF_WEBHOOK_DOCUMENT_ID,
108
+ source: process.env.AILF_WEBHOOK_SOURCE ?? "unknown",
109
+ type: "webhook",
110
+ };
111
+ }
112
+ // GitHub Actions context
113
+ const eventName = process.env.GITHUB_EVENT_NAME;
114
+ if (eventName === "schedule") {
115
+ return {
116
+ schedule: process.env.GITHUB_SCHEDULE ?? "unknown",
117
+ type: "scheduled",
118
+ };
119
+ }
120
+ if (eventName === "repository_dispatch") {
121
+ return {
122
+ callerRef: process.env.GITHUB_REF,
123
+ // Note: callerRepo here is a fallback. The accurate caller repo
124
+ // comes from callerGit (injected into the PipelineRequest payload).
125
+ // GITHUB_REPOSITORY_OWNER_ID is just the org ID, not owner/repo.
126
+ callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
127
+ type: "cross-repo",
128
+ };
129
+ }
130
+ if (process.env.GITHUB_ACTIONS === "true") {
131
+ return {
132
+ runId: process.env.GITHUB_RUN_ID ?? "unknown",
133
+ type: "ci",
134
+ workflow: process.env.GITHUB_WORKFLOW ?? "unknown",
135
+ };
136
+ }
137
+ return { type: "manual" };
138
+ }
139
+ // ---------------------------------------------------------------------------
140
+ // Model config loading
141
+ // ---------------------------------------------------------------------------
142
+ /**
143
+ * Load config/models to extract model list and grader info.
144
+ * Falls back to a minimal config if the file can't be read.
145
+ */
146
+ function loadModelsConfig(rootDir, log) {
147
+ const result = tryLoadConfigFile("models", rootDir);
148
+ if (result)
149
+ return result.data;
150
+ log.warn("Could not read config/models for run context");
151
+ return {
152
+ defaults: {},
153
+ grader: { id: "unknown" },
154
+ models: [],
155
+ };
156
+ }
@@ -0,0 +1,26 @@
1
+ /**
2
+ * upload-test-outputs.ts — shared helper for the testOutputs artifact upload.
3
+ *
4
+ * CalculateScoresStep calls this once its score-summary.json is complete.
5
+ * Each {taskId, modelId} pair becomes one GCS object under
6
+ * `runs/{runId}/test-outputs/{taskId}--{modelId}.json` carrying the full
7
+ * response output and truncation flag. The returned ArtifactRef's
8
+ * `entries[]` catalog lists every uploaded entry so Studio can render
9
+ * drill-down state without a second listing call.
10
+ *
11
+ * PublishReportStep later strips responseOutput from the inline
12
+ * testResults[] when this upload succeeds, so the Content Lake document
13
+ * stays slim — the full output lives in GCS and is fetched per-entry
14
+ * on click.
15
+ *
16
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
17
+ */
18
+ import type { ArtifactRef, ArtifactWriter, RunId, StoredTestResult } from "../_vendor/ailf-core/index.d.ts";
19
+ /**
20
+ * Upload testOutputs as per-entry GCS objects under
21
+ * `runs/{runId}/test-outputs/`, one per `{taskId}::{modelId}` pair.
22
+ *
23
+ * Returns the `ArtifactRef` on success, or `null` when upload is skipped or
24
+ * fails (P5: non-blocking).
25
+ */
26
+ export declare function uploadTestOutputs(writer: ArtifactWriter, runId: RunId, testResults: StoredTestResult[]): Promise<ArtifactRef | null>;
@@ -0,0 +1,34 @@
1
+ /**
2
+ * upload-test-outputs.ts — shared helper for the testOutputs artifact upload.
3
+ *
4
+ * CalculateScoresStep calls this once its score-summary.json is complete.
5
+ * Each {taskId, modelId} pair becomes one GCS object under
6
+ * `runs/{runId}/test-outputs/{taskId}--{modelId}.json` carrying the full
7
+ * response output and truncation flag. The returned ArtifactRef's
8
+ * `entries[]` catalog lists every uploaded entry so Studio can render
9
+ * drill-down state without a second listing call.
10
+ *
11
+ * PublishReportStep later strips responseOutput from the inline
12
+ * testResults[] when this upload succeeds, so the Content Lake document
13
+ * stays slim — the full output lives in GCS and is fetched per-entry
14
+ * on click.
15
+ *
16
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
17
+ */
18
+ /**
19
+ * Upload testOutputs as per-entry GCS objects under
20
+ * `runs/{runId}/test-outputs/`, one per `{taskId}::{modelId}` pair.
21
+ *
22
+ * Returns the `ArtifactRef` on success, or `null` when upload is skipped or
23
+ * fails (P5: non-blocking).
24
+ */
25
+ export async function uploadTestOutputs(writer, runId, testResults) {
26
+ const entries = testResults.map((tr) => ({
27
+ key: `${tr.taskId}::${tr.modelId}`,
28
+ data: {
29
+ responseOutput: tr.responseOutput ?? "",
30
+ responseOutputTruncated: tr.responseOutputTruncated ?? false,
31
+ },
32
+ }));
33
+ return writer.writePerEntry("testOutputs", runId, entries);
34
+ }
@@ -211,8 +211,10 @@ export class ReportStore {
211
211
  summary: {
212
212
  ...report.summary,
213
213
  // Artifact references live inside summary in Sanity so they're
214
- // projected automatically by the reportDetailQuery (D0030)
215
- ...(report.artifacts ? { artifacts: report.artifacts } : {}),
214
+ // projected automatically by the reportDetailQuery (D0032)
215
+ ...(report.artifactManifest
216
+ ? { artifactManifest: report.artifactManifest }
217
+ : {}),
216
218
  },
217
219
  tag: report.tag ?? null,
218
220
  title: report.title ?? null,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "2.7.1",
3
+ "version": "2.8.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -52,8 +52,8 @@
52
52
  "@types/node": "^22.13.1",
53
53
  "tsx": "^4.19.2",
54
54
  "typescript": "^5.7.3",
55
- "@sanity/ailf-shared": "0.1.0",
56
- "@sanity/ailf-core": "0.1.0"
55
+ "@sanity/ailf-core": "0.1.0",
56
+ "@sanity/ailf-shared": "0.1.0"
57
57
  },
58
58
  "scripts": {
59
59
  "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
@@ -1,35 +0,0 @@
1
- /**
2
- * Port: ArtifactUploader — uploads report artifacts to external object storage.
3
- *
4
- * Separate from ArtifactCollector (which captures forensic archives).
5
- * This port puts structured files at known paths so Studio can fetch
6
- * them on demand via signed URLs.
7
- *
8
- * @see docs/design-docs/external-artifact-store.md
9
- * @see docs/decisions/D0030-external-artifact-store.md
10
- */
11
- import type { ArtifactRef } from "../types/index.js";
12
- /**
13
- * Uploads report artifacts to external storage.
14
- *
15
- * Implementations:
16
- * - GcsReportArtifactUploader (packages/eval) — uploads to GCS
17
- * - NoOpArtifactUploader (below) — returns null (no-op when GCS is not configured)
18
- */
19
- export interface ArtifactUploader {
20
- /**
21
- * Upload a JSON artifact for a report.
22
- *
23
- * @param reportId - Report identifier (used as the GCS path prefix)
24
- * @param fileName - File name within the report prefix (e.g., "test-outputs.json")
25
- * @param data - Serializable data (will be JSON.stringify'd)
26
- * @returns ArtifactRef on success, null if upload is skipped or fails
27
- */
28
- upload(reportId: string, fileName: string, data: unknown): Promise<ArtifactRef | null>;
29
- }
30
- /**
31
- * No-op uploader — always returns null. Used when GCS is not configured.
32
- */
33
- export declare class NoOpArtifactUploader implements ArtifactUploader {
34
- upload(): Promise<null>;
35
- }