@sanity/ailf 2.8.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/dist/_vendor/ailf-core/artifact-capture/association.d.ts +35 -0
  2. package/dist/_vendor/ailf-core/artifact-capture/association.js +28 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +124 -23
  4. package/dist/_vendor/ailf-core/artifact-registry.js +708 -64
  5. package/dist/_vendor/ailf-core/batch-signing.d.ts +64 -0
  6. package/dist/_vendor/ailf-core/batch-signing.js +23 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +3 -2
  8. package/dist/_vendor/ailf-core/index.js +3 -2
  9. package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +59 -20
  10. package/dist/_vendor/ailf-core/ports/artifact-writer.js +33 -10
  11. package/dist/_vendor/ailf-core/ports/context.d.ts +20 -17
  12. package/dist/_vendor/ailf-core/ports/index.d.ts +0 -2
  13. package/dist/_vendor/ailf-core/schemas/pipeline.d.ts +6 -6
  14. package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
  15. package/dist/_vendor/ailf-core/services/index.js +1 -0
  16. package/dist/_vendor/ailf-core/services/slim-report-summary.d.ts +31 -0
  17. package/dist/_vendor/ailf-core/services/slim-report-summary.js +217 -0
  18. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +33 -0
  19. package/dist/_vendor/ailf-core/types/index.d.ts +202 -23
  20. package/dist/adapters/config-sources/file-config-adapter.js +0 -4
  21. package/dist/artifact-capture/accumulating-artifact-writer.d.ts +50 -0
  22. package/dist/artifact-capture/accumulating-artifact-writer.js +111 -0
  23. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +17 -4
  24. package/dist/artifact-capture/api-gateway-artifact-writer.js +58 -7
  25. package/dist/artifact-capture/emit-file.d.ts +28 -0
  26. package/dist/artifact-capture/emit-file.js +56 -0
  27. package/dist/artifact-capture/fanout-artifact-writer.d.ts +39 -0
  28. package/dist/artifact-capture/fanout-artifact-writer.js +76 -0
  29. package/dist/artifact-capture/gcs-artifact-writer.d.ts +40 -3
  30. package/dist/artifact-capture/gcs-artifact-writer.js +238 -14
  31. package/dist/artifact-capture/local-fs-artifact-writer.d.ts +71 -0
  32. package/dist/artifact-capture/local-fs-artifact-writer.js +273 -0
  33. package/dist/artifact-capture/redact-artifact.d.ts +3 -5
  34. package/dist/artifact-capture/redact-artifact.js +3 -5
  35. package/dist/cli.js +56 -2
  36. package/dist/commands/explain-handler.js +4 -4
  37. package/dist/commands/pipeline-action.d.ts +5 -4
  38. package/dist/commands/pipeline-action.js +33 -16
  39. package/dist/commands/pipeline.d.ts +4 -4
  40. package/dist/commands/pipeline.js +4 -4
  41. package/dist/commands/publish.js +4 -1
  42. package/dist/commands/runs.d.ts +18 -0
  43. package/dist/commands/runs.js +71 -0
  44. package/dist/composition-root.d.ts +13 -10
  45. package/dist/composition-root.js +74 -46
  46. package/dist/orchestration/build-app-context.js +4 -7
  47. package/dist/orchestration/pipeline-orchestrator.d.ts +1 -1
  48. package/dist/orchestration/pipeline-orchestrator.js +37 -46
  49. package/dist/orchestration/steps/calculate-scores-step.d.ts +1 -1
  50. package/dist/orchestration/steps/calculate-scores-step.js +19 -19
  51. package/dist/orchestration/steps/callback-step.d.ts +1 -1
  52. package/dist/orchestration/steps/callback-step.js +6 -4
  53. package/dist/orchestration/steps/compare-step.d.ts +1 -1
  54. package/dist/orchestration/steps/compare-step.js +4 -2
  55. package/dist/orchestration/steps/discovery-report-step.d.ts +1 -1
  56. package/dist/orchestration/steps/discovery-report-step.js +4 -1
  57. package/dist/orchestration/steps/fetch-docs-step.js +9 -15
  58. package/dist/orchestration/steps/finalize-run-step.js +21 -7
  59. package/dist/orchestration/steps/gap-analysis-step.js +34 -6
  60. package/dist/orchestration/steps/generate-configs-step.d.ts +1 -1
  61. package/dist/orchestration/steps/generate-configs-step.js +11 -11
  62. package/dist/orchestration/steps/publish-report-step.d.ts +1 -1
  63. package/dist/orchestration/steps/publish-report-step.js +24 -19
  64. package/dist/orchestration/steps/readiness-step.d.ts +1 -1
  65. package/dist/orchestration/steps/readiness-step.js +4 -1
  66. package/dist/orchestration/steps/report-step.d.ts +1 -1
  67. package/dist/orchestration/steps/report-step.js +6 -3
  68. package/dist/orchestration/steps/run-eval-step.js +14 -9
  69. package/dist/pipeline/compare.d.ts +2 -2
  70. package/dist/pipeline/emit-eval-results.d.ts +38 -0
  71. package/dist/pipeline/emit-eval-results.js +100 -0
  72. package/dist/pipeline/map-request-to-config.js +0 -4
  73. package/package.json +1 -1
  74. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.d.ts +0 -14
  75. package/dist/_vendor/ailf-core/artifact-capture/noop-collector.js +0 -25
  76. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +0 -94
  77. package/dist/_vendor/ailf-core/ports/artifact-collector.js +0 -13
  78. package/dist/_vendor/ailf-core/ports/capture-comparator.d.ts +0 -138
  79. package/dist/_vendor/ailf-core/ports/capture-comparator.js +0 -10
  80. package/dist/artifact-capture/comparator.d.ts +0 -22
  81. package/dist/artifact-capture/comparator.js +0 -493
  82. package/dist/artifact-capture/filesystem-collector.d.ts +0 -42
  83. package/dist/artifact-capture/filesystem-collector.js +0 -237
  84. package/dist/artifact-capture/gcs-collector.d.ts +0 -55
  85. package/dist/artifact-capture/gcs-collector.js +0 -117
  86. package/dist/commands/capture-compare.d.ts +0 -15
  87. package/dist/commands/capture-compare.js +0 -253
  88. package/dist/commands/capture-list.d.ts +0 -12
  89. package/dist/commands/capture-list.js +0 -150
  90. package/dist/commands/capture.d.ts +0 -9
  91. package/dist/commands/capture.js +0 -16
@@ -0,0 +1,35 @@
1
+ /**
2
+ * assoc() — collapses the `{ run: ctx.runId, …axisValues }` boilerplate
3
+ * every producer would otherwise write at each `ctx.artifactWriter.emit()`
4
+ * call site.
5
+ *
6
+ * Instead of:
7
+ * ctx.artifactWriter.emit("testOutputs", { run: ctx.runId, mode, task, model }, payload)
8
+ *
9
+ * Producers write:
10
+ * ctx.artifactWriter.emit("testOutputs", assoc(ctx, { mode, task, model }), payload)
11
+ *
12
+ * The `run` axis is always present because it's derivable from context;
13
+ * every other axis is whatever the producer's local loop variables are.
14
+ * See `docs/work-items/W0050-...json` (Q1 decision) and `tasks/plan.md § Q1`.
15
+ */
16
+ import type { AssociationValues, RunId } from "../types/branded-ids.js";
17
+ /**
18
+ * Structural subset of `AppContext` needed by `assoc()`. Producers pass
19
+ * their full `ctx` here; tests (and agents writing toy fixtures) can pass
20
+ * a plain `{ runId }` object.
21
+ */
22
+ export interface AssocContext {
23
+ readonly runId: RunId;
24
+ }
25
+ /**
26
+ * Build an `AssociationValues` object with `run` pinned from context and
27
+ * the rest of the axes supplied by the caller.
28
+ *
29
+ * Intentionally does NOT validate that the caller supplied all axes the
30
+ * descriptor requires — that would couple the helper to a specific type
31
+ * and defeat the point. Invalid associations surface at `descriptor.
32
+ * formatEntryKey()` time with a clear error, which is where the contract
33
+ * boundary actually lives.
34
+ */
35
+ export declare function assoc(ctx: AssocContext, partial?: Omit<AssociationValues, "run">): AssociationValues;
@@ -0,0 +1,28 @@
1
+ /**
2
+ * assoc() — collapses the `{ run: ctx.runId, …axisValues }` boilerplate
3
+ * every producer would otherwise write at each `ctx.artifactWriter.emit()`
4
+ * call site.
5
+ *
6
+ * Instead of:
7
+ * ctx.artifactWriter.emit("testOutputs", { run: ctx.runId, mode, task, model }, payload)
8
+ *
9
+ * Producers write:
10
+ * ctx.artifactWriter.emit("testOutputs", assoc(ctx, { mode, task, model }), payload)
11
+ *
12
+ * The `run` axis is always present because it's derivable from context;
13
+ * every other axis is whatever the producer's local loop variables are.
14
+ * See `docs/work-items/W0050-...json` (Q1 decision) and `tasks/plan.md § Q1`.
15
+ */
16
+ /**
17
+ * Build an `AssociationValues` object with `run` pinned from context and
18
+ * the rest of the axes supplied by the caller.
19
+ *
20
+ * Intentionally does NOT validate that the caller supplied all axes the
21
+ * descriptor requires — that would couple the helper to a specific type
22
+ * and defeat the point. Invalid associations surface at `descriptor.
23
+ * formatEntryKey()` time with a clear error, which is where the contract
24
+ * boundary actually lives.
25
+ */
26
+ export function assoc(ctx, partial = {}) {
27
+ return { run: ctx.runId, ...partial };
28
+ }
@@ -1,68 +1,147 @@
1
1
  /**
2
2
  * Artifact registry — single source of truth for AILF's external artifact types.
3
3
  *
4
- * Every artifact that lives in GCS declares itself here exactly once:
5
- * layout, path builder, entry schema, and (for per-entry layouts) key parser.
4
+ * Every artifact that lives in GCS (or on the local filesystem after W0050)
5
+ * declares itself here exactly once: association axes, layout, path builder,
6
+ * entry schema, mime, cap, and (for per-entry layouts) format/parse helpers.
6
7
  * Eval writers, the API Gateway's signing endpoint, and the Studio hook all
7
8
  * consume this same record.
8
9
  *
9
10
  * Adding a new artifact type = one entry here. No call-site changes needed in
10
11
  * the generic writer / signer / hook — they all iterate the registry.
11
12
  *
13
+ * ## Association axes (D0033 / W0049)
14
+ *
15
+ * Each descriptor declares the pipeline dimensions it is evidence about. At
16
+ * module load a structural invariant rejects descriptors that declare an
17
+ * unbounded axis (`task`, `model`, `trial`) but a `"bulk"` layout — such a
18
+ * shape would serialize as a single JSON array that blows past the object-
19
+ * size cap at scale. The invariant converts that class of mistake into a
20
+ * process-won't-start error rather than a silent data bug.
21
+ *
12
22
  * @see docs/decisions/D0032-run-anchored-artifact-store.md
13
- * @see docs/design-docs/run-artifact-store.md (§ Move 4 — Artifact Registry)
23
+ * @see docs/decisions/D0033-unified-run-anchored-artifact-capture.md
24
+ * @see docs/design-docs/unified-run-artifacts.md (§ M1, § M5)
14
25
  */
15
26
  import { z } from "zod";
16
- import type { RunId } from "./types/branded-ids.js";
27
+ import type { AssociationAxis, AssociationValues, EntryKey, RunId } from "./types/branded-ids.js";
17
28
  /** Layouts supported by the artifact store. */
18
29
  export type ArtifactLayout = "bulk" | "per-entry";
30
+ /** MIME types the registry knows how to place on disk. */
31
+ export type ArtifactMime = "application/json" | "application/x-ndjson" | "text/markdown" | "application/yaml";
32
+ /**
33
+ * Behavior when a payload exceeds a descriptor's `capBytes`:
34
+ * - `"reject"` — drop the write and log a warning (default for bounded entries).
35
+ * - `"trailing-truncate"` — serialize up to the cap, mark the entry truncated.
36
+ * - `"fielded-truncate"` — a descriptor-specific truncator trims the largest
37
+ * fields first (used by richly-nested payloads whose trailing bytes aren't
38
+ * the right thing to drop).
39
+ * - `"trial-oversize"` (traces only) — drop overflow rows for the current
40
+ * trial, keep the rest of the trial's artifacts intact.
41
+ */
42
+ export type ArtifactTruncationPolicy = "reject" | "trailing-truncate" | "fielded-truncate" | "trial-oversize";
19
43
  /** The union of every artifact type known to AILF. */
20
- export type ArtifactType = "testOutputs" | "renderedPrompts" | "rawResults" | "graderPrompts" | "taskDefinitions" | "evalResults" | "traces";
44
+ export type ArtifactType = "runManifest" | "scoreSummary" | "pipelineResult" | "pipelineContext" | "documentManifest" | "prComment" | "readinessReport" | "reportSnapshot" | "autoComparison" | "gapReport" | "sinkResults" | "callbackRequest" | "callbackResponse" | "configSnapshot" | "evalConfigGenerated" | "comparisonReport" | "discoveryReport" | "failureModes" | "taskDefinitions" | "renderedPrompts" | "rawResults" | "testOutputs" | "graderPrompts" | "graderJudgments" | "traces";
21
45
  /**
22
46
  * Result of parsing a per-entry key into a sanitized filename component.
23
47
  * Success carries the sanitized value; failure carries a reason for 4xx responses.
24
48
  */
25
49
  export type ParsedEntryKey = {
26
50
  ok: true;
27
- sanitized: string;
51
+ sanitized: EntryKey;
28
52
  } | {
29
53
  ok: false;
30
54
  reason: string;
31
55
  };
56
+ /**
57
+ * Declaration of which pipeline axes an artifact is evidence about. Axis order
58
+ * drives both the entry-key layout (join order) and the invariant check
59
+ * (unbounded axis ⇒ per-entry).
60
+ */
61
+ export interface AssociationDeclaration {
62
+ readonly axes: readonly AssociationAxis[];
63
+ }
64
+ /**
65
+ * Optional inline preview declaration. When present, the writer extracts a
66
+ * small summary shape at write time and attaches it to the manifest entry so
67
+ * Studio list views can render without fetching the external payload.
68
+ *
69
+ * Schema-only in W0049 — the write-time `extract()` wiring lands in W0051.
70
+ * `extract` takes `unknown` here so the registry can store heterogeneously
71
+ * typed descriptors without variance issues; W0051 tightens the parameter
72
+ * type to the owning descriptor's `TEntry` at the concrete-descriptor level.
73
+ */
74
+ export interface ManifestPreviewDeclaration<TPreview = unknown> {
75
+ readonly schema: z.ZodType<TPreview>;
76
+ readonly extract: (entry: unknown) => TPreview;
77
+ readonly capBytes: number;
78
+ }
32
79
  /**
33
80
  * Per-type declaration consumed by writers, signers, and readers.
34
81
  *
35
- * @typeParam TEntry - The shape of a single entry. For bulk layouts this is
36
- * the shape of each value in the bulk object's index; for
37
- * per-entry layouts it's the shape of a single GCS object.
82
+ * @typeParam TEntry - Shape of a single entry payload.
83
+ * @typeParam TPreview - Shape of the optional manifest preview (W0051).
38
84
  */
39
- export interface ArtifactDescriptor<TEntry = unknown> {
85
+ export interface ArtifactDescriptor<TEntry = unknown, TPreview = unknown> {
40
86
  /** The artifact type identifier (matches the key in ARTIFACT_REGISTRY). */
41
- type: ArtifactType;
87
+ readonly type: ArtifactType;
42
88
  /** Bulk (one object per run) or per-entry (one object per entryKey). */
43
- layout: ArtifactLayout;
89
+ readonly layout: ArtifactLayout;
44
90
  /** Kebab-case filename stem. Used by both bulk paths and per-entry dir names. */
45
- slug: string;
46
- /** Zod schema for validating a single entry. */
47
- entrySchema: z.ZodType<TEntry>;
91
+ readonly slug: string;
92
+ /** Pipeline axes this artifact is evidence about (D0033). */
93
+ readonly association: AssociationDeclaration;
94
+ /** Zod schema for validating a single entry payload. */
95
+ readonly entrySchema: z.ZodType<TEntry>;
96
+ /** Wire-level content type (drives filename extension + upload Content-Type). */
97
+ readonly mime: ArtifactMime;
98
+ /** Upper bound on serialized entry bytes. Enforced by the writer. */
99
+ readonly capBytes: number;
100
+ /** Behavior when a payload exceeds `capBytes`. Defaults to `"reject"`. */
101
+ readonly truncation?: ArtifactTruncationPolicy;
102
+ /**
103
+ * Some artifacts are expected not to fire every run (e.g. `prComment` only
104
+ * fires on PR-context runs). Marking them `optional` keeps the manifest
105
+ * catalog honest about what is absent vs. failed.
106
+ */
107
+ readonly optional?: boolean;
48
108
  /**
49
109
  * Build the GCS object path for this artifact.
50
- * - bulk: returns `runs/{runId}/{slug}.json`; `entryKey` is ignored.
51
- * - per-entry: requires `entryKey`; returns `runs/{runId}/{slug}/{sanitized}.json`.
110
+ * - bulk: returns `runs/{runId}/{slug}.{ext}`; `entryKey` is ignored.
111
+ * - per-entry: requires `entryKey`; returns `runs/{runId}/{slug}/{sanitized}.{ext}`.
52
112
  */
53
- objectPath: (runId: RunId, entryKey?: string) => string;
113
+ readonly objectPath: (runId: RunId, entryKey?: string) => string;
54
114
  /**
55
- * Validate a per-entry key and return its sanitized filename component.
56
- * Only meaningful for `layout === "per-entry"` — unused when layout is bulk,
57
- * but may be pre-declared so a future layout flip is a one-line change.
115
+ * Build a filename-safe entry key from association values. Only meaningful
116
+ * for `layout === "per-entry"` — bulk descriptors omit it.
58
117
  */
59
- parseEntryKey?: (key: string) => ParsedEntryKey;
118
+ readonly formatEntryKey?: (assoc: AssociationValues) => EntryKey;
119
+ /**
120
+ * Validate a per-entry key received on the wire (URL param, etc.) and
121
+ * return its sanitized filename component. Only meaningful for
122
+ * `layout === "per-entry"`; bulk descriptors omit it.
123
+ */
124
+ readonly parseEntryKey?: (key: string) => ParsedEntryKey;
125
+ /**
126
+ * Optional inline preview for triage-friendly list views. Populated at
127
+ * write time in W0051 — in this phase the field is schema-only and is
128
+ * carried on the descriptor so downstream tooling can see what the
129
+ * preview shape will be.
130
+ */
131
+ readonly manifestPreview?: ManifestPreviewDeclaration<TPreview>;
60
132
  }
133
+ /** Test-only reset for the legacy-key warning flag. Not exported publicly. */
134
+ export declare function __resetLegacyTestOutputsWarning(): void;
61
135
  /**
62
136
  * The canonical artifact descriptor for every artifact type. Iterate with
63
137
  * `Object.values(ARTIFACT_REGISTRY)` or look up by `ARTIFACT_REGISTRY[type]`.
138
+ *
139
+ * Axes, layout, and caps come from docs/design-docs/unified-run-artifacts.md
140
+ * § M5. The mapping is verified by the L1 contract tests.
64
141
  */
65
- export declare const ARTIFACT_REGISTRY: Record<ArtifactType, ArtifactDescriptor>;
142
+ export declare const ARTIFACT_REGISTRY: {
143
+ readonly [T in ArtifactType]: ArtifactDescriptor;
144
+ };
66
145
  /** All artifact types in declaration order. */
67
146
  export declare const ARTIFACT_TYPES: readonly ArtifactType[];
68
147
  /**
@@ -70,3 +149,25 @@ export declare const ARTIFACT_TYPES: readonly ArtifactType[];
70
149
  * Useful at API Gateway boundaries where the type comes from a URL parameter.
71
150
  */
72
151
  export declare function isArtifactType(value: string): value is ArtifactType;
152
+ /**
153
+ * Structural check run against a single descriptor. Exported so L1 contract
154
+ * tests can construct an invalid descriptor inline and assert the throw.
155
+ */
156
+ export declare function assertValidArtifactDescriptor(desc: ArtifactDescriptor): void;
157
+ /**
158
+ * Build the inline preview for a manifest entry at write time. Returns
159
+ * `undefined` when the descriptor has no `manifestPreview` declaration,
160
+ * when extraction throws, when the schema rejects the extracted shape, or
161
+ * when cap-enforcement cannot bring the serialized preview under the
162
+ * descriptor's `capBytes` budget.
163
+ *
164
+ * Failure is non-fatal: preview is triage metadata, never critical-path data.
165
+ * The full payload still lands in the external artifact regardless.
166
+ *
167
+ * Cap enforcement (hard truncation) iteratively shortens the longest string
168
+ * field on the preview object by ~10% per pass until the JSON-serialized
169
+ * form fits under `capBytes` or no string remains to trim. Nested objects
170
+ * are not recursed — previews are intentionally shallow (a handful of
171
+ * top-level fields).
172
+ */
173
+ export declare function buildManifestPreview<TPreview = unknown>(descriptor: ArtifactDescriptor<unknown, TPreview>, payload: unknown): TPreview | undefined;