@sanity/ailf 4.4.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/_vendor/ailf-core/artifact-registry.d.ts +138 -1
  2. package/dist/_vendor/ailf-core/artifact-registry.js +137 -4
  3. package/dist/_vendor/ailf-core/ports/context.d.ts +18 -0
  4. package/dist/_vendor/ailf-core/ports/index.d.ts +2 -0
  5. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  6. package/dist/_vendor/ailf-core/ports/llm-client.d.ts +112 -0
  7. package/dist/_vendor/ailf-core/ports/llm-client.js +68 -0
  8. package/dist/_vendor/ailf-core/types/confidence.d.ts +68 -0
  9. package/dist/_vendor/ailf-core/types/confidence.js +49 -0
  10. package/dist/_vendor/ailf-core/types/index.d.ts +2 -0
  11. package/dist/_vendor/ailf-core/types/index.js +1 -0
  12. package/dist/adapters/llm/anthropic-llm-client.d.ts +48 -0
  13. package/dist/adapters/llm/anthropic-llm-client.js +205 -0
  14. package/dist/adapters/llm/fake-llm-client.d.ts +49 -0
  15. package/dist/adapters/llm/fake-llm-client.js +63 -0
  16. package/dist/adapters/llm/index.d.ts +9 -0
  17. package/dist/adapters/llm/index.js +4 -0
  18. package/dist/adapters/llm/openai-llm-client.d.ts +44 -0
  19. package/dist/adapters/llm/openai-llm-client.js +168 -0
  20. package/dist/adapters/llm/pricing.d.ts +12 -0
  21. package/dist/adapters/llm/pricing.js +8 -0
  22. package/dist/adapters/llm/retry.d.ts +56 -0
  23. package/dist/adapters/llm/retry.js +66 -0
  24. package/dist/adapters/task-sources/repo-schemas.d.ts +11 -11
  25. package/dist/artifact-capture/api-gateway-artifact-writer.js +2 -1
  26. package/dist/artifact-capture/batching-api-gateway-artifact-writer.js +2 -1
  27. package/dist/artifact-capture/gcs-artifact-writer.js +3 -1
  28. package/dist/artifact-capture/local-fs-artifact-writer.js +3 -1
  29. package/dist/commands/pipeline-action.js +7 -1
  30. package/dist/commands/run.d.ts +1 -0
  31. package/dist/commands/run.js +1 -0
  32. package/dist/composition-root.d.ts +23 -1
  33. package/dist/composition-root.js +47 -0
  34. package/package.json +3 -3
@@ -29,6 +29,44 @@ import type { AssociationAxis, AssociationValues, EntryKey, RunId } from "./type
29
29
  export type ArtifactLayout = "bulk" | "per-entry";
30
30
  /** MIME types the registry knows how to place on disk. */
31
31
  export type ArtifactMime = "application/json" | "application/x-ndjson" | "text/markdown" | "application/yaml";
32
+ /**
33
+ * Who is permitted to write the artifact (D0050).
34
+ *
35
+ * `"pipeline"` artifacts are written during a pipeline run by a pipeline
36
+ * step (the legacy default — every pre-D0050 descriptor is implicitly
37
+ * `"pipeline"`).
38
+ *
39
+ * `"post-hoc"` artifacts are written **after** the run is finalized, by
40
+ * a separate command/action (e.g. `ailf interpret`, a Studio action, an
41
+ * API endpoint). Post-hoc artifacts may accumulate multiple versions
42
+ * within a single run prefix; the writer-side guard
43
+ * (`assertWritePolicyMatches`) prevents pipeline writers from emitting
44
+ * post-hoc descriptors and vice versa.
45
+ */
46
+ export type WritePolicy = "pipeline" | "post-hoc";
47
+ /**
48
+ * Source-of-version for descriptors that opt into the versioning axis
49
+ * (D0050). The path builder consumes the version segment to produce
50
+ * `runs/{runId}/{slug}-{version}.{ext}`.
51
+ *
52
+ * - `"schemaVersion"` — version tracks the writer's serialized-shape
53
+ * schema (e.g. attribution payload schema).
54
+ * - `"promptVersion"` — version tracks the prompt the artifact was
55
+ * produced under (e.g. a regenerated grader prompt).
56
+ * - `"diagnosisVersion"` — version tracks the diagnosis run that produced
57
+ * the artifact (e.g. `ailf interpret`'s versioned output).
58
+ *
59
+ * The union is intentionally narrow; D0050 leaves a function-shaped
60
+ * versioner as a future extension if version semantics get richer.
61
+ */
62
+ export type VersionedBy = "schemaVersion" | "promptVersion" | "diagnosisVersion";
63
+ /**
64
+ * Identity of the calling context when invoking the artifact writer.
65
+ * Mirrors `WritePolicy` but at the writer-instance level (a writer is
66
+ * either a pipeline writer or a post-hoc writer; descriptors declare
67
+ * which kind of writer is permitted to emit them).
68
+ */
69
+ export type WriteSource = WritePolicy;
32
70
  /**
33
71
  * Behavior when a payload exceeds a descriptor's `capBytes`:
34
72
  * - `"reject"` — drop the write and log a warning (default for bounded entries).
@@ -105,12 +143,29 @@ export interface ArtifactDescriptor<TEntry = unknown, TPreview = unknown> {
105
143
  * catalog honest about what is absent vs. failed.
106
144
  */
107
145
  readonly optional?: boolean;
146
+ /**
147
+ * Who writes this artifact (D0050). When unset, defaults to `"pipeline"`
148
+ * — matching every pre-D0050 descriptor's behavior. `"post-hoc"`
149
+ * artifacts are emitted by a separate command after the run finalizes
150
+ * and may accumulate multiple versions per run prefix.
151
+ */
152
+ readonly writePolicy?: WritePolicy;
153
+ /**
154
+ * Source of the version segment in the path (D0050). When set, the
155
+ * descriptor's `objectPath` produces a versioned path
156
+ * (`runs/{runId}/{slug}-{version}.{ext}`) and `version` is required.
157
+ * When unset, the path is unversioned (legacy behavior).
158
+ */
159
+ readonly versionedBy?: VersionedBy;
108
160
  /**
109
161
  * Build the GCS object path for this artifact.
110
162
  * - bulk: returns `runs/{runId}/{slug}.{ext}`; `entryKey` is ignored.
111
163
  * - per-entry: requires `entryKey`; returns `runs/{runId}/{slug}/{sanitized}.{ext}`.
164
+ * - versioned (`versionedBy` set): requires `version`; returns
165
+ * `runs/{runId}/{slug}-{version}.{ext}` for bulk-shaped versioned
166
+ * descriptors. `entryKey` is ignored on versioned bulk paths.
112
167
  */
113
- readonly objectPath: (runId: RunId, entryKey?: string) => string;
168
+ readonly objectPath: (runId: RunId, entryKey?: string, version?: string) => string;
114
169
  /**
115
170
  * Build a filename-safe entry key from association values. Only meaningful
116
171
  * for `layout === "per-entry"` — bulk descriptors omit it.
@@ -130,6 +185,22 @@ export interface ArtifactDescriptor<TEntry = unknown, TPreview = unknown> {
130
185
  */
131
186
  readonly manifestPreview?: ManifestPreviewDeclaration<TPreview>;
132
187
  }
188
+ /**
189
+ * Bulk-shaped path builder with a `{version}` segment appended to the
190
+ * filename stem (D0050). Used by descriptors that opt into the versioning
191
+ * axis via `versionedBy`. Multiple versions coexist under the same run
192
+ * prefix as siblings:
193
+ *
194
+ * `runs/{runId}/{slug}-v1.{ext}`
195
+ * `runs/{runId}/{slug}-v2.{ext}`
196
+ *
197
+ * The version segment is sanitized via `sanitizeEntryKey` so versions
198
+ * containing `/` or wire separators don't accidentally nest into
199
+ * subdirectories. Empty / whitespace-only versions are rejected — a
200
+ * versioned descriptor with no version is a programmer error, not silently
201
+ * collapsed to an unversioned path.
202
+ */
203
+ export declare function versionedPathBuilder(slug: string, mime: ArtifactMime): (runId: RunId, _entryKey?: string, version?: string) => string;
133
204
  /** Test-only reset for the legacy-key warning flag. Not exported publicly. */
134
205
  export declare function __resetLegacyTestOutputsWarning(): void;
135
206
  /**
@@ -154,6 +225,72 @@ export declare function isArtifactType(value: string): value is ArtifactType;
154
225
  * tests can construct an invalid descriptor inline and assert the throw.
155
226
  */
156
227
  export declare function assertValidArtifactDescriptor(desc: ArtifactDescriptor): void;
228
+ /**
229
+ * Thrown when a writer's identity (`writeSource`) doesn't match a
230
+ * descriptor's `writePolicy`. Pipeline writers can't emit `"post-hoc"`
231
+ * descriptors (the post-hoc artifact would land mid-run, before the run
232
+ * finalizes); post-hoc writers can't emit `"pipeline"` descriptors
233
+ * (those should have been written by the pipeline itself).
234
+ *
235
+ * The error type is intentionally distinct from `Error` so CI can match
236
+ * on the class and surface mismatches clearly in failure logs.
237
+ */
238
+ export declare class WritePolicyMismatchError extends Error {
239
+ readonly code: "WRITE_POLICY_MISMATCH";
240
+ readonly artifactType: ArtifactType;
241
+ readonly descriptorPolicy: WritePolicy;
242
+ readonly writerSource: WriteSource;
243
+ constructor(opts: {
244
+ artifactType: ArtifactType;
245
+ descriptorPolicy: WritePolicy;
246
+ writerSource: WriteSource;
247
+ });
248
+ }
249
+ /**
250
+ * Resolve a descriptor's effective write policy. Defaults to `"pipeline"`
251
+ * when unset — preserves backward compatibility with every pre-D0050
252
+ * descriptor that doesn't declare the field.
253
+ */
254
+ export declare function resolveWritePolicy(desc: ArtifactDescriptor): WritePolicy;
255
+ /**
256
+ * Writer-side guard. Call at the top of `emit()` / `appendNdjson()` in
257
+ * every artifact writer that physically writes bytes (the in-memory test
258
+ * doubles don't need it). Throws `WritePolicyMismatchError` on
259
+ * mismatch; returns silently on a match. Pure function — no I/O, safe
260
+ * to invoke from any layer.
261
+ */
262
+ export declare function assertWritePolicyMatches(writerSource: WriteSource, descriptor: ArtifactDescriptor): void;
263
+ /**
264
+ * Slim-shape preview for `"post-hoc"` descriptors. Replaces the
265
+ * fixed-path semantics that pipeline-written artifacts use (a single
266
+ * known path per descriptor) with `present: boolean` plus an optional
267
+ * `latestVersion: string` — necessary because:
268
+ *
269
+ * - A post-hoc artifact may have zero versions (never written) or
270
+ * multiple versions (regenerated). A fixed `path` cannot encode
271
+ * either.
272
+ * - Slim-shape consumers (Studio rollups) want to know "is there a
273
+ * diagnosis for this run?" without enumerating versioned siblings.
274
+ *
275
+ * Post-hoc writers populate `latestVersion` after a successful write
276
+ * (last-write-wins per-version semantics, per D0050 open-question
277
+ * resolution). The full versioned payload is fetched via the
278
+ * descriptor's `objectPath(runId, undefined, latestVersion)`.
279
+ */
280
+ export declare const postHocSlimPreviewSchema: z.ZodObject<{
281
+ present: z.ZodBoolean;
282
+ latestVersion: z.ZodOptional<z.ZodString>;
283
+ }, z.core.$strip>;
284
+ export type PostHocSlimPreview = z.infer<typeof postHocSlimPreviewSchema>;
285
+ /**
286
+ * Build a post-hoc slim-shape preview. `latestVersion` is omitted when
287
+ * absent rather than emitted as `undefined`, matching the optional-field
288
+ * convention used elsewhere in the registry.
289
+ */
290
+ export declare function buildPostHocSlimPreview(opts: {
291
+ present: boolean;
292
+ latestVersion?: string;
293
+ }): PostHocSlimPreview;
157
294
  /**
158
295
  * Build the inline preview for a manifest entry at write time. Returns
159
296
  * `undefined` when the descriptor has no `manifestPreview` declaration,
@@ -59,6 +59,34 @@ function perEntryPathBuilder(slug, mime) {
59
59
  return `runs/${runId}/${slug}/${sanitized}.${ext}`;
60
60
  };
61
61
  }
62
+ /**
63
+ * Bulk-shaped path builder with a `{version}` segment appended to the
64
+ * filename stem (D0050). Used by descriptors that opt into the versioning
65
+ * axis via `versionedBy`. Multiple versions coexist under the same run
66
+ * prefix as siblings:
67
+ *
68
+ * `runs/{runId}/{slug}-v1.{ext}`
69
+ * `runs/{runId}/{slug}-v2.{ext}`
70
+ *
71
+ * The version segment is sanitized via `sanitizeEntryKey` so versions
72
+ * containing `/` or wire separators don't accidentally nest into
73
+ * subdirectories. Empty / whitespace-only versions are rejected — a
74
+ * versioned descriptor with no version is a programmer error, not silently
75
+ * collapsed to an unversioned path.
76
+ */
77
+ export function versionedPathBuilder(slug, mime) {
78
+ const ext = mimeExtension(mime);
79
+ return (runId, _entryKey, version) => {
80
+ if (version === undefined || version.trim() === "") {
81
+ throw new Error(`Artifact "${slug}" uses versioned layout; a non-empty version is required`);
82
+ }
83
+ if (hasControlChars(version)) {
84
+ throw new Error(`Artifact "${slug}" version must not contain control characters`);
85
+ }
86
+ const sanitized = sanitizeEntryKey(version);
87
+ return `runs/${runId}/${slug}-${sanitized}.${ext}`;
88
+ };
89
+ }
62
90
  /**
63
91
  * Convert an entry key (wire format, e.g. `{taskId}::{modelId}`) to a
64
92
  * filename-safe component.
@@ -415,9 +443,11 @@ function titleCaseCategory(id) {
415
443
  .join(" ");
416
444
  }
417
445
  function buildDescriptor(input) {
418
- const objectPath = input.layout === "bulk"
419
- ? bulkPathBuilder(input.slug, input.mime)
420
- : perEntryPathBuilder(input.slug, input.mime);
446
+ const objectPath = input.versionedBy
447
+ ? versionedPathBuilder(input.slug, input.mime)
448
+ : input.layout === "bulk"
449
+ ? bulkPathBuilder(input.slug, input.mime)
450
+ : perEntryPathBuilder(input.slug, input.mime);
421
451
  const formatEntryKey = input.layout === "per-entry" ? formatKeyFromAxes(input.axes) : undefined;
422
452
  const parseEntryKey = input.layout === "per-entry"
423
453
  ? (input.parseEntryKey ?? parseKeyByAxes(input.type, input.axes))
@@ -432,6 +462,8 @@ function buildDescriptor(input) {
432
462
  capBytes: input.capBytes,
433
463
  truncation: input.truncation,
434
464
  optional: input.optional,
465
+ writePolicy: input.writePolicy,
466
+ versionedBy: input.versionedBy,
435
467
  objectPath,
436
468
  formatEntryKey,
437
469
  parseEntryKey,
@@ -943,12 +975,21 @@ export function isArtifactType(value) {
943
975
  return value in ARTIFACT_REGISTRY;
944
976
  }
945
977
  // ---------------------------------------------------------------------------
946
- // Module-load invariant (D0033 / W0049)
978
+ // Module-load invariant (D0033 / W0049 / D0050)
947
979
  // ---------------------------------------------------------------------------
948
980
  /**
949
981
  * Unbounded axes — dimensions whose cardinality grows with a run. A bulk
950
982
  * artifact fanning across these cannot bound its payload; the registry
951
983
  * forbids that shape at import time.
984
+ *
985
+ * **Layout rule (D0050).** Bulk descriptors must declare *only* bounded
986
+ * axes — fanning a single JSON across an unbounded axis (`task`, `model`,
987
+ * `trial`) violates the size cap at scale. Per-entry descriptors *may*
988
+ * declare unbounded axes; the per-entry layout naturally produces one
989
+ * object per axis tuple, so unboundedness becomes the file count, not
990
+ * the file size. The existing `testOutputs` per-entry descriptor has
991
+ * carried unbounded `task`+`model` axes since W0048 and is the precedent
992
+ * D0050 formalizes for attribution.
952
993
  */
953
994
  const UNBOUNDED_AXES = [
954
995
  "task",
@@ -972,6 +1013,12 @@ export function assertValidArtifactDescriptor(desc) {
972
1013
  if (desc.layout === "per-entry" && !desc.formatEntryKey) {
973
1014
  throw new Error(`Artifact ${desc.type}: per-entry descriptors must declare formatEntryKey`);
974
1015
  }
1016
+ // D0050 — versioned descriptors are bulk-shaped only in v0; per-entry +
1017
+ // versioned is a future extension and rejected at module load so a
1018
+ // half-wired descriptor doesn't ship by accident.
1019
+ if (desc.versionedBy && desc.layout !== "bulk") {
1020
+ throw new Error(`Artifact ${desc.type}: versionedBy is only supported on bulk descriptors (got layout "${desc.layout}")`);
1021
+ }
975
1022
  }
976
1023
  // Fire the invariant at import time — a bad descriptor kills the process
977
1024
  // before any producer can silently serialize an oversized JSON array.
@@ -979,6 +1026,92 @@ for (const desc of Object.values(ARTIFACT_REGISTRY)) {
979
1026
  assertValidArtifactDescriptor(desc);
980
1027
  }
981
1028
  // ---------------------------------------------------------------------------
1029
+ // Write-policy guard (D0050)
1030
+ // ---------------------------------------------------------------------------
1031
+ /**
1032
+ * Thrown when a writer's identity (`writeSource`) doesn't match a
1033
+ * descriptor's `writePolicy`. Pipeline writers can't emit `"post-hoc"`
1034
+ * descriptors (the post-hoc artifact would land mid-run, before the run
1035
+ * finalizes); post-hoc writers can't emit `"pipeline"` descriptors
1036
+ * (those should have been written by the pipeline itself).
1037
+ *
1038
+ * The error type is intentionally distinct from `Error` so CI can match
1039
+ * on the class and surface mismatches clearly in failure logs.
1040
+ */
1041
+ export class WritePolicyMismatchError extends Error {
1042
+ code = "WRITE_POLICY_MISMATCH";
1043
+ artifactType;
1044
+ descriptorPolicy;
1045
+ writerSource;
1046
+ constructor(opts) {
1047
+ super(`Artifact "${opts.artifactType}" has writePolicy="${opts.descriptorPolicy}" but writer is "${opts.writerSource}". ` +
1048
+ `Pipeline writers cannot emit post-hoc descriptors and post-hoc writers cannot emit pipeline descriptors.`);
1049
+ this.name = "WritePolicyMismatchError";
1050
+ this.artifactType = opts.artifactType;
1051
+ this.descriptorPolicy = opts.descriptorPolicy;
1052
+ this.writerSource = opts.writerSource;
1053
+ }
1054
+ }
1055
+ /**
1056
+ * Resolve a descriptor's effective write policy. Defaults to `"pipeline"`
1057
+ * when unset — preserves backward compatibility with every pre-D0050
1058
+ * descriptor that doesn't declare the field.
1059
+ */
1060
+ export function resolveWritePolicy(desc) {
1061
+ return desc.writePolicy ?? "pipeline";
1062
+ }
1063
+ /**
1064
+ * Writer-side guard. Call at the top of `emit()` / `appendNdjson()` in
1065
+ * every artifact writer that physically writes bytes (the in-memory test
1066
+ * doubles don't need it). Throws `WritePolicyMismatchError` on
1067
+ * mismatch; returns silently on a match. Pure function — no I/O, safe
1068
+ * to invoke from any layer.
1069
+ */
1070
+ export function assertWritePolicyMatches(writerSource, descriptor) {
1071
+ const descriptorPolicy = resolveWritePolicy(descriptor);
1072
+ if (descriptorPolicy !== writerSource) {
1073
+ throw new WritePolicyMismatchError({
1074
+ artifactType: descriptor.type,
1075
+ descriptorPolicy,
1076
+ writerSource,
1077
+ });
1078
+ }
1079
+ }
1080
+ // ---------------------------------------------------------------------------
1081
+ // Slim-shape preview for post-hoc artifacts (D0050)
1082
+ // ---------------------------------------------------------------------------
1083
+ /**
1084
+ * Slim-shape preview for `"post-hoc"` descriptors. Replaces the
1085
+ * fixed-path semantics that pipeline-written artifacts use (a single
1086
+ * known path per descriptor) with `present: boolean` plus an optional
1087
+ * `latestVersion: string` — necessary because:
1088
+ *
1089
+ * - A post-hoc artifact may have zero versions (never written) or
1090
+ * multiple versions (regenerated). A fixed `path` cannot encode
1091
+ * either.
1092
+ * - Slim-shape consumers (Studio rollups) want to know "is there a
1093
+ * diagnosis for this run?" without enumerating versioned siblings.
1094
+ *
1095
+ * Post-hoc writers populate `latestVersion` after a successful write
1096
+ * (last-write-wins per-version semantics, per D0050 open-question
1097
+ * resolution). The full versioned payload is fetched via the
1098
+ * descriptor's `objectPath(runId, undefined, latestVersion)`.
1099
+ */
1100
+ export const postHocSlimPreviewSchema = z.object({
1101
+ present: z.boolean(),
1102
+ latestVersion: z.string().optional(),
1103
+ });
1104
+ /**
1105
+ * Build a post-hoc slim-shape preview. `latestVersion` is omitted when
1106
+ * absent rather than emitted as `undefined`, matching the optional-field
1107
+ * convention used elsewhere in the registry.
1108
+ */
1109
+ export function buildPostHocSlimPreview(opts) {
1110
+ return opts.latestVersion === undefined
1111
+ ? { present: opts.present }
1112
+ : { present: opts.present, latestVersion: opts.latestVersion };
1113
+ }
1114
+ // ---------------------------------------------------------------------------
982
1115
  // Manifest preview helper (W0051 / D0033 M7)
983
1116
  // ---------------------------------------------------------------------------
984
1117
  /**
@@ -18,6 +18,7 @@ import type { ArtifactWriter } from "./artifact-writer.js";
18
18
  import type { CacheStore } from "./cache-store.js";
19
19
  import type { DocFetcher } from "./doc-fetcher.js";
20
20
  import type { EvalRunner } from "./eval-runner.js";
21
+ import type { LLMClient } from "./llm-client.js";
21
22
  import type { Logger } from "./logger.js";
22
23
  import type { PackageSurfaceResolver } from "./package-surface-resolver.js";
23
24
  import type { ProgressReporter } from "./progress-reporter.js";
@@ -207,6 +208,16 @@ export interface ResolvedConfig {
207
208
  * reconfiguring the gateway as well (D0030).
208
209
  */
209
210
  artifactGcsBucket?: string;
211
+ /**
212
+ * Selects the `LLMClient` adapter wired by the composition root (D0051).
213
+ *
214
+ * - `undefined` (default): auto — use Anthropic when `ANTHROPIC_API_KEY` is
215
+ * present, otherwise OpenAI when `OPENAI_API_KEY` is present, otherwise
216
+ * leave `llmClient` unset.
217
+ * - `"anthropic" | "openai"`: explicit selection. The composition root still
218
+ * reads the corresponding env var; if it's missing, `llmClient` is unset.
219
+ */
220
+ llmProvider?: "anthropic" | "openai";
210
221
  /**
211
222
  * Controls whether the ArtifactUploader is constructed.
212
223
  *
@@ -247,6 +258,13 @@ export interface AppContext {
247
258
  readonly docFetcher?: DocFetcher;
248
259
  /** LLM evaluation runner (Promptfoo adapter) */
249
260
  readonly evalRunner: EvalRunner;
261
+ /**
262
+ * LLM access for non-grader features (D0051). Optional during rollout —
263
+ * the composition root wires it when an OpenAI or Anthropic API key is
264
+ * available. Consumers (diagnosis cards, meta-eval) assert presence at
265
+ * their own call sites.
266
+ */
267
+ readonly llmClient?: LLMClient;
250
268
  /** Structured logger */
251
269
  readonly logger: Logger;
252
270
  /**
@@ -11,6 +11,8 @@ export type { ConfigSource } from "./config-source.js";
11
11
  export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
12
12
  export type { DocContext, DocFetcher, DocSourceConfig, DocumentManifestEntry, DocumentOverlaySummary, FetchMetadata, FetchResult, ReleaseImpact, SymbolIndexManifestEntry, UrlFetchEntry, UrlFetchSummary, } from "./doc-fetcher.js";
13
13
  export type { EvalRunConfig, EvalRunner } from "./eval-runner.js";
14
+ export type { LLMCallContext, LLMClient, LLMCompleteArgs, LLMCompleteStructuredArgs, LLMCompletion, LLMStructuredCompletion, LLMUsage, ModelId, ModelProvider, ParsedModelId, } from "./llm-client.js";
15
+ export { modelId, parseModelId, splitModelId } from "./llm-client.js";
14
16
  export type { CompilationContext, CompileResultAssertion, CompileResultPrompt, CompileResultProvider, CompileResultTestCase, ModeCompileResult, ModeHandler, ModeProviderEntry, ModeRubricConfig, PromptTemplate, } from "./mode-handler.js";
15
17
  export type { Logger } from "./logger.js";
16
18
  export type { PackageSurface, PackageSurfaceResolver, PackageSurfaceSymbol, PackageSurfaceUnresolvedReason, } from "./package-surface-resolver.js";
@@ -5,6 +5,7 @@
5
5
  * Adapters (in packages/eval) implement these interfaces.
6
6
  */
7
7
  export { NoOpArtifactWriter } from "./artifact-writer.js";
8
+ export { modelId, parseModelId, splitModelId } from "./llm-client.js";
8
9
  export { PackageSurfaceResolverError } from "./package-surface-resolver.js";
9
10
  export { ARTIFACT_EXPORT_PHASE_ID, NoOpProgressReporter, } from "./progress-reporter.js";
10
11
  export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Port: LLM access for non-grader features.
3
+ *
4
+ * Adapters wrap a vendor REST API and centralize retry, rate-limit handling,
5
+ * cost calculation, and observability. Features call this port instead of
6
+ * importing the grader's internals or a vendor SDK directly.
7
+ *
8
+ * The grader path (`packages/eval/src/pipeline/grader-api.ts`) is intentionally
9
+ * NOT migrated here — D0051 defers grader migration as a follow-up.
10
+ *
11
+ * @see docs/decisions/D0051-llm-client-port.md
12
+ */
13
+ import type { ZodType } from "zod";
14
+ import { type Brand, type IdValidationError, type Result } from "../types/branded-ids.js";
15
+ /**
16
+ * A canonical LLM model identifier.
17
+ *
18
+ * Grammar: `<provider>:<segment>...:<modelName>` (e.g.
19
+ * `"openai:chat:gpt-5.2"`, `"anthropic:messages:claude-opus-4-6"`,
20
+ * `"anthropic:claude-sonnet-4-6"`). Branded so adapters can trust the
21
+ * grammar and consumers can't accidentally pass an arbitrary string.
22
+ */
23
+ export type ModelId = Brand<string, "ModelId">;
24
+ /** The supported provider prefixes for `ModelId`. */
25
+ export type ModelProvider = "anthropic" | "openai";
26
+ /** Result of parsing a `ModelId` — provider plus the bare model name. */
27
+ export interface ParsedModelId {
28
+ readonly id: ModelId;
29
+ readonly provider: ModelProvider;
30
+ /** Bare vendor model name with provider segments stripped. */
31
+ readonly modelName: string;
32
+ }
33
+ /**
34
+ * Parse a raw string into a `ParsedModelId`. Returns `Result` — never throws.
35
+ *
36
+ * Recognized prefixes:
37
+ * - `openai:<modelName>` or `openai:<sub>:<modelName>` (e.g. `"openai:chat:gpt-5"`)
38
+ * - `anthropic:<modelName>` or `anthropic:messages:<modelName>`
39
+ */
40
+ export declare function parseModelId(raw: string): Result<ParsedModelId, IdValidationError>;
41
+ /**
42
+ * Throwing constructor — convenient for known-good inputs (config files,
43
+ * tests). Throws if the id is malformed; use `parseModelId` for untrusted
44
+ * input.
45
+ */
46
+ export declare function modelId(raw: string): ModelId;
47
+ /**
48
+ * Extract `provider` + `modelName` from an already-branded `ModelId`. Assumes
49
+ * the id was produced by `parseModelId` / `modelId` and is therefore valid;
50
+ * if it isn't, the caller's bug surfaces as a thrown `Error`.
51
+ */
52
+ export declare function splitModelId(id: ModelId): ParsedModelId;
53
+ /**
54
+ * Per-call telemetry tag. Carried through usage / cost records so billing can
55
+ * roll up by feature, run, or card.
56
+ */
57
+ export interface LLMCallContext {
58
+ /** Logical feature name (e.g. "diagnosis", "meta-eval"). */
59
+ feature: string;
60
+ /** Optional pipeline run id when the call happens inside a run. */
61
+ runId?: string;
62
+ /** Optional originating card id (for diagnosis-style features). */
63
+ cardId?: string;
64
+ }
65
+ /** Token usage reported by the vendor for a single call. */
66
+ export interface LLMUsage {
67
+ promptTokens: number;
68
+ completionTokens: number;
69
+ }
70
+ /** Result of a free-text completion. */
71
+ export interface LLMCompletion {
72
+ text: string;
73
+ usage: LLMUsage;
74
+ /** End-to-end USD cost for the call. */
75
+ cost: number;
76
+ /** Echo of the canonical model id used. */
77
+ model: ModelId;
78
+ }
79
+ /** Result of a structured-output completion. `value` is parsed via the supplied schema. */
80
+ export interface LLMStructuredCompletion<T> {
81
+ value: T;
82
+ usage: LLMUsage;
83
+ cost: number;
84
+ model: ModelId;
85
+ }
86
+ export interface LLMCompleteArgs {
87
+ /** Canonical model id — produced by `modelId` / `parseModelId`. */
88
+ model: ModelId;
89
+ /** Raw prompt text — adapters wrap it in the vendor message envelope. */
90
+ prompt: string;
91
+ temperature?: number;
92
+ maxTokens?: number;
93
+ stop?: string[];
94
+ context?: LLMCallContext;
95
+ }
96
+ export interface LLMCompleteStructuredArgs<T> {
97
+ model: ModelId;
98
+ prompt: string;
99
+ /** Runtime contract — the adapter parses the model's response through this. */
100
+ schema: ZodType<T>;
101
+ temperature?: number;
102
+ maxTokens?: number;
103
+ context?: LLMCallContext;
104
+ }
105
+ /**
106
+ * Synthesis-side LLM port. v0 is single-call only — streaming and batching are
107
+ * deferred per D0051 until a consumer needs them.
108
+ */
109
+ export interface LLMClient {
110
+ complete(args: LLMCompleteArgs): Promise<LLMCompletion>;
111
+ completeStructured<T>(args: LLMCompleteStructuredArgs<T>): Promise<LLMStructuredCompletion<T>>;
112
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Port: LLM access for non-grader features.
3
+ *
4
+ * Adapters wrap a vendor REST API and centralize retry, rate-limit handling,
5
+ * cost calculation, and observability. Features call this port instead of
6
+ * importing the grader's internals or a vendor SDK directly.
7
+ *
8
+ * The grader path (`packages/eval/src/pipeline/grader-api.ts`) is intentionally
9
+ * NOT migrated here — D0051 defers grader migration as a follow-up.
10
+ *
11
+ * @see docs/decisions/D0051-llm-client-port.md
12
+ */
13
+ import { err, ok, } from "../types/branded-ids.js";
14
+ /**
15
+ * Parse a raw string into a `ParsedModelId`. Returns `Result` — never throws.
16
+ *
17
+ * Recognized prefixes:
18
+ * - `openai:<modelName>` or `openai:<sub>:<modelName>` (e.g. `"openai:chat:gpt-5"`)
19
+ * - `anthropic:<modelName>` or `anthropic:messages:<modelName>`
20
+ */
21
+ export function parseModelId(raw) {
22
+ const parts = raw.split(":");
23
+ if (parts.length < 2 || parts[0] === "" || parts.some((p) => p === "")) {
24
+ return err({
25
+ code: "INVALID_MODEL_ID",
26
+ raw,
27
+ message: `Invalid ModelId "${raw}": expected "<provider>:<modelName>" with non-empty segments`,
28
+ });
29
+ }
30
+ const provider = parts[0];
31
+ if (provider === "openai") {
32
+ const modelName = parts.length >= 3 ? parts.slice(2).join(":") : parts.slice(1).join(":");
33
+ return ok({ id: raw, provider, modelName });
34
+ }
35
+ if (provider === "anthropic") {
36
+ const modelName = parts.length >= 3 && parts[1] === "messages"
37
+ ? parts.slice(2).join(":")
38
+ : parts.slice(1).join(":");
39
+ return ok({ id: raw, provider, modelName });
40
+ }
41
+ return err({
42
+ code: "INVALID_MODEL_ID",
43
+ raw,
44
+ message: `Invalid ModelId "${raw}": unknown provider "${provider}". Supported: openai, anthropic.`,
45
+ });
46
+ }
47
+ /**
48
+ * Throwing constructor — convenient for known-good inputs (config files,
49
+ * tests). Throws if the id is malformed; use `parseModelId` for untrusted
50
+ * input.
51
+ */
52
+ export function modelId(raw) {
53
+ const result = parseModelId(raw);
54
+ if (!result.ok)
55
+ throw new Error(result.error.message);
56
+ return result.value.id;
57
+ }
58
+ /**
59
+ * Extract `provider` + `modelName` from an already-branded `ModelId`. Assumes
60
+ * the id was produced by `parseModelId` / `modelId` and is therefore valid;
61
+ * if it isn't, the caller's bug surfaces as a thrown `Error`.
62
+ */
63
+ export function splitModelId(id) {
64
+ const result = parseModelId(id);
65
+ if (!result.ok)
66
+ throw new Error(result.error.message);
67
+ return result.value;
68
+ }
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Shared confidence contract for actionability-ladder emitters (D0049).
3
+ *
4
+ * Every confidence-emitting site in the actionability-ladder design set
5
+ * (per-document attribution ensemble, structured grader judgments,
6
+ * diagnosis cards, regression detection) emits the same abstract triple
7
+ * so consumers can reason about confidence uniformly across emitters.
8
+ *
9
+ * Bucket thresholds and the formula behind `level` are emitter-specific;
10
+ * the externally comparable behavior is the `level` enum. Consumers that
11
+ * need the underlying mechanic read `derivation` and can branch.
12
+ */
13
+ /**
14
+ * Conventional `derivation` identifiers for the seed set of emitters
15
+ * named in D0049. Re-exported as a typed tuple so consumers and tests can
16
+ * reference one source of truth instead of redeclaring the literals.
17
+ *
18
+ * Adding a new emitter does not require editing this list — `derivation`
19
+ * is an open tag (see `ConfidenceDerivation`). The list is the
20
+ * recommended starting set, not the universe.
21
+ */
22
+ export declare const CONVENTIONAL_DERIVATIONS: readonly ["ensemble-stdev", "ceiling-cross-check", "regression-gate", "card-type-specific"];
23
+ /**
24
+ * Tag identifying the formula used to derive `Confidence.level`.
25
+ *
26
+ * Members of `CONVENTIONAL_DERIVATIONS` are surfaced as literal variants
27
+ * so IDEs autocomplete the recommended set, while the trailing
28
+ * `(string & {})` keeps the type open — emitters that need a new
29
+ * identifier (per-card-type tags, future mechanics) can mint their own
30
+ * without editing `@sanity/ailf-core`. D0049 picked the open shape so
31
+ * feature work isn't coupled to core's release cycle.
32
+ */
33
+ export type ConfidenceDerivation = (typeof CONVENTIONAL_DERIVATIONS)[number] | (string & {});
34
+ /**
35
+ * The shared confidence triple. Every emitter populates all three fields.
36
+ *
37
+ * - `level` is bucketed (not numeric) — chosen over a 0..1 score so every
38
+ * consumer doesn't have to pick its own UI buckets. Emitters may keep a
39
+ * numeric internal representation and bucket at the edge.
40
+ * - `signalsPresent` lets consumers distinguish "1 of 1 signal said high"
41
+ * from "5 of 6 signals said high" without re-deriving the underlying
42
+ * mechanic.
43
+ * - `derivation` is a short identifier for the formula used to derive
44
+ * `level`, so consumers can interpret the mechanic without
45
+ * re-implementing it. Conventional values: `"ensemble-stdev"`,
46
+ * `"ceiling-cross-check"`, `"regression-gate"`, `"card-type-specific"`.
47
+ * Emitters may emit any non-empty string; new conventional identifiers
48
+ * land as new emitters arrive.
49
+ */
50
+ export type Confidence = {
51
+ /** Bucketed level. Comparable across emitters at this granularity. */
52
+ level: "high" | "medium" | "low";
53
+ /** Number of signals contributing to the score. Lets consumers
54
+ * distinguish "1 of 1 signal said high" from "5 of 6 signals said high." */
55
+ signalsPresent: number;
56
+ /** Short identifier for the formula used to derive `level`. Lets
57
+ * consumers interpret the mechanic without re-implementing it.
58
+ * Conventional values: "ensemble-stdev", "ceiling-cross-check",
59
+ * "regression-gate", "card-type-specific". */
60
+ derivation: ConfidenceDerivation;
61
+ };
62
+ /**
63
+ * Structural type guard for `Confidence`. Verifies the runtime shape
64
+ * matches the contract — useful at trust boundaries that can't depend on
65
+ * a Zod schema (the schema lives at the consuming site since each emitter
66
+ * picks its own `level` thresholds, but the shape is shared).
67
+ */
68
+ export declare function isConfidence(value: unknown): value is Confidence;