@sanity/ailf 7.1.0 → 7.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
  2. package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
  3. package/dist/_vendor/ailf-core/schemas/index.js +4 -0
  4. package/dist/_vendor/ailf-core/schemas/report.d.ts +11 -0
  5. package/dist/_vendor/ailf-core/schemas/report.js +14 -0
  6. package/dist/_vendor/ailf-core/schemas/user.d.ts +22 -0
  7. package/dist/_vendor/ailf-core/schemas/user.js +23 -0
  8. package/dist/_vendor/ailf-core/types/index.d.ts +29 -0
  9. package/dist/_vendor/ailf-core/types/index.js +13 -0
  10. package/dist/_vendor/ailf-core/types/user.d.ts +49 -0
  11. package/dist/_vendor/ailf-core/types/user.js +1 -0
  12. package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
  13. package/dist/_vendor/ailf-shared/document-ref.js +23 -1
  14. package/dist/_vendor/ailf-shared/generated/help-content.js +26 -14
  15. package/dist/_vendor/ailf-shared/index.d.ts +1 -1
  16. package/dist/_vendor/ailf-shared/index.js +1 -0
  17. package/dist/_vendor/ailf-shared/owner-teams.js +19 -6
  18. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
  19. package/dist/adapters/grader-outputs/promptfoo-grader-output.d.ts +2 -2
  20. package/dist/adapters/task-sources/content-lake-task-source.js +12 -7
  21. package/dist/orchestration/steps/compute-attribution-step.d.ts +2 -2
  22. package/dist/orchestration/steps/compute-attribution-step.js +17 -2
  23. package/dist/orchestration/steps/gap-analysis-step.d.ts +2 -2
  24. package/dist/orchestration/steps/gap-analysis-step.js +29 -10
  25. package/dist/orchestration/steps/publish-report-step.d.ts +15 -1
  26. package/dist/orchestration/steps/publish-report-step.js +63 -6
  27. package/dist/pipeline/calculate-scores.d.ts +13 -1
  28. package/dist/pipeline/calculate-scores.js +125 -22
  29. package/dist/pipeline/enrichment-preconditions.d.ts +52 -0
  30. package/dist/pipeline/enrichment-preconditions.js +84 -0
  31. package/dist/pipeline/extract-grader-judgments-resilient.d.ts +88 -0
  32. package/dist/pipeline/extract-grader-judgments-resilient.js +122 -0
  33. package/dist/report-store.d.ts +1 -0
  34. package/dist/report-store.js +2 -0
  35. package/dist/sanity/queries.d.ts +1 -1
  36. package/dist/sanity/queries.js +1 -0
  37. package/dist/sources.js +40 -2
  38. package/package.json +1 -1
@@ -0,0 +1,88 @@
1
+ /**
2
+ * pipeline/extract-grader-judgments-resilient.ts
3
+ *
4
+ * Resilient grader-judgment extraction for the `calculate-scores` persist
5
+ * junction.
6
+ *
7
+ * Background: `calculateAndWriteScores` extracts grader judgments from the
8
+ * eval results file(s), then writes `grader-judgments.json` only when the
9
+ * array is non-empty (the `judgments.length > 0` guard). A runtime anomaly was
10
+ * observed where `extractGraderJudgments` returned 0 judgments for a results
11
+ * file that demonstrably contained classifiable llm-rubric components — the
12
+ * same file, read tens of milliseconds later by `extractStoredTestResults`,
13
+ * yielded the full set (entries with populated dimensions). The empty array
14
+ * silently skipped the write, so gap-analysis and compute-attribution skipped
15
+ * and the report shipped with a score but no tests.
16
+ *
17
+ * The committed code reads the file via a pure `readFileSync` with identical
18
+ * classification on both sides, so the divergence is not reproducible from the
19
+ * source + captured artifacts — it is a transient read anomaly at the live
20
+ * junction. This wrapper does not pretend to know the mechanism; it makes the
21
+ * junction observable and recovers from the transient:
22
+ *
23
+ * 1. **Instruments** — logs the resolved path(s), file size/mtime, parsed
24
+ * result count, and judgment count on every run (never silent on 0), so a
25
+ * future empty-judgments persist is diagnosable from the run log alone.
26
+ * 2. **Self-heals** — when extraction yields 0 judgments but a results file
27
+ * exists, it re-extracts with bounded retries. A later read that yields
28
+ * judgments proves the initial 0 was transient; the recovered judgments
29
+ * are returned. If every attempt yields 0, severity is decided by an
30
+ * independent classifiable-component count: a genuinely judgment-free run
31
+ * (all api-errors / no llm-rubric) logs a warning, while 0 judgments
32
+ * against present classifiable components logs an error (the downstream
33
+ * gap-analysis fail-loud guard is the backstop).
34
+ *
35
+ * The extractor and fs helpers are injected so the wrapper is unit-testable
36
+ * without importing the ~3000-line scoring module (which would be circular)
37
+ * or touching the real filesystem.
38
+ */
39
+ import type { GraderJudgment, GraderReliability, Logger } from "../_vendor/ailf-core/index.d.ts";
40
+ /** Telemetry sink threaded into each extraction (shared reliability counters). */
41
+ export interface ExtractionTelemetry {
42
+ reliability: GraderReliability;
43
+ runId?: string;
44
+ }
45
+ /** Cheap on-disk stat used for diagnostics and the retry gate. */
46
+ export interface FileStat {
47
+ exists: boolean;
48
+ mtimeMs: number;
49
+ size: number;
50
+ }
51
+ /** Injectable seams — defaults wire the real fs; tests substitute fakes. */
52
+ export interface ResilientExtractionDeps {
53
+ /**
54
+ * The real `extractGraderJudgments`. Injected (rather than imported) to
55
+ * avoid a circular dependency with `calculate-scores.ts`.
56
+ */
57
+ extract: (path: string, telemetry?: ExtractionTelemetry) => GraderJudgment[];
58
+ /** Parsed result-entry count for a path — diagnostics only. */
59
+ countResults?: (path: string) => number;
60
+ /**
61
+ * Count of classifiable llm-rubric components for a path. Used only to set
62
+ * the severity of a persistent-empty extraction: a file with classifiable
63
+ * components but 0 extracted judgments is an error; a file with none (all
64
+ * api-errors / no llm-rubric) is a benign empty.
65
+ */
66
+ countClassifiable?: (path: string) => number;
67
+ /** On-disk stat (existence + size + mtime). */
68
+ statFile?: (path: string) => FileStat;
69
+ /** Backoff between self-heal attempts. */
70
+ sleep?: (ms: number) => Promise<void>;
71
+ }
72
+ export interface ResilientExtractionOptions {
73
+ /** Total extraction attempts when the first yields 0 (default 3, min 1). */
74
+ maxAttempts?: number;
75
+ /** Delay before each retry, in ms (default 200). */
76
+ delayMs?: number;
77
+ deps: ResilientExtractionDeps;
78
+ }
79
+ /**
80
+ * Extract grader judgments across one or more results files, instrumented and
81
+ * self-healing. See the module header for the rationale.
82
+ *
83
+ * @param resultsPaths One or more results files (e.g. baseline + agentic in
84
+ * literacy full mode). Missing paths are skipped.
85
+ * @param telemetry Shared reliability sink threaded into every extraction.
86
+ * @param log Pipeline logger — the junction is logged here on every run.
87
+ */
88
+ export declare function extractGraderJudgmentsResilient(resultsPaths: readonly string[], telemetry: ExtractionTelemetry | undefined, log: Logger, options: ResilientExtractionOptions): Promise<GraderJudgment[]>;
@@ -0,0 +1,122 @@
1
+ /**
2
+ * pipeline/extract-grader-judgments-resilient.ts
3
+ *
4
+ * Resilient grader-judgment extraction for the `calculate-scores` persist
5
+ * junction.
6
+ *
7
+ * Background: `calculateAndWriteScores` extracts grader judgments from the
8
+ * eval results file(s), then writes `grader-judgments.json` only when the
9
+ * array is non-empty (the `judgments.length > 0` guard). A runtime anomaly was
10
+ * observed where `extractGraderJudgments` returned 0 judgments for a results
11
+ * file that demonstrably contained classifiable llm-rubric components — the
12
+ * same file, read tens of milliseconds later by `extractStoredTestResults`,
13
+ * yielded the full set (entries with populated dimensions). The empty array
14
+ * silently skipped the write, so gap-analysis and compute-attribution skipped
15
+ * and the report shipped with a score but no tests.
16
+ *
17
+ * The committed code reads the file via a pure `readFileSync` with identical
18
+ * classification on both sides, so the divergence is not reproducible from the
19
+ * source + captured artifacts — it is a transient read anomaly at the live
20
+ * junction. This wrapper does not pretend to know the mechanism; it makes the
21
+ * junction observable and recovers from the transient:
22
+ *
23
+ * 1. **Instruments** — logs the resolved path(s), file size/mtime, parsed
24
+ * result count, and judgment count on every run (never silent on 0), so a
25
+ * future empty-judgments persist is diagnosable from the run log alone.
26
+ * 2. **Self-heals** — when extraction yields 0 judgments but a results file
27
+ * exists, it re-extracts with bounded retries. A later read that yields
28
+ * judgments proves the initial 0 was transient; the recovered judgments
29
+ * are returned. If every attempt yields 0, severity is decided by an
30
+ * independent classifiable-component count: a genuinely judgment-free run
31
+ * (all api-errors / no llm-rubric) logs a warning, while 0 judgments
32
+ * against present classifiable components logs an error (the downstream
33
+ * gap-analysis fail-loud guard is the backstop).
34
+ *
35
+ * The extractor and fs helpers are injected so the wrapper is unit-testable
36
+ * without importing the ~3000-line scoring module (which would be circular)
37
+ * or touching the real filesystem.
38
+ */
39
+ import { existsSync, statSync } from "node:fs";
40
+ const defaultStat = (path) => {
41
+ if (!existsSync(path))
42
+ return { exists: false, mtimeMs: 0, size: 0 };
43
+ const s = statSync(path);
44
+ return { exists: true, mtimeMs: s.mtimeMs, size: s.size };
45
+ };
46
+ const defaultSleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
47
+ /**
48
+ * Extract grader judgments across one or more results files, instrumented and
49
+ * self-healing. See the module header for the rationale.
50
+ *
51
+ * @param resultsPaths One or more results files (e.g. baseline + agentic in
52
+ * literacy full mode). Missing paths are skipped.
53
+ * @param telemetry Shared reliability sink threaded into every extraction.
54
+ * @param log Pipeline logger — the junction is logged here on every run.
55
+ */
56
+ export async function extractGraderJudgmentsResilient(resultsPaths, telemetry, log, options) {
57
+ const { extract } = options.deps;
58
+ const statFile = options.deps.statFile ?? defaultStat;
59
+ const sleep = options.deps.sleep ?? defaultSleep;
60
+ const { countResults, countClassifiable } = options.deps;
61
+ const maxAttempts = Math.max(1, options.maxAttempts ?? 3);
62
+ const delayMs = options.delayMs ?? 200;
63
+ const present = resultsPaths.filter((p) => statFile(p).exists);
64
+ const extractAll = () => {
65
+ const all = [];
66
+ for (const p of present) {
67
+ all.push(...extract(p, telemetry));
68
+ }
69
+ return all;
70
+ };
71
+ const diag = (path) => {
72
+ const st = statFile(path);
73
+ return {
74
+ mtimeMs: st.mtimeMs,
75
+ path,
76
+ sizeBytes: st.size,
77
+ ...(countResults ? { resultCount: countResults(path) } : {}),
78
+ };
79
+ };
80
+ // Attempt 1 — always instrument the junction so 0 is never silent.
81
+ let judgments = extractAll();
82
+ for (const p of present) {
83
+ log.info("Grader judgments — persist junction read", diag(p));
84
+ }
85
+ log.info(`Grader judgments extracted: ${judgments.length} total`, {
86
+ judgmentCount: judgments.length,
87
+ paths: present,
88
+ });
89
+ if (judgments.length > 0)
90
+ return judgments;
91
+ // 0 judgments and no results file present → genuinely nothing to grade.
92
+ // A missing file cannot become non-empty within the retry window.
93
+ if (present.length === 0) {
94
+ log.info("No grader judgments — no results file present (nothing to grade)");
95
+ return judgments;
96
+ }
97
+ // Results file(s) exist but extraction yielded 0 judgments — a suspected
98
+ // transient read anomaly. Loud diagnostic, then bounded self-heal retries;
99
+ // the same file read tens of ms later has been observed to yield the full set.
100
+ log.warn("Grader extraction returned 0 judgments despite present results file(s) — suspected transient read anomaly; attempting self-heal", { paths: present.map(diag) });
101
+ for (let attempt = 2; attempt <= maxAttempts; attempt++) {
102
+ await sleep(delayMs);
103
+ judgments = extractAll();
104
+ log.warn(`Grader self-heal attempt ${attempt}/${maxAttempts}: ${judgments.length} judgment(s)`);
105
+ if (judgments.length > 0) {
106
+ log.warn(`Grader self-heal recovered ${judgments.length} grader judgment(s) on attempt ${attempt} — the initial empty extraction was a transient read anomaly`);
107
+ return judgments;
108
+ }
109
+ }
110
+ // Still empty after every attempt. Severity depends on whether the files
111
+ // actually contain classifiable components.
112
+ const classifiable = countClassifiable
113
+ ? present.reduce((n, p) => n + countClassifiable(p), 0)
114
+ : undefined;
115
+ if (classifiable === 0) {
116
+ log.warn(`No grader judgments after ${maxAttempts} attempt(s) — results contain no classifiable llm-rubric components (e.g. all api-errors); nothing to persist`);
117
+ }
118
+ else {
119
+ log.error(`Grader judgments empty after ${maxAttempts} attempt(s) but ${classifiable ?? "an unknown number of"} classifiable component(s) present in the results file(s) — persisting none; downstream gap-analysis/attribution will fail loud`, { paths: present.map(diag) });
120
+ }
121
+ return judgments;
122
+ }
@@ -216,6 +216,7 @@ export interface SanityReportDoc {
216
216
  _type: string;
217
217
  comparison: null | Omit<ComparisonReport, "baseline" | "experiment">;
218
218
  completedAt: string;
219
+ degraded?: Report["degraded"];
219
220
  durationMs: number;
220
221
  provenance: Report["provenance"];
221
222
  reportId: ReportId;
@@ -477,6 +477,7 @@ export function toSanityReportDoc(report) {
477
477
  _type: REPORT_TYPE,
478
478
  comparison,
479
479
  completedAt: report.completedAt,
480
+ ...(report.degraded ? { degraded: report.degraded } : {}),
480
481
  durationMs: report.durationMs,
481
482
  provenance: report.provenance,
482
483
  reportId: report.id,
@@ -526,6 +527,7 @@ export function toReport(doc) {
526
527
  artifactManifest,
527
528
  comparison: doc.comparison,
528
529
  completedAt: doc.completedAt,
530
+ degraded: doc.degraded,
529
531
  durationMs: doc.durationMs,
530
532
  id: doc.reportId,
531
533
  provenance: doc.provenance,
@@ -69,7 +69,7 @@ export declare const ALL_ARTICLES_QUERY = "\n *[_type == \"article\"\n && !(
69
69
  *
70
70
  * Usage: client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: ["slug-a", "slug-b"] })
71
71
  */
72
- export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n *[_type == \"article\"\n && slug.current in $slugs\n && !(_id in path(\"drafts.**\"))\n ] {\n \"slug\": slug.current,\n _id,\n _rev,\n title\n }\n";
72
+ export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n *[_type == \"article\"\n && slug.current in $slugs\n && !(_id in path(\"drafts.**\"))\n ] {\n \"slug\": slug.current,\n \"sectionSlug\": primarySection->slug.current,\n _id,\n _rev,\n title\n }\n";
73
73
  /**
74
74
  * Fetch a single article by its slug — identical to ARTICLE_BY_SLUG_QUERY
75
75
  * but designed to be called with a perspective-enabled client.
@@ -203,6 +203,7 @@ export const ARTICLES_METADATA_BY_SLUGS_QUERY = `
203
203
  && !(_id in path("drafts.**"))
204
204
  ] {
205
205
  "slug": slug.current,
206
+ "sectionSlug": primarySection->slug.current,
206
207
  _id,
207
208
  _rev,
208
209
  title
package/dist/sources.js CHANGED
@@ -37,6 +37,44 @@ const DEFAULT_SOURCE = {
37
37
  studioOrigin: "https://admin.sanity.io",
38
38
  urls: [],
39
39
  };
40
+ /**
41
+ * Apply `SourceOverrides` + env-var fallbacks to `DEFAULT_SOURCE`.
42
+ *
43
+ * The DEFAULT_SOURCE early-return branches are taken when `config/sources`
44
+ * is missing or empty — the production state, since the named source
45
+ * definitions actually live in the `sanity-literacy` preset's `sourceDefs`
46
+ * (which `loadSource` doesn't consult). Returning `DEFAULT_SOURCE`
47
+ * verbatim drops every override the caller passed in, including
48
+ * `perspective` — observed live as production-source release evals
49
+ * fetching the published doc revision (W0295).
50
+ *
51
+ * The merge order mirrors the priority-1 (env-baseUrl) branch. The two
52
+ * paths diverge in three ways, all intentional: this branch (a) pins
53
+ * `baseUrl` / `llmsTxt` / `name` / `priorityDomain` to `DEFAULT_SOURCE`,
54
+ * (b) returns `documentIds: []` (the prior `DEFAULT_SOURCE` shape) where
55
+ * priority-1 would return `undefined` — both fall through the same
56
+ * `length > 0` consumer check, so behaviorally equivalent.
57
+ */
58
+ function applyOverridesToDefault(overrides) {
59
+ const allowedOrigins = overrides?.allowedOrigins ?? parseAllowedOriginsEnv();
60
+ const headers = overrides?.headers ?? parseHeadersEnv();
61
+ return {
62
+ ...DEFAULT_SOURCE,
63
+ ...(allowedOrigins ? { allowedOrigins } : {}),
64
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
65
+ dataset: overrides?.dataset ?? (process.env.SANITY_DATASET || "next"),
66
+ documentIds: overrides?.documentIds ?? parseDocumentIdsEnv() ?? [],
67
+ ...(headers ? { headers } : {}),
68
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
69
+ perspective: overrides?.perspective ?? (process.env.SANITY_PERSPECTIVE || undefined),
70
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
71
+ projectId: overrides?.projectId ?? (process.env.SANITY_PROJECT_ID || "3do82whm"),
72
+ // oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
73
+ studioOrigin: overrides?.studioOrigin ??
74
+ (process.env.SANITY_STUDIO_ORIGIN || "https://admin.sanity.io"),
75
+ urls: overrides?.directUrls ?? parseDirectUrlsEnv(),
76
+ };
77
+ }
40
78
  // ---------------------------------------------------------------------------
41
79
  // Validation
42
80
  // ---------------------------------------------------------------------------
@@ -117,12 +155,12 @@ export function loadSource(name, overrides, logger) {
117
155
  defaultBaseUrl: DEFAULT_SOURCE.baseUrl,
118
156
  });
119
157
  console.log(" No config/sources found, using built-in default (sanity.io production)");
120
- return DEFAULT_SOURCE;
158
+ return applyOverridesToDefault(overrides);
121
159
  }
122
160
  if (!rawFile?.sources || Object.keys(rawFile.sources).length === 0) {
123
161
  log.debug("config/sources is empty, falling back to built-in default");
124
162
  console.log(" config/sources is empty, using built-in default");
125
- return DEFAULT_SOURCE;
163
+ return applyOverridesToDefault(overrides);
126
164
  }
127
165
  // Resolve which source to use
128
166
  const sourceName =
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "7.1.0",
3
+ "version": "7.2.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"