@sanity/ailf 7.1.0 → 7.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +10 -0
- package/dist/_vendor/ailf-shared/document-ref.d.ts +29 -1
- package/dist/_vendor/ailf-shared/document-ref.js +23 -1
- package/dist/_vendor/ailf-shared/index.d.ts +1 -1
- package/dist/_vendor/ailf-shared/index.js +1 -0
- package/dist/_vendor/ailf-shared/owner-teams.js +19 -6
- package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +15 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +12 -7
- package/dist/orchestration/steps/gap-analysis-step.js +9 -8
- package/dist/orchestration/steps/publish-report-step.js +18 -6
- package/dist/pipeline/calculate-scores.d.ts +13 -1
- package/dist/pipeline/calculate-scores.js +66 -8
- package/dist/sanity/queries.d.ts +1 -1
- package/dist/sanity/queries.js +1 -0
- package/dist/sources.js +40 -2
- package/package.json +1 -1
|
@@ -53,6 +53,16 @@ export interface DocumentManifestEntry {
|
|
|
53
53
|
_id: string;
|
|
54
54
|
_rev: string;
|
|
55
55
|
slug: string;
|
|
56
|
+
/** Parent section slug (`primarySection->slug.current`), when resolvable. */
|
|
57
|
+
sectionSlug?: string;
|
|
58
|
+
/**
|
|
59
|
+
* Full URL path under `/docs/` (e.g. `content-lake/groq-introduction`)
|
|
60
|
+
* composed via `buildContextDocPath` from `sectionSlug + "/" + slug`.
|
|
61
|
+
* Optional — historical manifests written before W0287 only carry
|
|
62
|
+
* `slug`; downstream `DocumentRef` builders fall back to slug-only
|
|
63
|
+
* display when this is absent.
|
|
64
|
+
*/
|
|
65
|
+
path?: string;
|
|
56
66
|
title: string;
|
|
57
67
|
}
|
|
58
68
|
/** Impact of a content release on canonical documents */
|
|
@@ -22,8 +22,36 @@ export interface DocumentRef {
|
|
|
22
22
|
* Named `revision` (not `_rev`) for the same Sanity reserved-name reason.
|
|
23
23
|
*/
|
|
24
24
|
revision?: string;
|
|
25
|
-
/** URL-path identifier (e.g., "groq-introduction") */
|
|
25
|
+
/** URL-path identifier (e.g., "groq-introduction") — leaf segment only. */
|
|
26
26
|
slug: string;
|
|
27
|
+
/**
|
|
28
|
+
* Full URL path under `/docs/` (e.g., `content-lake/groq-introduction`).
|
|
29
|
+
* Composed from the article's `primarySection->slug.current` and
|
|
30
|
+
* `slug.current` via {@link buildContextDocPath}. Optional — historical
|
|
31
|
+
* reports written before W0287 carry only `slug`; consumers must fall
|
|
32
|
+
* back to `slug` for display when `path` is absent.
|
|
33
|
+
*/
|
|
34
|
+
path?: string;
|
|
27
35
|
/** Human-readable document title */
|
|
28
36
|
title: string;
|
|
29
37
|
}
|
|
38
|
+
/**
|
|
39
|
+
* Compose the canonical `/docs/`-relative path for a context-doc reference.
|
|
40
|
+
*
|
|
41
|
+
* Single source of truth across producers (eval doc fetcher, repo-task
|
|
42
|
+
* mirroring) and consumers (dashboard projections). Resolution order:
|
|
43
|
+
*
|
|
44
|
+
* 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
|
|
45
|
+
* 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
|
|
46
|
+
* 3. Otherwise `null` — neither caller can build a working docs URL, so
|
|
47
|
+
* consumers should disable the link rather than emit a 404.
|
|
48
|
+
*
|
|
49
|
+
* The leaf `slug` alone is never returned as the path because
|
|
50
|
+
* `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
|
|
51
|
+
* lives on `primarySection->slug.current`.
|
|
52
|
+
*/
|
|
53
|
+
export declare function buildContextDocPath(input: {
|
|
54
|
+
path?: string | null;
|
|
55
|
+
sectionSlug?: string | null;
|
|
56
|
+
slug?: string | null;
|
|
57
|
+
}): string | null;
|
|
@@ -1 +1,23 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Compose the canonical `/docs/`-relative path for a context-doc reference.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth across producers (eval doc fetcher, repo-task
|
|
5
|
+
* mirroring) and consumers (dashboard projections). Resolution order:
|
|
6
|
+
*
|
|
7
|
+
* 1. An explicit `path` (e.g. authored on a YAML/repo-mirrored task) wins.
|
|
8
|
+
* 2. Otherwise compose `sectionSlug + "/" + slug` when both are present.
|
|
9
|
+
* 3. Otherwise `null` — neither caller can build a working docs URL, so
|
|
10
|
+
* consumers should disable the link rather than emit a 404.
|
|
11
|
+
*
|
|
12
|
+
* The leaf `slug` alone is never returned as the path because
|
|
13
|
+
* `article.slug.current` is leaf-only on sanity.io/docs; the hierarchy
|
|
14
|
+
* lives on `primarySection->slug.current`.
|
|
15
|
+
*/
|
|
16
|
+
export function buildContextDocPath(input) {
|
|
17
|
+
if (input.path)
|
|
18
|
+
return input.path;
|
|
19
|
+
if (input.sectionSlug && input.slug) {
|
|
20
|
+
return `${input.sectionSlug}/${input.slug}`;
|
|
21
|
+
}
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
* surface against future regressions.
|
|
19
19
|
*/
|
|
20
20
|
export { computeCanaryDrift, type CanaryDriftReport, type CanaryReportSlim, type DriftEntry, type DriftThresholds, type DriftVerdict, } from "./canary-drift.js";
|
|
21
|
-
export { type DocumentRef } from "./document-ref.js";
|
|
21
|
+
export { buildContextDocPath, type DocumentRef } from "./document-ref.js";
|
|
22
22
|
export { makeEditorialReference, type EditorialReference, type MakeEditorialReferenceArgs, } from "./editorial-reference.js";
|
|
23
23
|
export { isKnownEventType, KNOWN_EVENT_TYPES, type EventType, type KnownEventType, } from "./event-types.js";
|
|
24
24
|
export { FEATURE_FLAGS, type FeatureFlag, type FeatureFlagKey, } from "./feature-flags.js";
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
* surface against future regressions.
|
|
19
19
|
*/
|
|
20
20
|
export { computeCanaryDrift, } from "./canary-drift.js";
|
|
21
|
+
export { buildContextDocPath } from "./document-ref.js";
|
|
21
22
|
export { makeEditorialReference, } from "./editorial-reference.js";
|
|
22
23
|
export { isKnownEventType, KNOWN_EVENT_TYPES, } from "./event-types.js";
|
|
23
24
|
export { FEATURE_FLAGS, } from "./feature-flags.js";
|
|
@@ -14,11 +14,21 @@
|
|
|
14
14
|
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
15
15
|
*/
|
|
16
16
|
export const KNOWN_OWNER_TEAMS = [
|
|
17
|
+
"ai-growth",
|
|
18
|
+
"billing-and-integrations",
|
|
19
|
+
"content-agent",
|
|
17
20
|
"content-lake",
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
21
|
+
"data",
|
|
22
|
+
"design-and-research",
|
|
23
|
+
"docs",
|
|
24
|
+
"editorial-experience",
|
|
25
|
+
"engineering",
|
|
26
|
+
"identity",
|
|
27
|
+
"media-library",
|
|
28
|
+
"product",
|
|
29
|
+
"runtime",
|
|
30
|
+
"sdk",
|
|
31
|
+
"ssi",
|
|
22
32
|
"studio",
|
|
23
33
|
];
|
|
24
34
|
/**
|
|
@@ -26,8 +36,11 @@ export const KNOWN_OWNER_TEAMS = [
|
|
|
26
36
|
* drift has been observed belong here. Unknown values pass through.
|
|
27
37
|
*/
|
|
28
38
|
const OWNER_TEAM_ALIASES = {
|
|
29
|
-
|
|
30
|
-
|
|
39
|
+
"core-docs": "docs",
|
|
40
|
+
coredocs: "docs",
|
|
41
|
+
documentation: "docs",
|
|
42
|
+
growth: "ai-growth",
|
|
43
|
+
media: "media-library",
|
|
31
44
|
studio_team: "studio",
|
|
32
45
|
"studio-team": "studio",
|
|
33
46
|
};
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import { mkdirSync, writeFileSync } from "fs";
|
|
17
17
|
import { join } from "path";
|
|
18
18
|
import { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, } from "../../_vendor/ailf-core/index.js";
|
|
19
|
+
import { buildContextDocPath } from "../../_vendor/ailf-shared/index.js";
|
|
19
20
|
import { fetchUrlContent, } from "../../pipeline/fetch-url-content.js";
|
|
20
21
|
import { createPerspectiveClient, createPublishedClient, getSanityClient, } from "../../sanity/client.js";
|
|
21
22
|
import { extractSymbolsForDoc, renderDocument, } from "../../sanity/document-renderers.js";
|
|
@@ -376,7 +377,20 @@ export class SanityDocFetcher {
|
|
|
376
377
|
: getSanityClient(toSanityOverrides(source));
|
|
377
378
|
const allMetadata = await client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: [...allSlugs] });
|
|
378
379
|
return allMetadata
|
|
379
|
-
.map((m) =>
|
|
380
|
+
.map((m) => {
|
|
381
|
+
const path = buildContextDocPath({
|
|
382
|
+
sectionSlug: m.sectionSlug,
|
|
383
|
+
slug: m.slug,
|
|
384
|
+
});
|
|
385
|
+
return {
|
|
386
|
+
_id: m._id,
|
|
387
|
+
_rev: m._rev,
|
|
388
|
+
slug: m.slug,
|
|
389
|
+
...(m.sectionSlug ? { sectionSlug: m.sectionSlug } : {}),
|
|
390
|
+
...(path ? { path } : {}),
|
|
391
|
+
title: m.title,
|
|
392
|
+
};
|
|
393
|
+
})
|
|
380
394
|
.sort((a, b) => a.slug.localeCompare(b.slug));
|
|
381
395
|
}
|
|
382
396
|
// -----------------------------------------------------------------------
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
* @see packages/core/src/ports/task-source.ts — TaskSource port
|
|
16
16
|
* @see docs/decisions/D0038-content-lake-authorable-task-modes.md
|
|
17
17
|
*/
|
|
18
|
+
import { buildContextDocPath } from "../../_vendor/ailf-shared/index.js";
|
|
18
19
|
import { filterByChangedDocs } from "./changed-docs-filter.js";
|
|
19
20
|
import { ContentLakeAuthorableTaskSchema } from "./repo-schemas.js";
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
@@ -223,9 +224,11 @@ function mapCanonicalDocRef(raw) {
|
|
|
223
224
|
case "slug":
|
|
224
225
|
return raw.slug ? { slug: raw.slug, reason } : null;
|
|
225
226
|
case "path": {
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
227
|
+
const path = buildContextDocPath({
|
|
228
|
+
path: raw.path,
|
|
229
|
+
sectionSlug: raw.sectionSlug,
|
|
230
|
+
slug: raw.slug,
|
|
231
|
+
});
|
|
229
232
|
return path ? { path, reason } : null;
|
|
230
233
|
}
|
|
231
234
|
case "id": {
|
|
@@ -233,10 +236,12 @@ function mapCanonicalDocRef(raw) {
|
|
|
233
236
|
const id = raw.docId || raw.docRefId || null;
|
|
234
237
|
if (!id)
|
|
235
238
|
return null;
|
|
236
|
-
// Carry slug and derived path as optional DX annotations
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
:
|
|
239
|
+
// Carry slug and derived path as optional DX annotations — single
|
|
240
|
+
// source of truth in `buildContextDocPath` (@sanity/ailf-shared).
|
|
241
|
+
const derivedPath = buildContextDocPath({
|
|
242
|
+
sectionSlug: raw.sectionSlug,
|
|
243
|
+
slug: raw.slug,
|
|
244
|
+
});
|
|
240
245
|
return {
|
|
241
246
|
id,
|
|
242
247
|
reason,
|
|
@@ -82,14 +82,15 @@ export class GapAnalysisStep {
|
|
|
82
82
|
const resolveRefs = (slugs) => slugs
|
|
83
83
|
.map((slug) => {
|
|
84
84
|
const m = refBySlug.get(slug);
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
}
|
|
92
|
-
:
|
|
85
|
+
if (!m)
|
|
86
|
+
return { documentId: "", slug, title: slug };
|
|
87
|
+
return {
|
|
88
|
+
documentId: m._id,
|
|
89
|
+
revision: m._rev,
|
|
90
|
+
slug: m.slug,
|
|
91
|
+
...(m.path ? { path: m.path } : {}),
|
|
92
|
+
title: m.title,
|
|
93
|
+
};
|
|
93
94
|
})
|
|
94
95
|
.filter((r) => r.documentId !== "");
|
|
95
96
|
// ── Build description→docs mapping from TaskSource ─────────
|
|
@@ -214,20 +214,32 @@ export function buildProvenanceInput(summary, ctx, options, autoScope) {
|
|
|
214
214
|
// summary.source undefined). Without this fallback, the report
|
|
215
215
|
// reads "production" regardless of what the dashboard sent.
|
|
216
216
|
// 3. "production" — last-resort built-in default.
|
|
217
|
-
|
|
217
|
+
//
|
|
218
|
+
// Per-field fallbacks (dataset/projectId/perspective) only fire when
|
|
219
|
+
// `summary.source` itself is absent — i.e. the loadSource throw was
|
|
220
|
+
// swallowed. When summary.source is present, trust what the fetch
|
|
221
|
+
// actually used; papering over a missing `perspective` from
|
|
222
|
+
// `ctx.config.perspectiveOverride` makes provenance claim a release
|
|
223
|
+
// was used when it wasn't (W0295).
|
|
224
|
+
const sourceResolved = summary.source?.name !== undefined;
|
|
225
|
+
if (!sourceResolved && ctx.config.source) {
|
|
218
226
|
ctx.logger.warn(`[publish-report] summary.source is missing; falling back to ctx.config.source="${ctx.config.source}" for provenance.source.name`);
|
|
219
227
|
}
|
|
220
228
|
const source = {
|
|
221
229
|
baseUrl: summary.source?.baseUrl ?? "https://www.sanity.io/docs",
|
|
222
|
-
dataset:
|
|
230
|
+
dataset: sourceResolved
|
|
231
|
+
? (summary.source.dataset ?? "next")
|
|
232
|
+
: (ctx.config.datasetOverride ?? "next"),
|
|
223
233
|
documentIds: [],
|
|
224
234
|
llmsTxt: (summary.source?.baseUrl ?? "https://www.sanity.io/docs") + "/llms.txt",
|
|
225
235
|
name: summary.source?.name ?? ctx.config.source ?? "production",
|
|
226
|
-
perspective:
|
|
227
|
-
|
|
228
|
-
undefined,
|
|
236
|
+
perspective: sourceResolved
|
|
237
|
+
? summary.source.perspective
|
|
238
|
+
: (ctx.config.perspectiveOverride ?? undefined),
|
|
229
239
|
priorityDomain: "sanity.io",
|
|
230
|
-
projectId:
|
|
240
|
+
projectId: sourceResolved
|
|
241
|
+
? summary.source.projectId
|
|
242
|
+
: (ctx.config.projectIdOverride ?? "3do82whm"),
|
|
231
243
|
studioOrigin: "https://admin.sanity.io",
|
|
232
244
|
urls: [],
|
|
233
245
|
};
|
|
@@ -187,6 +187,13 @@ export declare function validateGraderJudgmentsCalibration(judgments: GraderJudg
|
|
|
187
187
|
* @param manifestSlugs - All slugs in the run's document manifest.
|
|
188
188
|
*/
|
|
189
189
|
export declare function populateHallucinationFields(judgments: GraderJudgment[], taskDocSlugs: Map<string, string[]>, manifestSlugs: Iterable<string>): void;
|
|
190
|
+
/**
|
|
191
|
+
* Per-variant scoring profiles passed to {@link extractStoredTestResults}.
|
|
192
|
+
* Each profile maps dimension id → weight. Variants whose dimensions don't
|
|
193
|
+
* intersect the supplied keys yield `compositeScore: undefined` rather than
|
|
194
|
+
* a misleading 0.
|
|
195
|
+
*/
|
|
196
|
+
export type StoredTestResultProfiles = Partial<Record<"gold" | "baseline", Record<string, number>>>;
|
|
190
197
|
/**
|
|
191
198
|
* Extract per-test results with model output from evaluation results.
|
|
192
199
|
*
|
|
@@ -194,9 +201,14 @@ export declare function populateHallucinationFields(judgments: GraderJudgment[],
|
|
|
194
201
|
* shape including response.output (truncated), latency, and cost.
|
|
195
202
|
* One StoredTestResult per test × model combination.
|
|
196
203
|
*
|
|
204
|
+
* When `profiles` is provided, each entry's `compositeScore` is computed as
|
|
205
|
+
* the weighted mean of its dimension scores using the profile matching its
|
|
206
|
+
* detected `variant`. Without profiles, `compositeScore` is omitted — legacy
|
|
207
|
+
* behavior preserved.
|
|
208
|
+
*
|
|
197
209
|
* See D0029 and docs/design-docs/score-drill-down.md (Phase 1).
|
|
198
210
|
*/
|
|
199
|
-
export declare function extractStoredTestResults(resultsPath: string): StoredTestResult[];
|
|
211
|
+
export declare function extractStoredTestResults(resultsPath: string, profiles?: StoredTestResultProfiles): StoredTestResult[];
|
|
200
212
|
/**
|
|
201
213
|
* W0198 — aggregate every per-test `SymbolPreflightReport` into a single
|
|
202
214
|
* resolver-health summary. Returns `undefined` when the run had no
|
|
@@ -469,6 +469,26 @@ export function populateHallucinationFields(judgments, taskDocSlugs, manifestSlu
|
|
|
469
469
|
* `responseOutputTruncated` still flips for the extreme tail.
|
|
470
470
|
*/
|
|
471
471
|
const MAX_RESPONSE_OUTPUT_LENGTH = 1_000_000;
|
|
472
|
+
/**
|
|
473
|
+
* Weighted mean of dimension scores. Mirrors the dashboard's read-side
|
|
474
|
+
* fallback in `apps/dashboard/src/data/projections/test-entries.ts` so writer
|
|
475
|
+
* and reader stay aligned. Returns `undefined` when no dimension matches the
|
|
476
|
+
* profile (caller decides whether that signals misconfiguration).
|
|
477
|
+
*/
|
|
478
|
+
function computeStoredCompositeScore(dimensions, weights) {
|
|
479
|
+
let weighted = 0;
|
|
480
|
+
let totalWeight = 0;
|
|
481
|
+
for (const dim of dimensions) {
|
|
482
|
+
const w = weights[dim.dimension];
|
|
483
|
+
if (w === undefined)
|
|
484
|
+
continue;
|
|
485
|
+
weighted += dim.score * w;
|
|
486
|
+
totalWeight += w;
|
|
487
|
+
}
|
|
488
|
+
if (totalWeight === 0)
|
|
489
|
+
return undefined;
|
|
490
|
+
return Math.round(weighted / totalWeight);
|
|
491
|
+
}
|
|
472
492
|
/**
|
|
473
493
|
* Extract per-test results with model output from evaluation results.
|
|
474
494
|
*
|
|
@@ -476,9 +496,14 @@ const MAX_RESPONSE_OUTPUT_LENGTH = 1_000_000;
|
|
|
476
496
|
* shape including response.output (truncated), latency, and cost.
|
|
477
497
|
* One StoredTestResult per test × model combination.
|
|
478
498
|
*
|
|
499
|
+
* When `profiles` is provided, each entry's `compositeScore` is computed as
|
|
500
|
+
* the weighted mean of its dimension scores using the profile matching its
|
|
501
|
+
* detected `variant`. Without profiles, `compositeScore` is omitted — legacy
|
|
502
|
+
* behavior preserved.
|
|
503
|
+
*
|
|
479
504
|
* See D0029 and docs/design-docs/score-drill-down.md (Phase 1).
|
|
480
505
|
*/
|
|
481
|
-
export function extractStoredTestResults(resultsPath) {
|
|
506
|
+
export function extractStoredTestResults(resultsPath, profiles) {
|
|
482
507
|
const results = readAndNormalizeResults(resultsPath);
|
|
483
508
|
const testResults = [];
|
|
484
509
|
for (const result of results) {
|
|
@@ -523,8 +548,13 @@ export function extractStoredTestResults(resultsPath) {
|
|
|
523
548
|
dimensions.push({ dimension, reason, score });
|
|
524
549
|
}
|
|
525
550
|
const tokenUsage = result.response?.tokenUsage;
|
|
551
|
+
const profileForVariant = profiles?.[variant];
|
|
552
|
+
const compositeScore = profileForVariant
|
|
553
|
+
? computeStoredCompositeScore(dimensions, profileForVariant)
|
|
554
|
+
: undefined;
|
|
526
555
|
testResults.push({
|
|
527
556
|
area,
|
|
557
|
+
...(compositeScore !== undefined && { compositeScore }),
|
|
528
558
|
cost: result.cost || undefined,
|
|
529
559
|
dimensions,
|
|
530
560
|
latencyMs: result.latencyMs,
|
|
@@ -1477,7 +1507,12 @@ export async function calculateAndWriteScores(options) {
|
|
|
1477
1507
|
log.info(`Grader judgments written to results/latest/grader-judgments.json (${judgments.length} judgments)`);
|
|
1478
1508
|
}
|
|
1479
1509
|
// Extract and persist per-test results (D0029: model output + metadata)
|
|
1480
|
-
|
|
1510
|
+
// Agent-harness produces a single profile shared across detected variants
|
|
1511
|
+
// (the docs/no-docs split doesn't apply — there is no gold/baseline pair).
|
|
1512
|
+
const testResults = extractStoredTestResults(baselineResultsPath, {
|
|
1513
|
+
gold: agentProfile,
|
|
1514
|
+
baseline: agentProfile,
|
|
1515
|
+
});
|
|
1481
1516
|
if (testResults.length > 0) {
|
|
1482
1517
|
writeFileSync(join(outDir, "test-results.json"), JSON.stringify(testResults, null, 2));
|
|
1483
1518
|
log.info(`Test results written to results/latest/test-results.json (${testResults.length} results)`);
|
|
@@ -1534,7 +1569,13 @@ export async function calculateAndWriteScores(options) {
|
|
|
1534
1569
|
writeFileSync(join(outDir, "grader-judgments.json"), JSON.stringify(judgments, null, 2));
|
|
1535
1570
|
log.info(`Grader judgments written to results/latest/grader-judgments.json (${judgments.length} judgments)`);
|
|
1536
1571
|
}
|
|
1537
|
-
|
|
1572
|
+
// Knowledge-probe deletes vars.docs in the compiler, so every entry's
|
|
1573
|
+
// detected variant is "baseline" — supply the probe profile under both
|
|
1574
|
+
// keys so the composite is populated regardless of detection.
|
|
1575
|
+
const testResults = extractStoredTestResults(baselineResultsPath, {
|
|
1576
|
+
gold: probeProfile,
|
|
1577
|
+
baseline: probeProfile,
|
|
1578
|
+
});
|
|
1538
1579
|
if (testResults.length > 0) {
|
|
1539
1580
|
writeFileSync(join(outDir, "test-results.json"), JSON.stringify(testResults, null, 2));
|
|
1540
1581
|
log.info(`Test results written to results/latest/test-results.json (${testResults.length} results)`);
|
|
@@ -1548,9 +1589,15 @@ export async function calculateAndWriteScores(options) {
|
|
|
1548
1589
|
// doc-coverage excluded). See docs/design-docs/named-scoring-profiles.md.
|
|
1549
1590
|
const goldProfile = resolveProfile("literacy", "gold", rubricConfig, LiteracyVariant.STANDARD);
|
|
1550
1591
|
const baselineProfileWeights = resolveProfile("literacy", LiteracyVariant.STANDARD, rubricConfig, LiteracyVariant.STANDARD);
|
|
1592
|
+
// Hoisted so the post-scoring extractStoredTestResults call against the
|
|
1593
|
+
// agentic results file can attach the matching profile (W0291).
|
|
1594
|
+
const agenticProfile = mode === LiteracyVariant.FULL && existsSync(agenticResultsPath)
|
|
1595
|
+
? resolveProfile("literacy", "gold", rubricConfig, LiteracyVariant.AGENTIC)
|
|
1596
|
+
: undefined;
|
|
1551
1597
|
log.debug("Loaded scoring profiles", {
|
|
1552
1598
|
gold: goldProfile,
|
|
1553
1599
|
baseline: baselineProfileWeights,
|
|
1600
|
+
...(agenticProfile && { agentic: agenticProfile }),
|
|
1554
1601
|
});
|
|
1555
1602
|
const baselineScores = calculateScores(baselineResultsPath, goldProfile, baselineProfileWeights, preflightOptions);
|
|
1556
1603
|
log.debug("Baseline scores calculated", {
|
|
@@ -1577,7 +1624,8 @@ export async function calculateAndWriteScores(options) {
|
|
|
1577
1624
|
let evaluationMode;
|
|
1578
1625
|
if (mode === LiteracyVariant.FULL && existsSync(agenticResultsPath)) {
|
|
1579
1626
|
log.info(`\nReading agentic results from: ${agenticResultsPath}`);
|
|
1580
|
-
|
|
1627
|
+
// Non-null assertion safe — the outer guard hoisting agenticProfile uses
|
|
1628
|
+
// the same condition; if we entered this block, the profile was resolved.
|
|
1581
1629
|
const agenticScores = scoreAgenticResults(agenticResultsPath, agenticProfile, preflightOptions);
|
|
1582
1630
|
log.debug("Agentic scores calculated", {
|
|
1583
1631
|
featureCount: Object.keys(agenticScores).length,
|
|
@@ -1681,11 +1729,21 @@ export async function calculateAndWriteScores(options) {
|
|
|
1681
1729
|
});
|
|
1682
1730
|
}
|
|
1683
1731
|
}
|
|
1684
|
-
// Extract and persist per-test results (D0029: model output + metadata)
|
|
1685
|
-
|
|
1686
|
-
//
|
|
1732
|
+
// Extract and persist per-test results (D0029: model output + metadata).
|
|
1733
|
+
// Literacy gold (with-docs) entries score against the default profile;
|
|
1734
|
+
// baseline (without-docs) entries score against the output-only profile.
|
|
1735
|
+
const testResults = extractStoredTestResults(baselineResultsPath, {
|
|
1736
|
+
gold: goldProfile,
|
|
1737
|
+
baseline: baselineProfileWeights,
|
|
1738
|
+
});
|
|
1739
|
+
// In full mode, also extract test results from agentic results — the
|
|
1740
|
+
// agentic file's gold entries score against the agentic profile while
|
|
1741
|
+
// baseline entries (if any leak through) still use the literacy baseline.
|
|
1687
1742
|
if (mode === LiteracyVariant.FULL && existsSync(agenticResultsPath)) {
|
|
1688
|
-
const agenticTestResults = extractStoredTestResults(agenticResultsPath
|
|
1743
|
+
const agenticTestResults = extractStoredTestResults(agenticResultsPath, {
|
|
1744
|
+
gold: agenticProfile,
|
|
1745
|
+
baseline: baselineProfileWeights,
|
|
1746
|
+
});
|
|
1689
1747
|
testResults.push(...agenticTestResults);
|
|
1690
1748
|
}
|
|
1691
1749
|
if (testResults.length > 0) {
|
package/dist/sanity/queries.d.ts
CHANGED
|
@@ -69,7 +69,7 @@ export declare const ALL_ARTICLES_QUERY = "\n *[_type == \"article\"\n && !(
|
|
|
69
69
|
*
|
|
70
70
|
* Usage: client.fetch(ARTICLES_METADATA_BY_SLUGS_QUERY, { slugs: ["slug-a", "slug-b"] })
|
|
71
71
|
*/
|
|
72
|
-
export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n *[_type == \"article\"\n && slug.current in $slugs\n && !(_id in path(\"drafts.**\"))\n ] {\n \"slug\": slug.current,\n _id,\n _rev,\n title\n }\n";
|
|
72
|
+
export declare const ARTICLES_METADATA_BY_SLUGS_QUERY = "\n *[_type == \"article\"\n && slug.current in $slugs\n && !(_id in path(\"drafts.**\"))\n ] {\n \"slug\": slug.current,\n \"sectionSlug\": primarySection->slug.current,\n _id,\n _rev,\n title\n }\n";
|
|
73
73
|
/**
|
|
74
74
|
* Fetch a single article by its slug — identical to ARTICLE_BY_SLUG_QUERY
|
|
75
75
|
* but designed to be called with a perspective-enabled client.
|
package/dist/sanity/queries.js
CHANGED
package/dist/sources.js
CHANGED
|
@@ -37,6 +37,44 @@ const DEFAULT_SOURCE = {
|
|
|
37
37
|
studioOrigin: "https://admin.sanity.io",
|
|
38
38
|
urls: [],
|
|
39
39
|
};
|
|
40
|
+
/**
|
|
41
|
+
* Apply `SourceOverrides` + env-var fallbacks to `DEFAULT_SOURCE`.
|
|
42
|
+
*
|
|
43
|
+
* The DEFAULT_SOURCE early-return branches are taken when `config/sources`
|
|
44
|
+
* is missing or empty — the production state, since the named source
|
|
45
|
+
* definitions actually live in the `sanity-literacy` preset's `sourceDefs`
|
|
46
|
+
* (which `loadSource` doesn't consult). Returning `DEFAULT_SOURCE`
|
|
47
|
+
* verbatim drops every override the caller passed in, including
|
|
48
|
+
* `perspective` — observed live as production-source release evals
|
|
49
|
+
* fetching the published doc revision (W0295).
|
|
50
|
+
*
|
|
51
|
+
* The merge order mirrors the priority-1 (env-baseUrl) branch. The two
|
|
52
|
+
* paths diverge in three ways, all intentional: this branch (a) pins
|
|
53
|
+
* `baseUrl` / `llmsTxt` / `name` / `priorityDomain` to `DEFAULT_SOURCE`,
|
|
54
|
+
* (b) returns `documentIds: []` (the prior `DEFAULT_SOURCE` shape) where
|
|
55
|
+
* priority-1 would return `undefined` — both fall through the same
|
|
56
|
+
* `length > 0` consumer check, so behaviorally equivalent.
|
|
57
|
+
*/
|
|
58
|
+
function applyOverridesToDefault(overrides) {
|
|
59
|
+
const allowedOrigins = overrides?.allowedOrigins ?? parseAllowedOriginsEnv();
|
|
60
|
+
const headers = overrides?.headers ?? parseHeadersEnv();
|
|
61
|
+
return {
|
|
62
|
+
...DEFAULT_SOURCE,
|
|
63
|
+
...(allowedOrigins ? { allowedOrigins } : {}),
|
|
64
|
+
// oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
|
|
65
|
+
dataset: overrides?.dataset ?? (process.env.SANITY_DATASET || "next"),
|
|
66
|
+
documentIds: overrides?.documentIds ?? parseDocumentIdsEnv() ?? [],
|
|
67
|
+
...(headers ? { headers } : {}),
|
|
68
|
+
// oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
|
|
69
|
+
perspective: overrides?.perspective ?? (process.env.SANITY_PERSPECTIVE || undefined),
|
|
70
|
+
// oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
|
|
71
|
+
projectId: overrides?.projectId ?? (process.env.SANITY_PROJECT_ID || "3do82whm"),
|
|
72
|
+
// oxlint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- empty string env var should fall back
|
|
73
|
+
studioOrigin: overrides?.studioOrigin ??
|
|
74
|
+
(process.env.SANITY_STUDIO_ORIGIN || "https://admin.sanity.io"),
|
|
75
|
+
urls: overrides?.directUrls ?? parseDirectUrlsEnv(),
|
|
76
|
+
};
|
|
77
|
+
}
|
|
40
78
|
// ---------------------------------------------------------------------------
|
|
41
79
|
// Validation
|
|
42
80
|
// ---------------------------------------------------------------------------
|
|
@@ -117,12 +155,12 @@ export function loadSource(name, overrides, logger) {
|
|
|
117
155
|
defaultBaseUrl: DEFAULT_SOURCE.baseUrl,
|
|
118
156
|
});
|
|
119
157
|
console.log(" No config/sources found, using built-in default (sanity.io production)");
|
|
120
|
-
return
|
|
158
|
+
return applyOverridesToDefault(overrides);
|
|
121
159
|
}
|
|
122
160
|
if (!rawFile?.sources || Object.keys(rawFile.sources).length === 0) {
|
|
123
161
|
log.debug("config/sources is empty, falling back to built-in default");
|
|
124
162
|
console.log(" config/sources is empty, using built-in default");
|
|
125
|
-
return
|
|
163
|
+
return applyOverridesToDefault(overrides);
|
|
126
164
|
}
|
|
127
165
|
// Resolve which source to use
|
|
128
166
|
const sourceName =
|