@sanity/ailf 4.1.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/config/package-surface.ts +37 -0
  2. package/config/preflight-scoring.ts +26 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
  4. package/dist/_vendor/ailf-core/artifact-registry.js +47 -0
  5. package/dist/_vendor/ailf-core/config-helpers.d.ts +35 -0
  6. package/dist/_vendor/ailf-core/config-helpers.js +67 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +18 -0
  10. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +30 -0
  11. package/dist/_vendor/ailf-core/ports/index.d.ts +3 -1
  12. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  13. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +23 -0
  14. package/dist/_vendor/ailf-core/ports/package-surface-resolver.d.ts +71 -0
  15. package/dist/_vendor/ailf-core/ports/package-surface-resolver.js +36 -0
  16. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +6 -0
  17. package/dist/_vendor/ailf-core/schemas/eval-config.js +14 -0
  18. package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
  19. package/dist/_vendor/ailf-core/schemas/index.js +1 -0
  20. package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.d.ts +51 -0
  21. package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.js +57 -0
  22. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +20 -3
  23. package/dist/_vendor/ailf-core/types/index.d.ts +13 -1
  24. package/dist/_vendor/ailf-core/types/index.js +1 -0
  25. package/dist/_vendor/ailf-core/types/package-surface.d.ts +36 -0
  26. package/dist/_vendor/ailf-core/types/package-surface.js +13 -0
  27. package/dist/_vendor/ailf-core/types/preflight-scoring.d.ts +52 -0
  28. package/dist/_vendor/ailf-core/types/preflight-scoring.js +18 -0
  29. package/dist/_vendor/ailf-core/types/repo-config.d.ts +14 -0
  30. package/dist/_vendor/ailf-core/types/symbol-preflight-report.d.ts +66 -0
  31. package/dist/_vendor/ailf-core/types/symbol-preflight-report.js +25 -0
  32. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  33. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +25 -5
  34. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +276 -95
  35. package/dist/adapters/index.d.ts +1 -0
  36. package/dist/adapters/index.js +1 -0
  37. package/dist/adapters/package-surface/dts-package-surface.d.ts +46 -0
  38. package/dist/adapters/package-surface/dts-package-surface.js +173 -0
  39. package/dist/adapters/package-surface/in-memory-package-surface.d.ts +15 -0
  40. package/dist/adapters/package-surface/in-memory-package-surface.js +28 -0
  41. package/dist/adapters/package-surface/index.d.ts +9 -0
  42. package/dist/adapters/package-surface/index.js +8 -0
  43. package/dist/adapters/package-surface/parse-dts-exports.d.ts +31 -0
  44. package/dist/adapters/package-surface/parse-dts-exports.js +54 -0
  45. package/dist/adapters/task-sources/repo-schemas.d.ts +22 -0
  46. package/dist/adapters/task-sources/repo-schemas.js +93 -1
  47. package/dist/adapters/task-sources/repo-task-source.js +11 -2
  48. package/dist/commands/pipeline-action.d.ts +2 -0
  49. package/dist/commands/pipeline-action.js +12 -0
  50. package/dist/commands/remote-pipeline.js +9 -2
  51. package/dist/commands/remote-results.d.ts +12 -1
  52. package/dist/commands/remote-results.js +25 -5
  53. package/dist/commands/validate-tasks.js +8 -2
  54. package/dist/composition-root.js +9 -0
  55. package/dist/config/package-surface.ts +37 -0
  56. package/dist/config/preflight-scoring.ts +26 -0
  57. package/dist/index.d.ts +2 -2
  58. package/dist/index.js +1 -1
  59. package/dist/orchestration/build-app-context.js +1 -0
  60. package/dist/orchestration/pipeline-orchestrator.d.ts +19 -1
  61. package/dist/orchestration/pipeline-orchestrator.js +38 -0
  62. package/dist/orchestration/steps/calculate-scores-step.js +11 -0
  63. package/dist/orchestration/steps/generate-configs-step.js +16 -1
  64. package/dist/orchestration/steps/run-eval-step.js +27 -0
  65. package/dist/pipeline/calculate-scores.d.ts +66 -5
  66. package/dist/pipeline/calculate-scores.js +141 -27
  67. package/dist/pipeline/compiler/index.d.ts +1 -1
  68. package/dist/pipeline/compiler/index.js +1 -1
  69. package/dist/pipeline/compiler/literacy-bridge.d.ts +9 -0
  70. package/dist/pipeline/compiler/literacy-bridge.js +2 -0
  71. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +0 -12
  72. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +0 -12
  73. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +1 -1
  74. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +31 -4
  75. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +190 -6
  76. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +2 -0
  77. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +17 -2
  78. package/dist/pipeline/compiler/rubric-resolution.d.ts +17 -1
  79. package/dist/pipeline/compiler/rubric-resolution.js +78 -2
  80. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -2
  81. package/dist/pipeline/compiler/scoring-bridge.js +104 -10
  82. package/dist/pipeline/eval-fingerprint.d.ts +9 -0
  83. package/dist/pipeline/eval-fingerprint.js +7 -1
  84. package/dist/pipeline/preflight/compute-preflight.d.ts +67 -0
  85. package/dist/pipeline/preflight/compute-preflight.js +118 -0
  86. package/dist/pipeline/preflight/emit-symbol-preflight.d.ts +51 -0
  87. package/dist/pipeline/preflight/emit-symbol-preflight.js +102 -0
  88. package/dist/pipeline/preflight/load-package-surface.d.ts +14 -0
  89. package/dist/pipeline/preflight/load-package-surface.js +19 -0
  90. package/dist/pipeline/preflight/load-preflight-context.d.ts +13 -0
  91. package/dist/pipeline/preflight/load-preflight-context.js +25 -0
  92. package/dist/pipeline/preflight/load-preflight-scoring.d.ts +12 -0
  93. package/dist/pipeline/preflight/load-preflight-scoring.js +17 -0
  94. package/dist/pipeline/preflight/parse-imports.d.ts +62 -0
  95. package/dist/pipeline/preflight/parse-imports.js +125 -0
  96. package/dist/report-store.d.ts +8 -0
  97. package/dist/report-store.js +55 -6
  98. package/dist/sanity/document-renderers.d.ts +106 -0
  99. package/dist/sanity/document-renderers.js +307 -0
  100. package/dist/sanity/queries.d.ts +32 -11
  101. package/dist/sanity/queries.js +78 -0
  102. package/dist/sanity/symbol-index.d.ts +98 -0
  103. package/dist/sanity/symbol-index.js +615 -0
  104. package/dist/tasks/knowledge-probe/define-type-api.task.ts +2 -6
  105. package/dist/tasks/knowledge-probe/groq-projections.task.ts +0 -5
  106. package/dist/tasks/literacy/content-lake.task.ts +4 -10
  107. package/dist/tasks/literacy/frameworks.task.ts +2 -8
  108. package/dist/tasks/literacy/functions.task.ts +1 -4
  109. package/dist/tasks/literacy/groq.task.ts +3 -12
  110. package/dist/tasks/literacy/image-handling.task.ts +1 -4
  111. package/dist/tasks/literacy/nextjs-live.task.ts +1 -4
  112. package/dist/tasks/literacy/portable-text.task.ts +2 -8
  113. package/dist/tasks/literacy/studio-setup.task.ts +2 -8
  114. package/dist/tasks/literacy/visual-editing.task.ts +2 -8
  115. package/package.json +2 -1
  116. package/tasks/knowledge-probe/define-type-api.task.ts +2 -6
  117. package/tasks/knowledge-probe/groq-projections.task.ts +0 -5
  118. package/tasks/literacy/content-lake.task.ts +4 -10
  119. package/tasks/literacy/frameworks.task.ts +2 -8
  120. package/tasks/literacy/functions.task.ts +1 -4
  121. package/tasks/literacy/groq.task.ts +3 -12
  122. package/tasks/literacy/image-handling.task.ts +1 -4
  123. package/tasks/literacy/nextjs-live.task.ts +1 -4
  124. package/tasks/literacy/portable-text.task.ts +2 -8
  125. package/tasks/literacy/studio-setup.task.ts +2 -8
  126. package/tasks/literacy/visual-editing.task.ts +2 -8
@@ -0,0 +1,173 @@
1
+ /**
2
+ * DtsPackageSurface — `PackageSurfaceResolver` adapter that reads installed
3
+ * package `.d.ts` files from `node_modules`.
4
+ *
5
+ * Resolution flow:
6
+ * 1. Find the package's root directory via the configured resolver
7
+ * (default uses `createRequire` against the working directory).
8
+ * 2. Read its `package.json` to capture the resolved `version` and the
9
+ * `.d.ts` entry path (`types`, `typings`, or `exports["."].types`).
10
+ * 3. Parse the entry `.d.ts` for top-level export declarations.
11
+ * 4. Follow ONE hop of `export * from "./relative"` re-exports — direct
12
+ * siblings only, no transitive walking.
13
+ * 5. Cache the result by package name for the resolver's lifetime.
14
+ *
15
+ * Throws `PackageSurfaceResolverError` with a typed `reason` when the
16
+ * package isn't installed or its types entry can't be resolved. Callers
17
+ * (the W0198 preflight) catch the error and convert it into per-binding
18
+ * `unresolved` findings rather than `missing` deductions.
19
+ */
20
+ import { existsSync, readFileSync } from "node:fs";
21
+ import { createRequire } from "node:module";
22
+ import { dirname, isAbsolute, join, resolve } from "node:path";
23
+ import { pathToFileURL } from "node:url";
24
+ import { PackageSurfaceResolverError, } from "../../_vendor/ailf-core/index.js";
25
+ import { parseDtsExports } from "./parse-dts-exports.js";
26
+ export class DtsPackageSurface {
27
+ resolveRoot;
28
+ cache = new Map();
29
+ constructor(opts = {}) {
30
+ this.resolveRoot =
31
+ opts.resolvePackageRoot ??
32
+ makeDefaultPackageRootResolver(opts.resolveFromDir ?? process.cwd());
33
+ }
34
+ async resolveExports(pkg) {
35
+ const cached = this.cache.get(pkg);
36
+ if (cached)
37
+ return cached;
38
+ const root = this.resolveRoot(pkg);
39
+ if (!root) {
40
+ throw new PackageSurfaceResolverError("package-not-installed", pkg, `Package "${pkg}" was not resolvable from the configured lookup path.`);
41
+ }
42
+ const pkgJsonPath = join(root, "package.json");
43
+ if (!existsSync(pkgJsonPath)) {
44
+ throw new PackageSurfaceResolverError("package-not-installed", pkg, `Package "${pkg}" has no package.json at "${pkgJsonPath}".`);
45
+ }
46
+ const pkgJson = readPackageJson(pkgJsonPath, pkg);
47
+ const version = pkgJson.version ?? "0.0.0";
48
+ const typesEntry = resolveTypesEntry(pkgJson, root);
49
+ if (!typesEntry) {
50
+ throw new PackageSurfaceResolverError("types-entry-missing", pkg, `Package "${pkg}@${version}" does not declare a \`types\` entry ` +
51
+ `(checked package.json \`types\`, \`typings\`, and \`exports["."].types\`).`);
52
+ }
53
+ const symbols = readSurface(pkg, version, typesEntry);
54
+ const surface = {
55
+ pkg,
56
+ version,
57
+ symbols,
58
+ };
59
+ this.cache.set(pkg, surface);
60
+ return surface;
61
+ }
62
+ }
63
+ function readPackageJson(path, pkg) {
64
+ try {
65
+ return JSON.parse(readFileSync(path, "utf-8"));
66
+ }
67
+ catch (err) {
68
+ throw new PackageSurfaceResolverError("parse-failed", pkg, `Failed to parse "${path}": ${err instanceof Error ? err.message : String(err)}`);
69
+ }
70
+ }
71
+ /**
72
+ * Pick the `.d.ts` entry the resolver should parse. Order:
73
+ * 1. `package.json#types`
74
+ * 2. `package.json#typings`
75
+ * 3. `exports["."].types` (string or object form)
76
+ */
77
+ function resolveTypesEntry(pkgJson, root) {
78
+ const candidates = [];
79
+ if (typeof pkgJson.types === "string")
80
+ candidates.push(pkgJson.types);
81
+ if (typeof pkgJson.typings === "string")
82
+ candidates.push(pkgJson.typings);
83
+ const dotExport = pkgJson.exports && typeof pkgJson.exports === "object"
84
+ ? pkgJson.exports["."]
85
+ : undefined;
86
+ if (typeof dotExport === "string") {
87
+ if (dotExport.endsWith(".d.ts"))
88
+ candidates.push(dotExport);
89
+ }
90
+ else if (dotExport && typeof dotExport === "object") {
91
+ const typesField = dotExport.types;
92
+ if (typeof typesField === "string")
93
+ candidates.push(typesField);
94
+ else if (typesField && typeof typesField === "object") {
95
+ // Conditional `types` entry (rare) — pick any string leaf.
96
+ for (const v of Object.values(typesField)) {
97
+ if (typeof v === "string") {
98
+ candidates.push(v);
99
+ break;
100
+ }
101
+ }
102
+ }
103
+ }
104
+ for (const candidate of candidates) {
105
+ const abs = isAbsolute(candidate) ? candidate : join(root, candidate);
106
+ if (existsSync(abs))
107
+ return abs;
108
+ }
109
+ return null;
110
+ }
111
+ function readSurface(pkg, version, entryPath) {
112
+ const names = new Set();
113
+ const visited = new Set();
114
+ const parseFile = (path, hops) => {
115
+ if (visited.has(path))
116
+ return;
117
+ visited.add(path);
118
+ let src;
119
+ try {
120
+ src = readFileSync(path, "utf-8");
121
+ }
122
+ catch (err) {
123
+ throw new PackageSurfaceResolverError("parse-failed", pkg, `Failed to read "${path}" for "${pkg}@${version}": ${err instanceof Error ? err.message : String(err)}`);
124
+ }
125
+ const parsed = parseDtsExports(src);
126
+ for (const name of parsed.names)
127
+ names.add(name);
128
+ if (hops <= 0)
129
+ return;
130
+ const baseDir = dirname(path);
131
+ for (const spec of parsed.reExports) {
132
+ if (!spec.startsWith("."))
133
+ continue; // bare specifier — out of scope
134
+ const resolved = resolveRelativeDts(baseDir, spec);
135
+ if (resolved)
136
+ parseFile(resolved, hops - 1);
137
+ }
138
+ };
139
+ parseFile(entryPath, /* hops */ 1);
140
+ return [...names].sort().map((name) => ({ name, source: "types" }));
141
+ }
142
+ /**
143
+ * Resolve a relative re-export specifier to an existing `.d.ts` file.
144
+ * Tries `<spec>.d.ts`, `<spec>/index.d.ts`, and `<spec>` literally.
145
+ */
146
+ function resolveRelativeDts(baseDir, spec) {
147
+ const base = resolve(baseDir, spec);
148
+ const candidates = [
149
+ base.endsWith(".d.ts") ? base : null,
150
+ `${base}.d.ts`,
151
+ join(base, "index.d.ts"),
152
+ ].filter((p) => p !== null);
153
+ for (const path of candidates) {
154
+ if (existsSync(path))
155
+ return path;
156
+ }
157
+ return null;
158
+ }
159
+ function makeDefaultPackageRootResolver(fromDir) {
160
+ // `createRequire` needs a file URL or path that ends with a slash so
161
+ // it knows it's a directory, not a file.
162
+ const anchor = fromDir.endsWith("/") ? fromDir : `${fromDir}/`;
163
+ const req = createRequire(pathToFileURL(anchor));
164
+ return (pkg) => {
165
+ try {
166
+ const pkgJsonPath = req.resolve(`${pkg}/package.json`);
167
+ return dirname(pkgJsonPath);
168
+ }
169
+ catch {
170
+ return null;
171
+ }
172
+ };
173
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * InMemoryPackageSurface — `PackageSurfaceResolver` test double.
3
+ *
4
+ * Backed by a plain `Map<string, PackageSurface>`; calls for unknown
5
+ * packages throw the same `package-not-installed` error the
6
+ * `DtsPackageSurface` adapter throws, so test scenarios for the
7
+ * `unresolved` path need no special handling.
8
+ */
9
+ import { type PackageSurface, type PackageSurfaceResolver } from "../../_vendor/ailf-core/index.d.ts";
10
+ export declare class InMemoryPackageSurface implements PackageSurfaceResolver {
11
+ private readonly surfaces;
12
+ constructor(surfaces?: Iterable<PackageSurface>);
13
+ set(surface: PackageSurface): void;
14
+ resolveExports(pkg: string): Promise<PackageSurface>;
15
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * InMemoryPackageSurface — `PackageSurfaceResolver` test double.
3
+ *
4
+ * Backed by a plain `Map<string, PackageSurface>`; calls for unknown
5
+ * packages throw the same `package-not-installed` error the
6
+ * `DtsPackageSurface` adapter throws, so test scenarios for the
7
+ * `unresolved` path need no special handling.
8
+ */
9
+ import { PackageSurfaceResolverError, } from "../../_vendor/ailf-core/index.js";
10
+ export class InMemoryPackageSurface {
11
+ surfaces;
12
+ constructor(surfaces = []) {
13
+ this.surfaces = new Map();
14
+ for (const surface of surfaces) {
15
+ this.surfaces.set(surface.pkg, surface);
16
+ }
17
+ }
18
+ set(surface) {
19
+ this.surfaces.set(surface.pkg, surface);
20
+ }
21
+ async resolveExports(pkg) {
22
+ const surface = this.surfaces.get(pkg);
23
+ if (!surface) {
24
+ throw new PackageSurfaceResolverError("package-not-installed", pkg, `InMemoryPackageSurface has no entry for "${pkg}".`);
25
+ }
26
+ return surface;
27
+ }
28
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Package-surface resolver adapters.
3
+ *
4
+ * @see packages/core/src/ports/package-surface-resolver.ts
5
+ */
6
+ export { DtsPackageSurface, type DtsPackageSurfaceOptions, type PackageRootResolver, } from "./dts-package-surface.js";
7
+ export { InMemoryPackageSurface } from "./in-memory-package-surface.js";
8
+ export { parseDtsExports } from "./parse-dts-exports.js";
9
+ export type { ParsedDtsExports } from "./parse-dts-exports.js";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Package-surface resolver adapters.
3
+ *
4
+ * @see packages/core/src/ports/package-surface-resolver.ts
5
+ */
6
+ export { DtsPackageSurface, } from "./dts-package-surface.js";
7
+ export { InMemoryPackageSurface } from "./in-memory-package-surface.js";
8
+ export { parseDtsExports } from "./parse-dts-exports.js";
@@ -0,0 +1,31 @@
1
+ /**
2
+ * parse-dts-exports — pure function that extracts the public surface of a
3
+ * single `.d.ts` file as a list of top-level exported binding names plus
4
+ * any `export * from "./relative"` re-export specifiers.
5
+ *
6
+ * Implementation: delegates to `oxc-parser`'s `staticExports` view, which
7
+ * already decomposes each export statement into entries with `importName` /
8
+ * `exportName` / `moduleRequest` discriminators. We translate that view
9
+ * into the two outputs the W0198 preflight cares about — bare names and
10
+ * wildcard re-export specifiers — and drop default exports per the
11
+ * design's "named-bindings only" rule.
12
+ *
13
+ * Why oxc-parser instead of regex: top-level `.d.ts` syntax has enough TS
14
+ * surface area (declaration merging, conditional `exports` map types,
15
+ * ambient namespace augmentation) that a real AST is cheaper to maintain
16
+ * than a regex with the same coverage. Why oxc-parser instead of
17
+ * `typescript`: typescript isn't in `@sanity/ailf`'s runtime install graph
18
+ * and adding it adds ~50MB; oxc-parser is a few-MB native binary aligned
19
+ * with our existing `oxlint` / `oxfmt` toolchain.
20
+ */
21
+ export interface ParsedDtsExports {
22
+ /** Bare exported identifier names found in this file. */
23
+ readonly names: readonly string[];
24
+ /**
25
+ * Specifiers from `export * from "<spec>"` declarations. Only relative
26
+ * specifiers (starting with `.`) are useful for one-hop following; the
27
+ * caller decides which to resolve.
28
+ */
29
+ readonly reExports: readonly string[];
30
+ }
31
+ export declare function parseDtsExports(src: string): ParsedDtsExports;
@@ -0,0 +1,54 @@
1
+ /**
2
+ * parse-dts-exports — pure function that extracts the public surface of a
3
+ * single `.d.ts` file as a list of top-level exported binding names plus
4
+ * any `export * from "./relative"` re-export specifiers.
5
+ *
6
+ * Implementation: delegates to `oxc-parser`'s `staticExports` view, which
7
+ * already decomposes each export statement into entries with `importName` /
8
+ * `exportName` / `moduleRequest` discriminators. We translate that view
9
+ * into the two outputs the W0198 preflight cares about — bare names and
10
+ * wildcard re-export specifiers — and drop default exports per the
11
+ * design's "named-bindings only" rule.
12
+ *
13
+ * Why oxc-parser instead of regex: top-level `.d.ts` syntax has enough TS
14
+ * surface area (declaration merging, conditional `exports` map types,
15
+ * ambient namespace augmentation) that a real AST is cheaper to maintain
16
+ * than a regex with the same coverage. Why oxc-parser instead of
17
+ * `typescript`: typescript isn't in `@sanity/ailf`'s runtime install graph
18
+ * and adding it adds ~50MB; oxc-parser is a few-MB native binary aligned
19
+ * with our existing `oxlint` / `oxfmt` toolchain.
20
+ */
21
+ import { parseSync } from "oxc-parser";
22
+ export function parseDtsExports(src) {
23
+ // Filename hint drives the parser's grammar — `.d.ts` enables the
24
+ // ambient-only forms we want and disables expression-context grammar
25
+ // we'd otherwise have to ignore.
26
+ const result = parseSync("input.d.ts", src, { lang: "dts" });
27
+ const names = new Set();
28
+ const reExports = [];
29
+ for (const exportStmt of result.module.staticExports) {
30
+ for (const entry of exportStmt.entries) {
31
+ // Wildcard re-export: `export * from "./other"`. The namespace form
32
+ // `export * as ns from "./other"` falls into the named-export branch
33
+ // below because it does expose a binding (`ns`) at the top level.
34
+ if (entry.importName.kind === "AllButDefault" &&
35
+ entry.moduleRequest !== null) {
36
+ reExports.push(entry.moduleRequest.value);
37
+ continue;
38
+ }
39
+ // Anything that produces a stable named binding visible to consumers.
40
+ // Covers own declarations, local re-exports (`export { x as y }`),
41
+ // module re-exports (`export { x } from "./y"`), and namespace
42
+ // re-exports (`export * as ns from "./y"`). `export default ...`
43
+ // lands in `exportName.kind === "Default"` and is intentionally
44
+ // skipped — the W0198 preflight only judges named bindings.
45
+ if (entry.exportName.kind === "Name" && entry.exportName.name) {
46
+ names.add(entry.exportName.name);
47
+ }
48
+ }
49
+ }
50
+ return {
51
+ names: [...names].sort(),
52
+ reExports,
53
+ };
54
+ }
@@ -1425,6 +1425,22 @@ export declare function parseCanonicalTaskFile(raw: unknown, filename: string):
1425
1425
  * GeneralizedTaskDefinition shape.
1426
1426
  */
1427
1427
  export declare function detectLegacyFieldNames(raw: unknown, filename: string): string[];
1428
+ interface MigrationResult {
1429
+ migrated: unknown;
1430
+ warnings: string[];
1431
+ }
1432
+ /**
1433
+ * Pre-process legacy `prompt.vars.{task,docs,__featureArea}` into the
1434
+ * canonical shape. Backwards-compatible: legacy-shape tasks continue to
1435
+ * load, but a deprecation warning is emitted per affected task.
1436
+ *
1437
+ * Legacy: prompt: { vars: { task: "...", docs: "file://..." } }
1438
+ * Canonical: prompt: { text: "..." }
1439
+ *
1440
+ * Applies to every task regardless of mode. Per-task dedup: at most one
1441
+ * warning per task per call, listing every reserved key that was present.
1442
+ */
1443
+ export declare function migratePromptShape(raw: unknown, filename: string): MigrationResult;
1428
1444
  /**
1429
1445
  * Zod schema for .ailf/config.yaml — controls documentation source,
1430
1446
  * report destination, and trigger behavior for evaluations from an
@@ -1455,6 +1471,12 @@ export declare const RepoConfigSchema: z.ZodObject<{
1455
1471
  gapAnalysis: z.ZodOptional<z.ZodBoolean>;
1456
1472
  apiUrl: z.ZodOptional<z.ZodString>;
1457
1473
  }, z.core.$strip>>;
1474
+ grader: z.ZodOptional<z.ZodObject<{
1475
+ context: z.ZodOptional<z.ZodEnum<{
1476
+ "rubric-only": "rubric-only";
1477
+ "with-docs": "with-docs";
1478
+ }>>;
1479
+ }, z.core.$strip>>;
1458
1480
  output: z.ZodOptional<z.ZodObject<{
1459
1481
  dir: z.ZodOptional<z.ZodString>;
1460
1482
  }, z.core.$strip>>;
@@ -141,11 +141,30 @@ const AssertionSchema = z.union([
141
141
  // ---------------------------------------------------------------------------
142
142
  // Shared field schemas — building blocks reused across mode variants
143
143
  // ---------------------------------------------------------------------------
144
+ /**
145
+ * Variable keys reserved by the AILF compilers — populated automatically
146
+ * from canonical task fields (`prompt.text`, `context.docs`, `area`).
147
+ * Mirrors `ReservedPromptVarKey` in `@sanity/ailf-core`; the `satisfies`
148
+ * clause makes drift a build error.
149
+ */
150
+ const RESERVED_PROMPT_VAR_KEYS = [
151
+ "task",
152
+ "docs",
153
+ "__featureArea",
154
+ ];
144
155
  const TaskPromptSchema = z.object({
145
156
  template: z.string().optional(),
146
157
  text: z.string().optional(),
147
158
  systemMessage: z.string().optional(),
148
- vars: z.record(z.string(), z.unknown()).optional(),
159
+ vars: z
160
+ .record(z.string(), z.unknown())
161
+ .refine((vars) => !RESERVED_PROMPT_VAR_KEYS.some((key) => key in vars), {
162
+ message: `prompt.vars contains a reserved key. Reserved keys: ` +
163
+ RESERVED_PROMPT_VAR_KEYS.join(", ") +
164
+ `. Use prompt.text for the prompt body and context.docs for ` +
165
+ `documentation references.`,
166
+ })
167
+ .optional(),
149
168
  });
150
169
  const RubricRefSchema = z.union([
151
170
  z.object({ ref: z.string().min(1) }),
@@ -416,6 +435,64 @@ export function detectLegacyFieldNames(raw, filename) {
416
435
  }
417
436
  return warnings;
418
437
  }
438
+ /**
439
+ * Pre-process legacy `prompt.vars.{task,docs,__featureArea}` into the
440
+ * canonical shape. Backwards-compatible: legacy-shape tasks continue to
441
+ * load, but a deprecation warning is emitted per affected task.
442
+ *
443
+ * Legacy: prompt: { vars: { task: "...", docs: "file://..." } }
444
+ * Canonical: prompt: { text: "..." }
445
+ *
446
+ * Applies to every task regardless of mode. Per-task dedup: at most one
447
+ * warning per task per call, listing every reserved key that was present.
448
+ */
449
+ export function migratePromptShape(raw, filename) {
450
+ if (!Array.isArray(raw))
451
+ return { migrated: raw, warnings: [] };
452
+ const warnings = [];
453
+ const migrated = raw.map((entry, i) => {
454
+ if (typeof entry !== "object" || entry === null)
455
+ return entry;
456
+ const obj = entry;
457
+ const prompt = obj.prompt;
458
+ if (typeof prompt !== "object" || prompt === null)
459
+ return entry;
460
+ const promptObj = prompt;
461
+ const vars = promptObj.vars;
462
+ if (typeof vars !== "object" || vars === null)
463
+ return entry;
464
+ const varsObj = vars;
465
+ // Detect which reserved keys are present
466
+ const presentReserved = RESERVED_PROMPT_VAR_KEYS.filter((key) => key in varsObj);
467
+ if (presentReserved.length === 0)
468
+ return entry;
469
+ const taskId = typeof obj.id === "string" ? obj.id : `task[${i}]`;
470
+ // Build migrated prompt + vars
471
+ const newPrompt = { ...promptObj };
472
+ const newVars = { ...varsObj };
473
+ for (const key of presentReserved) {
474
+ if (key === "task" && newPrompt.text === undefined) {
475
+ // Move the prompt body to prompt.text only if the canonical slot
476
+ // is unset; an explicit prompt.text always wins.
477
+ newPrompt.text = newVars.task;
478
+ }
479
+ delete newVars[key];
480
+ }
481
+ // Drop empty vars to keep the migrated shape minimal
482
+ if (Object.keys(newVars).length === 0) {
483
+ delete newPrompt.vars;
484
+ }
485
+ else {
486
+ newPrompt.vars = newVars;
487
+ }
488
+ warnings.push(`[${filename}] ${taskId}: deprecated prompt.vars keys ` +
489
+ `(${presentReserved.join(", ")}) — migrated to canonical shape ` +
490
+ `(prompt.text + context.docs). Update the task source to silence ` +
491
+ `this warning.`);
492
+ return { ...obj, prompt: newPrompt };
493
+ });
494
+ return { migrated, warnings };
495
+ }
419
496
  // ---------------------------------------------------------------------------
420
497
  // Config schemas — specific to the eval pipeline
421
498
  // ---------------------------------------------------------------------------
@@ -489,6 +566,20 @@ const ExecutionConfigSchema = z
489
566
  apiUrl: z.string().url().optional(),
490
567
  })
491
568
  .optional();
569
+ /**
570
+ * Grader configuration.
571
+ *
572
+ * - `context: "rubric-only"` — grader sees only the rubric template +
573
+ * criteria + candidate response.
574
+ * - `context: "with-docs"` — canonical reference content is injected into
575
+ * the assertion's `rubricPrompt` so the grader has authoritative ground
576
+ * truth.
577
+ */
578
+ const GraderConfigSchema = z
579
+ .object({
580
+ context: z.enum(["rubric-only", "with-docs"]).optional(),
581
+ })
582
+ .optional();
492
583
  /**
493
584
  * Task-source configuration (W0077 Phase 6h). Replaces the retired
494
585
  * `--task-source` and `--repo-tasks-path` CLI flags on `ailf run`.
@@ -581,6 +672,7 @@ export const RepoConfigSchema = z.object({
581
672
  reportStore: ReportStoreConfigSchema,
582
673
  publish: PublishConfigSchema,
583
674
  execution: ExecutionConfigSchema,
675
+ grader: GraderConfigSchema,
584
676
  output: OutputConfigSchema,
585
677
  owner: OwnerConfigSchema,
586
678
  agentic: AgenticConfigSchema,
@@ -22,7 +22,7 @@ import { existsSync, readdirSync, readFileSync } from "fs";
22
22
  import { resolve } from "path";
23
23
  import { load } from "js-yaml";
24
24
  import { CANONICAL_EVAL_MODES } from "../../_vendor/ailf-shared/index.js";
25
- import { detectLegacyFieldNames, parseCanonicalTaskFile, } from "./repo-schemas.js";
25
+ import { detectLegacyFieldNames, migratePromptShape, parseCanonicalTaskFile, } from "./repo-schemas.js";
26
26
  import { discoverTsTaskFiles, loadTsTaskFile } from "./task-file-loader.js";
27
27
  /** Set of canonical mode names for O(1) lookup */
28
28
  const KNOWN_MODES = new Set(CANONICAL_EVAL_MODES);
@@ -69,10 +69,19 @@ export class RepoTaskSource {
69
69
  legacyWarnings.join("\n") +
70
70
  "\n\nSee contributing-tasks.md for the canonical task format.");
71
71
  }
72
+ // W0193: pre-migrate legacy prompt.vars.{task,docs,__featureArea}
73
+ // to the canonical prompt.text + context.docs shape. Mode-agnostic —
74
+ // every mode's TaskPromptSchema rejects reserved keys, so the shim
75
+ // unblocks legacy tasks regardless of mode. Per-task deprecation
76
+ // warning fires on stderr.
77
+ const { migrated, warnings: deprecationWarnings } = migratePromptShape(parsed, file);
78
+ for (const warning of deprecationWarnings) {
79
+ console.warn(warning);
80
+ }
72
81
  // Validate through canonical Zod schema
73
82
  let validated;
74
83
  try {
75
- validated = parseCanonicalTaskFile(parsed, file);
84
+ validated = parseCanonicalTaskFile(migrated, file);
76
85
  }
77
86
  catch (err) {
78
87
  const msg = err instanceof Error ? err.message : String(err);
@@ -27,6 +27,8 @@ export interface ResolvedOptions {
27
27
  dryRun: boolean;
28
28
  gapAnalysisEnabled: boolean;
29
29
  graderReplications?: number;
30
+ /** Grader context policy from `.ailf/config.yaml` `grader.context` */
31
+ graderContext?: "rubric-only" | "with-docs";
30
32
  headerArgs: string[];
31
33
  impactSummary?: ImpactSummary;
32
34
  mode: EvalMode;
@@ -249,6 +249,17 @@ export function computeResolvedOptions(opts) {
249
249
  const concurrency = repoConfig?.execution?.concurrency;
250
250
  const graderReplications = repoConfig?.execution?.graderReplications;
251
251
  const gapAnalysisEnabled = repoConfig?.execution?.gapAnalysis ?? true;
252
+ // Grader context policy. Cascade: env var > .ailf/config.yaml > unset
253
+ // (defaults to rubric-only at the EvalConfig boundary). The env var is the
254
+ // operational lever for one-shot comparison runs without editing the config file.
255
+ const rawGraderContext = process.env.AILF_GRADER_CONTEXT ?? repoConfig?.grader?.context;
256
+ const graderContext = rawGraderContext === "with-docs" || rawGraderContext === "rubric-only"
257
+ ? rawGraderContext
258
+ : undefined;
259
+ if (rawGraderContext && graderContext === undefined) {
260
+ console.error(`❌ Invalid grader.context "${rawGraderContext}". Must be "rubric-only" or "with-docs".`);
261
+ process.exit(1);
262
+ }
252
263
  // Remote mode
253
264
  const remote = opts.remote || process.env.AILF_REMOTE === "1";
254
265
  const apiUrl = process.env.AILF_API_URL ??
@@ -274,6 +285,7 @@ export function computeResolvedOptions(opts) {
274
285
  dryRun: opts.dryRun,
275
286
  gapAnalysisEnabled,
276
287
  graderReplications,
288
+ graderContext,
277
289
  headerArgs,
278
290
  impactSummary,
279
291
  mode,
@@ -90,12 +90,19 @@ export async function runRemotePipeline(opts, rootDir) {
90
90
  console.error(formatJobError(job));
91
91
  process.exit(1);
92
92
  }
93
- // 7. Fetch and write output artifacts
94
- await writeRemoteResults(client, job, {
93
+ // 7. Fetch and write output artifacts. A `completed` job that carries
94
+ // `job.error` is a degraded completion (DOC-2121 RC-3): a configured
95
+ // optional step failed end-to-end. Artifacts still write so the caller
96
+ // keeps useful local state, but the CLI exits non-zero so external
97
+ // `--remote` consumers don't mistake the placeholder for success.
98
+ const outcome = await writeRemoteResults(client, job, {
95
99
  outputDir: opts.outputDir,
96
100
  outputPath: opts.outputPath,
97
101
  apiUrl: opts.apiUrl,
98
102
  });
103
+ if (outcome.degraded) {
104
+ process.exit(1);
105
+ }
99
106
  }
100
107
  // ---------------------------------------------------------------------------
101
108
  // Helpers
@@ -21,6 +21,11 @@ export interface WriteResultsOptions {
21
21
  /** API base URL (for metadata). */
22
22
  apiUrl: string;
23
23
  }
24
+ /** Outcome flags so the caller can decide the process exit code. */
25
+ export interface WriteResultsOutcome {
26
+ /** True when `job.error` was set on a completed job (DOC-2121 RC-3). */
27
+ degraded: boolean;
28
+ }
24
29
  /**
25
30
  * Fetch report artifacts from the API and write them to disk.
26
31
  *
@@ -29,5 +34,11 @@ export interface WriteResultsOptions {
29
34
  * - `<outputDir>/report.md` — full markdown report (if reportId present)
30
35
  * - `<outputDir>/job-metadata.json` — job tracking info
31
36
  * - `--output` path — markdown report (if specified)
37
+ *
38
+ * Returns an outcome the caller uses to choose an exit code: a `completed`
39
+ * job that carries `job.error` is treated as a *degraded* completion (a
40
+ * configured optional step failed end-to-end; see DOC-2121 RC-3) and the
41
+ * caller should exit non-zero so external `--remote` consumers don't read
42
+ * a clean completion as success.
32
43
  */
33
- export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<void>;
44
+ export declare function writeRemoteResults(client: ApiClient, job: JobResponse, options: WriteResultsOptions): Promise<WriteResultsOutcome>;
@@ -12,9 +12,6 @@
12
12
  */
13
13
  import { mkdirSync, writeFileSync } from "fs";
14
14
  import { resolve } from "path";
15
- // ---------------------------------------------------------------------------
16
- // Public API
17
- // ---------------------------------------------------------------------------
18
15
  /**
19
16
  * Fetch report artifacts from the API and write them to disk.
20
17
  *
@@ -23,6 +20,12 @@ import { resolve } from "path";
23
20
  * - `<outputDir>/report.md` — full markdown report (if reportId present)
24
21
  * - `<outputDir>/job-metadata.json` — job tracking info
25
22
  * - `--output` path — markdown report (if specified)
23
+ *
24
+ * Returns an outcome the caller uses to choose an exit code: a `completed`
25
+ * job that carries `job.error` is treated as a *degraded* completion (a
26
+ * configured optional step failed end-to-end; see DOC-2121 RC-3) and the
27
+ * caller should exit non-zero so external `--remote` consumers don't read
28
+ * a clean completion as success.
26
29
  */
27
30
  export async function writeRemoteResults(client, job, options) {
28
31
  const resultsDir = options.outputDir;
@@ -55,11 +58,20 @@ export async function writeRemoteResults(client, job, options) {
55
58
  reportId: job.reportId ?? null,
56
59
  reportUrl: job.reportUrl ?? null,
57
60
  execution: job.execution ?? null,
61
+ error: job.error ?? null,
58
62
  apiUrl: options.apiUrl,
59
63
  }, null, 2));
60
- // 4. Print summary
64
+ // 4. Print summary. A completed job with `job.error` set means a
65
+ // configured optional step failed end-to-end — print the diagnostic
66
+ // and signal the caller to exit non-zero.
67
+ const degraded = Boolean(job.error);
61
68
  console.log("");
62
- console.log(`✅ Evaluation completed`);
69
+ if (degraded) {
70
+ console.log(`⚠️ Evaluation completed with errors`);
71
+ }
72
+ else {
73
+ console.log(`✅ Evaluation completed`);
74
+ }
63
75
  console.log(` 📊 Results: ${resolve(resultsDir, "score-summary.json")}`);
64
76
  if (reportWritten) {
65
77
  console.log(` 📝 Report: ${resolve(resultsDir, "report.md")}`);
@@ -71,6 +83,14 @@ export async function writeRemoteResults(client, job, options) {
71
83
  console.log(` 🔗 Studio: ${job.reportUrl}`);
72
84
  }
73
85
  console.log(` 🏷️ Job ID: ${job.jobId}`);
86
+ if (job.error) {
87
+ console.error("");
88
+ console.error(` ❌ Step "${job.error.step ?? "<unknown>"}" failed: ${job.error.message}`);
89
+ if (!job.reportId) {
90
+ console.error(" No report was published. See the API gateway run page for details.");
91
+ }
92
+ }
93
+ return { degraded };
74
94
  }
75
95
  // ---------------------------------------------------------------------------
76
96
  // Helpers