@sanity/ailf 4.1.0 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/config/package-surface.ts +37 -0
  2. package/config/preflight-scoring.ts +26 -0
  3. package/dist/_vendor/ailf-core/artifact-registry.d.ts +1 -1
  4. package/dist/_vendor/ailf-core/artifact-registry.js +47 -0
  5. package/dist/_vendor/ailf-core/config-helpers.d.ts +35 -0
  6. package/dist/_vendor/ailf-core/config-helpers.js +67 -0
  7. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +18 -0
  10. package/dist/_vendor/ailf-core/ports/doc-fetcher.d.ts +30 -0
  11. package/dist/_vendor/ailf-core/ports/index.d.ts +3 -1
  12. package/dist/_vendor/ailf-core/ports/index.js +1 -0
  13. package/dist/_vendor/ailf-core/ports/mode-handler.d.ts +23 -0
  14. package/dist/_vendor/ailf-core/ports/package-surface-resolver.d.ts +71 -0
  15. package/dist/_vendor/ailf-core/ports/package-surface-resolver.js +36 -0
  16. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +6 -0
  17. package/dist/_vendor/ailf-core/schemas/eval-config.js +14 -0
  18. package/dist/_vendor/ailf-core/schemas/index.d.ts +1 -0
  19. package/dist/_vendor/ailf-core/schemas/index.js +1 -0
  20. package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.d.ts +51 -0
  21. package/dist/_vendor/ailf-core/schemas/symbol-preflight-report.js +57 -0
  22. package/dist/_vendor/ailf-core/types/generalized-task.d.ts +20 -3
  23. package/dist/_vendor/ailf-core/types/index.d.ts +13 -1
  24. package/dist/_vendor/ailf-core/types/index.js +1 -0
  25. package/dist/_vendor/ailf-core/types/package-surface.d.ts +36 -0
  26. package/dist/_vendor/ailf-core/types/package-surface.js +13 -0
  27. package/dist/_vendor/ailf-core/types/preflight-scoring.d.ts +52 -0
  28. package/dist/_vendor/ailf-core/types/preflight-scoring.js +18 -0
  29. package/dist/_vendor/ailf-core/types/repo-config.d.ts +14 -0
  30. package/dist/_vendor/ailf-core/types/symbol-preflight-report.d.ts +66 -0
  31. package/dist/_vendor/ailf-core/types/symbol-preflight-report.js +25 -0
  32. package/dist/adapters/config-sources/file-config-adapter.js +1 -0
  33. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.d.ts +25 -5
  34. package/dist/adapters/doc-fetchers/sanity-doc-fetcher.js +276 -95
  35. package/dist/adapters/index.d.ts +1 -0
  36. package/dist/adapters/index.js +1 -0
  37. package/dist/adapters/package-surface/dts-package-surface.d.ts +46 -0
  38. package/dist/adapters/package-surface/dts-package-surface.js +173 -0
  39. package/dist/adapters/package-surface/in-memory-package-surface.d.ts +15 -0
  40. package/dist/adapters/package-surface/in-memory-package-surface.js +28 -0
  41. package/dist/adapters/package-surface/index.d.ts +9 -0
  42. package/dist/adapters/package-surface/index.js +8 -0
  43. package/dist/adapters/package-surface/parse-dts-exports.d.ts +31 -0
  44. package/dist/adapters/package-surface/parse-dts-exports.js +54 -0
  45. package/dist/adapters/task-sources/repo-schemas.d.ts +22 -0
  46. package/dist/adapters/task-sources/repo-schemas.js +93 -1
  47. package/dist/adapters/task-sources/repo-task-source.js +11 -2
  48. package/dist/commands/pipeline-action.d.ts +2 -0
  49. package/dist/commands/pipeline-action.js +12 -0
  50. package/dist/commands/remote-pipeline.js +9 -2
  51. package/dist/commands/remote-results.d.ts +12 -1
  52. package/dist/commands/remote-results.js +25 -5
  53. package/dist/commands/validate-tasks.js +8 -2
  54. package/dist/composition-root.js +9 -0
  55. package/dist/config/package-surface.ts +37 -0
  56. package/dist/config/preflight-scoring.ts +26 -0
  57. package/dist/index.d.ts +2 -2
  58. package/dist/index.js +1 -1
  59. package/dist/orchestration/build-app-context.js +1 -0
  60. package/dist/orchestration/pipeline-orchestrator.d.ts +19 -1
  61. package/dist/orchestration/pipeline-orchestrator.js +38 -0
  62. package/dist/orchestration/steps/calculate-scores-step.js +11 -0
  63. package/dist/orchestration/steps/generate-configs-step.js +16 -1
  64. package/dist/orchestration/steps/run-eval-step.js +27 -0
  65. package/dist/pipeline/calculate-scores.d.ts +66 -5
  66. package/dist/pipeline/calculate-scores.js +141 -27
  67. package/dist/pipeline/compiler/index.d.ts +1 -1
  68. package/dist/pipeline/compiler/index.js +1 -1
  69. package/dist/pipeline/compiler/literacy-bridge.d.ts +9 -0
  70. package/dist/pipeline/compiler/literacy-bridge.js +2 -0
  71. package/dist/pipeline/compiler/mode-handlers/__fixtures__/agent-harness-example-tasks.js +0 -12
  72. package/dist/pipeline/compiler/mode-handlers/__fixtures__/knowledge-probe-example-tasks.js +0 -12
  73. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.d.ts +1 -1
  74. package/dist/pipeline/compiler/mode-handlers/literacy/assertions.js +31 -4
  75. package/dist/pipeline/compiler/mode-handlers/literacy/compiler.js +190 -6
  76. package/dist/pipeline/compiler/mode-handlers/literacy/index.js +2 -0
  77. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +17 -2
  78. package/dist/pipeline/compiler/rubric-resolution.d.ts +17 -1
  79. package/dist/pipeline/compiler/rubric-resolution.js +78 -2
  80. package/dist/pipeline/compiler/scoring-bridge.d.ts +49 -2
  81. package/dist/pipeline/compiler/scoring-bridge.js +104 -10
  82. package/dist/pipeline/eval-fingerprint.d.ts +9 -0
  83. package/dist/pipeline/eval-fingerprint.js +7 -1
  84. package/dist/pipeline/preflight/compute-preflight.d.ts +67 -0
  85. package/dist/pipeline/preflight/compute-preflight.js +118 -0
  86. package/dist/pipeline/preflight/emit-symbol-preflight.d.ts +51 -0
  87. package/dist/pipeline/preflight/emit-symbol-preflight.js +102 -0
  88. package/dist/pipeline/preflight/load-package-surface.d.ts +14 -0
  89. package/dist/pipeline/preflight/load-package-surface.js +19 -0
  90. package/dist/pipeline/preflight/load-preflight-context.d.ts +13 -0
  91. package/dist/pipeline/preflight/load-preflight-context.js +25 -0
  92. package/dist/pipeline/preflight/load-preflight-scoring.d.ts +12 -0
  93. package/dist/pipeline/preflight/load-preflight-scoring.js +17 -0
  94. package/dist/pipeline/preflight/parse-imports.d.ts +62 -0
  95. package/dist/pipeline/preflight/parse-imports.js +125 -0
  96. package/dist/report-store.d.ts +8 -0
  97. package/dist/report-store.js +55 -6
  98. package/dist/sanity/document-renderers.d.ts +106 -0
  99. package/dist/sanity/document-renderers.js +307 -0
  100. package/dist/sanity/queries.d.ts +32 -11
  101. package/dist/sanity/queries.js +78 -0
  102. package/dist/sanity/symbol-index.d.ts +98 -0
  103. package/dist/sanity/symbol-index.js +615 -0
  104. package/dist/tasks/knowledge-probe/define-type-api.task.ts +2 -6
  105. package/dist/tasks/knowledge-probe/groq-projections.task.ts +0 -5
  106. package/dist/tasks/literacy/content-lake.task.ts +4 -10
  107. package/dist/tasks/literacy/frameworks.task.ts +2 -8
  108. package/dist/tasks/literacy/functions.task.ts +1 -4
  109. package/dist/tasks/literacy/groq.task.ts +3 -12
  110. package/dist/tasks/literacy/image-handling.task.ts +1 -4
  111. package/dist/tasks/literacy/nextjs-live.task.ts +1 -4
  112. package/dist/tasks/literacy/portable-text.task.ts +2 -8
  113. package/dist/tasks/literacy/studio-setup.task.ts +2 -8
  114. package/dist/tasks/literacy/visual-editing.task.ts +2 -8
  115. package/package.json +2 -1
  116. package/tasks/knowledge-probe/define-type-api.task.ts +2 -6
  117. package/tasks/knowledge-probe/groq-projections.task.ts +0 -5
  118. package/tasks/literacy/content-lake.task.ts +4 -10
  119. package/tasks/literacy/frameworks.task.ts +2 -8
  120. package/tasks/literacy/functions.task.ts +1 -4
  121. package/tasks/literacy/groq.task.ts +3 -12
  122. package/tasks/literacy/image-handling.task.ts +1 -4
  123. package/tasks/literacy/nextjs-live.task.ts +1 -4
  124. package/tasks/literacy/portable-text.task.ts +2 -8
  125. package/tasks/literacy/studio-setup.task.ts +2 -8
  126. package/tasks/literacy/visual-editing.task.ts +2 -8
@@ -0,0 +1,37 @@
1
+ /**
2
+ * package-surface.ts — Framework-level package-surface manifest for the
3
+ * W0198 symbol-resolution preflight.
4
+ *
5
+ * The manifest pins each in-scope package to a single semver-major range.
6
+ * The preflight resolver answers "does symbol X export from package Y" by
7
+ * reading the installed package's `.d.ts` against this pin. Tasks
8
+ * reference packages by name; they do not carry per-package version
9
+ * metadata (per-task overrides remain a future extension point).
10
+ *
11
+ * Bumping a major is an editorial event — one PR that updates the pin,
12
+ * regenerates cached surfaces, and re-runs the historical comparison set.
13
+ * Patch and minor releases within a pinned major flow silently because
14
+ * semver disallows the export removals that would change a deduction
15
+ * outcome.
16
+ *
17
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
18
+ */
19
+
20
+ import { definePackageSurface } from "@sanity/ailf-core"
21
+
22
+ export default definePackageSurface({
23
+ packages: [
24
+ // Sanity App SDK — drives App SDK literacy tasks (e.g. DOC-2117).
25
+ { pkg: "@sanity/sdk-react", semverPin: "^2.0.0" },
26
+
27
+ // Sanity Studio runtime — drives Studio-side literacy tasks.
28
+ // Pinned to the installed major in this repo (^5.x). The earlier
29
+ // major (v3) is an explicit per-task override candidate when a task
30
+ // exercises legacy Studio behavior.
31
+ { pkg: "sanity", semverPin: "^5.0.0" },
32
+
33
+ // @sanity/client — drives direct-client literacy tasks. Pinned to
34
+ // the installed major in this repo (^7.x).
35
+ { pkg: "@sanity/client", semverPin: "^7.0.0" },
36
+ ],
37
+ })
@@ -0,0 +1,26 @@
1
+ /**
2
+ * preflight-scoring.ts — How heavily the W0198 deterministic preflight
3
+ * contributes to the `code-correctness` dimension.
4
+ *
5
+ * The preflight (`SymbolPreflightReport`) and the LLM rubric both feed
6
+ * into `code-correctness` per D0010's weighted dimension aggregation.
7
+ * `codeCorrectnessWeight` sets the relative share between them — `0.4`
8
+ * means preflight is 40% of the dimension, rubric 60%.
9
+ *
10
+ * Bumping this is an editorial decision: a higher weight ties more of the
11
+ * `code-correctness` score to the deterministic existence-check (less
12
+ * grader noise on the symbol-existence question, but also less elasticity
13
+ * for the rubric to penalize stylistic/correctness issues the preflight
14
+ * cannot see). A lower weight cedes more authority back to the rubric.
15
+ *
16
+ * The default of `0.4` is a starting balance; revisit once the
17
+ * `unresolved` rate stabilizes in CI.
18
+ *
19
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
20
+ */
21
+
22
+ import { definePreflightScoring } from "@sanity/ailf-core"
23
+
24
+ export default definePreflightScoring({
25
+ codeCorrectnessWeight: 0.4,
26
+ })
@@ -41,7 +41,7 @@ export type ArtifactMime = "application/json" | "application/x-ndjson" | "text/m
41
41
  */
42
42
  export type ArtifactTruncationPolicy = "reject" | "trailing-truncate" | "fielded-truncate" | "trial-oversize";
43
43
  /** The union of every artifact type known to AILF. */
44
- export type ArtifactType = "runManifest" | "scoreSummary" | "pipelineResult" | "pipelineContext" | "documentManifest" | "prComment" | "readinessReport" | "reportSnapshot" | "autoComparison" | "gapReport" | "sinkResults" | "callbackRequest" | "callbackResponse" | "configSnapshot" | "evalConfigGenerated" | "comparisonReport" | "discoveryReport" | "failureModes" | "renderedPrompts" | "rawResults" | "testOutputs" | "graderPrompts" | "graderJudgments" | "traces";
44
+ export type ArtifactType = "runManifest" | "scoreSummary" | "pipelineResult" | "pipelineContext" | "documentManifest" | "prComment" | "readinessReport" | "reportSnapshot" | "autoComparison" | "gapReport" | "sinkResults" | "callbackRequest" | "callbackResponse" | "configSnapshot" | "evalConfigGenerated" | "comparisonReport" | "discoveryReport" | "failureModes" | "renderedPrompts" | "rawResults" | "testOutputs" | "graderPrompts" | "graderJudgments" | "symbolPreflight" | "traces";
45
45
  /**
46
46
  * Result of parsing a per-entry key into a sanitized filename component.
47
47
  * Success carries the sanitized value; failure carries a reason for 4xx responses.
@@ -241,6 +241,21 @@ const failureModePreviewSchema = z.object({
241
241
  severity: z.enum(["low", "medium", "high", "critical"]),
242
242
  titlePreview: z.string().max(120),
243
243
  });
244
+ /**
245
+ * Preview shape for `symbolPreflight` manifest entries (W0198 Phase 4).
246
+ *
247
+ * The deterministic-lane report can be tens of KB on candidates that
248
+ * import many bindings. The Studio rollup row only needs the three lane
249
+ * counts and the deduction total to flag whether a deduction was applied
250
+ * and how much — drill-down hydrates the full per-binding finding list
251
+ * from the external artifact.
252
+ */
253
+ const symbolPreflightPreviewSchema = z.object({
254
+ exists: z.number().int().nonnegative(),
255
+ missing: z.number().int().nonnegative(),
256
+ unresolved: z.number().int().nonnegative(),
257
+ deductionTotal: z.number().nonnegative(),
258
+ });
244
259
  /**
245
260
  * Preview shape for `graderJudgments` manifest entries (W0051 / D0033 M7).
246
261
  * List views render score + a short reason excerpt; drill-down hydrates
@@ -875,6 +890,38 @@ export const ARTIFACT_REGISTRY = {
875
890
  capBytes: 512,
876
891
  },
877
892
  }),
893
+ symbolPreflight: buildDescriptor({
894
+ type: "symbolPreflight",
895
+ slug: "symbol-preflight",
896
+ layout: "per-entry",
897
+ axes: ["run", "mode", "task", "model"],
898
+ entrySchema: unknownEntry,
899
+ mime: "application/json",
900
+ capBytes: 256_000,
901
+ manifestPreview: {
902
+ schema: symbolPreflightPreviewSchema,
903
+ extract: (entry) => {
904
+ const e = entry;
905
+ const findings = Array.isArray(e.findings) ? e.findings : [];
906
+ let exists = 0;
907
+ let missing = 0;
908
+ let unresolved = 0;
909
+ for (const f of findings) {
910
+ if (f && typeof f === "object" && "result" in f) {
911
+ if (f.result === "exists")
912
+ exists++;
913
+ else if (f.result === "missing")
914
+ missing++;
915
+ else if (f.result === "unresolved")
916
+ unresolved++;
917
+ }
918
+ }
919
+ const deductionTotal = typeof e.deduction?.total === "number" ? e.deduction.total : 0;
920
+ return { exists, missing, unresolved, deductionTotal };
921
+ },
922
+ capBytes: 96,
923
+ },
924
+ }),
878
925
  traces: buildDescriptor({
879
926
  type: "traces",
880
927
  slug: "traces",
@@ -33,6 +33,8 @@ import type { SinksFile } from "./schemas/sinks.js";
33
33
  import type { TestBudgetConfig } from "./schemas/test-budgets.js";
34
34
  import type { ModelsConfig } from "./types/index.js";
35
35
  import type { GeneralizedTaskDefinition } from "./types/generalized-task.js";
36
+ import type { PackageSurfaceConfig } from "./types/package-surface.js";
37
+ import type { PreflightScoringConfig } from "./types/preflight-scoring.js";
36
38
  import type { ModeBase, PresetDefinition } from "./types/plugin-registry.js";
37
39
  /**
38
40
  * Define an AILF evaluation configuration.
@@ -67,6 +69,39 @@ export declare function defineTask(task: GeneralizedTaskDefinition): Generalized
67
69
  * misconfigured OpenAI Responses-API fields.
68
70
  */
69
71
  export declare function defineModels(models: ModelsConfig): ModelsConfig;
72
+ /**
73
+ * Define the framework-level package-surface manifest consumed by the
74
+ * W0198 symbol-resolution preflight.
75
+ *
76
+ * Validates the obviously author-time errors:
77
+ * - Each entry has a non-empty `pkg` and `semverPin`.
78
+ * - `semverPin` is a single-major caret range (`^MAJOR.MINOR.PATCH`).
79
+ * Other range syntaxes (`>=`, `~`, `*`) are rejected because the
80
+ * resolver's stability commitment is "pinned to one major"; non-caret
81
+ * ranges either widen past a major (reintroducing churn) or are
82
+ * stricter than needed for an editorial pin.
83
+ * - No duplicate `pkg` entries.
84
+ *
85
+ * @throws {Error} On invalid entries or duplicates.
86
+ *
87
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
88
+ */
89
+ export declare function definePackageSurface(config: PackageSurfaceConfig): PackageSurfaceConfig;
90
+ /**
91
+ * Define the preflight-scoring config consumed by the W0198 two-stage
92
+ * grader's score merge.
93
+ *
94
+ * The single knob is `codeCorrectnessWeight` — preflight's share of the
95
+ * `code-correctness` dimension (`[0, 1]`). The complementary share belongs
96
+ * to the LLM rubric. Validates the obviously author-time errors:
97
+ *
98
+ * - `codeCorrectnessWeight` is a finite number in `[0, 1]`. Weights outside
99
+ * that range either disable one lane entirely (use `0` / `1`) or land in
100
+ * nonsense territory (negative, > 1).
101
+ *
102
+ * @throws {Error} On out-of-range weights.
103
+ */
104
+ export declare function definePreflightScoring(config: PreflightScoringConfig): PreflightScoringConfig;
70
105
  /**
71
106
  * Define rubric templates and scoring profiles.
72
107
  *
@@ -144,6 +144,73 @@ export function defineModels(models) {
144
144
  return models;
145
145
  }
146
146
  // ---------------------------------------------------------------------------
147
+ // Package-surface helpers
148
+ // ---------------------------------------------------------------------------
149
+ /**
150
+ * Define the framework-level package-surface manifest consumed by the
151
+ * W0198 symbol-resolution preflight.
152
+ *
153
+ * Validates the obviously author-time errors:
154
+ * - Each entry has a non-empty `pkg` and `semverPin`.
155
+ * - `semverPin` is a single-major caret range (`^MAJOR.MINOR.PATCH`).
156
+ * Other range syntaxes (`>=`, `~`, `*`) are rejected because the
157
+ * resolver's stability commitment is "pinned to one major"; non-caret
158
+ * ranges either widen past a major (reintroducing churn) or are
159
+ * stricter than needed for an editorial pin.
160
+ * - No duplicate `pkg` entries.
161
+ *
162
+ * @throws {Error} On invalid entries or duplicates.
163
+ *
164
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
165
+ */
166
+ export function definePackageSurface(config) {
167
+ const seen = new Set();
168
+ for (const entry of config.packages) {
169
+ if (!entry.pkg) {
170
+ throw new Error(`definePackageSurface: every entry must declare a non-empty \`pkg\`.`);
171
+ }
172
+ if (!entry.semverPin) {
173
+ throw new Error(`definePackageSurface: entry "${entry.pkg}" is missing \`semverPin\`.`);
174
+ }
175
+ if (!/^\^\d+\.\d+\.\d+(?:[-+].*)?$/.test(entry.semverPin)) {
176
+ throw new Error(`definePackageSurface: entry "${entry.pkg}" has \`semverPin\` ` +
177
+ `"${entry.semverPin}". Pins must be single-major caret ranges ` +
178
+ `(e.g. "^2.0.0"); other range syntaxes are not allowed.`);
179
+ }
180
+ if (seen.has(entry.pkg)) {
181
+ throw new Error(`definePackageSurface: duplicate entry for "${entry.pkg}". ` +
182
+ `Each package may appear at most once in the manifest.`);
183
+ }
184
+ seen.add(entry.pkg);
185
+ }
186
+ return config;
187
+ }
188
+ // ---------------------------------------------------------------------------
189
+ // Preflight-scoring helpers
190
+ // ---------------------------------------------------------------------------
191
+ /**
192
+ * Define the preflight-scoring config consumed by the W0198 two-stage
193
+ * grader's score merge.
194
+ *
195
+ * The single knob is `codeCorrectnessWeight` — preflight's share of the
196
+ * `code-correctness` dimension (`[0, 1]`). The complementary share belongs
197
+ * to the LLM rubric. Validates the obviously author-time errors:
198
+ *
199
+ * - `codeCorrectnessWeight` is a finite number in `[0, 1]`. Weights outside
200
+ * that range either disable one lane entirely (use `0` / `1`) or land in
201
+ * nonsense territory (negative, > 1).
202
+ *
203
+ * @throws {Error} On out-of-range weights.
204
+ */
205
+ export function definePreflightScoring(config) {
206
+ const w = config.codeCorrectnessWeight;
207
+ if (!Number.isFinite(w) || w < 0 || w > 1) {
208
+ throw new Error(`definePreflightScoring: \`codeCorrectnessWeight\` must be a finite ` +
209
+ `number in [0, 1] — got ${String(w)}.`);
210
+ }
211
+ return config;
212
+ }
213
+ // ---------------------------------------------------------------------------
147
214
  // Rubric helpers
148
215
  // ---------------------------------------------------------------------------
149
216
  /**
@@ -18,7 +18,7 @@ export * from "./examples/index.js";
18
18
  export * from "./artifact-registry.js";
19
19
  export * from "./batch-signing.js";
20
20
  export * from "./constants.js";
21
- export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
21
+ export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
22
22
  export type { PricingEntry, PromptEntry, SourceEntry, } from "./config-helpers.js";
23
23
  export { env } from "./env-helper.js";
24
24
  export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
@@ -21,7 +21,7 @@ export * from "./constants.js";
21
21
  // ---------------------------------------------------------------------------
22
22
  // Architecture overhaul — Phase 0 helpers
23
23
  // ---------------------------------------------------------------------------
24
- export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
24
+ export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
25
25
  export { env } from "./env-helper.js";
26
26
  export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
27
27
  export { assoc, resolveVariantMode, splitTaskVariant, } from "./artifact-capture/association.js";
@@ -19,6 +19,7 @@ import type { CacheStore } from "./cache-store.js";
19
19
  import type { DocFetcher } from "./doc-fetcher.js";
20
20
  import type { EvalRunner } from "./eval-runner.js";
21
21
  import type { Logger } from "./logger.js";
22
+ import type { PackageSurfaceResolver } from "./package-surface-resolver.js";
22
23
  import type { ProgressReporter } from "./progress-reporter.js";
23
24
  import type { TaskSource } from "./task-source.js";
24
25
  /**
@@ -78,6 +79,16 @@ export interface ResolvedConfig {
78
79
  noRemoteCache: boolean;
79
80
  /** Grader replications for consistency measurement */
80
81
  graderReplications?: number;
82
+ /**
83
+ * Grader context policy. Controls whether canonical reference docs are
84
+ * injected into the grader's assertion `rubricPrompt`. `"rubric-only"`
85
+ * sends only the rubric template + criteria; `"with-docs"` adds the
86
+ * canonical doc content as authoritative ground truth.
87
+ *
88
+ * Sourced from EvalConfig `grader.context` or the equivalent CLI/env
89
+ * surface. Defaults to `"rubric-only"` at the EvalConfig boundary.
90
+ */
91
+ graderContext?: "rubric-only" | "with-docs";
81
92
  /** Base directory for user-facing pipeline output artifacts. */
82
93
  outputDir: string;
83
94
  /** Output path override */
@@ -237,6 +248,13 @@ export interface AppContext {
237
248
  readonly evalRunner: EvalRunner;
238
249
  /** Structured logger */
239
250
  readonly logger: Logger;
251
+ /**
252
+ * Package-surface resolver for the W0198 symbol-resolution preflight.
253
+ * Optional during the staged rollout — Phase 1 wires the adapter, later
254
+ * phases consume it. When unset, the preflight step (Phase 4+) treats
255
+ * every binding as `unresolved` so candidates are not penalized.
256
+ */
257
+ readonly packageSurfaceResolver?: PackageSurfaceResolver;
240
258
  /**
241
259
  * Progress reporter — carries `phase-start / phase-progress / phase-complete`
242
260
  * events for long-running pipeline spans (W0053). The composition root always
@@ -86,6 +86,30 @@ export interface UrlFetchSummary {
86
86
  totalFailed: number;
87
87
  totalFetched: number;
88
88
  }
89
+ /**
90
+ * Per-task entry in the symbol-reference index manifest (W0197).
91
+ *
92
+ * One entry is emitted per task that the fetcher processed, regardless of
93
+ * whether extraction yielded any symbols. The grader-context consumer
94
+ * uses `symbolCount > 0` to decide between symbol-index and full-doc
95
+ * injection — distinguishing "extraction ran and produced nothing"
96
+ * (entry present, count zero — observable fallback) from "fetcher hadn't
97
+ * run with W0197" (no entry — silent fallback).
98
+ */
99
+ export interface SymbolIndexManifestEntry {
100
+ taskId: string;
101
+ /** Repo-relative path to the per-task index artifact. */
102
+ path: string;
103
+ /** Total deduped symbols in the index (post-merge). */
104
+ symbolCount: number;
105
+ /** Per-tier breakdown for observability. */
106
+ tierBreakdown: {
107
+ typeDef: number;
108
+ heading: number;
109
+ inlineCode: number;
110
+ codeBlock: number;
111
+ };
112
+ }
89
113
  /** Metadata about the fetch operation, for downstream pipeline consumption */
90
114
  export interface FetchMetadata {
91
115
  /** Document manifest for traceability (slug, _id, _rev, title) */
@@ -96,6 +120,12 @@ export interface FetchMetadata {
96
120
  documentOverlay?: DocumentOverlaySummary;
97
121
  /** URL fetch operations summary */
98
122
  urlFetch?: UrlFetchSummary;
123
+ /**
124
+ * Per-task symbol-index manifest (W0197). Always populated when the
125
+ * fetcher emits per-task index artifacts; the in-memory copy mirrors
126
+ * the on-disk `contexts/canonical-symbols/manifest.json`.
127
+ */
128
+ symbolIndexes?: SymbolIndexManifestEntry[];
99
129
  }
100
130
  /** Complete result of a doc fetch operation */
101
131
  export interface FetchResult {
@@ -9,10 +9,12 @@ export { NoOpArtifactWriter } from "./artifact-writer.js";
9
9
  export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
10
10
  export type { ConfigSource } from "./config-source.js";
11
11
  export type { AppContext, ReportSinkPort, ReportStorePort, ResolvedConfig, } from "./context.js";
12
- export type { DocContext, DocFetcher, DocSourceConfig, DocumentManifestEntry, DocumentOverlaySummary, FetchMetadata, FetchResult, ReleaseImpact, UrlFetchEntry, UrlFetchSummary, } from "./doc-fetcher.js";
12
+ export type { DocContext, DocFetcher, DocSourceConfig, DocumentManifestEntry, DocumentOverlaySummary, FetchMetadata, FetchResult, ReleaseImpact, SymbolIndexManifestEntry, UrlFetchEntry, UrlFetchSummary, } from "./doc-fetcher.js";
13
13
  export type { EvalRunConfig, EvalRunner } from "./eval-runner.js";
14
14
  export type { CompilationContext, CompileResultAssertion, CompileResultPrompt, CompileResultProvider, CompileResultTestCase, ModeCompileResult, ModeHandler, ModeProviderEntry, ModeRubricConfig, PromptTemplate, } from "./mode-handler.js";
15
15
  export type { Logger } from "./logger.js";
16
+ export type { PackageSurface, PackageSurfaceResolver, PackageSurfaceSymbol, PackageSurfaceUnresolvedReason, } from "./package-surface-resolver.js";
17
+ export { PackageSurfaceResolverError } from "./package-surface-resolver.js";
16
18
  export type { PipelineStep } from "./pipeline-step.js";
17
19
  export type { ArtifactWriterProgressOptions, PhaseCompleteEvent, PhaseProgressEvent, PhaseStartEvent, ProgressReporter, } from "./progress-reporter.js";
18
20
  export { ARTIFACT_EXPORT_PHASE_ID, NoOpProgressReporter, } from "./progress-reporter.js";
@@ -5,5 +5,6 @@
5
5
  * Adapters (in packages/eval) implement these interfaces.
6
6
  */
7
7
  export { NoOpArtifactWriter } from "./artifact-writer.js";
8
+ export { PackageSurfaceResolverError } from "./package-surface-resolver.js";
8
9
  export { ARTIFACT_EXPORT_PHASE_ID, NoOpProgressReporter, } from "./progress-reporter.js";
9
10
  export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -17,6 +17,7 @@
17
17
  * @see packages/eval/src/pipeline/compiler/mode-handlers/
18
18
  */
19
19
  import type { GeneralizedTaskDefinition } from "../types/generalized-task.js";
20
+ import type { PreflightRubricContext } from "../types/preflight-scoring.js";
20
21
  /**
21
22
  * A prompt template owned by a mode handler.
22
23
  *
@@ -44,6 +45,28 @@ export interface CompilationContext {
44
45
  models?: ModeProviderEntry[];
45
46
  /** Rubric config (templates, weights) — loaded from config/rubrics */
46
47
  rubricConfig?: ModeRubricConfig;
48
+ /**
49
+ * Grader context policy.
50
+ *
51
+ * - `"rubric-only"`: the grader sees only the rubric template + criteria +
52
+ * the candidate's response.
53
+ * - `"with-docs"`: the canonical reference content for the task is injected
54
+ * into the assertion's `rubricPrompt` so the grader has authoritative
55
+ * ground truth alongside the rubric.
56
+ *
57
+ * Mode handlers without canonical doc context (agent-harness, knowledge-probe)
58
+ * ignore this field — the `canonicalReference` parameter on
59
+ * `resolveTemplatedAssertion` is optional, so unset means current behavior.
60
+ */
61
+ graderContext?: "rubric-only" | "with-docs";
62
+ /**
63
+ * W0198 Phase 6 — when supplied, mode handlers prefix the
64
+ * `code-correctness` rubric with a "DETERMINISTIC PREFLIGHT" system
65
+ * instruction telling the grader to treat the preflight's existence
66
+ * findings as ground truth. Sourced upstream from the package-surface
67
+ * manifest; absence collapses cleanly to the pre-W0198 rubric.
68
+ */
69
+ preflightContext?: PreflightRubricContext;
47
70
  }
48
71
  /** A model provider entry for compilation */
49
72
  export interface ModeProviderEntry {
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Port: Reads the public symbol surface of an installed package.
3
+ *
4
+ * The W0198 symbol-resolution preflight resolves candidate imports against
5
+ * the actual package surface (its `.d.ts`) to answer "does symbol X export
6
+ * from package Y" deterministically. This port lets the preflight do that
7
+ * without depending on a specific resolution mechanism — adapters decide
8
+ * whether to read installed `node_modules`, an in-memory map (tests), or a
9
+ * future cached snapshot.
10
+ *
11
+ * Adapters:
12
+ * - `DtsPackageSurface` (`packages/eval/src/adapters/package-surface/`) —
13
+ * reads installed `.d.ts` from `node_modules/<pkg>`, follows one hop of
14
+ * `export * from "./other"`.
15
+ * - `InMemoryPackageSurface` — test double; maps package names to a fixed
16
+ * surface for deterministic unit tests.
17
+ *
18
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
19
+ */
20
+ /**
21
+ * One exported binding from a package's public surface.
22
+ *
23
+ * - `name` — the bare exported identifier (e.g. `"useEditDocument"`).
24
+ * - `source` — provenance of the binding within the package:
25
+ * - `"types"` — declared in a `.d.ts` (the only source the
26
+ * `DtsPackageSurface` adapter currently emits).
27
+ * - `"runtime"` — reserved for future adapters that introspect the
28
+ * loaded module's runtime exports.
29
+ */
30
+ export interface PackageSurfaceSymbol {
31
+ readonly name: string;
32
+ readonly source: "types" | "runtime";
33
+ }
34
+ /**
35
+ * Resolved public surface of a single package at a single resolved version.
36
+ *
37
+ * - `pkg` — npm package name as the caller asked.
38
+ * - `version` — version actually resolved on disk (from the package's
39
+ * `package.json`). May differ from a manifest pin's literal range; the
40
+ * pin says "any major-N", and this is the concrete version present.
41
+ * - `symbols` — every top-level exported binding the adapter discovered.
42
+ * Order is implementation-defined (callers should not rely on it).
43
+ */
44
+ export interface PackageSurface {
45
+ readonly pkg: string;
46
+ readonly version: string;
47
+ readonly symbols: readonly PackageSurfaceSymbol[];
48
+ }
49
+ /**
50
+ * Adapters throw a `PackageSurfaceResolverError` when they cannot answer
51
+ * a `resolveExports` call. The `reason` field maps directly to the
52
+ * `unresolved` finding kinds the W0198 preflight surfaces — callers
53
+ * catch this error and convert it into a per-binding `unresolved`
54
+ * finding rather than a `missing` deduction.
55
+ */
56
+ export declare class PackageSurfaceResolverError extends Error {
57
+ readonly pkg: string;
58
+ readonly reason: PackageSurfaceUnresolvedReason;
59
+ constructor(reason: PackageSurfaceUnresolvedReason, pkg: string, message: string);
60
+ }
61
+ export type PackageSurfaceUnresolvedReason = "package-not-installed" | "types-entry-missing" | "parse-failed";
62
+ /**
63
+ * Reads the public symbol surface of an installed package.
64
+ *
65
+ * Adapters MAY cache results within a single resolver instance — same
66
+ * `(pkg, version)` should yield the same `PackageSurface` for the
67
+ * lifetime of the resolver.
68
+ */
69
+ export interface PackageSurfaceResolver {
70
+ resolveExports(pkg: string): Promise<PackageSurface>;
71
+ }
@@ -0,0 +1,36 @@
1
+ /**
2
+ * Port: Reads the public symbol surface of an installed package.
3
+ *
4
+ * The W0198 symbol-resolution preflight resolves candidate imports against
5
+ * the actual package surface (its `.d.ts`) to answer "does symbol X export
6
+ * from package Y" deterministically. This port lets the preflight do that
7
+ * without depending on a specific resolution mechanism — adapters decide
8
+ * whether to read installed `node_modules`, an in-memory map (tests), or a
9
+ * future cached snapshot.
10
+ *
11
+ * Adapters:
12
+ * - `DtsPackageSurface` (`packages/eval/src/adapters/package-surface/`) —
13
+ * reads installed `.d.ts` from `node_modules/<pkg>`, follows one hop of
14
+ * `export * from "./other"`.
15
+ * - `InMemoryPackageSurface` — test double; maps package names to a fixed
16
+ * surface for deterministic unit tests.
17
+ *
18
+ * @see docs/design-docs/two-stage-grader-symbol-preflight.md
19
+ */
20
+ /**
21
+ * Adapters throw a `PackageSurfaceResolverError` when they cannot answer
22
+ * a `resolveExports` call. The `reason` field maps directly to the
23
+ * `unresolved` finding kinds the W0198 preflight surfaces — callers
24
+ * catch this error and convert it into a per-binding `unresolved`
25
+ * finding rather than a `missing` deduction.
26
+ */
27
+ export class PackageSurfaceResolverError extends Error {
28
+ pkg;
29
+ reason;
30
+ constructor(reason, pkg, message) {
31
+ super(message);
32
+ this.name = "PackageSurfaceResolverError";
33
+ this.pkg = pkg;
34
+ this.reason = reason;
35
+ }
36
+ }
@@ -44,6 +44,12 @@ export declare const EvalConfigSchema: z.ZodObject<{
44
44
  gapAnalysis: z.ZodOptional<z.ZodBoolean>;
45
45
  apiUrl: z.ZodOptional<z.ZodString>;
46
46
  }, z.core.$strip>>;
47
+ grader: z.ZodOptional<z.ZodObject<{
48
+ context: z.ZodOptional<z.ZodEnum<{
49
+ "rubric-only": "rubric-only";
50
+ "with-docs": "with-docs";
51
+ }>>;
52
+ }, z.core.$strip>>;
47
53
  output: z.ZodOptional<z.ZodObject<{
48
54
  dir: z.ZodOptional<z.ZodString>;
49
55
  }, z.core.$strip>>;
@@ -98,6 +98,20 @@ export const EvalConfigSchema = z
98
98
  apiUrl: z.string().url().optional(),
99
99
  })
100
100
  .optional(),
101
+ /**
102
+ * Grader configuration.
103
+ *
104
+ * - `context: "rubric-only"` — the grader sees only the rubric template
105
+ * + criteria + candidate response.
106
+ * - `context: "with-docs"` — canonical reference content for each task
107
+ * is injected into the assertion's `rubricPrompt` so the grader has
108
+ * authoritative ground truth.
109
+ */
110
+ grader: z
111
+ .object({
112
+ context: z.enum(["rubric-only", "with-docs"]).optional(),
113
+ })
114
+ .optional(),
101
115
  /**
102
116
  * Output configuration (W0077 Phase 6c). Replaces the retired
103
117
  * `--output-dir` CLI flag. Path is resolved relative to the caller's
@@ -17,4 +17,5 @@ export * from "./pipeline.js";
17
17
  export * from "./report.js";
18
18
  export * from "./schedules.js";
19
19
  export * from "./sinks.js";
20
+ export * from "./symbol-preflight-report.js";
20
21
  export * from "./test-budgets.js";
@@ -17,4 +17,5 @@ export * from "./pipeline.js";
17
17
  export * from "./report.js";
18
18
  export * from "./schedules.js";
19
19
  export * from "./sinks.js";
20
+ export * from "./symbol-preflight-report.js";
20
21
  export * from "./test-budgets.js";
@@ -0,0 +1,51 @@
1
+ /**
2
+ * @sanity/ailf-core — SymbolPreflightReport schema
3
+ *
4
+ * The trust-boundary parser for the W0198 deterministic-lane artifact.
5
+ * Used when reading a previously-emitted preflight report back from
6
+ * disk / GCS so a downstream step (re-grading, comparison, dashboard
7
+ * rendering) can trust its shape.
8
+ *
9
+ * The schema asserts `satisfies z.ZodType<SymbolPreflightReport>`
10
+ * against the domain type in
11
+ * `packages/core/src/types/symbol-preflight-report.ts` (D0045 / W0187),
12
+ * so any drift between the two is a build error.
13
+ */
14
+ import { z } from "zod";
15
+ export declare const SymbolPreflightReportSchema: z.ZodObject<{
16
+ candidate: z.ZodObject<{
17
+ taskId: z.ZodString;
18
+ testIndex: z.ZodNumber;
19
+ }, z.core.$strip>;
20
+ findings: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
21
+ result: z.ZodLiteral<"exists">;
22
+ pkg: z.ZodString;
23
+ version: z.ZodString;
24
+ binding: z.ZodString;
25
+ source: z.ZodEnum<{
26
+ types: "types";
27
+ runtime: "runtime";
28
+ }>;
29
+ }, z.core.$strip>, z.ZodObject<{
30
+ result: z.ZodLiteral<"missing">;
31
+ pkg: z.ZodString;
32
+ version: z.ZodString;
33
+ binding: z.ZodString;
34
+ }, z.core.$strip>, z.ZodObject<{
35
+ result: z.ZodLiteral<"unresolved">;
36
+ pkg: z.ZodString;
37
+ binding: z.ZodString;
38
+ reason: z.ZodEnum<{
39
+ "package-not-installed": "package-not-installed";
40
+ "types-entry-missing": "types-entry-missing";
41
+ "parse-failed": "parse-failed";
42
+ "reexport-hop-unfollowed": "reexport-hop-unfollowed";
43
+ }>;
44
+ }, z.core.$strip>], "result">>;
45
+ deduction: z.ZodObject<{
46
+ perMissing: z.ZodNumber;
47
+ cap: z.ZodNumber;
48
+ total: z.ZodNumber;
49
+ }, z.core.$strip>;
50
+ }, z.core.$strip>;
51
+ export type { SymbolPreflightReport } from "../types/symbol-preflight-report.js";