@sanity/ailf 7.3.0 → 7.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/_vendor/ailf-core/artifact-registry.d.ts +18 -0
  2. package/dist/_vendor/ailf-core/artifact-registry.js +1 -1
  3. package/dist/_vendor/ailf-core/config-helpers.d.ts +24 -2
  4. package/dist/_vendor/ailf-core/config-helpers.js +25 -2
  5. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  6. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  7. package/dist/_vendor/ailf-core/index.d.ts +1 -1
  8. package/dist/_vendor/ailf-core/index.js +1 -1
  9. package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
  10. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
  11. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
  12. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +2 -0
  13. package/dist/adapters/api-client/build-request.d.ts +2 -0
  14. package/dist/adapters/api-client/build-request.js +9 -0
  15. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  16. package/dist/adapters/config-sources/cli-config-adapter.js +6 -3
  17. package/dist/commands/explain-handler.js +2 -2
  18. package/dist/commands/init.js +9 -2
  19. package/dist/commands/interpret.js +1 -31
  20. package/dist/commands/pipeline-action.d.ts +29 -1
  21. package/dist/commands/pipeline-action.js +149 -41
  22. package/dist/commands/remote-pipeline.js +2 -0
  23. package/dist/commands/shared/versions-from-report.d.ts +29 -0
  24. package/dist/commands/shared/versions-from-report.js +47 -0
  25. package/dist/index.d.ts +1 -1
  26. package/dist/index.js +1 -1
  27. package/dist/orchestration/build-app-context.js +1 -0
  28. package/dist/orchestration/config-to-source-overrides.js +5 -1
  29. package/dist/pipeline/calculate-scores.js +8 -2
  30. package/dist/pipeline/map-request-to-config.js +1 -1
  31. package/package.json +1 -1
@@ -276,6 +276,24 @@ export declare function diagnosisPathBuilder(): ArtifactObjectPath;
276
276
  * `diagnosisVersion` MUST NOT contain `|` (the function rejects that case).
277
277
  */
278
278
  export declare function encodeDiagnosisPathVersion(diagnosisVersion: string, cardVersion: string): string;
279
+ /**
280
+ * Convert an entry key (wire format, e.g. `{taskId}::{modelId}`) to a
281
+ * filename-safe component.
282
+ *
283
+ * - `::` → `--` so the wire separator doesn't show up in the filename.
284
+ * - `/` → `_` so task names like "Content Lake with @sanity/client" don't
285
+ * create unintended GCS subdirectories and so `ls` against the per-entry
286
+ * directory shows one row per entry.
287
+ *
288
+ * Single colons (`:`) are preserved — modelIds like
289
+ * `anthropic:messages:claude-opus-4-6` are valid GCS object names.
290
+ *
291
+ * NOTE: this mapping is not bijective. A taskId containing literal `--`
292
+ * combined with a modelId could in theory collide with one whose taskId
293
+ * contains `::`. In practice, production taskIds don't exercise these
294
+ * combinations.
295
+ */
296
+ export declare function sanitizeEntryKey(key: string): string;
279
297
  /** Test-only reset for the legacy-key warning flag. Not exported publicly. */
280
298
  export declare function __resetLegacyTestOutputsWarning(): void;
281
299
  /**
@@ -311,7 +311,7 @@ export function encodeDiagnosisPathVersion(diagnosisVersion, cardVersion) {
311
311
  * contains `::`. In practice, production taskIds don't exercise these
312
312
  * combinations.
313
313
  */
314
- function sanitizeEntryKey(key) {
314
+ export function sanitizeEntryKey(key) {
315
315
  return key.replace(/::/g, "--").replace(/\//g, "_");
316
316
  }
317
317
  /**
@@ -33,15 +33,37 @@ import type { SinksFile } from "./schemas/sinks.js";
33
33
  import type { TestBudgetConfig } from "./schemas/test-budgets.js";
34
34
  import type { ModelsConfig } from "./types/index.js";
35
35
  import type { GeneralizedTaskDefinition } from "./types/generalized-task.js";
36
+ import type { RepoConfig } from "./types/repo-config.js";
36
37
  import type { PackageSurfaceConfig } from "./types/package-surface.js";
37
38
  import type { PreflightScoringConfig } from "./types/preflight-scoring.js";
38
39
  import type { ModeBase, PresetDefinition } from "./types/plugin-registry.js";
39
40
  /**
40
- * Define an AILF evaluation configuration.
41
+ * Define a full AILF evaluation configuration (`EvalConfig`).
41
42
  *
42
- * Used in `ailf.config.ts` files for typed configuration authoring.
43
+ * This is the advanced, standalone config passed via `ailf run --config
44
+ * <path>`. For the repo-level `.ailf/ailf.config.ts` that `ailf init`
45
+ * scaffolds and `ailf run` auto-loads, use {@link defineRepoConfig} instead —
46
+ * its shape is `RepoConfig` (with a `source` object), not `EvalConfig`.
43
47
  */
44
48
  export declare function defineConfig(config: EvalConfig): EvalConfig;
49
+ /**
50
+ * Define an AILF repo configuration — the `.ailf/ailf.config.ts` file that
51
+ * `ailf init` scaffolds and `ailf run` auto-loads.
52
+ *
53
+ * Narrows the parameter to `RepoConfig` so authors get full IDE autocomplete
54
+ * for `source`, `triggers`, `execution`, `owner`, etc. Like the other
55
+ * `define*` helpers this is a pure identity function — runtime validation
56
+ * happens later via `RepoConfigSchema` when the pipeline loads the file.
57
+ *
58
+ * ```typescript
59
+ * import { defineRepoConfig } from "@sanity/ailf"
60
+ *
61
+ * export default defineRepoConfig({
62
+ * source: { projectId: "abc123", dataset: "production" },
63
+ * })
64
+ * ```
65
+ */
66
+ export declare function defineRepoConfig(config: RepoConfig): RepoConfig;
45
67
  /**
46
68
  * Define an evaluation task with full type narrowing by mode.
47
69
  *
@@ -30,13 +30,36 @@ import { CANONICAL_EVAL_MODES } from "../ailf-shared/index.js";
30
30
  // Config-level helpers
31
31
  // ---------------------------------------------------------------------------
32
32
  /**
33
- * Define an AILF evaluation configuration.
33
+ * Define a full AILF evaluation configuration (`EvalConfig`).
34
34
  *
35
- * Used in `ailf.config.ts` files for typed configuration authoring.
35
+ * This is the advanced, standalone config passed via `ailf run --config
36
+ * <path>`. For the repo-level `.ailf/ailf.config.ts` that `ailf init`
37
+ * scaffolds and `ailf run` auto-loads, use {@link defineRepoConfig} instead —
38
+ * its shape is `RepoConfig` (with a `source` object), not `EvalConfig`.
36
39
  */
37
40
  export function defineConfig(config) {
38
41
  return config;
39
42
  }
43
+ /**
44
+ * Define an AILF repo configuration — the `.ailf/ailf.config.ts` file that
45
+ * `ailf init` scaffolds and `ailf run` auto-loads.
46
+ *
47
+ * Narrows the parameter to `RepoConfig` so authors get full IDE autocomplete
48
+ * for `source`, `triggers`, `execution`, `owner`, etc. Like the other
49
+ * `define*` helpers this is a pure identity function — runtime validation
50
+ * happens later via `RepoConfigSchema` when the pipeline loads the file.
51
+ *
52
+ * ```typescript
53
+ * import { defineRepoConfig } from "@sanity/ailf"
54
+ *
55
+ * export default defineRepoConfig({
56
+ * source: { projectId: "abc123", dataset: "production" },
57
+ * })
58
+ * ```
59
+ */
60
+ export function defineRepoConfig(config) {
61
+ return config;
62
+ }
40
63
  // ---------------------------------------------------------------------------
41
64
  // Task-level helpers
42
65
  // ---------------------------------------------------------------------------
@@ -534,4 +534,4 @@ export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
534
534
  /** GitHub Actions workflow template for AI Literacy evaluation */
535
535
  export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | adhoc | experimental |\n # test | external. External teams should use `adhoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: adhoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest run --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
536
536
  /** TypeScript project configuration template (ailf.config.ts) */
537
- export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
537
+ export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nimport { defineRepoConfig } from \"@sanity/ailf\"\n\nexport default defineRepoConfig({\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n})\n";
@@ -734,4 +734,4 @@ export const workflowYaml = "# ────────────────
734
734
  // TypeScript template exports (for ailf init --output-format ts)
735
735
  // ---------------------------------------------------------------------------
736
736
  /** TypeScript project configuration template (ailf.config.ts) */
737
- export const ailfConfigTs = "/**\n * .ailf/ailf.config.ts — AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source — which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration — when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" — check that task files parse correctly (fast, no LLM calls)\n * \"eval\" — run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
737
+ export const ailfConfigTs = "/**\n * .ailf/ailf.config.ts — AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nimport { defineRepoConfig } from \"@sanity/ailf\"\n\nexport default defineRepoConfig({\n /**\n * Documentation source — which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration — when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" — check that task files parse correctly (fast, no LLM calls)\n * \"eval\" — run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n})\n";
@@ -18,7 +18,7 @@ export * from "./examples/index.js";
18
18
  export * from "./artifact-registry.js";
19
19
  export * from "./batch-signing.js";
20
20
  export * from "./constants.js";
21
- export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
21
+ export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
22
22
  export type { PricingEntry, PromptEntry, SourceEntry, } from "./config-helpers.js";
23
23
  export { env } from "./env-helper.js";
24
24
  export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
@@ -21,7 +21,7 @@ export * from "./constants.js";
21
21
  // ---------------------------------------------------------------------------
22
22
  // Architecture overhaul — Phase 0 helpers
23
23
  // ---------------------------------------------------------------------------
24
- export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
24
+ export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
25
25
  export { env } from "./env-helper.js";
26
26
  export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
27
27
  export { assoc, resolveVariantMode, splitTaskVariant, } from "./artifact-capture/association.js";
@@ -143,6 +143,12 @@ export interface ResolvedConfig {
143
143
  perspectiveOverride?: string;
144
144
  /** Sanity studio origin override */
145
145
  studioOriginOverride?: string;
146
+ /**
147
+ * Documentation base-URL override, sourced from the repo config
148
+ * `source.baseUrl` (or the `DOC_BASE_URL` env var). Distinct from the
149
+ * `--url` flag captured in `urls`; `urls[0]` still wins when both are set.
150
+ */
151
+ baseUrlOverride?: string;
146
152
  /** Sanity document filter args */
147
153
  sanityDocumentArgs?: string[];
148
154
  /** Report ID that triggered this re-run (flows to provenance.lineage.rerunOf) */
@@ -79,6 +79,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
79
79
  }>>;
80
80
  source: z.ZodOptional<z.ZodString>;
81
81
  sourceReportId: z.ZodOptional<z.ZodString>;
82
+ studioOrigin: z.ZodOptional<z.ZodString>;
82
83
  taskMode: z.ZodOptional<z.ZodEnum<{
83
84
  "content-lake": "content-lake";
84
85
  inline: "inline";
@@ -140,6 +140,7 @@ export const PipelineRequestSchema = z.object({
140
140
  searchMode: z.enum(["off", "open", "origin-only"]).optional(),
141
141
  source: z.string().optional(),
142
142
  sourceReportId: z.string().optional(),
143
+ studioOrigin: z.string().url().optional(),
143
144
  taskMode: z.enum(["content-lake", "inline"]).optional(),
144
145
  /**
145
146
  * Task-source configuration (W0077 Phase 6h). Mirrors
@@ -123,6 +123,8 @@ export interface PipelineRequest {
123
123
  searchMode?: "off" | "open" | "origin-only";
124
124
  source?: string;
125
125
  sourceReportId?: string;
126
+ /** Studio origin override — maps to `ResolvedConfig.studioOriginOverride`. */
127
+ studioOrigin?: string;
126
128
  taskMode?: "content-lake" | "inline";
127
129
  taskSource?: PipelineRequestTaskSource;
128
130
  tasks?: string[];
@@ -54,9 +54,11 @@ export interface RemoteConfigSlice {
54
54
  publishEnabled?: boolean;
55
55
  publishTag?: string;
56
56
  concurrency?: number;
57
+ baseUrlOverride?: string;
57
58
  datasetOverride?: string;
58
59
  projectIdOverride?: string;
59
60
  perspectiveOverride?: string;
61
+ studioOriginOverride?: string;
60
62
  graderContext?: "rubric-only" | "with-docs";
61
63
  graderReplications?: number;
62
64
  borderlineReplications?: number;
@@ -123,6 +123,15 @@ export async function buildRemoteRequest(options) {
123
123
  raw.projectId = config.projectIdOverride;
124
124
  if (config.perspectiveOverride)
125
125
  raw.perspective = config.perspectiveOverride;
126
+ if (config.studioOriginOverride) {
127
+ raw.studioOrigin = config.studioOriginOverride;
128
+ }
129
+ // A repo-config `source.baseUrl` rides the existing `urls` channel: the
130
+ // server maps request.urls -> ResolvedConfig.urls -> source baseUrl
131
+ // (configToSourceOverrides), mirroring the local path.
132
+ if (config.baseUrlOverride && !raw.urls) {
133
+ raw.urls = [config.baseUrlOverride];
134
+ }
126
135
  // Advanced
127
136
  if (config.graderContext) {
128
137
  raw.graderContext = config.graderContext;
@@ -2,7 +2,7 @@
2
2
  * CliConfigAdapter — resolves pipeline config from Commander CLI flags.
3
3
  *
4
4
  * This is the default adapter — it wraps the existing option resolution
5
- * pipeline: PipelineCliOptions → computeResolvedOptions → mapToResolvedConfig.
5
+ * pipeline: PipelineCliOptions → resolveOptions → mapToResolvedConfig.
6
6
  *
7
7
  * @see packages/eval/src/commands/pipeline-action.ts — underlying implementation
8
8
  */
@@ -2,11 +2,11 @@
2
2
  * CliConfigAdapter — resolves pipeline config from Commander CLI flags.
3
3
  *
4
4
  * This is the default adapter — it wraps the existing option resolution
5
- * pipeline: PipelineCliOptions → computeResolvedOptions → mapToResolvedConfig.
5
+ * pipeline: PipelineCliOptions → resolveOptions → mapToResolvedConfig.
6
6
  *
7
7
  * @see packages/eval/src/commands/pipeline-action.ts — underlying implementation
8
8
  */
9
- import { computeResolvedOptions } from "../../commands/pipeline-action.js";
9
+ import { resolveOptions } from "../../commands/pipeline-action.js";
10
10
  import { mapToResolvedConfig } from "../../orchestration/build-app-context.js";
11
11
  export class CliConfigAdapter {
12
12
  cliOpts;
@@ -17,7 +17,10 @@ export class CliConfigAdapter {
17
17
  this.rootDir = rootDir;
18
18
  }
19
19
  async resolve() {
20
- const resolved = computeResolvedOptions(this.cliOpts);
20
+ // resolveOptions loads the repo config (`.ailf/ailf.config.ts` etc.)
21
+ // before mapping CLI flags, so file-sourced source/owner/execution
22
+ // values reach the resolved config.
23
+ const resolved = await resolveOptions(this.cliOpts);
21
24
  return mapToResolvedConfig(resolved, this.rootDir);
22
25
  }
23
26
  }
@@ -22,7 +22,7 @@
22
22
  import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
23
23
  import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
24
24
  import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
25
- import { computeResolvedOptions } from "./pipeline-action.js";
25
+ import { resolveOptions } from "./pipeline-action.js";
26
26
  import { getCallerCwd } from "./shared/resolve-output-dir.js";
27
27
  import { LiteracyVariant } from "../pipeline/normalize-mode.js";
28
28
  // ---------------------------------------------------------------------------
@@ -704,7 +704,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
704
704
  purpose: raw.purpose,
705
705
  label: raw.label ?? [],
706
706
  };
707
- const resolved = computeResolvedOptions(withDefaults);
707
+ const resolved = await resolveOptions(withDefaults);
708
708
  const planOpts = {
709
709
  areaOption: resolved.areaOption,
710
710
  beforeOption: resolved.beforeOption,
@@ -98,7 +98,7 @@ export async function runInit(opts) {
98
98
  const skipped = [];
99
99
  // 2. Write project config
100
100
  if (format === "ts") {
101
- // TypeScript: ailf.config.ts with defineConfig helper
101
+ // TypeScript: ailf.config.ts wrapped in the defineRepoConfig helper
102
102
  const configPath = resolve(ailfDir, "ailf.config.ts");
103
103
  if (writeIfNew(configPath, ailfConfigTs, force)) {
104
104
  written.push(rel(targetDir, configPath));
@@ -252,6 +252,13 @@ export async function runInit(opts) {
252
252
  }
253
253
  }
254
254
  const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
255
+ // Reference the config file we actually wrote (not a hard-coded
256
+ // config.yaml) so the "Next steps" hints point at a real file.
257
+ const configFile = format === "ts"
258
+ ? ".ailf/ailf.config.ts"
259
+ : format === "yaml"
260
+ ? ".ailf/config.yaml"
261
+ : ".ailf/config.json";
255
262
  console.log();
256
263
  console.log(" Next steps:");
257
264
  console.log();
@@ -285,7 +292,7 @@ export async function runInit(opts) {
285
292
  console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
286
293
  console.log();
287
294
  console.log(" 💡 Or test a remote run against your repo tasks:");
288
- console.log(" # First, set `taskSource: { type: repo }` in .ailf/config.yaml");
295
+ console.log(` # First, set \`taskSource: { type: repo }\` in ${configFile}`);
289
296
  console.log(" AILF_API_KEY=... npx @sanity/ailf@latest run --remote --debug");
290
297
  console.log();
291
298
  console.log(" 💡 Or run locally against your repo tasks:");
@@ -18,9 +18,9 @@
18
18
  import { dirname, resolve } from "path";
19
19
  import { fileURLToPath } from "url";
20
20
  import { Command } from "commander";
21
- import { CARD_REGISTRY_VERSION, diagnosisVersion, } from "../_vendor/ailf-core/index.js";
22
21
  import { addOutputDirOption } from "./shared/options.js";
23
22
  import { resolveOutputDir } from "./shared/resolve-output-dir.js";
23
+ import { defaultVersionsFromReport } from "./shared/versions-from-report.js";
24
24
  // ---------------------------------------------------------------------------
25
25
  // Module-level root constant (same pattern as compare.ts)
26
26
  // ---------------------------------------------------------------------------
@@ -68,36 +68,6 @@ export function formatCardSummaryLine(card) {
68
68
  return `${icon} ${card.cardType}: ${text}`;
69
69
  }
70
70
  // ---------------------------------------------------------------------------
71
- // Default versions resolver
72
- // ---------------------------------------------------------------------------
73
- /**
74
- * Derive VersionedInputs from a stored report record.
75
- *
76
- * The four-version chain is carried in `report.summary.versions` per the
77
- * Phase 5 schema, with `diagnosisVersion` sourced from the runner's const.
78
- * Falls back to hard-coded "unknown" values when the fields are not present
79
- * (legacy reports without version metadata).
80
- */
81
- function defaultVersionsFromReport(report) {
82
- const rec = report;
83
- const summary = rec.summary;
84
- const versions = summary?.versions;
85
- return {
86
- graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
87
- ? versions.graderJudgmentsVersion
88
- : "unknown",
89
- ensembleVersion: typeof versions?.ensembleVersion === "string"
90
- ? versions.ensembleVersion
91
- : "unknown",
92
- diagnosisVersion: typeof versions?.diagnosisVersion === "string"
93
- ? versions.diagnosisVersion
94
- : diagnosisVersion,
95
- cardVersion: typeof versions?.cardVersion === "string"
96
- ? versions.cardVersion
97
- : CARD_REGISTRY_VERSION,
98
- };
99
- }
100
- // ---------------------------------------------------------------------------
101
71
  // Command factory
102
72
  // ---------------------------------------------------------------------------
103
73
  /**
@@ -13,6 +13,7 @@
13
13
  import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
14
14
  import type { DebugOptions, EvalMode } from "../pipeline/types.js";
15
15
  import { type Diagnosis, type ReportStorePort, type SynthesisCostTelemetry } from "../_vendor/ailf-core/index.d.ts";
16
+ import { type RepoConfig } from "../adapters/task-sources/repo-schemas.js";
16
17
  import type { PipelineCliOptions } from "./run.js";
17
18
  export interface ResolvedOptions {
18
19
  allowedOriginArgs: string[];
@@ -23,6 +24,7 @@ export interface ResolvedOptions {
23
24
  compareEnabled: boolean;
24
25
  compareThreshold?: number;
25
26
  concurrency?: number;
27
+ baseUrlOverride?: string;
26
28
  datasetOverride?: string;
27
29
  debug?: DebugOptions;
28
30
  dryRun: boolean;
@@ -94,7 +96,7 @@ export interface ResolvedOptions {
94
96
  *
95
97
  * Exported so the plan builder can call it independently.
96
98
  */
97
- export declare function computeResolvedOptions(opts: PipelineCliOptions): ResolvedOptions;
99
+ export declare function computeResolvedOptions(opts: PipelineCliOptions, repoConfig?: RepoConfig | null): ResolvedOptions;
98
100
  /**
99
101
  * Determine whether the post-run diagnosis summary hook should fire.
100
102
  *
@@ -133,6 +135,13 @@ export declare function runPostPipelineHooks(ctx: {
133
135
  run(opts: unknown): Promise<Diagnosis>;
134
136
  };
135
137
  }): Promise<void>;
138
+ /**
139
+ * Render a failed `--config` load as a clean CLI diagnostic instead of an
140
+ * uncaught ZodError stack trace. Mirrors the first-5-issues style of the
141
+ * Content Lake gates, and appends a cross-schema hint when the file smells
142
+ * like a `.ailf/ailf.config.ts` (`RepoConfig`) rather than an `EvalConfig`.
143
+ */
144
+ export declare function formatConfigFileError(err: unknown, filePath: string): string;
136
145
  /**
137
146
  * Execute the evaluation pipeline.
138
147
  *
@@ -142,3 +151,22 @@ export declare function runPostPipelineHooks(ctx: {
142
151
  * 4. Delegate to the PipelineOrchestrator
143
152
  */
144
153
  export declare function executePipeline(cliOpts: PipelineCliOptions): Promise<void>;
154
+ /**
155
+ * Resolve CLI options into typed ResolvedOptions, loading the repo config
156
+ * from `<cwd>/.ailf/` first. This is the single async entry point; the pure
157
+ * `computeResolvedOptions` does the option mapping once the config is loaded.
158
+ */
159
+ export declare function resolveOptions(opts: PipelineCliOptions): Promise<ResolvedOptions>;
160
+ /**
161
+ * Load the repo config from `<cwd>/.ailf/`. Probes TS/JS/YAML/JSON in a
162
+ * fixed precedence order (see `REPO_CONFIG_CANDIDATES`) and returns the
163
+ * first match, validated against `RepoConfigSchema`. Returns null when no
164
+ * config file is present, or when the matched file fails to load/parse (a
165
+ * warning is emitted and the run falls back to defaults + env vars).
166
+ *
167
+ * Auto-loads regardless of `--task-source`: the same config file is the
168
+ * per-environment configuration home for every run (W0077 Phase 6a).
169
+ * Environment variables still win over file values — that cascade lives in
170
+ * `computeResolvedOptions`, which receives the parsed config from here.
171
+ */
172
+ export declare function loadRepoConfig(cwd?: string): Promise<RepoConfig | null>;
@@ -20,13 +20,16 @@ import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build
20
20
  import { buildStepSequence } from "../orchestration/build-step-sequence.js";
21
21
  import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
22
22
  import { load } from "js-yaml";
23
+ import { ZodError } from "zod";
23
24
  import { PLACEHOLDER_OWNER_TEAM, } from "../_vendor/ailf-core/index.js";
24
25
  import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
26
+ import { loadTsConfig } from "../adapters/config-sources/ts-config-loader.js";
25
27
  import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
26
28
  // Phase 6 / DIAG-06 — single formatter, single visual contract (D6-04).
27
29
  // Import statically so bundlers and type-checkers can verify the export
28
30
  // exists at build time rather than deferring to runtime dynamic import.
29
31
  import { formatCardSummaryLine } from "./interpret.js";
32
+ import { defaultVersionsFromReport } from "./shared/versions-from-report.js";
30
33
  const __dirname = dirname(fileURLToPath(import.meta.url));
31
34
  const ROOT = resolve(__dirname, "..", "..");
32
35
  // ---------------------------------------------------------------------------
@@ -39,13 +42,14 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
39
42
  *
40
43
  * Exported so the plan builder can call it independently.
41
44
  */
42
- export function computeResolvedOptions(opts) {
45
+ export function computeResolvedOptions(opts, repoConfig = null) {
43
46
  // Resolve paths relative to the caller's cwd, not the eval package root
44
47
  const callerCwd = getCallerCwd();
45
- // `.ailf/config.yaml` is the per-environment config home for `ailf run`
46
- // (W0077 Phase 6a). Load early so downstream cascades (source, agentic,
47
- // owner, output, etc.) can read from it.
48
- const repoConfig = loadRepoConfigIfPresent(callerCwd);
48
+ // The repo config (`.ailf/ailf.config.ts` or `.ailf/config.yaml`, etc.) is
49
+ // loaded asynchronously by `loadRepoConfig` and injected here so this
50
+ // function stays pure + synchronous (safe for `--explain`). Downstream
51
+ // cascades (source, agentic, owner, output, etc.) read from it; env vars
52
+ // still win at each cascade below.
49
53
  // Validate + normalize mode via the single boundary function.
50
54
  // normalizeMode() maps legacy variant names (baseline, agentic, etc.)
51
55
  // to canonical mode "literacy" + variant, and throws on invalid input.
@@ -198,6 +202,12 @@ export function computeResolvedOptions(opts) {
198
202
  const datasetOverride = process.env.SANITY_DATASET ?? repoConfig?.source?.dataset;
199
203
  const projectIdOverride = process.env.SANITY_PROJECT_ID ?? repoConfig?.source?.projectId;
200
204
  const studioOriginOverride = process.env.SANITY_STUDIO_ORIGIN ?? repoConfig?.source?.studioOrigin;
205
+ // `source.baseUrl` was parsed by the repo-config schema but never mapped
206
+ // into the source overrides — it took effect only via the `DOC_BASE_URL`
207
+ // env var or a named `config/sources.ts` entry (D0022). Map it here with
208
+ // the same env-wins cascade as the trio above; `configToSourceOverrides`
209
+ // keeps the explicit `--url` flag (`urls[0]`) ahead of it.
210
+ const baseUrlOverride = process.env.DOC_BASE_URL ?? repoConfig?.source?.baseUrl;
201
211
  // Report store overrides (W0077 Phase 6e — `--report-dataset` and
202
212
  // `--report-project` retired). Resolution order:
203
213
  // 1. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
@@ -295,6 +305,7 @@ export function computeResolvedOptions(opts) {
295
305
  compareEnabled,
296
306
  compareThreshold: opts.threshold,
297
307
  concurrency,
308
+ baseUrlOverride,
298
309
  datasetOverride,
299
310
  debug,
300
311
  dryRun: opts.dryRun,
@@ -512,24 +523,12 @@ export async function runPostPipelineHooks(ctx, result, args) {
512
523
  process.stderr.write(`ℹ️ Report not found: ${reportId} — skipping post-summary.\n`);
513
524
  return;
514
525
  }
515
- // Derive version metadata from the stored report (same approach as interpret.ts)
516
- const rec = report;
517
- const summary = rec.summary;
518
- const versions = summary?.versions;
519
- const versionedInputs = {
520
- graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
521
- ? versions.graderJudgmentsVersion
522
- : "unknown",
523
- ensembleVersion: typeof versions?.ensembleVersion === "string"
524
- ? versions.ensembleVersion
525
- : "unknown",
526
- diagnosisVersion: typeof versions?.diagnosisVersion === "string"
527
- ? versions.diagnosisVersion
528
- : "unknown",
529
- cardVersion: typeof versions?.cardVersion === "string"
530
- ? versions.cardVersion
531
- : "unknown",
532
- };
526
+ // Derive version metadata from the stored report. Shares the single
527
+ // `defaultVersionsFromReport` helper with `ailf interpret` so the
528
+ // path-relevant axes fall back to the canonical engine versions instead
529
+ // of the literal "unknown" that produced `diagnosis-unknown-…` paths
530
+ // (W0286).
531
+ const versionedInputs = defaultVersionsFromReport(report);
533
532
  // Run the diagnosis
534
533
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
535
534
  const diagnosis = await runner.run({
@@ -600,6 +599,61 @@ function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
600
599
  return undefined;
601
600
  }
602
601
  // ---------------------------------------------------------------------------
602
+ // --config file error formatting
603
+ // ---------------------------------------------------------------------------
604
+ /**
605
+ * Fields that exist on `RepoConfig` (`.ailf/ailf.config.ts`) but NOT on the
606
+ * `EvalConfig` accepted by `--config`. Their presence — or an object-shaped
607
+ * `source` where `--config` expects a named-source string — is a strong
608
+ * signal the user pasted their auto-loaded repo config into a `--config`
609
+ * file. Everything else (`execution`, `grader`, `publish`, `reportStore`,
610
+ * `artifacts`, `agentic`, `summary`, `taskSource`, `output`) overlaps between
611
+ * the two shapes.
612
+ */
613
+ const REPO_ONLY_CONFIG_KEYS = ["triggers", "owner"];
614
+ /**
615
+ * Detect whether a `--config` validation error looks like a `RepoConfig`
616
+ * (`.ailf/ailf.config.ts`) pasted into the wrong place. Two tells:
617
+ * - an `unrecognized_keys` issue naming `triggers` or `owner`, or
618
+ * - an `invalid_type` issue on `source` (RepoConfig's object `source` vs
619
+ * EvalConfig's named-source string).
620
+ */
621
+ function looksLikeRepoConfig(error) {
622
+ return error.issues.some((issue) => {
623
+ if (issue.code === "unrecognized_keys") {
624
+ const keys = issue.keys ?? [];
625
+ return keys.some((k) => REPO_ONLY_CONFIG_KEYS.includes(k));
626
+ }
627
+ return (issue.code === "invalid_type" &&
628
+ issue.path.length === 1 &&
629
+ issue.path[0] === "source");
630
+ });
631
+ }
632
+ /**
633
+ * Render a failed `--config` load as a clean CLI diagnostic instead of an
634
+ * uncaught ZodError stack trace. Mirrors the first-5-issues style of the
635
+ * Content Lake gates, and appends a cross-schema hint when the file smells
636
+ * like a `.ailf/ailf.config.ts` (`RepoConfig`) rather than an `EvalConfig`.
637
+ */
638
+ export function formatConfigFileError(err, filePath) {
639
+ if (!(err instanceof ZodError)) {
640
+ const msg = err instanceof Error ? err.message : String(err);
641
+ return `❌ Failed to load --config file: ${filePath}\n ${msg}`;
642
+ }
643
+ const issues = err.issues
644
+ .slice(0, 5)
645
+ .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
646
+ .join("\n");
647
+ const more = err.issues.length > 5
648
+ ? `\n …and ${err.issues.length - 5} more issue(s)`
649
+ : "";
650
+ const lines = [`❌ Invalid --config file: ${filePath}`, `${issues}${more}`];
651
+ if (looksLikeRepoConfig(err)) {
652
+ lines.push("", "💡 This looks like a .ailf/ailf.config.ts (RepoConfig), which is a", " different shape from the EvalConfig that --config expects:", " • --config (EvalConfig): `source` is the NAME of a source declared", " in config/sources.ts (a string), plus per-run fields like `areas`,", " `tasks`, `mode`, and `compare`.", " • .ailf/ailf.config.ts (RepoConfig): `source` is an object", " ({ projectId, dataset, baseUrl }) plus repo-only `triggers` and", " `owner`. It is auto-loaded by every `ailf run` — you don't pass it", " via --config.", " If you meant to set repo defaults, place this file at", " .ailf/ailf.config.ts and drop the --config flag.");
653
+ }
654
+ return lines.join("\n");
655
+ }
656
+ // ---------------------------------------------------------------------------
603
657
  // Pipeline entry point
604
658
  // ---------------------------------------------------------------------------
605
659
  /**
@@ -623,7 +677,14 @@ export async function executePipeline(cliOpts) {
623
677
  const { createAppContext } = await import("../composition-root.js");
624
678
  const callerCwd = getCallerCwd();
625
679
  const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
626
- const config = await adapter.resolve();
680
+ let config;
681
+ try {
682
+ config = await adapter.resolve();
683
+ }
684
+ catch (err) {
685
+ console.error(formatConfigFileError(err, cliOpts.config));
686
+ process.exit(1);
687
+ }
627
688
  // When `taskSource.type` is `repo` and no `repoTasksPath` was set in
628
689
  // the config file, fall back to `<callerCwd>/.ailf/tasks/` (the
629
690
  // location `ailf init` scaffolds). Silent fallback — composition-root
@@ -662,7 +723,7 @@ export async function executePipeline(cliOpts) {
662
723
  });
663
724
  process.exit(result.success ? 0 : 1);
664
725
  }
665
- const o = resolveOptions(cliOpts);
726
+ const o = await resolveOptions(cliOpts);
666
727
  console.log(` 📂 Output directory: ${o.outputDir}`);
667
728
  // Remote mode — submit to AILF API instead of running locally.
668
729
  // Use the caller's working directory (not the package root) because
@@ -724,10 +785,13 @@ function warnIfPlaceholderOwnerTeam() {
724
785
  `AILF_OWNER_TEAM) to attribute this run.`);
725
786
  }
726
787
  /**
727
- * Resolve CLI options into typed ResolvedOptions.
788
+ * Resolve CLI options into typed ResolvedOptions, loading the repo config
789
+ * from `<cwd>/.ailf/` first. This is the single async entry point; the pure
790
+ * `computeResolvedOptions` does the option mapping once the config is loaded.
728
791
  */
729
- function resolveOptions(opts) {
730
- return computeResolvedOptions(opts);
792
+ export async function resolveOptions(opts) {
793
+ const repoConfig = await loadRepoConfig();
794
+ return computeResolvedOptions(opts, repoConfig);
731
795
  }
732
796
  function writePipelineResult(result, outputDir) {
733
797
  mkdirSync(outputDir, { recursive: true });
@@ -736,25 +800,69 @@ function writePipelineResult(result, outputDir) {
736
800
  console.log(` 📄 Pipeline result: ${resultFile}\n`);
737
801
  }
738
802
  /**
739
- * Load `<cwd>/.ailf/config.yaml` if it exists. Returns null when the file
740
- * is absent or unparseable.
803
+ * Repo-config filenames probed under `<cwd>/.ailf/`, in resolution
804
+ * precedence order (highest first). `ailf init` writes `ailf.config.ts`
805
+ * (default `--format ts`), `config.yaml`, or `config.json` depending on
806
+ * `--format`; all are honored here.
807
+ *
808
+ * TypeScript/JavaScript files load via the same jiti mechanism `.task.ts`
809
+ * files use (`loadTsConfig`) — there is no second TS-loading path. YAML and
810
+ * JSON load via `js-yaml` (which also parses JSON). When more than one file
811
+ * is present the first match wins and the rest are ignored with a warning.
812
+ */
813
+ const REPO_CONFIG_CANDIDATES = [
814
+ "ailf.config.ts",
815
+ "ailf.config.js",
816
+ "config.ts",
817
+ "config.js",
818
+ "config.yaml",
819
+ "config.yml",
820
+ "config.json",
821
+ ];
822
+ function isTsConfigFile(filename) {
823
+ return filename.endsWith(".ts") || filename.endsWith(".js");
824
+ }
825
+ /**
826
+ * Load the repo config from `<cwd>/.ailf/`. Probes TS/JS/YAML/JSON in a
827
+ * fixed precedence order (see `REPO_CONFIG_CANDIDATES`) and returns the
828
+ * first match, validated against `RepoConfigSchema`. Returns null when no
829
+ * config file is present, or when the matched file fails to load/parse (a
830
+ * warning is emitted and the run falls back to defaults + env vars).
741
831
  *
742
- * Auto-loads regardless of `--task-source`: the same `.ailf/config.yaml` is
743
- * the per-environment configuration home for every run (W0077 Phase 6a).
744
- * Subsequent flag-family migrations (6b–6h) read additional fields from
745
- * this same file via the same loader.
832
+ * Auto-loads regardless of `--task-source`: the same config file is the
833
+ * per-environment configuration home for every run (W0077 Phase 6a).
834
+ * Environment variables still win over file values — that cascade lives in
835
+ * `computeResolvedOptions`, which receives the parsed config from here.
746
836
  */
747
- function loadRepoConfigIfPresent(cwd) {
748
- const configPath = resolve(cwd, ".ailf", "config.yaml");
749
- if (!existsSync(configPath))
837
+ export async function loadRepoConfig(cwd = getCallerCwd()) {
838
+ const ailfDir = resolve(cwd, ".ailf");
839
+ const present = REPO_CONFIG_CANDIDATES.filter((name) => existsSync(resolve(ailfDir, name)));
840
+ if (present.length === 0)
750
841
  return null;
842
+ const [chosen, ...shadowed] = present;
843
+ if (shadowed.length > 0) {
844
+ console.warn(` ⚠️ Multiple .ailf config files found; using ${chosen}, ignoring ` +
845
+ `${shadowed.join(", ")}.`);
846
+ }
847
+ const configPath = resolve(ailfDir, chosen);
848
+ const relPath = `.ailf/${chosen}`;
751
849
  try {
752
- const raw = readFileSync(configPath, "utf-8");
753
- const parsed = load(raw);
754
- return parseRepoConfig(parsed);
850
+ let raw;
851
+ if (isTsConfigFile(chosen)) {
852
+ const result = await loadTsConfig(configPath);
853
+ if (!result.ok) {
854
+ console.warn(` ⚠️ Failed to load ${relPath}: ${result.error}`);
855
+ return null;
856
+ }
857
+ raw = result.value;
858
+ }
859
+ else {
860
+ raw = load(readFileSync(configPath, "utf-8"));
861
+ }
862
+ return parseRepoConfig(raw, relPath);
755
863
  }
756
864
  catch (err) {
757
- console.warn(` ⚠️ Failed to parse ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
865
+ console.warn(` ⚠️ Failed to parse ${relPath}: ${err instanceof Error ? err.message : String(err)}`);
758
866
  return null;
759
867
  }
760
868
  }
@@ -137,9 +137,11 @@ function toConfigSlice(opts) {
137
137
  publishEnabled: opts.publishExplicit ? opts.publishEnabled : undefined,
138
138
  publishTag: opts.publishTag,
139
139
  concurrency: opts.concurrency,
140
+ baseUrlOverride: opts.baseUrlOverride,
140
141
  datasetOverride: opts.datasetOverride,
141
142
  projectIdOverride: opts.projectIdOverride,
142
143
  perspectiveOverride: opts.perspectiveOverride,
144
+ studioOriginOverride: opts.studioOriginOverride,
143
145
  graderContext: opts.graderContext,
144
146
  graderReplications: opts.graderReplications,
145
147
  borderlineReplications: opts.borderlineReplications,
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Derive `VersionedInputs` from a stored report record.
3
+ *
4
+ * Shared by both diagnosis emission paths — the `ailf interpret` command and
5
+ * the post-pipeline summary hook (`runPostPipelineHooks`) — so they build
6
+ * identical version metadata. Co-locating the logic here is the durable fix
7
+ * for W0286: the two paths previously inlined separate copies that drifted,
8
+ * and the hook copy coalesced every axis to the literal `"unknown"`.
9
+ *
10
+ * The path-relevant axes fall back to the canonical engine constants, NEVER
11
+ * `"unknown"`. `diagnosisVersion` is the visible provenance slug in the
12
+ * diagnosis artifact path (`diagnosis-{diagnosisVersion}-{hash}.json`); a
13
+ * `"unknown"` slug erases the signal that path is meant to carry. `cardVersion`
14
+ * feeds the path's content hash and the cache key, so it must also be the real
15
+ * registry version for cache identity to be correct.
16
+ *
17
+ * The two non-path axes (`graderJudgmentsVersion`, `ensembleVersion`) have no
18
+ * canonical version source today and fall back to `"unknown"`; they affect only
19
+ * the cache key, not the artifact path. Wiring them to real sources is out of
20
+ * scope for W0286.
21
+ */
22
+ import { type VersionedInputs } from "../../_vendor/ailf-core/index.d.ts";
23
+ /**
24
+ * The four-version chain is carried in `report.summary.versions` per the
25
+ * Phase 5 schema. When a field is absent (legacy reports, or any report
26
+ * produced before version metadata was populated), the path-relevant axes
27
+ * resolve to the canonical constants and the rest to `"unknown"`.
28
+ */
29
+ export declare function defaultVersionsFromReport(report: unknown): VersionedInputs;
@@ -0,0 +1,47 @@
1
+ /**
2
+ * Derive `VersionedInputs` from a stored report record.
3
+ *
4
+ * Shared by both diagnosis emission paths — the `ailf interpret` command and
5
+ * the post-pipeline summary hook (`runPostPipelineHooks`) — so they build
6
+ * identical version metadata. Co-locating the logic here is the durable fix
7
+ * for W0286: the two paths previously inlined separate copies that drifted,
8
+ * and the hook copy coalesced every axis to the literal `"unknown"`.
9
+ *
10
+ * The path-relevant axes fall back to the canonical engine constants, NEVER
11
+ * `"unknown"`. `diagnosisVersion` is the visible provenance slug in the
12
+ * diagnosis artifact path (`diagnosis-{diagnosisVersion}-{hash}.json`); a
13
+ * `"unknown"` slug erases the signal that path is meant to carry. `cardVersion`
14
+ * feeds the path's content hash and the cache key, so it must also be the real
15
+ * registry version for cache identity to be correct.
16
+ *
17
+ * The two non-path axes (`graderJudgmentsVersion`, `ensembleVersion`) have no
18
+ * canonical version source today and fall back to `"unknown"`; they affect only
19
+ * the cache key, not the artifact path. Wiring them to real sources is out of
20
+ * scope for W0286.
21
+ */
22
+ import { CARD_REGISTRY_VERSION, diagnosisVersion, } from "../../_vendor/ailf-core/index.js";
23
+ /**
24
+ * The four-version chain is carried in `report.summary.versions` per the
25
+ * Phase 5 schema. When a field is absent (legacy reports, or any report
26
+ * produced before version metadata was populated), the path-relevant axes
27
+ * resolve to the canonical constants and the rest to `"unknown"`.
28
+ */
29
+ export function defaultVersionsFromReport(report) {
30
+ const rec = report;
31
+ const summary = rec.summary;
32
+ const versions = summary?.versions;
33
+ return {
34
+ graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
35
+ ? versions.graderJudgmentsVersion
36
+ : "unknown",
37
+ ensembleVersion: typeof versions?.ensembleVersion === "string"
38
+ ? versions.ensembleVersion
39
+ : "unknown",
40
+ diagnosisVersion: typeof versions?.diagnosisVersion === "string"
41
+ ? versions.diagnosisVersion
42
+ : diagnosisVersion,
43
+ cardVersion: typeof versions?.cardVersion === "string"
44
+ ? versions.cardVersion
45
+ : CARD_REGISTRY_VERSION,
46
+ };
47
+ }
package/dist/index.d.ts CHANGED
@@ -37,7 +37,7 @@
37
37
  * })
38
38
  * ```
39
39
  */
40
- export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
40
+ export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
41
41
  export type { PackageSurfaceConfig, PackageSurfaceEntry, PreflightScoringConfig, PricingEntry, PromptEntry, SourceEntry, } from "./_vendor/ailf-core/index.d.ts";
42
42
  export { env } from "./_vendor/ailf-core/index.d.ts";
43
43
  export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
package/dist/index.js CHANGED
@@ -40,7 +40,7 @@
40
40
  // ---------------------------------------------------------------------------
41
41
  // Configuration helpers (define* identity functions for typed authoring)
42
42
  // ---------------------------------------------------------------------------
43
- export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
43
+ export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
44
44
  // ---------------------------------------------------------------------------
45
45
  // Environment helper
46
46
  // ---------------------------------------------------------------------------
@@ -71,6 +71,7 @@ export function mapToResolvedConfig(opts, rootDir) {
71
71
  searchMode: opts.searchMode ?? "open",
72
72
  concurrency: opts.concurrency,
73
73
  promptfooUrl: opts.promptfooUrl,
74
+ baseUrlOverride: opts.baseUrlOverride,
74
75
  datasetOverride: opts.datasetOverride,
75
76
  projectIdOverride: opts.projectIdOverride,
76
77
  perspectiveOverride: opts.perspectiveOverride,
@@ -5,11 +5,15 @@
5
5
  * with typed overrides instead of relying on process.env.
6
6
  */
7
7
  export function configToSourceOverrides(config) {
8
+ // The explicit `--url` flag (captured in `urls`) wins over a repo-config
9
+ // `source.baseUrl` (captured in `baseUrlOverride`); both feed the same
10
+ // `SourceOverrides.baseUrl` the doc fetcher reads.
11
+ const baseUrl = config.urls?.[0] ?? config.baseUrlOverride;
8
12
  return {
9
13
  ...(config.allowedOrigins?.length
10
14
  ? { allowedOrigins: config.allowedOrigins }
11
15
  : {}),
12
- ...(config.urls?.[0] ? { baseUrl: config.urls[0] } : {}),
16
+ ...(baseUrl ? { baseUrl } : {}),
13
17
  ...(config.datasetOverride ? { dataset: config.datasetOverride } : {}),
14
18
  ...(config.sanityDocumentArgs?.length
15
19
  ? { documentIds: config.sanityDocumentArgs }
@@ -1479,9 +1479,15 @@ export async function calculateAndWriteScores(options) {
1479
1479
  logger: log,
1480
1480
  });
1481
1481
  // Mutate-in-place so subsequent steps (validateGraderJudgmentsCalibration,
1482
- // persist) see the consensus-merged scores.
1482
+ // persist) see the consensus-merged scores. Snapshot first: the runner's
1483
+ // no-borderline fast path returns the SAME array reference it received,
1484
+ // so `regraded` may alias `judgments`. Truncating `judgments` would then
1485
+ // empty `regraded` before the spread reads it, silently wiping every
1486
+ // judgment (extract N, persist 0) — the divergence the post-persist guard
1487
+ // aborts on. Copying breaks the alias regardless of what the runner returns.
1488
+ const merged = [...regraded];
1483
1489
  judgments.length = 0;
1484
- judgments.push(...regraded);
1490
+ judgments.push(...merged);
1485
1491
  if (consistencyByJudgment.size > 0) {
1486
1492
  log.info(`Borderline consensus merged ${consistencyByJudgment.size} judgment(s)`);
1487
1493
  }
@@ -68,7 +68,7 @@ export function mapRequestToConfig(request, rootDir) {
68
68
  taskSourceType: mapTaskSourceType(request.taskSource?.type, request.taskMode),
69
69
  outputPath: undefined,
70
70
  promptfooUrl: undefined,
71
- studioOriginOverride: undefined,
71
+ studioOriginOverride: request.studioOrigin,
72
72
  sanityDocumentArgs: undefined,
73
73
  sourceReportId: request.sourceReportId,
74
74
  beforeOption: undefined,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "7.3.0",
3
+ "version": "7.4.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"