@sanity/ailf 7.3.0 → 7.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +18 -0
- package/dist/_vendor/ailf-core/artifact-registry.js +1 -1
- package/dist/_vendor/ailf-core/config-helpers.d.ts +24 -2
- package/dist/_vendor/ailf-core/config-helpers.js +25 -2
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +2 -0
- package/dist/adapters/api-client/build-request.d.ts +2 -0
- package/dist/adapters/api-client/build-request.js +9 -0
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/cli-config-adapter.js +6 -3
- package/dist/commands/explain-handler.js +2 -2
- package/dist/commands/init.js +9 -2
- package/dist/commands/interpret.js +1 -31
- package/dist/commands/pipeline-action.d.ts +29 -1
- package/dist/commands/pipeline-action.js +149 -41
- package/dist/commands/remote-pipeline.js +2 -0
- package/dist/commands/shared/versions-from-report.d.ts +29 -0
- package/dist/commands/shared/versions-from-report.js +47 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/orchestration/build-app-context.js +1 -0
- package/dist/orchestration/config-to-source-overrides.js +5 -1
- package/dist/pipeline/calculate-scores.js +8 -2
- package/dist/pipeline/map-request-to-config.js +1 -1
- package/package.json +1 -1
|
@@ -276,6 +276,24 @@ export declare function diagnosisPathBuilder(): ArtifactObjectPath;
|
|
|
276
276
|
* `diagnosisVersion` MUST NOT contain `|` (the function rejects that case).
|
|
277
277
|
*/
|
|
278
278
|
export declare function encodeDiagnosisPathVersion(diagnosisVersion: string, cardVersion: string): string;
|
|
279
|
+
/**
|
|
280
|
+
* Convert an entry key (wire format, e.g. `{taskId}::{modelId}`) to a
|
|
281
|
+
* filename-safe component.
|
|
282
|
+
*
|
|
283
|
+
* - `::` → `--` so the wire separator doesn't show up in the filename.
|
|
284
|
+
* - `/` → `_` so task names like "Content Lake with @sanity/client" don't
|
|
285
|
+
* create unintended GCS subdirectories and so `ls` against the per-entry
|
|
286
|
+
* directory shows one row per entry.
|
|
287
|
+
*
|
|
288
|
+
* Single colons (`:`) are preserved — modelIds like
|
|
289
|
+
* `anthropic:messages:claude-opus-4-6` are valid GCS object names.
|
|
290
|
+
*
|
|
291
|
+
* NOTE: this mapping is not bijective. A taskId containing literal `--`
|
|
292
|
+
* combined with a modelId could in theory collide with one whose taskId
|
|
293
|
+
* contains `::`. In practice, production taskIds don't exercise these
|
|
294
|
+
* combinations.
|
|
295
|
+
*/
|
|
296
|
+
export declare function sanitizeEntryKey(key: string): string;
|
|
279
297
|
/** Test-only reset for the legacy-key warning flag. Not exported publicly. */
|
|
280
298
|
export declare function __resetLegacyTestOutputsWarning(): void;
|
|
281
299
|
/**
|
|
@@ -311,7 +311,7 @@ export function encodeDiagnosisPathVersion(diagnosisVersion, cardVersion) {
|
|
|
311
311
|
* contains `::`. In practice, production taskIds don't exercise these
|
|
312
312
|
* combinations.
|
|
313
313
|
*/
|
|
314
|
-
function sanitizeEntryKey(key) {
|
|
314
|
+
export function sanitizeEntryKey(key) {
|
|
315
315
|
return key.replace(/::/g, "--").replace(/\//g, "_");
|
|
316
316
|
}
|
|
317
317
|
/**
|
|
@@ -33,15 +33,37 @@ import type { SinksFile } from "./schemas/sinks.js";
|
|
|
33
33
|
import type { TestBudgetConfig } from "./schemas/test-budgets.js";
|
|
34
34
|
import type { ModelsConfig } from "./types/index.js";
|
|
35
35
|
import type { GeneralizedTaskDefinition } from "./types/generalized-task.js";
|
|
36
|
+
import type { RepoConfig } from "./types/repo-config.js";
|
|
36
37
|
import type { PackageSurfaceConfig } from "./types/package-surface.js";
|
|
37
38
|
import type { PreflightScoringConfig } from "./types/preflight-scoring.js";
|
|
38
39
|
import type { ModeBase, PresetDefinition } from "./types/plugin-registry.js";
|
|
39
40
|
/**
|
|
40
|
-
* Define
|
|
41
|
+
* Define a full AILF evaluation configuration (`EvalConfig`).
|
|
41
42
|
*
|
|
42
|
-
*
|
|
43
|
+
* This is the advanced, standalone config passed via `ailf run --config
|
|
44
|
+
* <path>`. For the repo-level `.ailf/ailf.config.ts` that `ailf init`
|
|
45
|
+
* scaffolds and `ailf run` auto-loads, use {@link defineRepoConfig} instead —
|
|
46
|
+
* its shape is `RepoConfig` (with a `source` object), not `EvalConfig`.
|
|
43
47
|
*/
|
|
44
48
|
export declare function defineConfig(config: EvalConfig): EvalConfig;
|
|
49
|
+
/**
|
|
50
|
+
* Define an AILF repo configuration — the `.ailf/ailf.config.ts` file that
|
|
51
|
+
* `ailf init` scaffolds and `ailf run` auto-loads.
|
|
52
|
+
*
|
|
53
|
+
* Narrows the parameter to `RepoConfig` so authors get full IDE autocomplete
|
|
54
|
+
* for `source`, `triggers`, `execution`, `owner`, etc. Like the other
|
|
55
|
+
* `define*` helpers this is a pure identity function — runtime validation
|
|
56
|
+
* happens later via `RepoConfigSchema` when the pipeline loads the file.
|
|
57
|
+
*
|
|
58
|
+
* ```typescript
|
|
59
|
+
* import { defineRepoConfig } from "@sanity/ailf"
|
|
60
|
+
*
|
|
61
|
+
* export default defineRepoConfig({
|
|
62
|
+
* source: { projectId: "abc123", dataset: "production" },
|
|
63
|
+
* })
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
export declare function defineRepoConfig(config: RepoConfig): RepoConfig;
|
|
45
67
|
/**
|
|
46
68
|
* Define an evaluation task with full type narrowing by mode.
|
|
47
69
|
*
|
|
@@ -30,13 +30,36 @@ import { CANONICAL_EVAL_MODES } from "../ailf-shared/index.js";
|
|
|
30
30
|
// Config-level helpers
|
|
31
31
|
// ---------------------------------------------------------------------------
|
|
32
32
|
/**
|
|
33
|
-
* Define
|
|
33
|
+
* Define a full AILF evaluation configuration (`EvalConfig`).
|
|
34
34
|
*
|
|
35
|
-
*
|
|
35
|
+
* This is the advanced, standalone config passed via `ailf run --config
|
|
36
|
+
* <path>`. For the repo-level `.ailf/ailf.config.ts` that `ailf init`
|
|
37
|
+
* scaffolds and `ailf run` auto-loads, use {@link defineRepoConfig} instead —
|
|
38
|
+
* its shape is `RepoConfig` (with a `source` object), not `EvalConfig`.
|
|
36
39
|
*/
|
|
37
40
|
export function defineConfig(config) {
|
|
38
41
|
return config;
|
|
39
42
|
}
|
|
43
|
+
/**
|
|
44
|
+
* Define an AILF repo configuration — the `.ailf/ailf.config.ts` file that
|
|
45
|
+
* `ailf init` scaffolds and `ailf run` auto-loads.
|
|
46
|
+
*
|
|
47
|
+
* Narrows the parameter to `RepoConfig` so authors get full IDE autocomplete
|
|
48
|
+
* for `source`, `triggers`, `execution`, `owner`, etc. Like the other
|
|
49
|
+
* `define*` helpers this is a pure identity function — runtime validation
|
|
50
|
+
* happens later via `RepoConfigSchema` when the pipeline loads the file.
|
|
51
|
+
*
|
|
52
|
+
* ```typescript
|
|
53
|
+
* import { defineRepoConfig } from "@sanity/ailf"
|
|
54
|
+
*
|
|
55
|
+
* export default defineRepoConfig({
|
|
56
|
+
* source: { projectId: "abc123", dataset: "production" },
|
|
57
|
+
* })
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export function defineRepoConfig(config) {
|
|
61
|
+
return config;
|
|
62
|
+
}
|
|
40
63
|
// ---------------------------------------------------------------------------
|
|
41
64
|
// Task-level helpers
|
|
42
65
|
// ---------------------------------------------------------------------------
|
|
@@ -534,4 +534,4 @@ export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
|
534
534
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
535
535
|
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | adhoc | experimental |\n # test | external. External teams should use `adhoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: adhoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest run --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
536
536
|
/** TypeScript project configuration template (ailf.config.ts) */
|
|
537
|
-
export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
|
|
537
|
+
export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nimport { defineRepoConfig } from \"@sanity/ailf\"\n\nexport default defineRepoConfig({\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n})\n";
|
|
@@ -734,4 +734,4 @@ export const workflowYaml = "# ────────────────
|
|
|
734
734
|
// TypeScript template exports (for ailf init --output-format ts)
|
|
735
735
|
// ---------------------------------------------------------------------------
|
|
736
736
|
/** TypeScript project configuration template (ailf.config.ts) */
|
|
737
|
-
export const ailfConfigTs = "/**\n * .ailf/ailf.config.ts — AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source — which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration — when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" — check that task files parse correctly (fast, no LLM calls)\n * \"eval\" — run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
|
|
737
|
+
export const ailfConfigTs = "/**\n * .ailf/ailf.config.ts — AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nimport { defineRepoConfig } from \"@sanity/ailf\"\n\nexport default defineRepoConfig({\n /**\n * Documentation source — which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration — when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" — check that task files parse correctly (fast, no LLM calls)\n * \"eval\" — run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n})\n";
|
|
@@ -18,7 +18,7 @@ export * from "./examples/index.js";
|
|
|
18
18
|
export * from "./artifact-registry.js";
|
|
19
19
|
export * from "./batch-signing.js";
|
|
20
20
|
export * from "./constants.js";
|
|
21
|
-
export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
|
|
21
|
+
export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
|
|
22
22
|
export type { PricingEntry, PromptEntry, SourceEntry, } from "./config-helpers.js";
|
|
23
23
|
export { env } from "./env-helper.js";
|
|
24
24
|
export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
|
|
@@ -21,7 +21,7 @@ export * from "./constants.js";
|
|
|
21
21
|
// ---------------------------------------------------------------------------
|
|
22
22
|
// Architecture overhaul — Phase 0 helpers
|
|
23
23
|
// ---------------------------------------------------------------------------
|
|
24
|
-
export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
|
|
24
|
+
export { defineCanaryTasks, defineConfig, defineFeatures, defineModeBase, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineTestBudgets, defineThresholds, } from "./config-helpers.js";
|
|
25
25
|
export { env } from "./env-helper.js";
|
|
26
26
|
export { NoOpArtifactWriter, NotImplementedError, } from "./ports/artifact-writer.js";
|
|
27
27
|
export { assoc, resolveVariantMode, splitTaskVariant, } from "./artifact-capture/association.js";
|
|
@@ -143,6 +143,12 @@ export interface ResolvedConfig {
|
|
|
143
143
|
perspectiveOverride?: string;
|
|
144
144
|
/** Sanity studio origin override */
|
|
145
145
|
studioOriginOverride?: string;
|
|
146
|
+
/**
|
|
147
|
+
* Documentation base-URL override, sourced from the repo config
|
|
148
|
+
* `source.baseUrl` (or the `DOC_BASE_URL` env var). Distinct from the
|
|
149
|
+
* `--url` flag captured in `urls`; `urls[0]` still wins when both are set.
|
|
150
|
+
*/
|
|
151
|
+
baseUrlOverride?: string;
|
|
146
152
|
/** Sanity document filter args */
|
|
147
153
|
sanityDocumentArgs?: string[];
|
|
148
154
|
/** Report ID that triggered this re-run (flows to provenance.lineage.rerunOf) */
|
|
@@ -79,6 +79,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
79
79
|
}>>;
|
|
80
80
|
source: z.ZodOptional<z.ZodString>;
|
|
81
81
|
sourceReportId: z.ZodOptional<z.ZodString>;
|
|
82
|
+
studioOrigin: z.ZodOptional<z.ZodString>;
|
|
82
83
|
taskMode: z.ZodOptional<z.ZodEnum<{
|
|
83
84
|
"content-lake": "content-lake";
|
|
84
85
|
inline: "inline";
|
|
@@ -140,6 +140,7 @@ export const PipelineRequestSchema = z.object({
|
|
|
140
140
|
searchMode: z.enum(["off", "open", "origin-only"]).optional(),
|
|
141
141
|
source: z.string().optional(),
|
|
142
142
|
sourceReportId: z.string().optional(),
|
|
143
|
+
studioOrigin: z.string().url().optional(),
|
|
143
144
|
taskMode: z.enum(["content-lake", "inline"]).optional(),
|
|
144
145
|
/**
|
|
145
146
|
* Task-source configuration (W0077 Phase 6h). Mirrors
|
|
@@ -123,6 +123,8 @@ export interface PipelineRequest {
|
|
|
123
123
|
searchMode?: "off" | "open" | "origin-only";
|
|
124
124
|
source?: string;
|
|
125
125
|
sourceReportId?: string;
|
|
126
|
+
/** Studio origin override — maps to `ResolvedConfig.studioOriginOverride`. */
|
|
127
|
+
studioOrigin?: string;
|
|
126
128
|
taskMode?: "content-lake" | "inline";
|
|
127
129
|
taskSource?: PipelineRequestTaskSource;
|
|
128
130
|
tasks?: string[];
|
|
@@ -54,9 +54,11 @@ export interface RemoteConfigSlice {
|
|
|
54
54
|
publishEnabled?: boolean;
|
|
55
55
|
publishTag?: string;
|
|
56
56
|
concurrency?: number;
|
|
57
|
+
baseUrlOverride?: string;
|
|
57
58
|
datasetOverride?: string;
|
|
58
59
|
projectIdOverride?: string;
|
|
59
60
|
perspectiveOverride?: string;
|
|
61
|
+
studioOriginOverride?: string;
|
|
60
62
|
graderContext?: "rubric-only" | "with-docs";
|
|
61
63
|
graderReplications?: number;
|
|
62
64
|
borderlineReplications?: number;
|
|
@@ -123,6 +123,15 @@ export async function buildRemoteRequest(options) {
|
|
|
123
123
|
raw.projectId = config.projectIdOverride;
|
|
124
124
|
if (config.perspectiveOverride)
|
|
125
125
|
raw.perspective = config.perspectiveOverride;
|
|
126
|
+
if (config.studioOriginOverride) {
|
|
127
|
+
raw.studioOrigin = config.studioOriginOverride;
|
|
128
|
+
}
|
|
129
|
+
// A repo-config `source.baseUrl` rides the existing `urls` channel: the
|
|
130
|
+
// server maps request.urls -> ResolvedConfig.urls -> source baseUrl
|
|
131
|
+
// (configToSourceOverrides), mirroring the local path.
|
|
132
|
+
if (config.baseUrlOverride && !raw.urls) {
|
|
133
|
+
raw.urls = [config.baseUrlOverride];
|
|
134
|
+
}
|
|
126
135
|
// Advanced
|
|
127
136
|
if (config.graderContext) {
|
|
128
137
|
raw.graderContext = config.graderContext;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* CliConfigAdapter — resolves pipeline config from Commander CLI flags.
|
|
3
3
|
*
|
|
4
4
|
* This is the default adapter — it wraps the existing option resolution
|
|
5
|
-
* pipeline: PipelineCliOptions →
|
|
5
|
+
* pipeline: PipelineCliOptions → resolveOptions → mapToResolvedConfig.
|
|
6
6
|
*
|
|
7
7
|
* @see packages/eval/src/commands/pipeline-action.ts — underlying implementation
|
|
8
8
|
*/
|
|
@@ -2,11 +2,11 @@
|
|
|
2
2
|
* CliConfigAdapter — resolves pipeline config from Commander CLI flags.
|
|
3
3
|
*
|
|
4
4
|
* This is the default adapter — it wraps the existing option resolution
|
|
5
|
-
* pipeline: PipelineCliOptions →
|
|
5
|
+
* pipeline: PipelineCliOptions → resolveOptions → mapToResolvedConfig.
|
|
6
6
|
*
|
|
7
7
|
* @see packages/eval/src/commands/pipeline-action.ts — underlying implementation
|
|
8
8
|
*/
|
|
9
|
-
import {
|
|
9
|
+
import { resolveOptions } from "../../commands/pipeline-action.js";
|
|
10
10
|
import { mapToResolvedConfig } from "../../orchestration/build-app-context.js";
|
|
11
11
|
export class CliConfigAdapter {
|
|
12
12
|
cliOpts;
|
|
@@ -17,7 +17,10 @@ export class CliConfigAdapter {
|
|
|
17
17
|
this.rootDir = rootDir;
|
|
18
18
|
}
|
|
19
19
|
async resolve() {
|
|
20
|
-
|
|
20
|
+
// resolveOptions loads the repo config (`.ailf/ailf.config.ts` etc.)
|
|
21
|
+
// before mapping CLI flags, so file-sourced source/owner/execution
|
|
22
|
+
// values reach the resolved config.
|
|
23
|
+
const resolved = await resolveOptions(this.cliOpts);
|
|
21
24
|
return mapToResolvedConfig(resolved, this.rootDir);
|
|
22
25
|
}
|
|
23
26
|
}
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
import { TASK_FILE_NAMES } from "../_vendor/ailf-core/index.js";
|
|
23
23
|
import { buildPipelinePlan, buildSimpleCommandPlan, } from "../pipeline/plan.js";
|
|
24
24
|
import { formatPlanConsole, formatPlanJson } from "../pipeline/plan-format.js";
|
|
25
|
-
import {
|
|
25
|
+
import { resolveOptions } from "./pipeline-action.js";
|
|
26
26
|
import { getCallerCwd } from "./shared/resolve-output-dir.js";
|
|
27
27
|
import { LiteracyVariant } from "../pipeline/normalize-mode.js";
|
|
28
28
|
// ---------------------------------------------------------------------------
|
|
@@ -704,7 +704,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
|
|
|
704
704
|
purpose: raw.purpose,
|
|
705
705
|
label: raw.label ?? [],
|
|
706
706
|
};
|
|
707
|
-
const resolved =
|
|
707
|
+
const resolved = await resolveOptions(withDefaults);
|
|
708
708
|
const planOpts = {
|
|
709
709
|
areaOption: resolved.areaOption,
|
|
710
710
|
beforeOption: resolved.beforeOption,
|
package/dist/commands/init.js
CHANGED
|
@@ -98,7 +98,7 @@ export async function runInit(opts) {
|
|
|
98
98
|
const skipped = [];
|
|
99
99
|
// 2. Write project config
|
|
100
100
|
if (format === "ts") {
|
|
101
|
-
// TypeScript: ailf.config.ts
|
|
101
|
+
// TypeScript: ailf.config.ts wrapped in the defineRepoConfig helper
|
|
102
102
|
const configPath = resolve(ailfDir, "ailf.config.ts");
|
|
103
103
|
if (writeIfNew(configPath, ailfConfigTs, force)) {
|
|
104
104
|
written.push(rel(targetDir, configPath));
|
|
@@ -252,6 +252,13 @@ export async function runInit(opts) {
|
|
|
252
252
|
}
|
|
253
253
|
}
|
|
254
254
|
const taskExt = format === "ts" ? ".task.ts" : format === "yaml" ? ".yaml" : ".json";
|
|
255
|
+
// Reference the config file we actually wrote (not a hard-coded
|
|
256
|
+
// config.yaml) so the "Next steps" hints point at a real file.
|
|
257
|
+
const configFile = format === "ts"
|
|
258
|
+
? ".ailf/ailf.config.ts"
|
|
259
|
+
: format === "yaml"
|
|
260
|
+
? ".ailf/config.yaml"
|
|
261
|
+
: ".ailf/config.json";
|
|
255
262
|
console.log();
|
|
256
263
|
console.log(" Next steps:");
|
|
257
264
|
console.log();
|
|
@@ -285,7 +292,7 @@ export async function runInit(opts) {
|
|
|
285
292
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
286
293
|
console.log();
|
|
287
294
|
console.log(" 💡 Or test a remote run against your repo tasks:");
|
|
288
|
-
console.log(
|
|
295
|
+
console.log(` # First, set \`taskSource: { type: repo }\` in ${configFile}`);
|
|
289
296
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest run --remote --debug");
|
|
290
297
|
console.log();
|
|
291
298
|
console.log(" 💡 Or run locally against your repo tasks:");
|
|
@@ -18,9 +18,9 @@
|
|
|
18
18
|
import { dirname, resolve } from "path";
|
|
19
19
|
import { fileURLToPath } from "url";
|
|
20
20
|
import { Command } from "commander";
|
|
21
|
-
import { CARD_REGISTRY_VERSION, diagnosisVersion, } from "../_vendor/ailf-core/index.js";
|
|
22
21
|
import { addOutputDirOption } from "./shared/options.js";
|
|
23
22
|
import { resolveOutputDir } from "./shared/resolve-output-dir.js";
|
|
23
|
+
import { defaultVersionsFromReport } from "./shared/versions-from-report.js";
|
|
24
24
|
// ---------------------------------------------------------------------------
|
|
25
25
|
// Module-level root constant (same pattern as compare.ts)
|
|
26
26
|
// ---------------------------------------------------------------------------
|
|
@@ -68,36 +68,6 @@ export function formatCardSummaryLine(card) {
|
|
|
68
68
|
return `${icon} ${card.cardType}: ${text}`;
|
|
69
69
|
}
|
|
70
70
|
// ---------------------------------------------------------------------------
|
|
71
|
-
// Default versions resolver
|
|
72
|
-
// ---------------------------------------------------------------------------
|
|
73
|
-
/**
|
|
74
|
-
* Derive VersionedInputs from a stored report record.
|
|
75
|
-
*
|
|
76
|
-
* The four-version chain is carried in `report.summary.versions` per the
|
|
77
|
-
* Phase 5 schema, with `diagnosisVersion` sourced from the runner's const.
|
|
78
|
-
* Falls back to hard-coded "unknown" values when the fields are not present
|
|
79
|
-
* (legacy reports without version metadata).
|
|
80
|
-
*/
|
|
81
|
-
function defaultVersionsFromReport(report) {
|
|
82
|
-
const rec = report;
|
|
83
|
-
const summary = rec.summary;
|
|
84
|
-
const versions = summary?.versions;
|
|
85
|
-
return {
|
|
86
|
-
graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
|
|
87
|
-
? versions.graderJudgmentsVersion
|
|
88
|
-
: "unknown",
|
|
89
|
-
ensembleVersion: typeof versions?.ensembleVersion === "string"
|
|
90
|
-
? versions.ensembleVersion
|
|
91
|
-
: "unknown",
|
|
92
|
-
diagnosisVersion: typeof versions?.diagnosisVersion === "string"
|
|
93
|
-
? versions.diagnosisVersion
|
|
94
|
-
: diagnosisVersion,
|
|
95
|
-
cardVersion: typeof versions?.cardVersion === "string"
|
|
96
|
-
? versions.cardVersion
|
|
97
|
-
: CARD_REGISTRY_VERSION,
|
|
98
|
-
};
|
|
99
|
-
}
|
|
100
|
-
// ---------------------------------------------------------------------------
|
|
101
71
|
// Command factory
|
|
102
72
|
// ---------------------------------------------------------------------------
|
|
103
73
|
/**
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
import { type ImpactSummary } from "../pipeline/reverse-mapping.js";
|
|
14
14
|
import type { DebugOptions, EvalMode } from "../pipeline/types.js";
|
|
15
15
|
import { type Diagnosis, type ReportStorePort, type SynthesisCostTelemetry } from "../_vendor/ailf-core/index.d.ts";
|
|
16
|
+
import { type RepoConfig } from "../adapters/task-sources/repo-schemas.js";
|
|
16
17
|
import type { PipelineCliOptions } from "./run.js";
|
|
17
18
|
export interface ResolvedOptions {
|
|
18
19
|
allowedOriginArgs: string[];
|
|
@@ -23,6 +24,7 @@ export interface ResolvedOptions {
|
|
|
23
24
|
compareEnabled: boolean;
|
|
24
25
|
compareThreshold?: number;
|
|
25
26
|
concurrency?: number;
|
|
27
|
+
baseUrlOverride?: string;
|
|
26
28
|
datasetOverride?: string;
|
|
27
29
|
debug?: DebugOptions;
|
|
28
30
|
dryRun: boolean;
|
|
@@ -94,7 +96,7 @@ export interface ResolvedOptions {
|
|
|
94
96
|
*
|
|
95
97
|
* Exported so the plan builder can call it independently.
|
|
96
98
|
*/
|
|
97
|
-
export declare function computeResolvedOptions(opts: PipelineCliOptions): ResolvedOptions;
|
|
99
|
+
export declare function computeResolvedOptions(opts: PipelineCliOptions, repoConfig?: RepoConfig | null): ResolvedOptions;
|
|
98
100
|
/**
|
|
99
101
|
* Determine whether the post-run diagnosis summary hook should fire.
|
|
100
102
|
*
|
|
@@ -133,6 +135,13 @@ export declare function runPostPipelineHooks(ctx: {
|
|
|
133
135
|
run(opts: unknown): Promise<Diagnosis>;
|
|
134
136
|
};
|
|
135
137
|
}): Promise<void>;
|
|
138
|
+
/**
|
|
139
|
+
* Render a failed `--config` load as a clean CLI diagnostic instead of an
|
|
140
|
+
* uncaught ZodError stack trace. Mirrors the first-5-issues style of the
|
|
141
|
+
* Content Lake gates, and appends a cross-schema hint when the file smells
|
|
142
|
+
* like a `.ailf/ailf.config.ts` (`RepoConfig`) rather than an `EvalConfig`.
|
|
143
|
+
*/
|
|
144
|
+
export declare function formatConfigFileError(err: unknown, filePath: string): string;
|
|
136
145
|
/**
|
|
137
146
|
* Execute the evaluation pipeline.
|
|
138
147
|
*
|
|
@@ -142,3 +151,22 @@ export declare function runPostPipelineHooks(ctx: {
|
|
|
142
151
|
* 4. Delegate to the PipelineOrchestrator
|
|
143
152
|
*/
|
|
144
153
|
export declare function executePipeline(cliOpts: PipelineCliOptions): Promise<void>;
|
|
154
|
+
/**
|
|
155
|
+
* Resolve CLI options into typed ResolvedOptions, loading the repo config
|
|
156
|
+
* from `<cwd>/.ailf/` first. This is the single async entry point; the pure
|
|
157
|
+
* `computeResolvedOptions` does the option mapping once the config is loaded.
|
|
158
|
+
*/
|
|
159
|
+
export declare function resolveOptions(opts: PipelineCliOptions): Promise<ResolvedOptions>;
|
|
160
|
+
/**
|
|
161
|
+
* Load the repo config from `<cwd>/.ailf/`. Probes TS/JS/YAML/JSON in a
|
|
162
|
+
* fixed precedence order (see `REPO_CONFIG_CANDIDATES`) and returns the
|
|
163
|
+
* first match, validated against `RepoConfigSchema`. Returns null when no
|
|
164
|
+
* config file is present, or when the matched file fails to load/parse (a
|
|
165
|
+
* warning is emitted and the run falls back to defaults + env vars).
|
|
166
|
+
*
|
|
167
|
+
* Auto-loads regardless of `--task-source`: the same config file is the
|
|
168
|
+
* per-environment configuration home for every run (W0077 Phase 6a).
|
|
169
|
+
* Environment variables still win over file values — that cascade lives in
|
|
170
|
+
* `computeResolvedOptions`, which receives the parsed config from here.
|
|
171
|
+
*/
|
|
172
|
+
export declare function loadRepoConfig(cwd?: string): Promise<RepoConfig | null>;
|
|
@@ -20,13 +20,16 @@ import { buildAppContext, parseArtifactUploadEnv, } from "../orchestration/build
|
|
|
20
20
|
import { buildStepSequence } from "../orchestration/build-step-sequence.js";
|
|
21
21
|
import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
|
|
22
22
|
import { load } from "js-yaml";
|
|
23
|
+
import { ZodError } from "zod";
|
|
23
24
|
import { PLACEHOLDER_OWNER_TEAM, } from "../_vendor/ailf-core/index.js";
|
|
24
25
|
import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
|
|
26
|
+
import { loadTsConfig } from "../adapters/config-sources/ts-config-loader.js";
|
|
25
27
|
import { getCallerCwd, resolveOutputDir } from "./shared/resolve-output-dir.js";
|
|
26
28
|
// Phase 6 / DIAG-06 — single formatter, single visual contract (D6-04).
|
|
27
29
|
// Import statically so bundlers and type-checkers can verify the export
|
|
28
30
|
// exists at build time rather than deferring to runtime dynamic import.
|
|
29
31
|
import { formatCardSummaryLine } from "./interpret.js";
|
|
32
|
+
import { defaultVersionsFromReport } from "./shared/versions-from-report.js";
|
|
30
33
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
31
34
|
const ROOT = resolve(__dirname, "..", "..");
|
|
32
35
|
// ---------------------------------------------------------------------------
|
|
@@ -39,13 +42,14 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
|
|
|
39
42
|
*
|
|
40
43
|
* Exported so the plan builder can call it independently.
|
|
41
44
|
*/
|
|
42
|
-
export function computeResolvedOptions(opts) {
|
|
45
|
+
export function computeResolvedOptions(opts, repoConfig = null) {
|
|
43
46
|
// Resolve paths relative to the caller's cwd, not the eval package root
|
|
44
47
|
const callerCwd = getCallerCwd();
|
|
45
|
-
// `.ailf/config.
|
|
46
|
-
//
|
|
47
|
-
//
|
|
48
|
-
|
|
48
|
+
// The repo config (`.ailf/ailf.config.ts` or `.ailf/config.yaml`, etc.) is
|
|
49
|
+
// loaded asynchronously by `loadRepoConfig` and injected here so this
|
|
50
|
+
// function stays pure + synchronous (safe for `--explain`). Downstream
|
|
51
|
+
// cascades (source, agentic, owner, output, etc.) read from it; env vars
|
|
52
|
+
// still win at each cascade below.
|
|
49
53
|
// Validate + normalize mode via the single boundary function.
|
|
50
54
|
// normalizeMode() maps legacy variant names (baseline, agentic, etc.)
|
|
51
55
|
// to canonical mode "literacy" + variant, and throws on invalid input.
|
|
@@ -198,6 +202,12 @@ export function computeResolvedOptions(opts) {
|
|
|
198
202
|
const datasetOverride = process.env.SANITY_DATASET ?? repoConfig?.source?.dataset;
|
|
199
203
|
const projectIdOverride = process.env.SANITY_PROJECT_ID ?? repoConfig?.source?.projectId;
|
|
200
204
|
const studioOriginOverride = process.env.SANITY_STUDIO_ORIGIN ?? repoConfig?.source?.studioOrigin;
|
|
205
|
+
// `source.baseUrl` was parsed by the repo-config schema but never mapped
|
|
206
|
+
// into the source overrides — it took effect only via the `DOC_BASE_URL`
|
|
207
|
+
// env var or a named `config/sources.ts` entry (D0022). Map it here with
|
|
208
|
+
// the same env-wins cascade as the trio above; `configToSourceOverrides`
|
|
209
|
+
// keeps the explicit `--url` flag (`urls[0]`) ahead of it.
|
|
210
|
+
const baseUrlOverride = process.env.DOC_BASE_URL ?? repoConfig?.source?.baseUrl;
|
|
201
211
|
// Report store overrides (W0077 Phase 6e — `--report-dataset` and
|
|
202
212
|
// `--report-project` retired). Resolution order:
|
|
203
213
|
// 1. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
|
|
@@ -295,6 +305,7 @@ export function computeResolvedOptions(opts) {
|
|
|
295
305
|
compareEnabled,
|
|
296
306
|
compareThreshold: opts.threshold,
|
|
297
307
|
concurrency,
|
|
308
|
+
baseUrlOverride,
|
|
298
309
|
datasetOverride,
|
|
299
310
|
debug,
|
|
300
311
|
dryRun: opts.dryRun,
|
|
@@ -512,24 +523,12 @@ export async function runPostPipelineHooks(ctx, result, args) {
|
|
|
512
523
|
process.stderr.write(`ℹ️ Report not found: ${reportId} — skipping post-summary.\n`);
|
|
513
524
|
return;
|
|
514
525
|
}
|
|
515
|
-
// Derive version metadata from the stored report
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
? versions.graderJudgmentsVersion
|
|
522
|
-
: "unknown",
|
|
523
|
-
ensembleVersion: typeof versions?.ensembleVersion === "string"
|
|
524
|
-
? versions.ensembleVersion
|
|
525
|
-
: "unknown",
|
|
526
|
-
diagnosisVersion: typeof versions?.diagnosisVersion === "string"
|
|
527
|
-
? versions.diagnosisVersion
|
|
528
|
-
: "unknown",
|
|
529
|
-
cardVersion: typeof versions?.cardVersion === "string"
|
|
530
|
-
? versions.cardVersion
|
|
531
|
-
: "unknown",
|
|
532
|
-
};
|
|
526
|
+
// Derive version metadata from the stored report. Shares the single
|
|
527
|
+
// `defaultVersionsFromReport` helper with `ailf interpret` so the
|
|
528
|
+
// path-relevant axes fall back to the canonical engine versions instead
|
|
529
|
+
// of the literal "unknown" that produced `diagnosis-unknown-…` paths
|
|
530
|
+
// (W0286).
|
|
531
|
+
const versionedInputs = defaultVersionsFromReport(report);
|
|
533
532
|
// Run the diagnosis
|
|
534
533
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
535
534
|
const diagnosis = await runner.run({
|
|
@@ -600,6 +599,61 @@ function resolveRepoTasksPath(callerCwd, explicitPath, taskSourceType) {
|
|
|
600
599
|
return undefined;
|
|
601
600
|
}
|
|
602
601
|
// ---------------------------------------------------------------------------
|
|
602
|
+
// --config file error formatting
|
|
603
|
+
// ---------------------------------------------------------------------------
|
|
604
|
+
/**
|
|
605
|
+
* Fields that exist on `RepoConfig` (`.ailf/ailf.config.ts`) but NOT on the
|
|
606
|
+
* `EvalConfig` accepted by `--config`. Their presence — or an object-shaped
|
|
607
|
+
* `source` where `--config` expects a named-source string — is a strong
|
|
608
|
+
* signal the user pasted their auto-loaded repo config into a `--config`
|
|
609
|
+
* file. Everything else (`execution`, `grader`, `publish`, `reportStore`,
|
|
610
|
+
* `artifacts`, `agentic`, `summary`, `taskSource`, `output`) overlaps between
|
|
611
|
+
* the two shapes.
|
|
612
|
+
*/
|
|
613
|
+
const REPO_ONLY_CONFIG_KEYS = ["triggers", "owner"];
|
|
614
|
+
/**
|
|
615
|
+
* Detect whether a `--config` validation error looks like a `RepoConfig`
|
|
616
|
+
* (`.ailf/ailf.config.ts`) pasted into the wrong place. Two tells:
|
|
617
|
+
* - an `unrecognized_keys` issue naming `triggers` or `owner`, or
|
|
618
|
+
* - an `invalid_type` issue on `source` (RepoConfig's object `source` vs
|
|
619
|
+
* EvalConfig's named-source string).
|
|
620
|
+
*/
|
|
621
|
+
function looksLikeRepoConfig(error) {
|
|
622
|
+
return error.issues.some((issue) => {
|
|
623
|
+
if (issue.code === "unrecognized_keys") {
|
|
624
|
+
const keys = issue.keys ?? [];
|
|
625
|
+
return keys.some((k) => REPO_ONLY_CONFIG_KEYS.includes(k));
|
|
626
|
+
}
|
|
627
|
+
return (issue.code === "invalid_type" &&
|
|
628
|
+
issue.path.length === 1 &&
|
|
629
|
+
issue.path[0] === "source");
|
|
630
|
+
});
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Render a failed `--config` load as a clean CLI diagnostic instead of an
|
|
634
|
+
* uncaught ZodError stack trace. Mirrors the first-5-issues style of the
|
|
635
|
+
* Content Lake gates, and appends a cross-schema hint when the file smells
|
|
636
|
+
* like a `.ailf/ailf.config.ts` (`RepoConfig`) rather than an `EvalConfig`.
|
|
637
|
+
*/
|
|
638
|
+
export function formatConfigFileError(err, filePath) {
|
|
639
|
+
if (!(err instanceof ZodError)) {
|
|
640
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
641
|
+
return `❌ Failed to load --config file: ${filePath}\n ${msg}`;
|
|
642
|
+
}
|
|
643
|
+
const issues = err.issues
|
|
644
|
+
.slice(0, 5)
|
|
645
|
+
.map((i) => ` [${i.path.join(".")}]: ${i.message}`)
|
|
646
|
+
.join("\n");
|
|
647
|
+
const more = err.issues.length > 5
|
|
648
|
+
? `\n …and ${err.issues.length - 5} more issue(s)`
|
|
649
|
+
: "";
|
|
650
|
+
const lines = [`❌ Invalid --config file: ${filePath}`, `${issues}${more}`];
|
|
651
|
+
if (looksLikeRepoConfig(err)) {
|
|
652
|
+
lines.push("", "💡 This looks like a .ailf/ailf.config.ts (RepoConfig), which is a", " different shape from the EvalConfig that --config expects:", " • --config (EvalConfig): `source` is the NAME of a source declared", " in config/sources.ts (a string), plus per-run fields like `areas`,", " `tasks`, `mode`, and `compare`.", " • .ailf/ailf.config.ts (RepoConfig): `source` is an object", " ({ projectId, dataset, baseUrl }) plus repo-only `triggers` and", " `owner`. It is auto-loaded by every `ailf run` — you don't pass it", " via --config.", " If you meant to set repo defaults, place this file at", " .ailf/ailf.config.ts and drop the --config flag.");
|
|
653
|
+
}
|
|
654
|
+
return lines.join("\n");
|
|
655
|
+
}
|
|
656
|
+
// ---------------------------------------------------------------------------
|
|
603
657
|
// Pipeline entry point
|
|
604
658
|
// ---------------------------------------------------------------------------
|
|
605
659
|
/**
|
|
@@ -623,7 +677,14 @@ export async function executePipeline(cliOpts) {
|
|
|
623
677
|
const { createAppContext } = await import("../composition-root.js");
|
|
624
678
|
const callerCwd = getCallerCwd();
|
|
625
679
|
const adapter = new FileConfigAdapter(cliOpts.config, ROOT);
|
|
626
|
-
|
|
680
|
+
let config;
|
|
681
|
+
try {
|
|
682
|
+
config = await adapter.resolve();
|
|
683
|
+
}
|
|
684
|
+
catch (err) {
|
|
685
|
+
console.error(formatConfigFileError(err, cliOpts.config));
|
|
686
|
+
process.exit(1);
|
|
687
|
+
}
|
|
627
688
|
// When `taskSource.type` is `repo` and no `repoTasksPath` was set in
|
|
628
689
|
// the config file, fall back to `<callerCwd>/.ailf/tasks/` (the
|
|
629
690
|
// location `ailf init` scaffolds). Silent fallback — composition-root
|
|
@@ -662,7 +723,7 @@ export async function executePipeline(cliOpts) {
|
|
|
662
723
|
});
|
|
663
724
|
process.exit(result.success ? 0 : 1);
|
|
664
725
|
}
|
|
665
|
-
const o = resolveOptions(cliOpts);
|
|
726
|
+
const o = await resolveOptions(cliOpts);
|
|
666
727
|
console.log(` 📂 Output directory: ${o.outputDir}`);
|
|
667
728
|
// Remote mode — submit to AILF API instead of running locally.
|
|
668
729
|
// Use the caller's working directory (not the package root) because
|
|
@@ -724,10 +785,13 @@ function warnIfPlaceholderOwnerTeam() {
|
|
|
724
785
|
`AILF_OWNER_TEAM) to attribute this run.`);
|
|
725
786
|
}
|
|
726
787
|
/**
|
|
727
|
-
* Resolve CLI options into typed ResolvedOptions
|
|
788
|
+
* Resolve CLI options into typed ResolvedOptions, loading the repo config
|
|
789
|
+
* from `<cwd>/.ailf/` first. This is the single async entry point; the pure
|
|
790
|
+
* `computeResolvedOptions` does the option mapping once the config is loaded.
|
|
728
791
|
*/
|
|
729
|
-
function resolveOptions(opts) {
|
|
730
|
-
|
|
792
|
+
export async function resolveOptions(opts) {
|
|
793
|
+
const repoConfig = await loadRepoConfig();
|
|
794
|
+
return computeResolvedOptions(opts, repoConfig);
|
|
731
795
|
}
|
|
732
796
|
function writePipelineResult(result, outputDir) {
|
|
733
797
|
mkdirSync(outputDir, { recursive: true });
|
|
@@ -736,25 +800,69 @@ function writePipelineResult(result, outputDir) {
|
|
|
736
800
|
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
737
801
|
}
|
|
738
802
|
/**
|
|
739
|
-
*
|
|
740
|
-
*
|
|
803
|
+
* Repo-config filenames probed under `<cwd>/.ailf/`, in resolution
|
|
804
|
+
* precedence order (highest first). `ailf init` writes `ailf.config.ts`
|
|
805
|
+
* (default `--format ts`), `config.yaml`, or `config.json` depending on
|
|
806
|
+
* `--format`; all are honored here.
|
|
807
|
+
*
|
|
808
|
+
* TypeScript/JavaScript files load via the same jiti mechanism `.task.ts`
|
|
809
|
+
* files use (`loadTsConfig`) — there is no second TS-loading path. YAML and
|
|
810
|
+
* JSON load via `js-yaml` (which also parses JSON). When more than one file
|
|
811
|
+
* is present the first match wins and the rest are ignored with a warning.
|
|
812
|
+
*/
|
|
813
|
+
const REPO_CONFIG_CANDIDATES = [
|
|
814
|
+
"ailf.config.ts",
|
|
815
|
+
"ailf.config.js",
|
|
816
|
+
"config.ts",
|
|
817
|
+
"config.js",
|
|
818
|
+
"config.yaml",
|
|
819
|
+
"config.yml",
|
|
820
|
+
"config.json",
|
|
821
|
+
];
|
|
822
|
+
function isTsConfigFile(filename) {
|
|
823
|
+
return filename.endsWith(".ts") || filename.endsWith(".js");
|
|
824
|
+
}
|
|
825
|
+
/**
|
|
826
|
+
* Load the repo config from `<cwd>/.ailf/`. Probes TS/JS/YAML/JSON in a
|
|
827
|
+
* fixed precedence order (see `REPO_CONFIG_CANDIDATES`) and returns the
|
|
828
|
+
* first match, validated against `RepoConfigSchema`. Returns null when no
|
|
829
|
+
* config file is present, or when the matched file fails to load/parse (a
|
|
830
|
+
* warning is emitted and the run falls back to defaults + env vars).
|
|
741
831
|
*
|
|
742
|
-
* Auto-loads regardless of `--task-source`: the same
|
|
743
|
-
*
|
|
744
|
-
*
|
|
745
|
-
*
|
|
832
|
+
* Auto-loads regardless of `--task-source`: the same config file is the
|
|
833
|
+
* per-environment configuration home for every run (W0077 Phase 6a).
|
|
834
|
+
* Environment variables still win over file values — that cascade lives in
|
|
835
|
+
* `computeResolvedOptions`, which receives the parsed config from here.
|
|
746
836
|
*/
|
|
747
|
-
function
|
|
748
|
-
const
|
|
749
|
-
|
|
837
|
+
export async function loadRepoConfig(cwd = getCallerCwd()) {
|
|
838
|
+
const ailfDir = resolve(cwd, ".ailf");
|
|
839
|
+
const present = REPO_CONFIG_CANDIDATES.filter((name) => existsSync(resolve(ailfDir, name)));
|
|
840
|
+
if (present.length === 0)
|
|
750
841
|
return null;
|
|
842
|
+
const [chosen, ...shadowed] = present;
|
|
843
|
+
if (shadowed.length > 0) {
|
|
844
|
+
console.warn(` ⚠️ Multiple .ailf config files found; using ${chosen}, ignoring ` +
|
|
845
|
+
`${shadowed.join(", ")}.`);
|
|
846
|
+
}
|
|
847
|
+
const configPath = resolve(ailfDir, chosen);
|
|
848
|
+
const relPath = `.ailf/${chosen}`;
|
|
751
849
|
try {
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
850
|
+
let raw;
|
|
851
|
+
if (isTsConfigFile(chosen)) {
|
|
852
|
+
const result = await loadTsConfig(configPath);
|
|
853
|
+
if (!result.ok) {
|
|
854
|
+
console.warn(` ⚠️ Failed to load ${relPath}: ${result.error}`);
|
|
855
|
+
return null;
|
|
856
|
+
}
|
|
857
|
+
raw = result.value;
|
|
858
|
+
}
|
|
859
|
+
else {
|
|
860
|
+
raw = load(readFileSync(configPath, "utf-8"));
|
|
861
|
+
}
|
|
862
|
+
return parseRepoConfig(raw, relPath);
|
|
755
863
|
}
|
|
756
864
|
catch (err) {
|
|
757
|
-
console.warn(` ⚠️ Failed to parse ${
|
|
865
|
+
console.warn(` ⚠️ Failed to parse ${relPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
758
866
|
return null;
|
|
759
867
|
}
|
|
760
868
|
}
|
|
@@ -137,9 +137,11 @@ function toConfigSlice(opts) {
|
|
|
137
137
|
publishEnabled: opts.publishExplicit ? opts.publishEnabled : undefined,
|
|
138
138
|
publishTag: opts.publishTag,
|
|
139
139
|
concurrency: opts.concurrency,
|
|
140
|
+
baseUrlOverride: opts.baseUrlOverride,
|
|
140
141
|
datasetOverride: opts.datasetOverride,
|
|
141
142
|
projectIdOverride: opts.projectIdOverride,
|
|
142
143
|
perspectiveOverride: opts.perspectiveOverride,
|
|
144
|
+
studioOriginOverride: opts.studioOriginOverride,
|
|
143
145
|
graderContext: opts.graderContext,
|
|
144
146
|
graderReplications: opts.graderReplications,
|
|
145
147
|
borderlineReplications: opts.borderlineReplications,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Derive `VersionedInputs` from a stored report record.
|
|
3
|
+
*
|
|
4
|
+
* Shared by both diagnosis emission paths — the `ailf interpret` command and
|
|
5
|
+
* the post-pipeline summary hook (`runPostPipelineHooks`) — so they build
|
|
6
|
+
* identical version metadata. Co-locating the logic here is the durable fix
|
|
7
|
+
* for W0286: the two paths previously inlined separate copies that drifted,
|
|
8
|
+
* and the hook copy coalesced every axis to the literal `"unknown"`.
|
|
9
|
+
*
|
|
10
|
+
* The path-relevant axes fall back to the canonical engine constants, NEVER
|
|
11
|
+
* `"unknown"`. `diagnosisVersion` is the visible provenance slug in the
|
|
12
|
+
* diagnosis artifact path (`diagnosis-{diagnosisVersion}-{hash}.json`); a
|
|
13
|
+
* `"unknown"` slug erases the signal that path is meant to carry. `cardVersion`
|
|
14
|
+
* feeds the path's content hash and the cache key, so it must also be the real
|
|
15
|
+
* registry version for cache identity to be correct.
|
|
16
|
+
*
|
|
17
|
+
* The two non-path axes (`graderJudgmentsVersion`, `ensembleVersion`) have no
|
|
18
|
+
* canonical version source today and fall back to `"unknown"`; they affect only
|
|
19
|
+
* the cache key, not the artifact path. Wiring them to real sources is out of
|
|
20
|
+
* scope for W0286.
|
|
21
|
+
*/
|
|
22
|
+
import { type VersionedInputs } from "../../_vendor/ailf-core/index.d.ts";
|
|
23
|
+
/**
|
|
24
|
+
* The four-version chain is carried in `report.summary.versions` per the
|
|
25
|
+
* Phase 5 schema. When a field is absent (legacy reports, or any report
|
|
26
|
+
* produced before version metadata was populated), the path-relevant axes
|
|
27
|
+
* resolve to the canonical constants and the rest to `"unknown"`.
|
|
28
|
+
*/
|
|
29
|
+
export declare function defaultVersionsFromReport(report: unknown): VersionedInputs;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Derive `VersionedInputs` from a stored report record.
|
|
3
|
+
*
|
|
4
|
+
* Shared by both diagnosis emission paths — the `ailf interpret` command and
|
|
5
|
+
* the post-pipeline summary hook (`runPostPipelineHooks`) — so they build
|
|
6
|
+
* identical version metadata. Co-locating the logic here is the durable fix
|
|
7
|
+
* for W0286: the two paths previously inlined separate copies that drifted,
|
|
8
|
+
* and the hook copy coalesced every axis to the literal `"unknown"`.
|
|
9
|
+
*
|
|
10
|
+
* The path-relevant axes fall back to the canonical engine constants, NEVER
|
|
11
|
+
* `"unknown"`. `diagnosisVersion` is the visible provenance slug in the
|
|
12
|
+
* diagnosis artifact path (`diagnosis-{diagnosisVersion}-{hash}.json`); a
|
|
13
|
+
* `"unknown"` slug erases the signal that path is meant to carry. `cardVersion`
|
|
14
|
+
* feeds the path's content hash and the cache key, so it must also be the real
|
|
15
|
+
* registry version for cache identity to be correct.
|
|
16
|
+
*
|
|
17
|
+
* The two non-path axes (`graderJudgmentsVersion`, `ensembleVersion`) have no
|
|
18
|
+
* canonical version source today and fall back to `"unknown"`; they affect only
|
|
19
|
+
* the cache key, not the artifact path. Wiring them to real sources is out of
|
|
20
|
+
* scope for W0286.
|
|
21
|
+
*/
|
|
22
|
+
import { CARD_REGISTRY_VERSION, diagnosisVersion, } from "../../_vendor/ailf-core/index.js";
|
|
23
|
+
/**
|
|
24
|
+
* The four-version chain is carried in `report.summary.versions` per the
|
|
25
|
+
* Phase 5 schema. When a field is absent (legacy reports, or any report
|
|
26
|
+
* produced before version metadata was populated), the path-relevant axes
|
|
27
|
+
* resolve to the canonical constants and the rest to `"unknown"`.
|
|
28
|
+
*/
|
|
29
|
+
export function defaultVersionsFromReport(report) {
|
|
30
|
+
const rec = report;
|
|
31
|
+
const summary = rec.summary;
|
|
32
|
+
const versions = summary?.versions;
|
|
33
|
+
return {
|
|
34
|
+
graderJudgmentsVersion: typeof versions?.graderJudgmentsVersion === "string"
|
|
35
|
+
? versions.graderJudgmentsVersion
|
|
36
|
+
: "unknown",
|
|
37
|
+
ensembleVersion: typeof versions?.ensembleVersion === "string"
|
|
38
|
+
? versions.ensembleVersion
|
|
39
|
+
: "unknown",
|
|
40
|
+
diagnosisVersion: typeof versions?.diagnosisVersion === "string"
|
|
41
|
+
? versions.diagnosisVersion
|
|
42
|
+
: diagnosisVersion,
|
|
43
|
+
cardVersion: typeof versions?.cardVersion === "string"
|
|
44
|
+
? versions.cardVersion
|
|
45
|
+
: CARD_REGISTRY_VERSION,
|
|
46
|
+
};
|
|
47
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
* })
|
|
38
38
|
* ```
|
|
39
39
|
*/
|
|
40
|
-
export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
|
|
40
|
+
export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.d.ts";
|
|
41
41
|
export type { PackageSurfaceConfig, PackageSurfaceEntry, PreflightScoringConfig, PricingEntry, PromptEntry, SourceEntry, } from "./_vendor/ailf-core/index.d.ts";
|
|
42
42
|
export { env } from "./_vendor/ailf-core/index.d.ts";
|
|
43
43
|
export type { AgentHarnessTaskDefinition, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./_vendor/ailf-core/index.d.ts";
|
package/dist/index.js
CHANGED
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
// ---------------------------------------------------------------------------
|
|
41
41
|
// Configuration helpers (define* identity functions for typed authoring)
|
|
42
42
|
// ---------------------------------------------------------------------------
|
|
43
|
-
export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
|
|
43
|
+
export { defineConfig, defineFeatures, defineModels, definePackageSurface, definePreflightScoring, definePricingTable, definePreset, definePrompts, defineRepoConfig, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./_vendor/ailf-core/index.js";
|
|
44
44
|
// ---------------------------------------------------------------------------
|
|
45
45
|
// Environment helper
|
|
46
46
|
// ---------------------------------------------------------------------------
|
|
@@ -71,6 +71,7 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
71
71
|
searchMode: opts.searchMode ?? "open",
|
|
72
72
|
concurrency: opts.concurrency,
|
|
73
73
|
promptfooUrl: opts.promptfooUrl,
|
|
74
|
+
baseUrlOverride: opts.baseUrlOverride,
|
|
74
75
|
datasetOverride: opts.datasetOverride,
|
|
75
76
|
projectIdOverride: opts.projectIdOverride,
|
|
76
77
|
perspectiveOverride: opts.perspectiveOverride,
|
|
@@ -5,11 +5,15 @@
|
|
|
5
5
|
* with typed overrides instead of relying on process.env.
|
|
6
6
|
*/
|
|
7
7
|
export function configToSourceOverrides(config) {
|
|
8
|
+
// The explicit `--url` flag (captured in `urls`) wins over a repo-config
|
|
9
|
+
// `source.baseUrl` (captured in `baseUrlOverride`); both feed the same
|
|
10
|
+
// `SourceOverrides.baseUrl` the doc fetcher reads.
|
|
11
|
+
const baseUrl = config.urls?.[0] ?? config.baseUrlOverride;
|
|
8
12
|
return {
|
|
9
13
|
...(config.allowedOrigins?.length
|
|
10
14
|
? { allowedOrigins: config.allowedOrigins }
|
|
11
15
|
: {}),
|
|
12
|
-
...(
|
|
16
|
+
...(baseUrl ? { baseUrl } : {}),
|
|
13
17
|
...(config.datasetOverride ? { dataset: config.datasetOverride } : {}),
|
|
14
18
|
...(config.sanityDocumentArgs?.length
|
|
15
19
|
? { documentIds: config.sanityDocumentArgs }
|
|
@@ -1479,9 +1479,15 @@ export async function calculateAndWriteScores(options) {
|
|
|
1479
1479
|
logger: log,
|
|
1480
1480
|
});
|
|
1481
1481
|
// Mutate-in-place so subsequent steps (validateGraderJudgmentsCalibration,
|
|
1482
|
-
// persist) see the consensus-merged scores.
|
|
1482
|
+
// persist) see the consensus-merged scores. Snapshot first: the runner's
|
|
1483
|
+
// no-borderline fast path returns the SAME array reference it received,
|
|
1484
|
+
// so `regraded` may alias `judgments`. Truncating `judgments` would then
|
|
1485
|
+
// empty `regraded` before the spread reads it, silently wiping every
|
|
1486
|
+
// judgment (extract N, persist 0) — the divergence the post-persist guard
|
|
1487
|
+
// aborts on. Copying breaks the alias regardless of what the runner returns.
|
|
1488
|
+
const merged = [...regraded];
|
|
1483
1489
|
judgments.length = 0;
|
|
1484
|
-
judgments.push(...
|
|
1490
|
+
judgments.push(...merged);
|
|
1485
1491
|
if (consistencyByJudgment.size > 0) {
|
|
1486
1492
|
log.info(`Borderline consensus merged ${consistencyByJudgment.size} judgment(s)`);
|
|
1487
1493
|
}
|
|
@@ -68,7 +68,7 @@ export function mapRequestToConfig(request, rootDir) {
|
|
|
68
68
|
taskSourceType: mapTaskSourceType(request.taskSource?.type, request.taskMode),
|
|
69
69
|
outputPath: undefined,
|
|
70
70
|
promptfooUrl: undefined,
|
|
71
|
-
studioOriginOverride:
|
|
71
|
+
studioOriginOverride: request.studioOrigin,
|
|
72
72
|
sanityDocumentArgs: undefined,
|
|
73
73
|
sourceReportId: request.sourceReportId,
|
|
74
74
|
beforeOption: undefined,
|