@sanity/ailf 3.7.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
- package/config/thresholds.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
- package/dist/_vendor/ailf-core/examples/index.js +2 -2
- package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
- package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
- package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
- package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
- package/dist/_vendor/ailf-shared/run-classification.js +1 -1
- package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
- package/dist/adapters/api-client/build-request.d.ts +0 -2
- package/dist/adapters/api-client/build-request.js +2 -6
- package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
- package/dist/adapters/config-sources/file-config-adapter.js +42 -17
- package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
- package/dist/adapters/task-sources/repo-schemas.js +127 -0
- package/dist/cli-program.d.ts +39 -0
- package/dist/cli-program.js +137 -0
- package/dist/cli.d.ts +8 -2
- package/dist/cli.js +128 -142
- package/dist/commands/agent-report.js +1 -1
- package/dist/commands/calculate-scores.js +0 -2
- package/dist/commands/check-staleness.js +1 -1
- package/dist/commands/chronic-failures.js +4 -4
- package/dist/commands/coverage-audit.js +6 -7
- package/dist/commands/discovery-report.js +16 -4
- package/dist/commands/eval.d.ts +1 -1
- package/dist/commands/eval.js +1 -1
- package/dist/commands/explain-handler.d.ts +1 -1
- package/dist/commands/explain-handler.js +13 -44
- package/dist/commands/fetch-docs.js +0 -2
- package/dist/commands/generate-configs.js +0 -2
- package/dist/commands/grader/index.js +3 -3
- package/dist/commands/init.d.ts +2 -2
- package/dist/commands/init.js +10 -9
- package/dist/commands/interactive.d.ts +1 -1
- package/dist/commands/interactive.js +8 -8
- package/dist/commands/pipeline-action.d.ts +1 -3
- package/dist/commands/pipeline-action.js +174 -140
- package/dist/commands/pr-comment.js +1 -3
- package/dist/commands/publish.d.ts +1 -1
- package/dist/commands/publish.js +2 -4
- package/dist/commands/readiness-report.js +17 -8
- package/dist/commands/remote-pipeline.d.ts +1 -1
- package/dist/commands/remote-pipeline.js +1 -3
- package/dist/commands/run.d.ts +64 -0
- package/dist/commands/{pipeline.js → run.js} +19 -30
- package/dist/commands/shared/help.js +4 -4
- package/dist/commands/shared/options.d.ts +29 -3
- package/dist/commands/shared/options.js +37 -13
- package/dist/commands/validate-tasks.js +1 -1
- package/dist/commands/validate.d.ts +1 -1
- package/dist/commands/validate.js +2 -2
- package/dist/commands/weekly-digest.js +3 -3
- package/dist/config/thresholds.ts +3 -3
- package/dist/orchestration/build-app-context.js +0 -2
- package/dist/orchestration/build-step-sequence.js +1 -11
- package/dist/orchestration/steps/fetch-docs-step.js +1 -1
- package/dist/orchestration/steps/index.d.ts +0 -2
- package/dist/orchestration/steps/index.js +0 -2
- package/dist/orchestration/steps/run-eval-step.js +1 -1
- package/dist/pipeline/cache.d.ts +1 -1
- package/dist/pipeline/map-request-to-config.js +0 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
- package/dist/pipeline/plan.d.ts +2 -4
- package/dist/pipeline/plan.js +4 -32
- package/dist/pipeline/run-context.d.ts +1 -1
- package/dist/pipeline/run-context.js +4 -4
- package/dist/pipeline/validate.d.ts +1 -1
- package/dist/pipeline/validate.js +1 -1
- package/package.json +11 -9
- package/dist/commands/pipeline.d.ts +0 -77
- package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
- package/dist/orchestration/steps/discovery-report-step.js +0 -62
- package/dist/orchestration/steps/readiness-step.d.ts +0 -13
- package/dist/orchestration/steps/readiness-step.js +0 -98
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
- package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
- package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
- package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
- package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
- package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
- package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
|
@@ -489,7 +489,7 @@ schemas:
|
|
|
489
489
|
- string
|
|
490
490
|
- "null"
|
|
491
491
|
description:
|
|
492
|
-
"Run classification (D0037): official |
|
|
492
|
+
"Run classification (D0037): official | adhoc | experimental | test |
|
|
493
493
|
external. Orthogonal to trigger_type."
|
|
494
494
|
owner_team:
|
|
495
495
|
type:
|
package/config/thresholds.ts
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* thresholds.ts — Quality thresholds for readiness gates and regression alerts.
|
|
3
3
|
*
|
|
4
4
|
* Used by:
|
|
5
|
-
* - `npx @sanity/ailf
|
|
6
|
-
* - `npx @sanity/ailf
|
|
7
|
-
* - `npx @sanity/ailf
|
|
5
|
+
* - `npx @sanity/ailf report readiness` (launch readiness checklist)
|
|
6
|
+
* - `npx @sanity/ailf run --publish` (severity-aware sink routing)
|
|
7
|
+
* - `npx @sanity/ailf run --compare` (regression alerting)
|
|
8
8
|
*
|
|
9
9
|
* @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
|
|
10
10
|
*/
|
|
@@ -87,7 +87,7 @@ export declare const thresholdData: {
|
|
|
87
87
|
};
|
|
88
88
|
};
|
|
89
89
|
/** Raw YAML string for threshold example (preserves comments) */
|
|
90
|
-
export declare const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The
|
|
90
|
+
export declare const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The `ailf report readiness` command evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
|
|
91
91
|
/** Parsed ailf-config example data (JSON-safe) */
|
|
92
92
|
export declare const ailfConfigData: {
|
|
93
93
|
readonly source: {
|
|
@@ -433,6 +433,6 @@ export interface ExampleRecord {
|
|
|
433
433
|
}
|
|
434
434
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
435
435
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
436
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
436
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | adhoc | experimental |\n # test | external. External teams should use `adhoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: adhoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest run --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
437
437
|
/** TypeScript project configuration template (ailf.config.ts) */
|
|
438
438
|
export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
|
|
@@ -113,7 +113,7 @@ export const thresholdData = {
|
|
|
113
113
|
}
|
|
114
114
|
};
|
|
115
115
|
/** Raw YAML string for threshold example (preserves comments) */
|
|
116
|
-
export const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The
|
|
116
|
+
export const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The `ailf report readiness` command evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
|
|
117
117
|
// ---------------------------------------------------------------------------
|
|
118
118
|
// Project configuration for .ailf/config.yaml
|
|
119
119
|
// ---------------------------------------------------------------------------
|
|
@@ -630,7 +630,7 @@ export const EXAMPLES = {
|
|
|
630
630
|
// Raw file exports (non-data files, exported as raw strings)
|
|
631
631
|
// ---------------------------------------------------------------------------
|
|
632
632
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
633
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # ────────────────────────────────────────────────────────────\n # D0037 run provenance envelope — REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official |
|
|
633
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # ────────────────────────────────────────────────────────────\n # D0037 run provenance envelope — REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | adhoc | experimental |\n # test | external. External teams should use `adhoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # ────────────────────────────────────────────────────────────\n AILF_CLASSIFICATION: adhoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest run --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
634
634
|
// ---------------------------------------------------------------------------
|
|
635
635
|
// TypeScript template exports (for ailf init --output-format ts)
|
|
636
636
|
// ---------------------------------------------------------------------------
|
|
@@ -68,10 +68,6 @@ export interface ResolvedConfig {
|
|
|
68
68
|
compareBaseline?: string;
|
|
69
69
|
/** Whether gap analysis is enabled */
|
|
70
70
|
gapAnalysisEnabled: boolean;
|
|
71
|
-
/** Whether readiness report is enabled */
|
|
72
|
-
readinessEnabled: boolean;
|
|
73
|
-
/** Whether discovery report is enabled */
|
|
74
|
-
discoveryReportEnabled: boolean;
|
|
75
71
|
/** Whether publishing is enabled */
|
|
76
72
|
publishEnabled: boolean;
|
|
77
73
|
/** Publish tag */
|
|
@@ -11,42 +11,68 @@
|
|
|
11
11
|
*/
|
|
12
12
|
import { z } from "zod";
|
|
13
13
|
export declare const EvalConfigSchema: z.ZodObject<{
|
|
14
|
-
|
|
14
|
+
agentic: z.ZodOptional<z.ZodObject<{
|
|
15
|
+
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
16
|
+
allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
17
|
+
}, z.core.$strip>>;
|
|
18
|
+
artifacts: z.ZodOptional<z.ZodObject<{
|
|
19
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
20
|
+
dir: z.ZodOptional<z.ZodString>;
|
|
21
|
+
exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
22
|
+
}, z.core.$strip>>;
|
|
23
|
+
taskSource: z.ZodOptional<z.ZodObject<{
|
|
24
|
+
type: z.ZodOptional<z.ZodEnum<{
|
|
25
|
+
"content-lake": "content-lake";
|
|
26
|
+
repo: "repo";
|
|
27
|
+
}>>;
|
|
28
|
+
repoTasksPath: z.ZodOptional<z.ZodString>;
|
|
29
|
+
}, z.core.$strip>>;
|
|
15
30
|
areas: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
16
31
|
changedDocs: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
17
32
|
compare: z.ZodOptional<z.ZodBoolean>;
|
|
18
33
|
compareBaseline: z.ZodOptional<z.ZodString>;
|
|
19
34
|
compareThreshold: z.ZodOptional<z.ZodNumber>;
|
|
20
|
-
concurrency: z.ZodOptional<z.ZodNumber>;
|
|
21
35
|
debug: z.ZodOptional<z.ZodUnion<readonly [z.ZodBoolean, z.ZodObject<{
|
|
22
36
|
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
23
37
|
firstN: z.ZodOptional<z.ZodNumber>;
|
|
24
38
|
pattern: z.ZodOptional<z.ZodString>;
|
|
25
39
|
sample: z.ZodOptional<z.ZodNumber>;
|
|
26
40
|
}, z.core.$strip>]>>;
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
41
|
+
execution: z.ZodOptional<z.ZodObject<{
|
|
42
|
+
concurrency: z.ZodOptional<z.ZodNumber>;
|
|
43
|
+
graderReplications: z.ZodOptional<z.ZodNumber>;
|
|
44
|
+
gapAnalysis: z.ZodOptional<z.ZodBoolean>;
|
|
45
|
+
apiUrl: z.ZodOptional<z.ZodString>;
|
|
46
|
+
}, z.core.$strip>>;
|
|
47
|
+
output: z.ZodOptional<z.ZodObject<{
|
|
48
|
+
dir: z.ZodOptional<z.ZodString>;
|
|
49
|
+
}, z.core.$strip>>;
|
|
31
50
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
32
51
|
custom: "custom";
|
|
52
|
+
agentic: "agentic";
|
|
33
53
|
literacy: "literacy";
|
|
34
54
|
"mcp-server": "mcp-server";
|
|
35
55
|
"agent-harness": "agent-harness";
|
|
36
56
|
"knowledge-probe": "knowledge-probe";
|
|
37
57
|
baseline: "baseline";
|
|
38
|
-
agentic: "agentic";
|
|
39
58
|
observed: "observed";
|
|
40
59
|
full: "full";
|
|
41
60
|
}>>;
|
|
42
61
|
noAutoScope: z.ZodOptional<z.ZodBoolean>;
|
|
43
62
|
noCache: z.ZodOptional<z.ZodBoolean>;
|
|
44
63
|
noRemoteCache: z.ZodOptional<z.ZodBoolean>;
|
|
45
|
-
publish: z.ZodOptional<z.
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
64
|
+
publish: z.ZodOptional<z.ZodObject<{
|
|
65
|
+
auto: z.ZodOptional<z.ZodEnum<{
|
|
66
|
+
never: "never";
|
|
67
|
+
always: "always";
|
|
68
|
+
"full-runs": "full-runs";
|
|
69
|
+
}>>;
|
|
70
|
+
tag: z.ZodOptional<z.ZodString>;
|
|
71
|
+
}, z.core.$strip>>;
|
|
72
|
+
reportStore: z.ZodOptional<z.ZodObject<{
|
|
73
|
+
projectId: z.ZodOptional<z.ZodString>;
|
|
74
|
+
dataset: z.ZodOptional<z.ZodString>;
|
|
75
|
+
}, z.core.$strip>>;
|
|
50
76
|
searchMode: z.ZodOptional<z.ZodEnum<{
|
|
51
77
|
off: "off";
|
|
52
78
|
open: "open";
|
|
@@ -13,8 +13,51 @@ import { z } from "zod";
|
|
|
13
13
|
import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
|
|
14
14
|
export const EvalConfigSchema = z
|
|
15
15
|
.object({
|
|
16
|
-
/**
|
|
17
|
-
|
|
16
|
+
/**
|
|
17
|
+
* Agentic-mode configuration (W0077 Phase 6f). Replaces the top-level
|
|
18
|
+
* `headers` and `allowedOrigins` fields. Mirrors `RepoConfigSchema`'s
|
|
19
|
+
* `agentic` block for the auto-loaded `.ailf/config.yaml` path.
|
|
20
|
+
*
|
|
21
|
+
* - `headers` — custom HTTP headers for doc fetching (key/value object)
|
|
22
|
+
* - `allowedOrigins` — origin globs for agentic-mode sandbox
|
|
23
|
+
*/
|
|
24
|
+
agentic: z
|
|
25
|
+
.object({
|
|
26
|
+
headers: z.record(z.string(), z.string()).optional(),
|
|
27
|
+
allowedOrigins: z.array(z.string()).optional(),
|
|
28
|
+
})
|
|
29
|
+
.optional(),
|
|
30
|
+
/**
|
|
31
|
+
* Artifact-writer configuration (W0077 Phase 6g). Replaces the retired
|
|
32
|
+
* `--no-artifacts`, `--artifacts-dir`, and `--artifacts-exclude` CLI
|
|
33
|
+
* flags. Mirrors `RepoConfigSchema.artifacts`.
|
|
34
|
+
*
|
|
35
|
+
* - `enabled` — false to disable all writers (legacy --no-artifacts)
|
|
36
|
+
* - `dir` — root for local writer (default .ailf/results/captures/)
|
|
37
|
+
* - `exclude` — list of artifact-type names to skip
|
|
38
|
+
*/
|
|
39
|
+
artifacts: z
|
|
40
|
+
.object({
|
|
41
|
+
enabled: z.boolean().optional(),
|
|
42
|
+
dir: z.string().min(1).optional(),
|
|
43
|
+
exclude: z.array(z.string().min(1)).optional(),
|
|
44
|
+
})
|
|
45
|
+
.optional(),
|
|
46
|
+
/**
|
|
47
|
+
* Task-source configuration (W0077 Phase 6h). Replaces the retired
|
|
48
|
+
* `--task-source` and `--repo-tasks-path` CLI flags. Mirrors
|
|
49
|
+
* `RepoConfigSchema.taskSource`.
|
|
50
|
+
*
|
|
51
|
+
* - `type` — `content-lake` (default) or `repo`
|
|
52
|
+
* - `repoTasksPath` — optional path; default `<cwd>/.ailf/tasks/` when
|
|
53
|
+
* type is `repo`
|
|
54
|
+
*/
|
|
55
|
+
taskSource: z
|
|
56
|
+
.object({
|
|
57
|
+
type: z.enum(["content-lake", "repo"]).optional(),
|
|
58
|
+
repoTasksPath: z.string().min(1).optional(),
|
|
59
|
+
})
|
|
60
|
+
.optional(),
|
|
18
61
|
/** Feature area filter (comma-separated or array) */
|
|
19
62
|
areas: z.array(z.string()).optional(),
|
|
20
63
|
/** Changed doc slugs for impact scoping */
|
|
@@ -25,8 +68,6 @@ export const EvalConfigSchema = z
|
|
|
25
68
|
compareBaseline: z.string().optional(),
|
|
26
69
|
/** Comparison noise threshold */
|
|
27
70
|
compareThreshold: z.number().min(0).optional(),
|
|
28
|
-
/** Max parallel API calls */
|
|
29
|
-
concurrency: z.number().int().positive().optional(),
|
|
30
71
|
/** Debug mode */
|
|
31
72
|
debug: z
|
|
32
73
|
.union([
|
|
@@ -39,14 +80,34 @@ export const EvalConfigSchema = z
|
|
|
39
80
|
}),
|
|
40
81
|
])
|
|
41
82
|
.optional(),
|
|
42
|
-
/**
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
83
|
+
/**
|
|
84
|
+
* Execution-tier configuration (W0077 Phase 6b). Replaces the top-level
|
|
85
|
+
* `concurrency`, `gapAnalysis`, and `graderReplications` fields and adds
|
|
86
|
+
* `apiUrl` to the same group.
|
|
87
|
+
*
|
|
88
|
+
* - `concurrency` — max parallel API calls
|
|
89
|
+
* - `graderReplications` — grader consistency replications
|
|
90
|
+
* - `gapAnalysis` — enable failure-mode + impact analysis (default true)
|
|
91
|
+
* - `apiUrl` — AILF API base URL (default https://ailf-api.sanity.build)
|
|
92
|
+
*/
|
|
93
|
+
execution: z
|
|
94
|
+
.object({
|
|
95
|
+
concurrency: z.number().int().positive().optional(),
|
|
96
|
+
graderReplications: z.number().int().positive().optional(),
|
|
97
|
+
gapAnalysis: z.boolean().optional(),
|
|
98
|
+
apiUrl: z.string().url().optional(),
|
|
99
|
+
})
|
|
100
|
+
.optional(),
|
|
101
|
+
/**
|
|
102
|
+
* Output configuration (W0077 Phase 6c). Replaces the retired
|
|
103
|
+
* `--output-dir` CLI flag. Path is resolved relative to the caller's
|
|
104
|
+
* cwd. When unset, defaults to `<cwd>/.ailf/results/latest/`.
|
|
105
|
+
*/
|
|
106
|
+
output: z
|
|
107
|
+
.object({
|
|
108
|
+
dir: z.string().min(1).optional(),
|
|
109
|
+
})
|
|
110
|
+
.optional(),
|
|
50
111
|
/**
|
|
51
112
|
* Evaluation mode — accepts both canonical and legacy names.
|
|
52
113
|
* Legacy names ("baseline", "agentic", "observed", "full") must pass
|
|
@@ -59,16 +120,35 @@ export const EvalConfigSchema = z
|
|
|
59
120
|
noCache: z.boolean().optional(),
|
|
60
121
|
/** Disable remote cache */
|
|
61
122
|
noRemoteCache: z.boolean().optional(),
|
|
62
|
-
/**
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
123
|
+
/**
|
|
124
|
+
* Publish policy. The CLI `--publish` and `--no-publish` flags override
|
|
125
|
+
* the policy at runtime; this field controls behavior when no explicit
|
|
126
|
+
* flag is passed.
|
|
127
|
+
*
|
|
128
|
+
* - `auto: "always"` — publish whenever a report store is configured
|
|
129
|
+
* - `auto: "full-runs"` — publish non-debug runs (default; preserves
|
|
130
|
+
* the historical smart default)
|
|
131
|
+
* - `auto: "never"` — never auto-publish (must opt in via --publish)
|
|
132
|
+
*
|
|
133
|
+
* `tag` is a default value for `--publish-tag` when not passed at the CLI.
|
|
134
|
+
*/
|
|
135
|
+
publish: z
|
|
136
|
+
.object({
|
|
137
|
+
auto: z.enum(["always", "full-runs", "never"]).optional(),
|
|
138
|
+
tag: z.string().optional(),
|
|
139
|
+
})
|
|
140
|
+
.optional(),
|
|
141
|
+
/**
|
|
142
|
+
* Report store configuration (W0077 Phase 6e). Replaces the top-level
|
|
143
|
+
* `reportDataset` and `reportProjectId` fields. Mirrors `RepoConfigSchema`'s
|
|
144
|
+
* `reportStore` block for the auto-loaded `.ailf/config.yaml` path.
|
|
145
|
+
*/
|
|
146
|
+
reportStore: z
|
|
147
|
+
.object({
|
|
148
|
+
projectId: z.string().optional(),
|
|
149
|
+
dataset: z.string().optional(),
|
|
150
|
+
})
|
|
151
|
+
.optional(),
|
|
72
152
|
/** Search mode for agentic mode */
|
|
73
153
|
searchMode: z.enum(["off", "open", "origin-only"]).optional(),
|
|
74
154
|
/** Skip eval step */
|
|
@@ -42,7 +42,6 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
42
42
|
pattern: z.ZodOptional<z.ZodString>;
|
|
43
43
|
sample: z.ZodOptional<z.ZodNumber>;
|
|
44
44
|
}, z.core.$strip>]>>;
|
|
45
|
-
discoveryReport: z.ZodOptional<z.ZodBoolean>;
|
|
46
45
|
gapAnalysis: z.ZodOptional<z.ZodBoolean>;
|
|
47
46
|
graderReplications: z.ZodOptional<z.ZodNumber>;
|
|
48
47
|
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
@@ -50,12 +49,12 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
50
49
|
jobId: z.ZodOptional<z.ZodString>;
|
|
51
50
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
52
51
|
custom: "custom";
|
|
52
|
+
agentic: "agentic";
|
|
53
53
|
literacy: "literacy";
|
|
54
54
|
"mcp-server": "mcp-server";
|
|
55
55
|
"agent-harness": "agent-harness";
|
|
56
56
|
"knowledge-probe": "knowledge-probe";
|
|
57
57
|
baseline: "baseline";
|
|
58
|
-
agentic: "agentic";
|
|
59
58
|
observed: "observed";
|
|
60
59
|
full: "full";
|
|
61
60
|
}>>;
|
|
@@ -66,7 +65,6 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
66
65
|
projectId: z.ZodOptional<z.ZodString>;
|
|
67
66
|
publish: z.ZodOptional<z.ZodBoolean>;
|
|
68
67
|
publishTag: z.ZodOptional<z.ZodString>;
|
|
69
|
-
readiness: z.ZodOptional<z.ZodBoolean>;
|
|
70
68
|
searchMode: z.ZodOptional<z.ZodEnum<{
|
|
71
69
|
off: "off";
|
|
72
70
|
open: "open";
|
|
@@ -75,14 +73,14 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
75
73
|
source: z.ZodOptional<z.ZodString>;
|
|
76
74
|
sourceReportId: z.ZodOptional<z.ZodString>;
|
|
77
75
|
taskMode: z.ZodOptional<z.ZodEnum<{
|
|
78
|
-
inline: "inline";
|
|
79
76
|
"content-lake": "content-lake";
|
|
77
|
+
inline: "inline";
|
|
80
78
|
}>>;
|
|
81
79
|
tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
82
80
|
urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
83
81
|
variant: z.ZodOptional<z.ZodEnum<{
|
|
84
|
-
baseline: "baseline";
|
|
85
82
|
agentic: "agentic";
|
|
83
|
+
baseline: "baseline";
|
|
86
84
|
observed: "observed";
|
|
87
85
|
full: "full";
|
|
88
86
|
}>>;
|
|
@@ -90,7 +88,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
90
88
|
classification: z.ZodOptional<z.ZodEnum<{
|
|
91
89
|
external: "external";
|
|
92
90
|
official: "official";
|
|
93
|
-
|
|
91
|
+
adhoc: "adhoc";
|
|
94
92
|
experimental: "experimental";
|
|
95
93
|
test: "test";
|
|
96
94
|
}>>;
|
|
@@ -105,7 +105,6 @@ export const PipelineRequestSchema = z.object({
|
|
|
105
105
|
concurrency: z.number().int().positive().optional(),
|
|
106
106
|
dataset: z.string().optional(),
|
|
107
107
|
debug: z.union([z.boolean(), DebugOptionsSchema]).optional(),
|
|
108
|
-
discoveryReport: z.boolean().optional(),
|
|
109
108
|
gapAnalysis: z.boolean().optional(),
|
|
110
109
|
graderReplications: z.number().int().positive().optional(),
|
|
111
110
|
headers: z.record(z.string(), z.string()).optional(),
|
|
@@ -123,7 +122,6 @@ export const PipelineRequestSchema = z.object({
|
|
|
123
122
|
projectId: z.string().optional(),
|
|
124
123
|
publish: z.boolean().optional(),
|
|
125
124
|
publishTag: z.string().optional(),
|
|
126
|
-
readiness: z.boolean().optional(),
|
|
127
125
|
searchMode: z.enum(["off", "open", "origin-only"]).optional(),
|
|
128
126
|
source: z.string().optional(),
|
|
129
127
|
sourceReportId: z.string().optional(),
|
|
@@ -150,7 +148,7 @@ export const PipelineRequestSchema = z.object({
|
|
|
150
148
|
/**
|
|
151
149
|
* How this run should be treated for reporting and trend tracking.
|
|
152
150
|
* Orthogonal to `trigger.type` (captured server-side). When omitted,
|
|
153
|
-
* the server defaults to `"
|
|
151
|
+
* the server defaults to `"adhoc"`.
|
|
154
152
|
*/
|
|
155
153
|
classification: z.enum(RUN_CLASSIFICATIONS).optional(),
|
|
156
154
|
/** Team and (optionally) individual this run is attributable to. */
|
|
@@ -19,12 +19,12 @@ export declare const ScheduleEntrySchema: z.ZodObject<{
|
|
|
19
19
|
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
20
20
|
mode: z.ZodDefault<z.ZodEnum<{
|
|
21
21
|
custom: "custom";
|
|
22
|
+
agentic: "agentic";
|
|
22
23
|
literacy: "literacy";
|
|
23
24
|
"mcp-server": "mcp-server";
|
|
24
25
|
"agent-harness": "agent-harness";
|
|
25
26
|
"knowledge-probe": "knowledge-probe";
|
|
26
27
|
baseline: "baseline";
|
|
27
|
-
agentic: "agentic";
|
|
28
28
|
observed: "observed";
|
|
29
29
|
full: "full";
|
|
30
30
|
}>>;
|
|
@@ -59,12 +59,12 @@ export declare const SchedulesFileSchema: z.ZodObject<{
|
|
|
59
59
|
enabled: z.ZodDefault<z.ZodBoolean>;
|
|
60
60
|
mode: z.ZodDefault<z.ZodEnum<{
|
|
61
61
|
custom: "custom";
|
|
62
|
+
agentic: "agentic";
|
|
62
63
|
literacy: "literacy";
|
|
63
64
|
"mcp-server": "mcp-server";
|
|
64
65
|
"agent-harness": "agent-harness";
|
|
65
66
|
"knowledge-probe": "knowledge-probe";
|
|
66
67
|
baseline: "baseline";
|
|
67
|
-
agentic: "agentic";
|
|
68
68
|
observed: "observed";
|
|
69
69
|
full: "full";
|
|
70
70
|
}>>;
|
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
/**
|
|
14
14
|
* How a run should be treated for reporting and trend tracking.
|
|
15
15
|
*
|
|
16
|
-
* Orthogonal to `RunTrigger` (mechanism). Defaults to `"
|
|
16
|
+
* Orthogonal to `RunTrigger` (mechanism). Defaults to `"adhoc"` when
|
|
17
17
|
* unannotated so pre-taxonomy runs never leak into the canonical series.
|
|
18
18
|
*/
|
|
19
|
-
export type RunClassification = "official" | "
|
|
19
|
+
export type RunClassification = "official" | "adhoc" | "experimental" | "test" | "external";
|
|
20
20
|
export declare const RUN_CLASSIFICATIONS: readonly RunClassification[];
|
|
21
21
|
export declare function isRunClassification(value: unknown): value is RunClassification;
|
|
22
22
|
/**
|
|
@@ -22,7 +22,7 @@ export interface RunContext {
|
|
|
22
22
|
areas: string[];
|
|
23
23
|
/**
|
|
24
24
|
* How this run should be treated for reporting and trend tracking.
|
|
25
|
-
* Orthogonal to `trigger` (mechanism). Defaults to `"
|
|
25
|
+
* Orthogonal to `trigger` (mechanism). Defaults to `"adhoc"` when
|
|
26
26
|
* unannotated — only the scheduled workflow mints `"official"`.
|
|
27
27
|
*
|
|
28
28
|
* @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
|
|
@@ -59,8 +59,6 @@ export interface RemoteConfigSlice {
|
|
|
59
59
|
perspectiveOverride?: string;
|
|
60
60
|
graderReplications?: number;
|
|
61
61
|
gapAnalysisEnabled?: boolean;
|
|
62
|
-
readinessEnabled?: boolean;
|
|
63
|
-
discoveryReportEnabled?: boolean;
|
|
64
62
|
noRemoteCache?: boolean;
|
|
65
63
|
/**
|
|
66
64
|
* D0037 / W0069 — CLI-flag overrides for the caller envelope. These
|
|
@@ -129,14 +129,10 @@ export async function buildRemoteRequest(options) {
|
|
|
129
129
|
}
|
|
130
130
|
if (config.gapAnalysisEnabled)
|
|
131
131
|
raw.gapAnalysis = true;
|
|
132
|
-
if (config.readinessEnabled)
|
|
133
|
-
raw.readiness = true;
|
|
134
|
-
if (config.discoveryReportEnabled)
|
|
135
|
-
raw.discoveryReport = true;
|
|
136
132
|
if (config.noRemoteCache)
|
|
137
133
|
raw.noRemoteCache = true;
|
|
138
134
|
// Caller git metadata — auto-detect from CI environment variables.
|
|
139
|
-
// When running via `ailf
|
|
135
|
+
// When running via `ailf run --remote` in a GitHub Actions workflow,
|
|
140
136
|
// the GITHUB_* env vars identify the *calling* repo (not the AILF core
|
|
141
137
|
// repo). This ensures report provenance attributes to the right repo.
|
|
142
138
|
const callerGit = detectCallerGit();
|
|
@@ -391,7 +387,7 @@ export function buildCallerEnvelope(config) {
|
|
|
391
387
|
/**
|
|
392
388
|
* Auto-detect caller git metadata from GitHub Actions environment variables.
|
|
393
389
|
*
|
|
394
|
-
* When the CLI runs in a calling repo's CI (via `npx @sanity/ailf
|
|
390
|
+
* When the CLI runs in a calling repo's CI (via `npx @sanity/ailf run
|
|
395
391
|
* --remote`), the GITHUB_* env vars reflect that repo — not the AILF core
|
|
396
392
|
* repo. We capture them here so the API can carry them through to report
|
|
397
393
|
* provenance.
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* @see packages/eval/src/commands/pipeline-action.ts — underlying implementation
|
|
8
8
|
*/
|
|
9
9
|
import type { ConfigSource, ResolvedConfig } from "../../_vendor/ailf-core/index.d.ts";
|
|
10
|
-
import type { PipelineCliOptions } from "../../commands/
|
|
10
|
+
import type { PipelineCliOptions } from "../../commands/run.js";
|
|
11
11
|
export declare class CliConfigAdapter implements ConfigSource {
|
|
12
12
|
private readonly cliOpts;
|
|
13
13
|
private readonly rootDir;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* FileConfigAdapter — resolves pipeline config from a local config file.
|
|
3
3
|
*
|
|
4
|
-
* Enables `ailf
|
|
4
|
+
* Enables `ailf run --config <path>` to load all pipeline options
|
|
5
5
|
* from a file instead of CLI flags. Supports multiple formats in
|
|
6
6
|
* priority order:
|
|
7
7
|
*
|