@sanity/ailf 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
- package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
- package/dist/_vendor/ailf-tasks/cli.js +61 -0
- package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
- package/dist/_vendor/ailf-tasks/index.js +16 -0
- package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
- package/dist/_vendor/ailf-tasks/parser.js +73 -0
- package/dist/_vendor/ailf-tasks/schemas.d.ts +186 -0
- package/dist/_vendor/ailf-tasks/schemas.js +176 -0
- package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
- package/dist/_vendor/ailf-tasks/validation.js +162 -0
- package/dist/adapters/api-client/api-client.d.ts +75 -0
- package/dist/adapters/api-client/api-client.js +201 -0
- package/dist/adapters/api-client/build-request.d.ts +75 -0
- package/dist/adapters/api-client/build-request.js +176 -0
- package/dist/adapters/api-client/errors.d.ts +43 -0
- package/dist/adapters/api-client/errors.js +68 -0
- package/dist/adapters/api-client/format-error.d.ts +22 -0
- package/dist/adapters/api-client/format-error.js +48 -0
- package/dist/adapters/api-client/index.d.ts +13 -0
- package/dist/adapters/api-client/index.js +12 -0
- package/dist/adapters/api-client/progress.d.ts +26 -0
- package/dist/adapters/api-client/progress.js +69 -0
- package/dist/adapters/api-client/remediation.d.ts +19 -0
- package/dist/adapters/api-client/remediation.js +76 -0
- package/dist/adapters/api-client/types.d.ts +98 -0
- package/dist/adapters/api-client/types.js +14 -0
- package/dist/adapters/config-sources/file-config-adapter.js +2 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +16 -181
- package/dist/adapters/task-sources/repo-schemas.js +27 -184
- package/dist/adapters/task-sources/repo-validation.d.ts +5 -46
- package/dist/adapters/task-sources/repo-validation.js +5 -161
- package/dist/commands/calculate-scores.js +2 -0
- package/dist/commands/explain-handler.js +6 -0
- package/dist/commands/fetch-docs.js +2 -0
- package/dist/commands/generate-configs.js +2 -0
- package/dist/commands/init.js +9 -9
- package/dist/commands/pipeline-action.d.ts +3 -0
- package/dist/commands/pipeline-action.js +13 -0
- package/dist/commands/pipeline.d.ts +2 -0
- package/dist/commands/pipeline.js +2 -0
- package/dist/commands/pr-comment.js +2 -0
- package/dist/commands/publish.js +2 -0
- package/dist/commands/remote-pipeline.d.ts +27 -0
- package/dist/commands/remote-pipeline.js +133 -0
- package/dist/commands/remote-results.d.ts +33 -0
- package/dist/commands/remote-results.js +97 -0
- package/dist/orchestration/build-app-context.js +3 -0
- package/dist/pipeline/map-request-to-config.js +2 -0
- package/package.json +2 -1
|
@@ -191,4 +191,4 @@ export interface ExampleRecord {
|
|
|
191
191
|
}
|
|
192
192
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
193
193
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
194
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This workflow evaluates your documentation quality by submitting your\n# .ailf/tasks/ definitions to the AILF API on every pull request. The\n# API handles all heavy lifting (LLM calls, doc fetching, grading,\n# report publishing). Results are posted as a PR comment.\n#\n# How it works:\n# 1. Reads your .ailf/tasks/*.yaml files\n# 2. Submits them to the AILF API as inline tasks\n# 3. Polls for results (the API runs the full eval pipeline)\n# 4. Posts a score report as a PR comment\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Narrow the paths trigger to reduce cost (see comment below)\n# - Set full_eval to true for comprehensive (slower) evaluation\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default so documentation quality is\n # continuously monitored. To reduce cost, narrow the trigger:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n # Manual trigger from the Actions tab\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n pull-requests: write\n steps:\n # \u2500\u2500\u2500 Checkout repo to read .ailf/tasks/ \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - uses: actions/checkout@v4\n\n # \u2500\u2500\u2500 Read local tasks and submit to AILF API \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Submit evaluation\n id: submit\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n FULL_EVAL: ${{ inputs.full_eval || 'false' }}\n run: |\n # Parse .ailf/tasks/*.yaml into a JSON array\n # (python3 + PyYAML are pre-installed on ubuntu-latest)\n INLINE_TASKS=$(python3 << 'PYEOF'\n import yaml, json, glob, sys\n\n tasks = []\n for f in sorted(glob.glob(\".ailf/tasks/*.yaml\") + glob.glob(\".ailf/tasks/*.yml\")):\n with open(f) as fh:\n data = yaml.safe_load(fh)\n if isinstance(data, list):\n tasks.extend(data)\n elif data is not None:\n tasks.append(data)\n\n if not tasks:\n print(\"::error::No tasks found in .ailf/tasks/ \u2014 add task YAML files first.\", file=sys.stderr)\n sys.exit(1)\n\n print(json.dumps(tasks))\n PYEOF\n )\n\n TASK_COUNT=$(echo \"$INLINE_TASKS\" | jq 'length')\n echo \"\uD83D\uDCE6 Found $TASK_COUNT task(s) in .ailf/tasks/\"\n\n # Build API payload \u2014 includes the local tasks as inlineTasks\n if [ \"$FULL_EVAL\" = \"true\" ]; then\n DEBUG_JSON='null'\n else\n DEBUG_JSON='{\"enabled\":true,\"firstN\":2}'\n fi\n\n PAYLOAD=$(jq -n \\\n --argjson tasks \"$INLINE_TASKS\" \\\n --argjson debug \"$DEBUG_JSON\" \\\n '{\n mode: \"baseline\",\n taskMode: \"inline\",\n inlineTasks: $tasks,\n publish: true,\n compare: true\n } + (if $debug != null then {debug: $debug} else {} end)')\n\n echo \"::group::Request payload (tasks omitted)\"\n echo \"$PAYLOAD\" | jq 'del(.inlineTasks)'\n echo \"::endgroup::\"\n\n RESPONSE=$(curl -sf -X POST \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n https://ailf-api.sanity.build/v1/pipeline \\\n -d \"$PAYLOAD\")\n\n JOB_ID=$(echo \"$RESPONSE\" | jq -r '.jobId')\n echo \"job_id=$JOB_ID\" >> $GITHUB_OUTPUT\n echo \"\uD83D\uDCCB Submitted job: $JOB_ID\"\n\n # \u2500\u2500\u2500 Poll for results (long-polling) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Wait for results\n id: results\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n run: |\n for i in $(seq 1 40); do\n RESPONSE=$(curl -s \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Prefer: wait=25\" \\\n \"https://ailf-api.sanity.build/v1/jobs/$JOB_ID\")\n\n STATUS=$(echo \"$RESPONSE\" | jq -r '.status')\n\n case \"$STATUS\" in\n completed)\n echo \"status=completed\" >> $GITHUB_OUTPUT\n echo \"report_id=$(echo $RESPONSE | jq -r '.reportId // empty')\" >> $GITHUB_OUTPUT\n echo \"score=$(echo $RESPONSE | jq -r '.score // empty')\" >> $GITHUB_OUTPUT\n echo \"\u2705 Evaluation completed\"\n exit 0\n ;;\n failed|timed-out)\n echo \"status=$STATUS\" >> $GITHUB_OUTPUT\n echo \"::error::Evaluation $STATUS\"\n exit 1\n ;;\n *)\n echo \"\u23F3 [$i/40] $STATUS\"\n ;;\n esac\n done\n\n echo \"::error::Timed out waiting for evaluation\"\n exit 1\n\n # \u2500\u2500\u2500 Fetch the markdown report \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Fetch markdown report\n id: markdown\n if: steps.results.outputs.report_id != ''\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n REPORT_ID: ${{ steps.results.outputs.report_id }}\n run: |\n REPORT_MD=$(curl -sf \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n \"https://ailf-api.sanity.build/v1/reports/$REPORT_ID/markdown\")\n\n if [ $? -eq 0 ] && [ -n \"$REPORT_MD\" ]; then\n echo \"$REPORT_MD\" > /tmp/ailf-report.md\n echo \"fetched=true\" >> $GITHUB_OUTPUT\n else\n echo \"fetched=false\" >> $GITHUB_OUTPUT\n echo \"::warning::Could not fetch markdown report\"\n fi\n\n # \u2500\u2500\u2500 Post results to PR \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Post PR comment\n if: >-\n always() && github.event_name == 'pull_request' &&\n steps.submit.outputs.job_id != ''\n uses: actions/github-script@v7\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n const status = process.env.JOB_STATUS;\n const reportId = process.env.REPORT_ID;\n const jobId = process.env.JOB_ID;\n const score = process.env.SCORE;\n const mdFetched = process.env.MARKDOWN_FETCHED === 'true';\n\n let body;\n\n if (status === 'completed' && mdFetched) {\n const reportMd = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${reportMd}`;\n if (reportId) {\n body += `\\n\uD83D\uDD17 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})`;\n }\n body += `\\n\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n } else {\n let icon, message;\n if (status === 'completed') {\n icon = '\u2705';\n message = score\n ? `Evaluation completed \u2014 score: **${score}/100**`\n : 'Evaluation completed successfully.';\n } else if (status === 'failed' || status === 'timed-out') {\n icon = '\u26A0\uFE0F';\n message = `Evaluation ${status}.`;\n } else {\n icon = '\u23F3';\n message = 'Evaluation status unknown (may still be running).';\n }\n\n body = `${marker}\\n## ${icon} AI Literacy Evaluation\\n\\n${message}\\n`;\n if (reportId) {\n body += `\\n\uD83D\uDD17 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})\\n`;\n }\n body += `\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n }\n\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n comment_id: existing.id,\n body,\n });\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n body,\n });\n }\n\n # \u2500\u2500\u2500 Job summary \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Summary\n if: always()\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n run: |\n if [ \"$JOB_STATUS\" = \"completed\" ] && [ \"$MARKDOWN_FETCHED\" = \"true\" ] && [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n [ -n \"$REPORT_ID\" ] && echo \"\uD83D\uDD17 [View in Studio](https://admin.sanity.io/ailf/report/$REPORT_ID)\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"<sub>Job: \\`$JOB_ID\\`</sub>\" >> \"$GITHUB_STEP_SUMMARY\"\n else\n {\n echo \"## \uD83D\uDCCA AI Literacy Evaluation\"\n echo \"\"\n echo \"| Field | Value |\"\n echo \"|-------|-------|\"\n echo \"| Job | \\`$JOB_ID\\` |\"\n echo \"| Status | $JOB_STATUS |\"\n [ -n \"$SCORE\" ] && echo \"| Score | $SCORE/100 |\"\n [ -n \"$REPORT_ID\" ] && echo \"| Report | [$REPORT_ID](https://admin.sanity.io/ailf/report/$REPORT_ID) |\"\n } >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
194
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Set full_eval to true for comprehensive evaluation\n# - See: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.full_eval != 'true' && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n let body;\n try {\n const report = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${report}`;\n } catch {\n body = `${marker}\\n## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n const params = {\n owner: context.repo.owner, repo: context.repo.repo,\n body,\n };\n if (existing) {\n await github.rest.issues.updateComment({ ...params, comment_id: existing.id });\n } else {\n await github.rest.issues.createComment({ ...params, issue_number: context.issue.number });\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
@@ -289,4 +289,4 @@ export const EXAMPLES = {
|
|
|
289
289
|
// Raw file exports (non-data files, exported as raw strings)
|
|
290
290
|
// ---------------------------------------------------------------------------
|
|
291
291
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
292
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# This workflow evaluates your documentation quality by submitting your\n# .ailf/tasks/ definitions to the AILF API on every pull request. The\n# API handles all heavy lifting (LLM calls, doc fetching, grading,\n# report publishing). Results are posted as a PR comment.\n#\n# How it works:\n# 1. Reads your .ailf/tasks/*.yaml files\n# 2. Submits them to the AILF API as inline tasks\n# 3. Polls for results (the API runs the full eval pipeline)\n# 4. Posts a score report as a PR comment\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Narrow the paths trigger to reduce cost (see comment below)\n# - Set full_eval to true for comprehensive (slower) evaluation\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default so documentation quality is\n # continuously monitored. To reduce cost, narrow the trigger:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n # Manual trigger from the Actions tab\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n pull-requests: write\n steps:\n # ─── Checkout repo to read .ailf/tasks/ ────────────────────\n - uses: actions/checkout@v4\n\n # ─── Read local tasks and submit to AILF API ───────────────\n - name: Submit evaluation\n id: submit\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n FULL_EVAL: ${{ inputs.full_eval || 'false' }}\n run: |\n # Parse .ailf/tasks/*.yaml into a JSON array\n # (python3 + PyYAML are pre-installed on ubuntu-latest)\n INLINE_TASKS=$(python3 << 'PYEOF'\n import yaml, json, glob, sys\n\n tasks = []\n for f in sorted(glob.glob(\".ailf/tasks/*.yaml\") + glob.glob(\".ailf/tasks/*.yml\")):\n with open(f) as fh:\n data = yaml.safe_load(fh)\n if isinstance(data, list):\n tasks.extend(data)\n elif data is not None:\n tasks.append(data)\n\n if not tasks:\n print(\"::error::No tasks found in .ailf/tasks/ — add task YAML files first.\", file=sys.stderr)\n sys.exit(1)\n\n print(json.dumps(tasks))\n PYEOF\n )\n\n TASK_COUNT=$(echo \"$INLINE_TASKS\" | jq 'length')\n echo \"📦 Found $TASK_COUNT task(s) in .ailf/tasks/\"\n\n # Build API payload — includes the local tasks as inlineTasks\n if [ \"$FULL_EVAL\" = \"true\" ]; then\n DEBUG_JSON='null'\n else\n DEBUG_JSON='{\"enabled\":true,\"firstN\":2}'\n fi\n\n PAYLOAD=$(jq -n \\\n --argjson tasks \"$INLINE_TASKS\" \\\n --argjson debug \"$DEBUG_JSON\" \\\n '{\n mode: \"baseline\",\n taskMode: \"inline\",\n inlineTasks: $tasks,\n publish: true,\n compare: true\n } + (if $debug != null then {debug: $debug} else {} end)')\n\n echo \"::group::Request payload (tasks omitted)\"\n echo \"$PAYLOAD\" | jq 'del(.inlineTasks)'\n echo \"::endgroup::\"\n\n RESPONSE=$(curl -sf -X POST \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n https://ailf-api.sanity.build/v1/pipeline \\\n -d \"$PAYLOAD\")\n\n JOB_ID=$(echo \"$RESPONSE\" | jq -r '.jobId')\n echo \"job_id=$JOB_ID\" >> $GITHUB_OUTPUT\n echo \"📋 Submitted job: $JOB_ID\"\n\n # ─── Poll for results (long-polling) ───────────────────────\n - name: Wait for results\n id: results\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n run: |\n for i in $(seq 1 40); do\n RESPONSE=$(curl -s \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Prefer: wait=25\" \\\n \"https://ailf-api.sanity.build/v1/jobs/$JOB_ID\")\n\n STATUS=$(echo \"$RESPONSE\" | jq -r '.status')\n\n case \"$STATUS\" in\n completed)\n echo \"status=completed\" >> $GITHUB_OUTPUT\n echo \"report_id=$(echo $RESPONSE | jq -r '.reportId // empty')\" >> $GITHUB_OUTPUT\n echo \"score=$(echo $RESPONSE | jq -r '.score // empty')\" >> $GITHUB_OUTPUT\n echo \"✅ Evaluation completed\"\n exit 0\n ;;\n failed|timed-out)\n echo \"status=$STATUS\" >> $GITHUB_OUTPUT\n echo \"::error::Evaluation $STATUS\"\n exit 1\n ;;\n *)\n echo \"⏳ [$i/40] $STATUS\"\n ;;\n esac\n done\n\n echo \"::error::Timed out waiting for evaluation\"\n exit 1\n\n # ─── Fetch the markdown report ─────────────────────────────\n - name: Fetch markdown report\n id: markdown\n if: steps.results.outputs.report_id != ''\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n REPORT_ID: ${{ steps.results.outputs.report_id }}\n run: |\n REPORT_MD=$(curl -sf \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n \"https://ailf-api.sanity.build/v1/reports/$REPORT_ID/markdown\")\n\n if [ $? -eq 0 ] && [ -n \"$REPORT_MD\" ]; then\n echo \"$REPORT_MD\" > /tmp/ailf-report.md\n echo \"fetched=true\" >> $GITHUB_OUTPUT\n else\n echo \"fetched=false\" >> $GITHUB_OUTPUT\n echo \"::warning::Could not fetch markdown report\"\n fi\n\n # ─── Post results to PR ────────────────────────────────────\n - name: Post PR comment\n if: >-\n always() && github.event_name == 'pull_request' &&\n steps.submit.outputs.job_id != ''\n uses: actions/github-script@v7\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n const status = process.env.JOB_STATUS;\n const reportId = process.env.REPORT_ID;\n const jobId = process.env.JOB_ID;\n const score = process.env.SCORE;\n const mdFetched = process.env.MARKDOWN_FETCHED === 'true';\n\n let body;\n\n if (status === 'completed' && mdFetched) {\n const reportMd = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${reportMd}`;\n if (reportId) {\n body += `\\n🔗 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})`;\n }\n body += `\\n\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n } else {\n let icon, message;\n if (status === 'completed') {\n icon = '✅';\n message = score\n ? `Evaluation completed — score: **${score}/100**`\n : 'Evaluation completed successfully.';\n } else if (status === 'failed' || status === 'timed-out') {\n icon = '⚠️';\n message = `Evaluation ${status}.`;\n } else {\n icon = '⏳';\n message = 'Evaluation status unknown (may still be running).';\n }\n\n body = `${marker}\\n## ${icon} AI Literacy Evaluation\\n\\n${message}\\n`;\n if (reportId) {\n body += `\\n🔗 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})\\n`;\n }\n body += `\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n }\n\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n comment_id: existing.id,\n body,\n });\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n body,\n });\n }\n\n # ─── Job summary ───────────────────────────────────────────\n - name: Summary\n if: always()\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n run: |\n if [ \"$JOB_STATUS\" = \"completed\" ] && [ \"$MARKDOWN_FETCHED\" = \"true\" ] && [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n [ -n \"$REPORT_ID\" ] && echo \"🔗 [View in Studio](https://admin.sanity.io/ailf/report/$REPORT_ID)\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"<sub>Job: \\`$JOB_ID\\`</sub>\" >> \"$GITHUB_STEP_SUMMARY\"\n else\n {\n echo \"## 📊 AI Literacy Evaluation\"\n echo \"\"\n echo \"| Field | Value |\"\n echo \"|-------|-------|\"\n echo \"| Job | \\`$JOB_ID\\` |\"\n echo \"| Status | $JOB_STATUS |\"\n [ -n \"$SCORE\" ] && echo \"| Score | $SCORE/100 |\"\n [ -n \"$REPORT_ID\" ] && echo \"| Report | [$REPORT_ID](https://admin.sanity.io/ailf/report/$REPORT_ID) |\"\n } >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
292
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Set full_eval to true for comprehensive evaluation\n# - See: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.full_eval != 'true' && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n let body;\n try {\n const report = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${report}`;\n } catch {\n body = `${marker}\\n## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n const params = {\n owner: context.repo.owner, repo: context.repo.repo,\n body,\n };\n if (existing) {\n await github.rest.issues.updateComment({ ...params, comment_id: existing.id });\n } else {\n await github.rest.issues.createComment({ ...params, issue_number: context.issue.number });\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
@@ -106,6 +106,12 @@ export interface ResolvedConfig {
|
|
|
106
106
|
};
|
|
107
107
|
/** Job ID for tracking API-triggered evaluations in the Content Lake */
|
|
108
108
|
jobId?: string;
|
|
109
|
+
/** Whether to run in remote mode (submit to AILF API) */
|
|
110
|
+
remote: boolean;
|
|
111
|
+
/** AILF API base URL */
|
|
112
|
+
apiUrl: string;
|
|
113
|
+
/** AILF API key (from AILF_API_KEY env var) */
|
|
114
|
+
apiKey?: string;
|
|
109
115
|
}
|
|
110
116
|
/**
|
|
111
117
|
* Application context — the complete dependency carrier.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cli.ts — Minimal CLI for standalone task validation.
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* npx @sanity/ailf-tasks validate .ailf/tasks/
|
|
6
|
+
* npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
|
|
7
|
+
*/
|
|
8
|
+
import { loadTaskDir } from "./parser.js";
|
|
9
|
+
import { formatValidationResult, validateRepoTasks } from "./validation.js";
|
|
10
|
+
export function run() {
|
|
11
|
+
const args = process.argv.slice(2);
|
|
12
|
+
const command = args[0];
|
|
13
|
+
if (command === "validate") {
|
|
14
|
+
const dir = args[1] ?? ".ailf/tasks";
|
|
15
|
+
validateCommand(dir);
|
|
16
|
+
}
|
|
17
|
+
else if (command === "--help" ||
|
|
18
|
+
command === "-h" ||
|
|
19
|
+
command === undefined) {
|
|
20
|
+
printUsage();
|
|
21
|
+
}
|
|
22
|
+
else {
|
|
23
|
+
console.error(`Unknown command: ${command}`);
|
|
24
|
+
printUsage();
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function validateCommand(dir) {
|
|
29
|
+
try {
|
|
30
|
+
const tasks = loadTaskDir(dir);
|
|
31
|
+
// Run semantic validation
|
|
32
|
+
const result = validateRepoTasks(tasks);
|
|
33
|
+
const formatted = formatValidationResult(result);
|
|
34
|
+
console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
|
|
35
|
+
for (const task of tasks) {
|
|
36
|
+
console.log(` ${task.id} — ${task.description}`);
|
|
37
|
+
}
|
|
38
|
+
if (result.warnings.length > 0 || result.errors.length > 0) {
|
|
39
|
+
console.log("");
|
|
40
|
+
console.log(formatted);
|
|
41
|
+
}
|
|
42
|
+
if (!result.valid) {
|
|
43
|
+
process.exit(1);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
function printUsage() {
|
|
52
|
+
console.log("Usage: ailf-tasks <command> [options]");
|
|
53
|
+
console.log("");
|
|
54
|
+
console.log("Commands:");
|
|
55
|
+
console.log(" validate [dir] Validate task YAML files (default: .ailf/tasks/)");
|
|
56
|
+
console.log("");
|
|
57
|
+
console.log("Examples:");
|
|
58
|
+
console.log(" ailf-tasks validate");
|
|
59
|
+
console.log(" ailf-tasks validate .ailf/tasks/");
|
|
60
|
+
console.log(" ailf-tasks validate /path/to/tasks/");
|
|
61
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf-tasks — Task definition schemas and YAML parser.
|
|
3
|
+
*
|
|
4
|
+
* Lightweight package for parsing and validating .ailf/tasks/*.yaml files
|
|
5
|
+
* without depending on the full AILF CLI or its heavyweight dependencies
|
|
6
|
+
* (Promptfoo, LLM SDKs, Sanity client).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
|
|
10
|
+
*/
|
|
11
|
+
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
|
|
12
|
+
export { loadTaskDir, parseTaskFile } from "./parser.js";
|
|
13
|
+
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf-tasks — Task definition schemas and YAML parser.
|
|
3
|
+
*
|
|
4
|
+
* Lightweight package for parsing and validating .ailf/tasks/*.yaml files
|
|
5
|
+
* without depending on the full AILF CLI or its heavyweight dependencies
|
|
6
|
+
* (Promptfoo, LLM SDKs, Sanity client).
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
|
|
10
|
+
*/
|
|
11
|
+
// Schemas and types
|
|
12
|
+
export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
|
|
13
|
+
// Parsing
|
|
14
|
+
export { loadTaskDir, parseTaskFile } from "./parser.js";
|
|
15
|
+
// Validation
|
|
16
|
+
export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* parser.ts — Standalone task file and directory parsing.
|
|
3
|
+
*
|
|
4
|
+
* High-level functions for loading and validating .ailf/tasks/ YAML
|
|
5
|
+
* files without any dependency on the eval pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
|
|
9
|
+
*/
|
|
10
|
+
import { type RepoTask } from "./schemas.js";
|
|
11
|
+
/**
|
|
12
|
+
* Parse a single task YAML string and return validated tasks.
|
|
13
|
+
*
|
|
14
|
+
* @param content - Raw YAML string content
|
|
15
|
+
* @param filename - Source filename (for error messages)
|
|
16
|
+
* @returns Validated array of RepoTask objects
|
|
17
|
+
* @throws Error if YAML parsing or Zod validation fails
|
|
18
|
+
*/
|
|
19
|
+
export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
|
|
20
|
+
/**
|
|
21
|
+
* Load and parse all task YAML files from a directory.
|
|
22
|
+
*
|
|
23
|
+
* @param dirPath - Path to directory containing .yaml/.yml files
|
|
24
|
+
* @returns All validated tasks, sorted by filename
|
|
25
|
+
* @throws Error if directory not found, no YAML files, or validation fails
|
|
26
|
+
*/
|
|
27
|
+
export declare function loadTaskDir(dirPath: string): RepoTask[];
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* parser.ts — Standalone task file and directory parsing.
|
|
3
|
+
*
|
|
4
|
+
* High-level functions for loading and validating .ailf/tasks/ YAML
|
|
5
|
+
* files without any dependency on the eval pipeline.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
|
|
9
|
+
*/
|
|
10
|
+
import { existsSync, readdirSync, readFileSync } from "fs";
|
|
11
|
+
import { resolve } from "path";
|
|
12
|
+
import { load } from "js-yaml";
|
|
13
|
+
import { RepoTaskFileSchema } from "./schemas.js";
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Public API
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
/**
|
|
18
|
+
* Parse a single task YAML string and return validated tasks.
|
|
19
|
+
*
|
|
20
|
+
* @param content - Raw YAML string content
|
|
21
|
+
* @param filename - Source filename (for error messages)
|
|
22
|
+
* @returns Validated array of RepoTask objects
|
|
23
|
+
* @throws Error if YAML parsing or Zod validation fails
|
|
24
|
+
*/
|
|
25
|
+
export function parseTaskFile(content, filename = "<string>") {
|
|
26
|
+
const parsed = load(content);
|
|
27
|
+
if (!Array.isArray(parsed)) {
|
|
28
|
+
throw new Error(`${filename} did not parse to an array of tasks. ` +
|
|
29
|
+
"Task files must contain a YAML array of task definitions.");
|
|
30
|
+
}
|
|
31
|
+
const result = RepoTaskFileSchema.safeParse(parsed);
|
|
32
|
+
if (!result.success) {
|
|
33
|
+
const messages = result.error.issues
|
|
34
|
+
.map((i) => ` [${i.path.join(".")}]: ${i.message}`)
|
|
35
|
+
.join("\n");
|
|
36
|
+
throw new Error(`Invalid task file "${filename}":\n${messages}`);
|
|
37
|
+
}
|
|
38
|
+
return result.data;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Load and parse all task YAML files from a directory.
|
|
42
|
+
*
|
|
43
|
+
* @param dirPath - Path to directory containing .yaml/.yml files
|
|
44
|
+
* @returns All validated tasks, sorted by filename
|
|
45
|
+
* @throws Error if directory not found, no YAML files, or validation fails
|
|
46
|
+
*/
|
|
47
|
+
export function loadTaskDir(dirPath) {
|
|
48
|
+
if (!existsSync(dirPath)) {
|
|
49
|
+
throw new Error(`Tasks directory not found: ${dirPath}\n` +
|
|
50
|
+
" Expected a directory containing .ailf/tasks/*.yaml files.");
|
|
51
|
+
}
|
|
52
|
+
const yamlFiles = readdirSync(dirPath)
|
|
53
|
+
.filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
|
|
54
|
+
.sort();
|
|
55
|
+
if (yamlFiles.length === 0) {
|
|
56
|
+
throw new Error(`No YAML files found in ${dirPath}\n` +
|
|
57
|
+
" Expected .ailf/tasks/*.yaml files with task definitions.");
|
|
58
|
+
}
|
|
59
|
+
const allTasks = [];
|
|
60
|
+
for (const file of yamlFiles) {
|
|
61
|
+
const filePath = resolve(dirPath, file);
|
|
62
|
+
const content = readFileSync(filePath, "utf-8");
|
|
63
|
+
try {
|
|
64
|
+
const tasks = parseTaskFile(content, file);
|
|
65
|
+
allTasks.push(...tasks);
|
|
66
|
+
}
|
|
67
|
+
catch (err) {
|
|
68
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
69
|
+
throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return allTasks;
|
|
73
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* schemas.ts — Zod schemas for repo-based task definitions.
|
|
3
|
+
*
|
|
4
|
+
* Validates .ailf/tasks/*.yaml task files from external repositories.
|
|
5
|
+
* These schemas are the contract between external repos and the AILF eval
|
|
6
|
+
* pipeline — they define exactly what fields are accepted, with friendly
|
|
7
|
+
* error messages for authors writing task YAML by hand.
|
|
8
|
+
*
|
|
9
|
+
* This module is the single source of truth for task schemas. The eval
|
|
10
|
+
* package re-exports from here to avoid duplication.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/exec-plans/completed/tasks-as-content/phase-4-repo-based-tasks.md
|
|
13
|
+
*/
|
|
14
|
+
import { z } from "zod";
|
|
15
|
+
/**
|
|
16
|
+
* The set of assertion types allowed in repo-based task files.
|
|
17
|
+
*
|
|
18
|
+
* This is a curated subset of Promptfoo assertion types — we expose only the
|
|
19
|
+
* types that are stable, well-documented, and useful for external authors.
|
|
20
|
+
*/
|
|
21
|
+
export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
|
|
22
|
+
export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
|
|
23
|
+
/**
|
|
24
|
+
* Valid rubric template names — must match keys in config/rubrics.yaml.
|
|
25
|
+
*/
|
|
26
|
+
export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
|
|
27
|
+
export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
|
|
28
|
+
/**
|
|
29
|
+
* Zod schema for a single repo-based task definition.
|
|
30
|
+
*
|
|
31
|
+
* This is the external-author-facing contract. Field names are camelCase
|
|
32
|
+
* to match the Content Lake document schema (ailf.task).
|
|
33
|
+
*/
|
|
34
|
+
export declare const RepoTaskSchema: z.ZodObject<{
|
|
35
|
+
id: z.ZodString;
|
|
36
|
+
description: z.ZodString;
|
|
37
|
+
featureArea: z.ZodString;
|
|
38
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
39
|
+
canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
40
|
+
id: z.ZodString;
|
|
41
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
42
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
43
|
+
path: z.ZodOptional<z.ZodString>;
|
|
44
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
45
|
+
slug: z.ZodString;
|
|
46
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
47
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
48
|
+
path: z.ZodString;
|
|
49
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
50
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
51
|
+
perspective: z.ZodString;
|
|
52
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
53
|
+
}, z.core.$strip>]>>>>;
|
|
54
|
+
vars: z.ZodOptional<z.ZodObject<{
|
|
55
|
+
task: z.ZodString;
|
|
56
|
+
}, z.core.$loose>>;
|
|
57
|
+
assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
58
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
59
|
+
template: z.ZodEnum<{
|
|
60
|
+
"task-completion": "task-completion";
|
|
61
|
+
"code-correctness": "code-correctness";
|
|
62
|
+
"doc-coverage": "doc-coverage";
|
|
63
|
+
}>;
|
|
64
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
65
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
66
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
67
|
+
type: z.ZodEnum<{
|
|
68
|
+
"llm-rubric": "llm-rubric";
|
|
69
|
+
contains: "contains";
|
|
70
|
+
"contains-any": "contains-any";
|
|
71
|
+
"contains-all": "contains-all";
|
|
72
|
+
"not-contains": "not-contains";
|
|
73
|
+
icontains: "icontains";
|
|
74
|
+
"icontains-any": "icontains-any";
|
|
75
|
+
regex: "regex";
|
|
76
|
+
javascript: "javascript";
|
|
77
|
+
similar: "similar";
|
|
78
|
+
cost: "cost";
|
|
79
|
+
latency: "latency";
|
|
80
|
+
}>;
|
|
81
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
82
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
83
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
84
|
+
}, z.core.$loose>]>>;
|
|
85
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
86
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
87
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
88
|
+
abbreviated: "abbreviated";
|
|
89
|
+
full: "full";
|
|
90
|
+
none: "none";
|
|
91
|
+
}>>;
|
|
92
|
+
}, z.core.$strip>>;
|
|
93
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
94
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
95
|
+
execution: z.ZodOptional<z.ZodObject<{
|
|
96
|
+
enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
97
|
+
blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
98
|
+
threshold: z.ZodOptional<z.ZodObject<{
|
|
99
|
+
score: z.ZodOptional<z.ZodNumber>;
|
|
100
|
+
}, z.core.$strip>>;
|
|
101
|
+
trigger: z.ZodOptional<z.ZodObject<{
|
|
102
|
+
branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
103
|
+
paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
104
|
+
}, z.core.$strip>>;
|
|
105
|
+
source: z.ZodOptional<z.ZodString>;
|
|
106
|
+
}, z.core.$strip>>;
|
|
107
|
+
}, z.core.$strip>;
|
|
108
|
+
export type RepoTask = z.infer<typeof RepoTaskSchema>;
|
|
109
|
+
/**
|
|
110
|
+
* Schema for an array of repo tasks — what a single .ailf/tasks/*.yaml file
|
|
111
|
+
* contains. Each file must define at least one task.
|
|
112
|
+
*/
|
|
113
|
+
export declare const RepoTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
114
|
+
id: z.ZodString;
|
|
115
|
+
description: z.ZodString;
|
|
116
|
+
featureArea: z.ZodString;
|
|
117
|
+
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
118
|
+
canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
119
|
+
id: z.ZodString;
|
|
120
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
121
|
+
slug: z.ZodOptional<z.ZodString>;
|
|
122
|
+
path: z.ZodOptional<z.ZodString>;
|
|
123
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
124
|
+
slug: z.ZodString;
|
|
125
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
126
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
127
|
+
path: z.ZodString;
|
|
128
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
129
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
130
|
+
perspective: z.ZodString;
|
|
131
|
+
reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
132
|
+
}, z.core.$strip>]>>>>;
|
|
133
|
+
vars: z.ZodOptional<z.ZodObject<{
|
|
134
|
+
task: z.ZodString;
|
|
135
|
+
}, z.core.$loose>>;
|
|
136
|
+
assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
137
|
+
type: z.ZodLiteral<"llm-rubric">;
|
|
138
|
+
template: z.ZodEnum<{
|
|
139
|
+
"task-completion": "task-completion";
|
|
140
|
+
"code-correctness": "code-correctness";
|
|
141
|
+
"doc-coverage": "doc-coverage";
|
|
142
|
+
}>;
|
|
143
|
+
criteria: z.ZodArray<z.ZodString>;
|
|
144
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
145
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
146
|
+
type: z.ZodEnum<{
|
|
147
|
+
"llm-rubric": "llm-rubric";
|
|
148
|
+
contains: "contains";
|
|
149
|
+
"contains-any": "contains-any";
|
|
150
|
+
"contains-all": "contains-all";
|
|
151
|
+
"not-contains": "not-contains";
|
|
152
|
+
icontains: "icontains";
|
|
153
|
+
"icontains-any": "icontains-any";
|
|
154
|
+
regex: "regex";
|
|
155
|
+
javascript: "javascript";
|
|
156
|
+
similar: "similar";
|
|
157
|
+
cost: "cost";
|
|
158
|
+
latency: "latency";
|
|
159
|
+
}>;
|
|
160
|
+
value: z.ZodOptional<z.ZodUnknown>;
|
|
161
|
+
threshold: z.ZodOptional<z.ZodNumber>;
|
|
162
|
+
weight: z.ZodOptional<z.ZodNumber>;
|
|
163
|
+
}, z.core.$loose>]>>;
|
|
164
|
+
baseline: z.ZodOptional<z.ZodObject<{
|
|
165
|
+
enabled: z.ZodOptional<z.ZodBoolean>;
|
|
166
|
+
rubric: z.ZodOptional<z.ZodEnum<{
|
|
167
|
+
abbreviated: "abbreviated";
|
|
168
|
+
full: "full";
|
|
169
|
+
none: "none";
|
|
170
|
+
}>>;
|
|
171
|
+
}, z.core.$strip>>;
|
|
172
|
+
docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
173
|
+
referenceSolution: z.ZodOptional<z.ZodString>;
|
|
174
|
+
execution: z.ZodOptional<z.ZodObject<{
|
|
175
|
+
enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
176
|
+
blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
|
|
177
|
+
threshold: z.ZodOptional<z.ZodObject<{
|
|
178
|
+
score: z.ZodOptional<z.ZodNumber>;
|
|
179
|
+
}, z.core.$strip>>;
|
|
180
|
+
trigger: z.ZodOptional<z.ZodObject<{
|
|
181
|
+
branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
182
|
+
paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
183
|
+
}, z.core.$strip>>;
|
|
184
|
+
source: z.ZodOptional<z.ZodString>;
|
|
185
|
+
}, z.core.$strip>>;
|
|
186
|
+
}, z.core.$strip>>;
|