@sanity/ailf 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  2. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  3. package/dist/_vendor/ailf-core/ports/context.d.ts +6 -0
  4. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +1 -53
  5. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -2
  6. package/dist/_vendor/ailf-tasks/cli.d.ts +8 -0
  7. package/dist/_vendor/ailf-tasks/cli.js +61 -0
  8. package/dist/_vendor/ailf-tasks/index.d.ts +13 -0
  9. package/dist/_vendor/ailf-tasks/index.js +16 -0
  10. package/dist/_vendor/ailf-tasks/parser.d.ts +27 -0
  11. package/dist/_vendor/ailf-tasks/parser.js +73 -0
  12. package/dist/_vendor/ailf-tasks/schemas.d.ts +186 -0
  13. package/dist/_vendor/ailf-tasks/schemas.js +176 -0
  14. package/dist/_vendor/ailf-tasks/validation.d.ts +47 -0
  15. package/dist/_vendor/ailf-tasks/validation.js +162 -0
  16. package/dist/adapters/api-client/api-client.d.ts +75 -0
  17. package/dist/adapters/api-client/api-client.js +201 -0
  18. package/dist/adapters/api-client/build-request.d.ts +75 -0
  19. package/dist/adapters/api-client/build-request.js +176 -0
  20. package/dist/adapters/api-client/errors.d.ts +43 -0
  21. package/dist/adapters/api-client/errors.js +68 -0
  22. package/dist/adapters/api-client/format-error.d.ts +22 -0
  23. package/dist/adapters/api-client/format-error.js +48 -0
  24. package/dist/adapters/api-client/index.d.ts +13 -0
  25. package/dist/adapters/api-client/index.js +12 -0
  26. package/dist/adapters/api-client/progress.d.ts +26 -0
  27. package/dist/adapters/api-client/progress.js +69 -0
  28. package/dist/adapters/api-client/remediation.d.ts +19 -0
  29. package/dist/adapters/api-client/remediation.js +76 -0
  30. package/dist/adapters/api-client/types.d.ts +98 -0
  31. package/dist/adapters/api-client/types.js +14 -0
  32. package/dist/adapters/config-sources/file-config-adapter.js +2 -0
  33. package/dist/adapters/task-sources/repo-schemas.d.ts +16 -181
  34. package/dist/adapters/task-sources/repo-schemas.js +27 -184
  35. package/dist/adapters/task-sources/repo-validation.d.ts +5 -46
  36. package/dist/adapters/task-sources/repo-validation.js +5 -161
  37. package/dist/commands/calculate-scores.js +2 -0
  38. package/dist/commands/explain-handler.js +6 -0
  39. package/dist/commands/fetch-docs.js +2 -0
  40. package/dist/commands/generate-configs.js +2 -0
  41. package/dist/commands/init.js +9 -9
  42. package/dist/commands/pipeline-action.d.ts +3 -0
  43. package/dist/commands/pipeline-action.js +13 -0
  44. package/dist/commands/pipeline.d.ts +2 -0
  45. package/dist/commands/pipeline.js +2 -0
  46. package/dist/commands/pr-comment.js +2 -0
  47. package/dist/commands/publish.js +2 -0
  48. package/dist/commands/remote-pipeline.d.ts +27 -0
  49. package/dist/commands/remote-pipeline.js +133 -0
  50. package/dist/commands/remote-results.d.ts +33 -0
  51. package/dist/commands/remote-results.js +97 -0
  52. package/dist/orchestration/build-app-context.js +3 -0
  53. package/dist/pipeline/map-request-to-config.js +2 -0
  54. package/package.json +2 -1
@@ -191,4 +191,4 @@ export interface ExampleRecord {
191
191
  }
192
192
  export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
193
193
  /** GitHub Actions workflow template for AI Literacy evaluation */
194
- export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This workflow submits evaluations to the AILF API when task or config\n# files change in a pull request. The API handles all processing\n# (LLM calls, doc fetching, grading, report publishing).\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Adjust `paths` to match your documentation file locations\n# - Set full_eval to true for comprehensive (slower) evaluation\n# - See: https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/API_GATEWAY.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n paths:\n - \".ailf/**\"\n\n # Manual trigger from the Actions tab\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n pull-requests: write\n steps:\n # \u2500\u2500\u2500 Submit evaluation to the AILF API \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Submit evaluation\n id: submit\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n FULL_EVAL: ${{ inputs.full_eval || 'false' }}\n run: |\n if [ \"$FULL_EVAL\" = \"true\" ]; then\n DEBUG_FIELD=\"\"\n else\n DEBUG_FIELD='\"debug\": { \"enabled\": true, \"firstN\": 2 },'\n fi\n\n PAYLOAD=$(cat <<EOF\n {\n \"mode\": \"baseline\",\n ${DEBUG_FIELD}\n \"publish\": true,\n \"compare\": true\n }\n EOF\n )\n\n RESPONSE=$(curl -sf -X POST \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n https://ailf-api.sanity.build/v1/pipeline \\\n -d \"$PAYLOAD\")\n\n JOB_ID=$(echo \"$RESPONSE\" | jq -r '.jobId')\n echo \"job_id=$JOB_ID\" >> $GITHUB_OUTPUT\n echo \"\uD83D\uDCCB Submitted job: $JOB_ID\"\n\n # \u2500\u2500\u2500 Poll for results (long-polling) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Wait for results\n id: results\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n run: |\n for i in $(seq 1 40); do\n RESPONSE=$(curl -s \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Prefer: wait=25\" \\\n \"https://ailf-api.sanity.build/v1/jobs/$JOB_ID\")\n\n STATUS=$(echo \"$RESPONSE\" | jq -r '.status')\n\n case \"$STATUS\" in\n completed)\n echo \"status=completed\" >> $GITHUB_OUTPUT\n echo \"report_id=$(echo $RESPONSE | jq -r '.reportId // empty')\" >> $GITHUB_OUTPUT\n echo \"score=$(echo $RESPONSE | jq -r '.score // empty')\" >> $GITHUB_OUTPUT\n echo \"\u2705 Evaluation completed\"\n exit 0\n ;;\n failed|timed-out)\n echo \"status=$STATUS\" >> $GITHUB_OUTPUT\n echo \"::error::Evaluation $STATUS\"\n exit 1\n ;;\n *)\n echo \"\u23F3 [$i/40] $STATUS\"\n ;;\n esac\n done\n\n echo \"::error::Timed out waiting for evaluation\"\n exit 1\n\n # \u2500\u2500\u2500 Fetch the markdown report \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Fetch markdown report\n id: markdown\n if: steps.results.outputs.report_id != ''\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n REPORT_ID: ${{ steps.results.outputs.report_id }}\n run: |\n REPORT_MD=$(curl -sf \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n \"https://ailf-api.sanity.build/v1/reports/$REPORT_ID/markdown\")\n\n if [ $? -eq 0 ] && [ -n \"$REPORT_MD\" ]; then\n # Write to a temp file to avoid shell quoting issues\n echo \"$REPORT_MD\" > /tmp/ailf-report.md\n echo \"fetched=true\" >> $GITHUB_OUTPUT\n else\n echo \"fetched=false\" >> $GITHUB_OUTPUT\n echo \"::warning::Could not fetch markdown report\"\n fi\n\n # \u2500\u2500\u2500 Post results to PR \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Post PR comment\n if: >-\n always() && github.event_name == 'pull_request' &&\n steps.submit.outputs.job_id != ''\n uses: actions/github-script@v7\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n const status = process.env.JOB_STATUS;\n const reportId = process.env.REPORT_ID;\n const jobId = process.env.JOB_ID;\n const score = process.env.SCORE;\n const mdFetched = process.env.MARKDOWN_FETCHED === 'true';\n\n let body;\n\n if (status === 'completed' && mdFetched) {\n // Use the full markdown report from the API\n const reportMd = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${reportMd}`;\n if (reportId) {\n body += `\\n\uD83D\uDD17 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})`;\n }\n body += `\\n\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n } else {\n // Fallback: minimal comment when markdown is unavailable\n let icon, message;\n if (status === 'completed') {\n icon = '\u2705';\n message = score\n ? `Evaluation completed \u2014 score: **${score}/100**`\n : 'Evaluation completed successfully.';\n } else if (status === 'failed' || status === 'timed-out') {\n icon = '\u26A0\uFE0F';\n message = `Evaluation ${status}.`;\n } else {\n icon = '\u23F3';\n message = 'Evaluation status unknown (may still be running).';\n }\n\n body = `${marker}\\n## ${icon} AI Literacy Evaluation\\n\\n${message}\\n`;\n if (reportId) {\n body += `\\n\uD83D\uDD17 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})\\n`;\n }\n body += `\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n }\n\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n comment_id: existing.id,\n body,\n });\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n body,\n });\n }\n\n # \u2500\u2500\u2500 Job summary \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Summary\n if: always()\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n run: |\n if [ \"$JOB_STATUS\" = \"completed\" ] && [ \"$MARKDOWN_FETCHED\" = \"true\" ] && [ -f /tmp/ailf-report.md ]; then\n # Use the full markdown report as the job summary\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n [ -n \"$REPORT_ID\" ] && echo \"\uD83D\uDD17 [View in Studio](https://admin.sanity.io/ailf/report/$REPORT_ID)\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"<sub>Job: \\`$JOB_ID\\`</sub>\" >> \"$GITHUB_STEP_SUMMARY\"\n else\n # Fallback: minimal table\n {\n echo \"## \uD83D\uDCCA AI Literacy Evaluation\"\n echo \"\"\n echo \"| Field | Value |\"\n echo \"|-------|-------|\"\n echo \"| Job | \\`$JOB_ID\\` |\"\n echo \"| Status | $JOB_STATUS |\"\n [ -n \"$SCORE\" ] && echo \"| Score | $SCORE/100 |\"\n [ -n \"$REPORT_ID\" ] && echo \"| Report | [$REPORT_ID](https://admin.sanity.io/ailf/report/$REPORT_ID) |\"\n } >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
194
+ export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Set full_eval to true for comprehensive evaluation\n# - See: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.full_eval != 'true' && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n let body;\n try {\n const report = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${report}`;\n } catch {\n body = `${marker}\\n## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n const params = {\n owner: context.repo.owner, repo: context.repo.repo,\n body,\n };\n if (existing) {\n await github.rest.issues.updateComment({ ...params, comment_id: existing.id });\n } else {\n await github.rest.issues.createComment({ ...params, issue_number: context.issue.number });\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
@@ -289,4 +289,4 @@ export const EXAMPLES = {
289
289
  // Raw file exports (non-data files, exported as raw strings)
290
290
  // ---------------------------------------------------------------------------
291
291
  /** GitHub Actions workflow template for AI Literacy evaluation */
292
- export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# This workflow submits evaluations to the AILF API when task or config\n# files change in a pull request. The API handles all processing\n# (LLM calls, doc fetching, grading, report publishing).\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Adjust `paths` to match your documentation file locations\n# - Set full_eval to true for comprehensive (slower) evaluation\n# - See: https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/API_GATEWAY.md\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n paths:\n - \".ailf/**\"\n\n # Manual trigger from the Actions tab\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n pull-requests: write\n steps:\n # ─── Submit evaluation to the AILF API ─────────────────────\n - name: Submit evaluation\n id: submit\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n FULL_EVAL: ${{ inputs.full_eval || 'false' }}\n run: |\n if [ \"$FULL_EVAL\" = \"true\" ]; then\n DEBUG_FIELD=\"\"\n else\n DEBUG_FIELD='\"debug\": { \"enabled\": true, \"firstN\": 2 },'\n fi\n\n PAYLOAD=$(cat <<EOF\n {\n \"mode\": \"baseline\",\n ${DEBUG_FIELD}\n \"publish\": true,\n \"compare\": true\n }\n EOF\n )\n\n RESPONSE=$(curl -sf -X POST \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n https://ailf-api.sanity.build/v1/pipeline \\\n -d \"$PAYLOAD\")\n\n JOB_ID=$(echo \"$RESPONSE\" | jq -r '.jobId')\n echo \"job_id=$JOB_ID\" >> $GITHUB_OUTPUT\n echo \"📋 Submitted job: $JOB_ID\"\n\n # ─── Poll for results (long-polling) ───────────────────────\n - name: Wait for results\n id: results\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n run: |\n for i in $(seq 1 40); do\n RESPONSE=$(curl -s \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Prefer: wait=25\" \\\n \"https://ailf-api.sanity.build/v1/jobs/$JOB_ID\")\n\n STATUS=$(echo \"$RESPONSE\" | jq -r '.status')\n\n case \"$STATUS\" in\n completed)\n echo \"status=completed\" >> $GITHUB_OUTPUT\n echo \"report_id=$(echo $RESPONSE | jq -r '.reportId // empty')\" >> $GITHUB_OUTPUT\n echo \"score=$(echo $RESPONSE | jq -r '.score // empty')\" >> $GITHUB_OUTPUT\n echo \"✅ Evaluation completed\"\n exit 0\n ;;\n failed|timed-out)\n echo \"status=$STATUS\" >> $GITHUB_OUTPUT\n echo \"::error::Evaluation $STATUS\"\n exit 1\n ;;\n *)\n echo \"⏳ [$i/40] $STATUS\"\n ;;\n esac\n done\n\n echo \"::error::Timed out waiting for evaluation\"\n exit 1\n\n # ─── Fetch the markdown report ─────────────────────────────\n - name: Fetch markdown report\n id: markdown\n if: steps.results.outputs.report_id != ''\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n REPORT_ID: ${{ steps.results.outputs.report_id }}\n run: |\n REPORT_MD=$(curl -sf \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n \"https://ailf-api.sanity.build/v1/reports/$REPORT_ID/markdown\")\n\n if [ $? -eq 0 ] && [ -n \"$REPORT_MD\" ]; then\n # Write to a temp file to avoid shell quoting issues\n echo \"$REPORT_MD\" > /tmp/ailf-report.md\n echo \"fetched=true\" >> $GITHUB_OUTPUT\n else\n echo \"fetched=false\" >> $GITHUB_OUTPUT\n echo \"::warning::Could not fetch markdown report\"\n fi\n\n # ─── Post results to PR ────────────────────────────────────\n - name: Post PR comment\n if: >-\n always() && github.event_name == 'pull_request' &&\n steps.submit.outputs.job_id != ''\n uses: actions/github-script@v7\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n const status = process.env.JOB_STATUS;\n const reportId = process.env.REPORT_ID;\n const jobId = process.env.JOB_ID;\n const score = process.env.SCORE;\n const mdFetched = process.env.MARKDOWN_FETCHED === 'true';\n\n let body;\n\n if (status === 'completed' && mdFetched) {\n // Use the full markdown report from the API\n const reportMd = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${reportMd}`;\n if (reportId) {\n body += `\\n🔗 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})`;\n }\n body += `\\n\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n } else {\n // Fallback: minimal comment when markdown is unavailable\n let icon, message;\n if (status === 'completed') {\n icon = '✅';\n message = score\n ? `Evaluation completed — score: **${score}/100**`\n : 'Evaluation completed successfully.';\n } else if (status === 'failed' || status === 'timed-out') {\n icon = '⚠️';\n message = `Evaluation ${status}.`;\n } else {\n icon = '⏳';\n message = 'Evaluation status unknown (may still be running).';\n }\n\n body = `${marker}\\n## ${icon} AI Literacy Evaluation\\n\\n${message}\\n`;\n if (reportId) {\n body += `\\n🔗 [View in Studio](https://admin.sanity.io/ailf/report/${reportId})\\n`;\n }\n body += `\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n }\n\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n comment_id: existing.id,\n body,\n });\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n body,\n });\n }\n\n # ─── Job summary ───────────────────────────────────────────\n - name: Summary\n if: always()\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n MARKDOWN_FETCHED: ${{ steps.markdown.outputs.fetched || 'false' }}\n run: |\n if [ \"$JOB_STATUS\" = \"completed\" ] && [ \"$MARKDOWN_FETCHED\" = \"true\" ] && [ -f /tmp/ailf-report.md ]; then\n # Use the full markdown report as the job summary\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n [ -n \"$REPORT_ID\" ] && echo \"🔗 [View in Studio](https://admin.sanity.io/ailf/report/$REPORT_ID)\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"<sub>Job: \\`$JOB_ID\\`</sub>\" >> \"$GITHUB_STEP_SUMMARY\"\n else\n # Fallback: minimal table\n {\n echo \"## 📊 AI Literacy Evaluation\"\n echo \"\"\n echo \"| Field | Value |\"\n echo \"|-------|-------|\"\n echo \"| Job | \\`$JOB_ID\\` |\"\n echo \"| Status | $JOB_STATUS |\"\n [ -n \"$SCORE\" ] && echo \"| Score | $SCORE/100 |\"\n [ -n \"$REPORT_ID\" ] && echo \"| Report | [$REPORT_ID](https://admin.sanity.io/ailf/report/$REPORT_ID) |\"\n } >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
292
+ export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Set full_eval to true for comprehensive evaluation\n# - See: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.full_eval != 'true' && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n const marker = '<!-- ailf-score-report -->';\n let body;\n try {\n const report = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n body = `${marker}\\n${report}`;\n } catch {\n body = `${marker}\\n## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n const params = {\n owner: context.repo.owner, repo: context.repo.repo,\n body,\n };\n if (existing) {\n await github.rest.issues.updateComment({ ...params, comment_id: existing.id });\n } else {\n await github.rest.issues.createComment({ ...params, issue_number: context.issue.number });\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
@@ -106,6 +106,12 @@ export interface ResolvedConfig {
106
106
  };
107
107
  /** Job ID for tracking API-triggered evaluations in the Content Lake */
108
108
  jobId?: string;
109
+ /** Whether to run in remote mode (submit to AILF API) */
110
+ remote: boolean;
111
+ /** AILF API base URL */
112
+ apiUrl: string;
113
+ /** AILF API key (from AILF_API_KEY env var) */
114
+ apiKey?: string;
109
115
  }
110
116
  /**
111
117
  * Application context — the complete dependency carrier.
@@ -40,59 +40,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
40
40
  gapAnalysis: z.ZodOptional<z.ZodBoolean>;
41
41
  graderReplications: z.ZodOptional<z.ZodNumber>;
42
42
  headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
43
- inlineTasks: z.ZodOptional<z.ZodArray<z.ZodObject<{
44
- assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
45
- criteria: z.ZodArray<z.ZodString>;
46
- template: z.ZodString;
47
- type: z.ZodLiteral<"llm-rubric">;
48
- weight: z.ZodOptional<z.ZodNumber>;
49
- }, z.core.$strip>, z.ZodObject<{
50
- type: z.ZodLiteral<"contains">;
51
- value: z.ZodString;
52
- weight: z.ZodOptional<z.ZodNumber>;
53
- }, z.core.$strip>, z.ZodObject<{
54
- type: z.ZodLiteral<"contains-any">;
55
- value: z.ZodArray<z.ZodString>;
56
- weight: z.ZodOptional<z.ZodNumber>;
57
- }, z.core.$strip>, z.ZodObject<{
58
- type: z.ZodLiteral<"not-contains">;
59
- value: z.ZodString;
60
- weight: z.ZodOptional<z.ZodNumber>;
61
- }, z.core.$strip>, z.ZodObject<{
62
- type: z.ZodLiteral<"javascript">;
63
- value: z.ZodString;
64
- weight: z.ZodOptional<z.ZodNumber>;
65
- }, z.core.$strip>]>>;
66
- baseline: z.ZodOptional<z.ZodObject<{
67
- enabled: z.ZodOptional<z.ZodBoolean>;
68
- rubric: z.ZodOptional<z.ZodEnum<{
69
- full: "full";
70
- abbreviated: "abbreviated";
71
- none: "none";
72
- }>>;
73
- }, z.core.$strip>>;
74
- canonical_docs: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
75
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
76
- slug: z.ZodString;
77
- }, z.core.$strip>, z.ZodObject<{
78
- path: z.ZodString;
79
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
80
- }, z.core.$strip>, z.ZodObject<{
81
- id: z.ZodString;
82
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
83
- }, z.core.$strip>, z.ZodObject<{
84
- perspective: z.ZodString;
85
- reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
86
- }, z.core.$strip>]>>;
87
- description: z.ZodString;
88
- doc_coverage: z.ZodOptional<z.ZodBoolean>;
89
- id: z.ZodString;
90
- reference_solution: z.ZodString;
91
- vars: z.ZodObject<{
92
- docs: z.ZodString;
93
- task: z.ZodString;
94
- }, z.core.$loose>;
95
- }, z.core.$strip>>>;
43
+ inlineTasks: z.ZodOptional<z.ZodArray<z.ZodRecord<z.ZodString, z.ZodUnknown>>>;
96
44
  jobId: z.ZodOptional<z.ZodString>;
97
45
  mode: z.ZodOptional<z.ZodEnum<{
98
46
  agentic: "agentic";
@@ -13,7 +13,6 @@
13
13
  * @see packages/eval/src/pipeline/map-request-to-config.ts — maps to ResolvedConfig
14
14
  */
15
15
  import { z } from "zod";
16
- import { SingleTaskSchema } from "./pipeline.js";
17
16
  // ---------------------------------------------------------------------------
18
17
  // Debug options — boolean shorthand or structured object
19
18
  // ---------------------------------------------------------------------------
@@ -49,7 +48,7 @@ export const PipelineRequestSchema = z.object({
49
48
  gapAnalysis: z.boolean().optional(),
50
49
  graderReplications: z.number().int().positive().optional(),
51
50
  headers: z.record(z.string(), z.string()).optional(),
52
- inlineTasks: z.array(SingleTaskSchema).optional(),
51
+ inlineTasks: z.array(z.record(z.string(), z.unknown())).optional(),
53
52
  jobId: z.string().optional(),
54
53
  mode: z.enum(["baseline", "agentic", "observed", "full"]).optional(),
55
54
  noCache: z.boolean().optional(),
@@ -0,0 +1,8 @@
1
+ /**
2
+ * cli.ts — Minimal CLI for standalone task validation.
3
+ *
4
+ * Usage:
5
+ * npx @sanity/ailf-tasks validate .ailf/tasks/
6
+ * npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
7
+ */
8
+ export declare function run(): void;
@@ -0,0 +1,61 @@
1
+ /**
2
+ * cli.ts — Minimal CLI for standalone task validation.
3
+ *
4
+ * Usage:
5
+ * npx @sanity/ailf-tasks validate .ailf/tasks/
6
+ * npx @sanity/ailf-tasks validate # defaults to .ailf/tasks/
7
+ */
8
+ import { loadTaskDir } from "./parser.js";
9
+ import { formatValidationResult, validateRepoTasks } from "./validation.js";
10
+ export function run() {
11
+ const args = process.argv.slice(2);
12
+ const command = args[0];
13
+ if (command === "validate") {
14
+ const dir = args[1] ?? ".ailf/tasks";
15
+ validateCommand(dir);
16
+ }
17
+ else if (command === "--help" ||
18
+ command === "-h" ||
19
+ command === undefined) {
20
+ printUsage();
21
+ }
22
+ else {
23
+ console.error(`Unknown command: ${command}`);
24
+ printUsage();
25
+ process.exit(1);
26
+ }
27
+ }
28
+ function validateCommand(dir) {
29
+ try {
30
+ const tasks = loadTaskDir(dir);
31
+ // Run semantic validation
32
+ const result = validateRepoTasks(tasks);
33
+ const formatted = formatValidationResult(result);
34
+ console.log(`✅ ${tasks.length} task(s) validated from ${dir}`);
35
+ for (const task of tasks) {
36
+ console.log(` ${task.id} — ${task.description}`);
37
+ }
38
+ if (result.warnings.length > 0 || result.errors.length > 0) {
39
+ console.log("");
40
+ console.log(formatted);
41
+ }
42
+ if (!result.valid) {
43
+ process.exit(1);
44
+ }
45
+ }
46
+ catch (err) {
47
+ console.error(`❌ ${err instanceof Error ? err.message : String(err)}`);
48
+ process.exit(1);
49
+ }
50
+ }
51
+ function printUsage() {
52
+ console.log("Usage: ailf-tasks <command> [options]");
53
+ console.log("");
54
+ console.log("Commands:");
55
+ console.log(" validate [dir] Validate task YAML files (default: .ailf/tasks/)");
56
+ console.log("");
57
+ console.log("Examples:");
58
+ console.log(" ailf-tasks validate");
59
+ console.log(" ailf-tasks validate .ailf/tasks/");
60
+ console.log(" ailf-tasks validate /path/to/tasks/");
61
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * @sanity/ailf-tasks — Task definition schemas and YAML parser.
3
+ *
4
+ * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
5
+ * without depending on the full AILF CLI or its heavyweight dependencies
6
+ * (Promptfoo, LLM SDKs, Sanity client).
7
+ *
8
+ * Usage:
9
+ * import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
10
+ */
11
+ export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, type CuratedAssertionType, type RepoTask, type RubricTemplateName, } from "./schemas.js";
12
+ export { loadTaskDir, parseTaskFile } from "./parser.js";
13
+ export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, type ValidationMessage, type ValidationResult, } from "./validation.js";
@@ -0,0 +1,16 @@
1
+ /**
2
+ * @sanity/ailf-tasks — Task definition schemas and YAML parser.
3
+ *
4
+ * Lightweight package for parsing and validating .ailf/tasks/*.yaml files
5
+ * without depending on the full AILF CLI or its heavyweight dependencies
6
+ * (Promptfoo, LLM SDKs, Sanity client).
7
+ *
8
+ * Usage:
9
+ * import { parseTaskFile, loadTaskDir, RepoTaskSchema } from '@sanity/ailf-tasks'
10
+ */
11
+ // Schemas and types
12
+ export { CURATED_ASSERTION_TYPES, RepoTaskFileSchema, RepoTaskSchema, RUBRIC_TEMPLATE_NAMES, } from "./schemas.js";
13
+ // Parsing
14
+ export { loadTaskDir, parseTaskFile } from "./parser.js";
15
+ // Validation
16
+ export { detectSnakeCaseFields, formatValidationResult, validateRepoTasks, } from "./validation.js";
@@ -0,0 +1,27 @@
1
+ /**
2
+ * parser.ts — Standalone task file and directory parsing.
3
+ *
4
+ * High-level functions for loading and validating .ailf/tasks/ YAML
5
+ * files without any dependency on the eval pipeline.
6
+ *
7
+ * Usage:
8
+ * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
+ */
10
+ import { type RepoTask } from "./schemas.js";
11
+ /**
12
+ * Parse a single task YAML string and return validated tasks.
13
+ *
14
+ * @param content - Raw YAML string content
15
+ * @param filename - Source filename (for error messages)
16
+ * @returns Validated array of RepoTask objects
17
+ * @throws Error if YAML parsing or Zod validation fails
18
+ */
19
+ export declare function parseTaskFile(content: string, filename?: string): RepoTask[];
20
+ /**
21
+ * Load and parse all task YAML files from a directory.
22
+ *
23
+ * @param dirPath - Path to directory containing .yaml/.yml files
24
+ * @returns All validated tasks, sorted by filename
25
+ * @throws Error if directory not found, no YAML files, or validation fails
26
+ */
27
+ export declare function loadTaskDir(dirPath: string): RepoTask[];
@@ -0,0 +1,73 @@
1
+ /**
2
+ * parser.ts — Standalone task file and directory parsing.
3
+ *
4
+ * High-level functions for loading and validating .ailf/tasks/ YAML
5
+ * files without any dependency on the eval pipeline.
6
+ *
7
+ * Usage:
8
+ * import { parseTaskFile, loadTaskDir } from '@sanity/ailf-tasks'
9
+ */
10
+ import { existsSync, readdirSync, readFileSync } from "fs";
11
+ import { resolve } from "path";
12
+ import { load } from "js-yaml";
13
+ import { RepoTaskFileSchema } from "./schemas.js";
14
+ // ---------------------------------------------------------------------------
15
+ // Public API
16
+ // ---------------------------------------------------------------------------
17
+ /**
18
+ * Parse a single task YAML string and return validated tasks.
19
+ *
20
+ * @param content - Raw YAML string content
21
+ * @param filename - Source filename (for error messages)
22
+ * @returns Validated array of RepoTask objects
23
+ * @throws Error if YAML parsing or Zod validation fails
24
+ */
25
+ export function parseTaskFile(content, filename = "<string>") {
26
+ const parsed = load(content);
27
+ if (!Array.isArray(parsed)) {
28
+ throw new Error(`${filename} did not parse to an array of tasks. ` +
29
+ "Task files must contain a YAML array of task definitions.");
30
+ }
31
+ const result = RepoTaskFileSchema.safeParse(parsed);
32
+ if (!result.success) {
33
+ const messages = result.error.issues
34
+ .map((i) => ` [${i.path.join(".")}]: ${i.message}`)
35
+ .join("\n");
36
+ throw new Error(`Invalid task file "${filename}":\n${messages}`);
37
+ }
38
+ return result.data;
39
+ }
40
+ /**
41
+ * Load and parse all task YAML files from a directory.
42
+ *
43
+ * @param dirPath - Path to directory containing .yaml/.yml files
44
+ * @returns All validated tasks, sorted by filename
45
+ * @throws Error if directory not found, no YAML files, or validation fails
46
+ */
47
+ export function loadTaskDir(dirPath) {
48
+ if (!existsSync(dirPath)) {
49
+ throw new Error(`Tasks directory not found: ${dirPath}\n` +
50
+ " Expected a directory containing .ailf/tasks/*.yaml files.");
51
+ }
52
+ const yamlFiles = readdirSync(dirPath)
53
+ .filter((f) => (f.endsWith(".yaml") || f.endsWith(".yml")) && !f.startsWith("."))
54
+ .sort();
55
+ if (yamlFiles.length === 0) {
56
+ throw new Error(`No YAML files found in ${dirPath}\n` +
57
+ " Expected .ailf/tasks/*.yaml files with task definitions.");
58
+ }
59
+ const allTasks = [];
60
+ for (const file of yamlFiles) {
61
+ const filePath = resolve(dirPath, file);
62
+ const content = readFileSync(filePath, "utf-8");
63
+ try {
64
+ const tasks = parseTaskFile(content, file);
65
+ allTasks.push(...tasks);
66
+ }
67
+ catch (err) {
68
+ const msg = err instanceof Error ? err.message : String(err);
69
+ throw new Error(`Failed to load ${file}:\n${msg}`, { cause: err });
70
+ }
71
+ }
72
+ return allTasks;
73
+ }
@@ -0,0 +1,186 @@
1
+ /**
2
+ * schemas.ts — Zod schemas for repo-based task definitions.
3
+ *
4
+ * Validates .ailf/tasks/*.yaml task files from external repositories.
5
+ * These schemas are the contract between external repos and the AILF eval
6
+ * pipeline — they define exactly what fields are accepted, with friendly
7
+ * error messages for authors writing task YAML by hand.
8
+ *
9
+ * This module is the single source of truth for task schemas. The eval
10
+ * package re-exports from here to avoid duplication.
11
+ *
12
+ * @see docs/exec-plans/completed/tasks-as-content/phase-4-repo-based-tasks.md
13
+ */
14
+ import { z } from "zod";
15
+ /**
16
+ * The set of assertion types allowed in repo-based task files.
17
+ *
18
+ * This is a curated subset of Promptfoo assertion types — we expose only the
19
+ * types that are stable, well-documented, and useful for external authors.
20
+ */
21
+ export declare const CURATED_ASSERTION_TYPES: readonly ["llm-rubric", "contains", "contains-any", "contains-all", "not-contains", "icontains", "icontains-any", "regex", "javascript", "similar", "cost", "latency"];
22
+ export type CuratedAssertionType = (typeof CURATED_ASSERTION_TYPES)[number];
23
+ /**
24
+ * Valid rubric template names — must match keys in config/rubrics.yaml.
25
+ */
26
+ export declare const RUBRIC_TEMPLATE_NAMES: readonly ["task-completion", "code-correctness", "doc-coverage"];
27
+ export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
28
+ /**
29
+ * Zod schema for a single repo-based task definition.
30
+ *
31
+ * This is the external-author-facing contract. Field names are camelCase
32
+ * to match the Content Lake document schema (ailf.task).
33
+ */
34
+ export declare const RepoTaskSchema: z.ZodObject<{
35
+ id: z.ZodString;
36
+ description: z.ZodString;
37
+ featureArea: z.ZodString;
38
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
39
+ canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
40
+ id: z.ZodString;
41
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
42
+ slug: z.ZodOptional<z.ZodString>;
43
+ path: z.ZodOptional<z.ZodString>;
44
+ }, z.core.$strip>, z.ZodObject<{
45
+ slug: z.ZodString;
46
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
47
+ }, z.core.$strip>, z.ZodObject<{
48
+ path: z.ZodString;
49
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
50
+ }, z.core.$strip>, z.ZodObject<{
51
+ perspective: z.ZodString;
52
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
53
+ }, z.core.$strip>]>>>>;
54
+ vars: z.ZodOptional<z.ZodObject<{
55
+ task: z.ZodString;
56
+ }, z.core.$loose>>;
57
+ assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
58
+ type: z.ZodLiteral<"llm-rubric">;
59
+ template: z.ZodEnum<{
60
+ "task-completion": "task-completion";
61
+ "code-correctness": "code-correctness";
62
+ "doc-coverage": "doc-coverage";
63
+ }>;
64
+ criteria: z.ZodArray<z.ZodString>;
65
+ weight: z.ZodOptional<z.ZodNumber>;
66
+ }, z.core.$strip>, z.ZodObject<{
67
+ type: z.ZodEnum<{
68
+ "llm-rubric": "llm-rubric";
69
+ contains: "contains";
70
+ "contains-any": "contains-any";
71
+ "contains-all": "contains-all";
72
+ "not-contains": "not-contains";
73
+ icontains: "icontains";
74
+ "icontains-any": "icontains-any";
75
+ regex: "regex";
76
+ javascript: "javascript";
77
+ similar: "similar";
78
+ cost: "cost";
79
+ latency: "latency";
80
+ }>;
81
+ value: z.ZodOptional<z.ZodUnknown>;
82
+ threshold: z.ZodOptional<z.ZodNumber>;
83
+ weight: z.ZodOptional<z.ZodNumber>;
84
+ }, z.core.$loose>]>>;
85
+ baseline: z.ZodOptional<z.ZodObject<{
86
+ enabled: z.ZodOptional<z.ZodBoolean>;
87
+ rubric: z.ZodOptional<z.ZodEnum<{
88
+ abbreviated: "abbreviated";
89
+ full: "full";
90
+ none: "none";
91
+ }>>;
92
+ }, z.core.$strip>>;
93
+ docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
94
+ referenceSolution: z.ZodOptional<z.ZodString>;
95
+ execution: z.ZodOptional<z.ZodObject<{
96
+ enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
97
+ blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
98
+ threshold: z.ZodOptional<z.ZodObject<{
99
+ score: z.ZodOptional<z.ZodNumber>;
100
+ }, z.core.$strip>>;
101
+ trigger: z.ZodOptional<z.ZodObject<{
102
+ branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
103
+ paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
104
+ }, z.core.$strip>>;
105
+ source: z.ZodOptional<z.ZodString>;
106
+ }, z.core.$strip>>;
107
+ }, z.core.$strip>;
108
+ export type RepoTask = z.infer<typeof RepoTaskSchema>;
109
+ /**
110
+ * Schema for an array of repo tasks — what a single .ailf/tasks/*.yaml file
111
+ * contains. Each file must define at least one task.
112
+ */
113
+ export declare const RepoTaskFileSchema: z.ZodArray<z.ZodObject<{
114
+ id: z.ZodString;
115
+ description: z.ZodString;
116
+ featureArea: z.ZodString;
117
+ tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
118
+ canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
119
+ id: z.ZodString;
120
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
121
+ slug: z.ZodOptional<z.ZodString>;
122
+ path: z.ZodOptional<z.ZodString>;
123
+ }, z.core.$strip>, z.ZodObject<{
124
+ slug: z.ZodString;
125
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
126
+ }, z.core.$strip>, z.ZodObject<{
127
+ path: z.ZodString;
128
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
129
+ }, z.core.$strip>, z.ZodObject<{
130
+ perspective: z.ZodString;
131
+ reason: z.ZodDefault<z.ZodOptional<z.ZodString>>;
132
+ }, z.core.$strip>]>>>>;
133
+ vars: z.ZodOptional<z.ZodObject<{
134
+ task: z.ZodString;
135
+ }, z.core.$loose>>;
136
+ assert: z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
137
+ type: z.ZodLiteral<"llm-rubric">;
138
+ template: z.ZodEnum<{
139
+ "task-completion": "task-completion";
140
+ "code-correctness": "code-correctness";
141
+ "doc-coverage": "doc-coverage";
142
+ }>;
143
+ criteria: z.ZodArray<z.ZodString>;
144
+ weight: z.ZodOptional<z.ZodNumber>;
145
+ }, z.core.$strip>, z.ZodObject<{
146
+ type: z.ZodEnum<{
147
+ "llm-rubric": "llm-rubric";
148
+ contains: "contains";
149
+ "contains-any": "contains-any";
150
+ "contains-all": "contains-all";
151
+ "not-contains": "not-contains";
152
+ icontains: "icontains";
153
+ "icontains-any": "icontains-any";
154
+ regex: "regex";
155
+ javascript: "javascript";
156
+ similar: "similar";
157
+ cost: "cost";
158
+ latency: "latency";
159
+ }>;
160
+ value: z.ZodOptional<z.ZodUnknown>;
161
+ threshold: z.ZodOptional<z.ZodNumber>;
162
+ weight: z.ZodOptional<z.ZodNumber>;
163
+ }, z.core.$loose>]>>;
164
+ baseline: z.ZodOptional<z.ZodObject<{
165
+ enabled: z.ZodOptional<z.ZodBoolean>;
166
+ rubric: z.ZodOptional<z.ZodEnum<{
167
+ abbreviated: "abbreviated";
168
+ full: "full";
169
+ none: "none";
170
+ }>>;
171
+ }, z.core.$strip>>;
172
+ docCoverage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
173
+ referenceSolution: z.ZodOptional<z.ZodString>;
174
+ execution: z.ZodOptional<z.ZodObject<{
175
+ enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
176
+ blocking: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
177
+ threshold: z.ZodOptional<z.ZodObject<{
178
+ score: z.ZodOptional<z.ZodNumber>;
179
+ }, z.core.$strip>>;
180
+ trigger: z.ZodOptional<z.ZodObject<{
181
+ branches: z.ZodOptional<z.ZodArray<z.ZodString>>;
182
+ paths: z.ZodOptional<z.ZodArray<z.ZodString>>;
183
+ }, z.core.$strip>>;
184
+ source: z.ZodOptional<z.ZodString>;
185
+ }, z.core.$strip>>;
186
+ }, z.core.$strip>>;