@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +42 -17
  19. package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
  20. package/dist/adapters/task-sources/repo-schemas.js +127 -0
  21. package/dist/cli-program.d.ts +39 -0
  22. package/dist/cli-program.js +137 -0
  23. package/dist/cli.d.ts +8 -2
  24. package/dist/cli.js +128 -142
  25. package/dist/commands/agent-report.js +1 -1
  26. package/dist/commands/calculate-scores.js +0 -2
  27. package/dist/commands/check-staleness.js +1 -1
  28. package/dist/commands/chronic-failures.js +4 -4
  29. package/dist/commands/coverage-audit.js +6 -7
  30. package/dist/commands/discovery-report.js +16 -4
  31. package/dist/commands/eval.d.ts +1 -1
  32. package/dist/commands/eval.js +1 -1
  33. package/dist/commands/explain-handler.d.ts +1 -1
  34. package/dist/commands/explain-handler.js +13 -44
  35. package/dist/commands/fetch-docs.js +0 -2
  36. package/dist/commands/generate-configs.js +0 -2
  37. package/dist/commands/grader/index.js +3 -3
  38. package/dist/commands/init.d.ts +2 -2
  39. package/dist/commands/init.js +10 -9
  40. package/dist/commands/interactive.d.ts +1 -1
  41. package/dist/commands/interactive.js +8 -8
  42. package/dist/commands/pipeline-action.d.ts +1 -3
  43. package/dist/commands/pipeline-action.js +174 -140
  44. package/dist/commands/pr-comment.js +1 -3
  45. package/dist/commands/publish.d.ts +1 -1
  46. package/dist/commands/publish.js +2 -4
  47. package/dist/commands/readiness-report.js +17 -8
  48. package/dist/commands/remote-pipeline.d.ts +1 -1
  49. package/dist/commands/remote-pipeline.js +1 -3
  50. package/dist/commands/run.d.ts +64 -0
  51. package/dist/commands/{pipeline.js → run.js} +19 -30
  52. package/dist/commands/shared/help.js +4 -4
  53. package/dist/commands/shared/options.d.ts +29 -3
  54. package/dist/commands/shared/options.js +37 -13
  55. package/dist/commands/validate-tasks.js +1 -1
  56. package/dist/commands/validate.d.ts +1 -1
  57. package/dist/commands/validate.js +2 -2
  58. package/dist/commands/weekly-digest.js +3 -3
  59. package/dist/config/thresholds.ts +3 -3
  60. package/dist/orchestration/build-app-context.js +0 -2
  61. package/dist/orchestration/build-step-sequence.js +1 -11
  62. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  63. package/dist/orchestration/steps/index.d.ts +0 -2
  64. package/dist/orchestration/steps/index.js +0 -2
  65. package/dist/orchestration/steps/run-eval-step.js +1 -1
  66. package/dist/pipeline/cache.d.ts +1 -1
  67. package/dist/pipeline/map-request-to-config.js +0 -2
  68. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  69. package/dist/pipeline/plan.d.ts +2 -4
  70. package/dist/pipeline/plan.js +4 -32
  71. package/dist/pipeline/run-context.d.ts +1 -1
  72. package/dist/pipeline/run-context.js +4 -4
  73. package/dist/pipeline/validate.d.ts +1 -1
  74. package/dist/pipeline/validate.js +1 -1
  75. package/package.json +11 -9
  76. package/dist/commands/pipeline.d.ts +0 -77
  77. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  78. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  79. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  80. package/dist/orchestration/steps/readiness-step.js +0 -98
  81. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  82. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  83. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  84. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  85. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  86. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  87. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  88. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  89. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  90. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  91. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  92. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  93. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  94. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  95. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  96. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  97. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  98. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  99. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  100. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  101. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  102. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  103. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  104. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
@@ -489,7 +489,7 @@ schemas:
489
489
  - string
490
490
  - "null"
491
491
  description:
492
- "Run classification (D0037): official | ad-hoc | experimental | test |
492
+ "Run classification (D0037): official | adhoc | experimental | test |
493
493
  external. Orthogonal to trigger_type."
494
494
  owner_team:
495
495
  type:
@@ -2,9 +2,9 @@
2
2
  * thresholds.ts — Quality thresholds for readiness gates and regression alerts.
3
3
  *
4
4
  * Used by:
5
- * - `npx @sanity/ailf pipeline --readiness` (launch readiness checklist)
6
- * - `npx @sanity/ailf pipeline --publish` (severity-aware sink routing)
7
- * - `npx @sanity/ailf pipeline --compare` (regression alerting)
5
+ * - `npx @sanity/ailf report readiness` (launch readiness checklist)
6
+ * - `npx @sanity/ailf run --publish` (severity-aware sink routing)
7
+ * - `npx @sanity/ailf run --compare` (regression alerting)
8
8
  *
9
9
  * @see docs/archive/exec-plans/scenario-matrix-implementation/phase-5-readiness-thresholds.md
10
10
  */
@@ -87,7 +87,7 @@ export declare const thresholdData: {
87
87
  };
88
88
  };
89
89
  /** Raw YAML string for threshold example (preserves comments) */
90
- export declare const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The pipeline's --readiness flag evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
90
+ export declare const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The `ailf report readiness` command evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
91
91
  /** Parsed ailf-config example data (JSON-safe) */
92
92
  export declare const ailfConfigData: {
93
93
  readonly source: {
@@ -433,6 +433,6 @@ export interface ExampleRecord {
433
433
  }
434
434
  export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
435
435
  /** GitHub Actions workflow template for AI Literacy evaluation */
436
- export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
436
+ export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n # D0037 run provenance envelope \u2014 REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | adhoc | experimental |\n # test | external. External teams should use `adhoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n AILF_CLASSIFICATION: adhoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest run --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
437
437
  /** TypeScript project configuration template (ailf.config.ts) */
438
438
  export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
@@ -113,7 +113,7 @@ export const thresholdData = {
113
113
  }
114
114
  };
115
115
  /** Raw YAML string for threshold example (preserves comments) */
116
- export const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The pipeline's --readiness flag evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
116
+ export const thresholdYaml = "# Example quality threshold configuration.\n#\n# Thresholds define the minimum scores for readiness gates.\n# The `ailf report readiness` command evaluates scores against these\n# thresholds and produces a go/no-go checklist.\n#\n# Global thresholds apply to all areas unless overridden per-area.\n\nglobal:\n composite: 60\n dimensions:\n task_completion: 55\n code_correctness: 50\n doc_coverage: 50\n ceiling: 70\n docLift: 10\n\nareas:\n groq:\n composite: 65\n ceiling: 75\n";
117
117
  // ---------------------------------------------------------------------------
118
118
  // Project configuration for .ailf/config.yaml
119
119
  // ---------------------------------------------------------------------------
@@ -630,7 +630,7 @@ export const EXAMPLES = {
630
630
  // Raw file exports (non-data files, exported as raw strings)
631
631
  // ---------------------------------------------------------------------------
632
632
  /** GitHub Actions workflow template for AI Literacy evaluation */
633
- export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # ────────────────────────────────────────────────────────────\n # D0037 run provenance envelope — REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | ad-hoc | experimental |\n # test | external. External teams should use `ad-hoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # ────────────────────────────────────────────────────────────\n AILF_CLASSIFICATION: ad-hoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
633
+ export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n # ────────────────────────────────────────────────────────────\n # D0037 run provenance envelope — REPLACE THE OWNER TEAM SLUG\n # below. Unedited templates produce runs tagged with the literal\n # placeholder so you can spot them in Studio / BigQuery and fix.\n #\n # AILF_CLASSIFICATION values: official | adhoc | experimental |\n # test | external. External teams should use `adhoc` by default;\n # `official` is reserved for the core-docs scheduled series.\n # ────────────────────────────────────────────────────────────\n AILF_CLASSIFICATION: adhoc\n AILF_OWNER_TEAM: \"<REPLACE-WITH-YOUR-TEAM-SLUG>\"\n AILF_OWNER_INDIVIDUAL: ${{ github.actor }}\n run: |\n npx @sanity/ailf@latest run --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
634
634
  // ---------------------------------------------------------------------------
635
635
  // TypeScript template exports (for ailf init --output-format ts)
636
636
  // ---------------------------------------------------------------------------
@@ -68,10 +68,6 @@ export interface ResolvedConfig {
68
68
  compareBaseline?: string;
69
69
  /** Whether gap analysis is enabled */
70
70
  gapAnalysisEnabled: boolean;
71
- /** Whether readiness report is enabled */
72
- readinessEnabled: boolean;
73
- /** Whether discovery report is enabled */
74
- discoveryReportEnabled: boolean;
75
71
  /** Whether publishing is enabled */
76
72
  publishEnabled: boolean;
77
73
  /** Publish tag */
@@ -11,42 +11,68 @@
11
11
  */
12
12
  import { z } from "zod";
13
13
  export declare const EvalConfigSchema: z.ZodObject<{
14
- allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
14
+ agentic: z.ZodOptional<z.ZodObject<{
15
+ headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
16
+ allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
17
+ }, z.core.$strip>>;
18
+ artifacts: z.ZodOptional<z.ZodObject<{
19
+ enabled: z.ZodOptional<z.ZodBoolean>;
20
+ dir: z.ZodOptional<z.ZodString>;
21
+ exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
22
+ }, z.core.$strip>>;
23
+ taskSource: z.ZodOptional<z.ZodObject<{
24
+ type: z.ZodOptional<z.ZodEnum<{
25
+ "content-lake": "content-lake";
26
+ repo: "repo";
27
+ }>>;
28
+ repoTasksPath: z.ZodOptional<z.ZodString>;
29
+ }, z.core.$strip>>;
15
30
  areas: z.ZodOptional<z.ZodArray<z.ZodString>>;
16
31
  changedDocs: z.ZodOptional<z.ZodArray<z.ZodString>>;
17
32
  compare: z.ZodOptional<z.ZodBoolean>;
18
33
  compareBaseline: z.ZodOptional<z.ZodString>;
19
34
  compareThreshold: z.ZodOptional<z.ZodNumber>;
20
- concurrency: z.ZodOptional<z.ZodNumber>;
21
35
  debug: z.ZodOptional<z.ZodUnion<readonly [z.ZodBoolean, z.ZodObject<{
22
36
  enabled: z.ZodOptional<z.ZodBoolean>;
23
37
  firstN: z.ZodOptional<z.ZodNumber>;
24
38
  pattern: z.ZodOptional<z.ZodString>;
25
39
  sample: z.ZodOptional<z.ZodNumber>;
26
40
  }, z.core.$strip>]>>;
27
- discoveryReport: z.ZodOptional<z.ZodBoolean>;
28
- gapAnalysis: z.ZodOptional<z.ZodBoolean>;
29
- graderReplications: z.ZodOptional<z.ZodNumber>;
30
- headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
41
+ execution: z.ZodOptional<z.ZodObject<{
42
+ concurrency: z.ZodOptional<z.ZodNumber>;
43
+ graderReplications: z.ZodOptional<z.ZodNumber>;
44
+ gapAnalysis: z.ZodOptional<z.ZodBoolean>;
45
+ apiUrl: z.ZodOptional<z.ZodString>;
46
+ }, z.core.$strip>>;
47
+ output: z.ZodOptional<z.ZodObject<{
48
+ dir: z.ZodOptional<z.ZodString>;
49
+ }, z.core.$strip>>;
31
50
  mode: z.ZodOptional<z.ZodEnum<{
32
51
  custom: "custom";
52
+ agentic: "agentic";
33
53
  literacy: "literacy";
34
54
  "mcp-server": "mcp-server";
35
55
  "agent-harness": "agent-harness";
36
56
  "knowledge-probe": "knowledge-probe";
37
57
  baseline: "baseline";
38
- agentic: "agentic";
39
58
  observed: "observed";
40
59
  full: "full";
41
60
  }>>;
42
61
  noAutoScope: z.ZodOptional<z.ZodBoolean>;
43
62
  noCache: z.ZodOptional<z.ZodBoolean>;
44
63
  noRemoteCache: z.ZodOptional<z.ZodBoolean>;
45
- publish: z.ZodOptional<z.ZodBoolean>;
46
- publishTag: z.ZodOptional<z.ZodString>;
47
- readiness: z.ZodOptional<z.ZodBoolean>;
48
- reportDataset: z.ZodOptional<z.ZodString>;
49
- reportProjectId: z.ZodOptional<z.ZodString>;
64
+ publish: z.ZodOptional<z.ZodObject<{
65
+ auto: z.ZodOptional<z.ZodEnum<{
66
+ never: "never";
67
+ always: "always";
68
+ "full-runs": "full-runs";
69
+ }>>;
70
+ tag: z.ZodOptional<z.ZodString>;
71
+ }, z.core.$strip>>;
72
+ reportStore: z.ZodOptional<z.ZodObject<{
73
+ projectId: z.ZodOptional<z.ZodString>;
74
+ dataset: z.ZodOptional<z.ZodString>;
75
+ }, z.core.$strip>>;
50
76
  searchMode: z.ZodOptional<z.ZodEnum<{
51
77
  off: "off";
52
78
  open: "open";
@@ -13,8 +13,51 @@ import { z } from "zod";
13
13
  import { RAW_EVAL_MODES } from "../../ailf-shared/index.js";
14
14
  export const EvalConfigSchema = z
15
15
  .object({
16
- /** Allowed origins for agentic mode */
17
- allowedOrigins: z.array(z.string()).optional(),
16
+ /**
17
+ * Agentic-mode configuration (W0077 Phase 6f). Replaces the top-level
18
+ * `headers` and `allowedOrigins` fields. Mirrors `RepoConfigSchema`'s
19
+ * `agentic` block for the auto-loaded `.ailf/config.yaml` path.
20
+ *
21
+ * - `headers` — custom HTTP headers for doc fetching (key/value object)
22
+ * - `allowedOrigins` — origin globs for agentic-mode sandbox
23
+ */
24
+ agentic: z
25
+ .object({
26
+ headers: z.record(z.string(), z.string()).optional(),
27
+ allowedOrigins: z.array(z.string()).optional(),
28
+ })
29
+ .optional(),
30
+ /**
31
+ * Artifact-writer configuration (W0077 Phase 6g). Replaces the retired
32
+ * `--no-artifacts`, `--artifacts-dir`, and `--artifacts-exclude` CLI
33
+ * flags. Mirrors `RepoConfigSchema.artifacts`.
34
+ *
35
+ * - `enabled` — false to disable all writers (legacy --no-artifacts)
36
+ * - `dir` — root for local writer (default .ailf/results/captures/)
37
+ * - `exclude` — list of artifact-type names to skip
38
+ */
39
+ artifacts: z
40
+ .object({
41
+ enabled: z.boolean().optional(),
42
+ dir: z.string().min(1).optional(),
43
+ exclude: z.array(z.string().min(1)).optional(),
44
+ })
45
+ .optional(),
46
+ /**
47
+ * Task-source configuration (W0077 Phase 6h). Replaces the retired
48
+ * `--task-source` and `--repo-tasks-path` CLI flags. Mirrors
49
+ * `RepoConfigSchema.taskSource`.
50
+ *
51
+ * - `type` — `content-lake` (default) or `repo`
52
+ * - `repoTasksPath` — optional path; default `<cwd>/.ailf/tasks/` when
53
+ * type is `repo`
54
+ */
55
+ taskSource: z
56
+ .object({
57
+ type: z.enum(["content-lake", "repo"]).optional(),
58
+ repoTasksPath: z.string().min(1).optional(),
59
+ })
60
+ .optional(),
18
61
  /** Feature area filter (comma-separated or array) */
19
62
  areas: z.array(z.string()).optional(),
20
63
  /** Changed doc slugs for impact scoping */
@@ -25,8 +68,6 @@ export const EvalConfigSchema = z
25
68
  compareBaseline: z.string().optional(),
26
69
  /** Comparison noise threshold */
27
70
  compareThreshold: z.number().min(0).optional(),
28
- /** Max parallel API calls */
29
- concurrency: z.number().int().positive().optional(),
30
71
  /** Debug mode */
31
72
  debug: z
32
73
  .union([
@@ -39,14 +80,34 @@ export const EvalConfigSchema = z
39
80
  }),
40
81
  ])
41
82
  .optional(),
42
- /** Enable discovery report */
43
- discoveryReport: z.boolean().optional(),
44
- /** Enable gap analysis */
45
- gapAnalysis: z.boolean().optional(),
46
- /** Grader consistency replications */
47
- graderReplications: z.number().int().positive().optional(),
48
- /** Custom headers for doc fetching */
49
- headers: z.record(z.string(), z.string()).optional(),
83
+ /**
84
+ * Execution-tier configuration (W0077 Phase 6b). Replaces the top-level
85
+ * `concurrency`, `gapAnalysis`, and `graderReplications` fields and adds
86
+ * `apiUrl` to the same group.
87
+ *
88
+ * - `concurrency` — max parallel API calls
89
+ * - `graderReplications` — grader consistency replications
90
+ * - `gapAnalysis` — enable failure-mode + impact analysis (default true)
91
+ * - `apiUrl` — AILF API base URL (default https://ailf-api.sanity.build)
92
+ */
93
+ execution: z
94
+ .object({
95
+ concurrency: z.number().int().positive().optional(),
96
+ graderReplications: z.number().int().positive().optional(),
97
+ gapAnalysis: z.boolean().optional(),
98
+ apiUrl: z.string().url().optional(),
99
+ })
100
+ .optional(),
101
+ /**
102
+ * Output configuration (W0077 Phase 6c). Replaces the retired
103
+ * `--output-dir` CLI flag. Path is resolved relative to the caller's
104
+ * cwd. When unset, defaults to `<cwd>/.ailf/results/latest/`.
105
+ */
106
+ output: z
107
+ .object({
108
+ dir: z.string().min(1).optional(),
109
+ })
110
+ .optional(),
50
111
  /**
51
112
  * Evaluation mode — accepts both canonical and legacy names.
52
113
  * Legacy names ("baseline", "agentic", "observed", "full") must pass
@@ -59,16 +120,35 @@ export const EvalConfigSchema = z
59
120
  noCache: z.boolean().optional(),
60
121
  /** Disable remote cache */
61
122
  noRemoteCache: z.boolean().optional(),
62
- /** Enable publishing */
63
- publish: z.boolean().optional(),
64
- /** Publish tag */
65
- publishTag: z.string().optional(),
66
- /** Enable readiness report */
67
- readiness: z.boolean().optional(),
68
- /** Report store dataset override */
69
- reportDataset: z.string().optional(),
70
- /** Report store project ID override */
71
- reportProjectId: z.string().optional(),
123
+ /**
124
+ * Publish policy. The CLI `--publish` and `--no-publish` flags override
125
+ * the policy at runtime; this field controls behavior when no explicit
126
+ * flag is passed.
127
+ *
128
+ * - `auto: "always"` — publish whenever a report store is configured
129
+ * - `auto: "full-runs"` — publish non-debug runs (default; preserves
130
+ * the historical smart default)
131
+ * - `auto: "never"` — never auto-publish (must opt in via --publish)
132
+ *
133
+ * `tag` is a default value for `--publish-tag` when not passed at the CLI.
134
+ */
135
+ publish: z
136
+ .object({
137
+ auto: z.enum(["always", "full-runs", "never"]).optional(),
138
+ tag: z.string().optional(),
139
+ })
140
+ .optional(),
141
+ /**
142
+ * Report store configuration (W0077 Phase 6e). Replaces the top-level
143
+ * `reportDataset` and `reportProjectId` fields. Mirrors `RepoConfigSchema`'s
144
+ * `reportStore` block for the auto-loaded `.ailf/config.yaml` path.
145
+ */
146
+ reportStore: z
147
+ .object({
148
+ projectId: z.string().optional(),
149
+ dataset: z.string().optional(),
150
+ })
151
+ .optional(),
72
152
  /** Search mode for agentic mode */
73
153
  searchMode: z.enum(["off", "open", "origin-only"]).optional(),
74
154
  /** Skip eval step */
@@ -42,7 +42,6 @@ export declare const PipelineRequestSchema: z.ZodObject<{
42
42
  pattern: z.ZodOptional<z.ZodString>;
43
43
  sample: z.ZodOptional<z.ZodNumber>;
44
44
  }, z.core.$strip>]>>;
45
- discoveryReport: z.ZodOptional<z.ZodBoolean>;
46
45
  gapAnalysis: z.ZodOptional<z.ZodBoolean>;
47
46
  graderReplications: z.ZodOptional<z.ZodNumber>;
48
47
  headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
@@ -50,12 +49,12 @@ export declare const PipelineRequestSchema: z.ZodObject<{
50
49
  jobId: z.ZodOptional<z.ZodString>;
51
50
  mode: z.ZodOptional<z.ZodEnum<{
52
51
  custom: "custom";
52
+ agentic: "agentic";
53
53
  literacy: "literacy";
54
54
  "mcp-server": "mcp-server";
55
55
  "agent-harness": "agent-harness";
56
56
  "knowledge-probe": "knowledge-probe";
57
57
  baseline: "baseline";
58
- agentic: "agentic";
59
58
  observed: "observed";
60
59
  full: "full";
61
60
  }>>;
@@ -66,7 +65,6 @@ export declare const PipelineRequestSchema: z.ZodObject<{
66
65
  projectId: z.ZodOptional<z.ZodString>;
67
66
  publish: z.ZodOptional<z.ZodBoolean>;
68
67
  publishTag: z.ZodOptional<z.ZodString>;
69
- readiness: z.ZodOptional<z.ZodBoolean>;
70
68
  searchMode: z.ZodOptional<z.ZodEnum<{
71
69
  off: "off";
72
70
  open: "open";
@@ -75,14 +73,14 @@ export declare const PipelineRequestSchema: z.ZodObject<{
75
73
  source: z.ZodOptional<z.ZodString>;
76
74
  sourceReportId: z.ZodOptional<z.ZodString>;
77
75
  taskMode: z.ZodOptional<z.ZodEnum<{
78
- inline: "inline";
79
76
  "content-lake": "content-lake";
77
+ inline: "inline";
80
78
  }>>;
81
79
  tasks: z.ZodOptional<z.ZodArray<z.ZodString>>;
82
80
  urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
83
81
  variant: z.ZodOptional<z.ZodEnum<{
84
- baseline: "baseline";
85
82
  agentic: "agentic";
83
+ baseline: "baseline";
86
84
  observed: "observed";
87
85
  full: "full";
88
86
  }>>;
@@ -90,7 +88,7 @@ export declare const PipelineRequestSchema: z.ZodObject<{
90
88
  classification: z.ZodOptional<z.ZodEnum<{
91
89
  external: "external";
92
90
  official: "official";
93
- "ad-hoc": "ad-hoc";
91
+ adhoc: "adhoc";
94
92
  experimental: "experimental";
95
93
  test: "test";
96
94
  }>>;
@@ -105,7 +105,6 @@ export const PipelineRequestSchema = z.object({
105
105
  concurrency: z.number().int().positive().optional(),
106
106
  dataset: z.string().optional(),
107
107
  debug: z.union([z.boolean(), DebugOptionsSchema]).optional(),
108
- discoveryReport: z.boolean().optional(),
109
108
  gapAnalysis: z.boolean().optional(),
110
109
  graderReplications: z.number().int().positive().optional(),
111
110
  headers: z.record(z.string(), z.string()).optional(),
@@ -123,7 +122,6 @@ export const PipelineRequestSchema = z.object({
123
122
  projectId: z.string().optional(),
124
123
  publish: z.boolean().optional(),
125
124
  publishTag: z.string().optional(),
126
- readiness: z.boolean().optional(),
127
125
  searchMode: z.enum(["off", "open", "origin-only"]).optional(),
128
126
  source: z.string().optional(),
129
127
  sourceReportId: z.string().optional(),
@@ -150,7 +148,7 @@ export const PipelineRequestSchema = z.object({
150
148
  /**
151
149
  * How this run should be treated for reporting and trend tracking.
152
150
  * Orthogonal to `trigger.type` (captured server-side). When omitted,
153
- * the server defaults to `"ad-hoc"`.
151
+ * the server defaults to `"adhoc"`.
154
152
  */
155
153
  classification: z.enum(RUN_CLASSIFICATIONS).optional(),
156
154
  /** Team and (optionally) individual this run is attributable to. */
@@ -19,12 +19,12 @@ export declare const ScheduleEntrySchema: z.ZodObject<{
19
19
  enabled: z.ZodDefault<z.ZodBoolean>;
20
20
  mode: z.ZodDefault<z.ZodEnum<{
21
21
  custom: "custom";
22
+ agentic: "agentic";
22
23
  literacy: "literacy";
23
24
  "mcp-server": "mcp-server";
24
25
  "agent-harness": "agent-harness";
25
26
  "knowledge-probe": "knowledge-probe";
26
27
  baseline: "baseline";
27
- agentic: "agentic";
28
28
  observed: "observed";
29
29
  full: "full";
30
30
  }>>;
@@ -59,12 +59,12 @@ export declare const SchedulesFileSchema: z.ZodObject<{
59
59
  enabled: z.ZodDefault<z.ZodBoolean>;
60
60
  mode: z.ZodDefault<z.ZodEnum<{
61
61
  custom: "custom";
62
+ agentic: "agentic";
62
63
  literacy: "literacy";
63
64
  "mcp-server": "mcp-server";
64
65
  "agent-harness": "agent-harness";
65
66
  "knowledge-probe": "knowledge-probe";
66
67
  baseline: "baseline";
67
- agentic: "agentic";
68
68
  observed: "observed";
69
69
  full: "full";
70
70
  }>>;
@@ -13,10 +13,10 @@
13
13
  /**
14
14
  * How a run should be treated for reporting and trend tracking.
15
15
  *
16
- * Orthogonal to `RunTrigger` (mechanism). Defaults to `"ad-hoc"` when
16
+ * Orthogonal to `RunTrigger` (mechanism). Defaults to `"adhoc"` when
17
17
  * unannotated so pre-taxonomy runs never leak into the canonical series.
18
18
  */
19
- export type RunClassification = "official" | "ad-hoc" | "experimental" | "test" | "external";
19
+ export type RunClassification = "official" | "adhoc" | "experimental" | "test" | "external";
20
20
  export declare const RUN_CLASSIFICATIONS: readonly RunClassification[];
21
21
  export declare function isRunClassification(value: unknown): value is RunClassification;
22
22
  /**
@@ -12,7 +12,7 @@
12
12
  */
13
13
  export const RUN_CLASSIFICATIONS = [
14
14
  "official",
15
- "ad-hoc",
15
+ "adhoc",
16
16
  "experimental",
17
17
  "test",
18
18
  "external",
@@ -22,7 +22,7 @@ export interface RunContext {
22
22
  areas: string[];
23
23
  /**
24
24
  * How this run should be treated for reporting and trend tracking.
25
- * Orthogonal to `trigger` (mechanism). Defaults to `"ad-hoc"` when
25
+ * Orthogonal to `trigger` (mechanism). Defaults to `"adhoc"` when
26
26
  * unannotated — only the scheduled workflow mints `"official"`.
27
27
  *
28
28
  * @see docs/decisions/D0037-run-classification-and-ownership-taxonomy.md
@@ -59,8 +59,6 @@ export interface RemoteConfigSlice {
59
59
  perspectiveOverride?: string;
60
60
  graderReplications?: number;
61
61
  gapAnalysisEnabled?: boolean;
62
- readinessEnabled?: boolean;
63
- discoveryReportEnabled?: boolean;
64
62
  noRemoteCache?: boolean;
65
63
  /**
66
64
  * D0037 / W0069 — CLI-flag overrides for the caller envelope. These
@@ -129,14 +129,10 @@ export async function buildRemoteRequest(options) {
129
129
  }
130
130
  if (config.gapAnalysisEnabled)
131
131
  raw.gapAnalysis = true;
132
- if (config.readinessEnabled)
133
- raw.readiness = true;
134
- if (config.discoveryReportEnabled)
135
- raw.discoveryReport = true;
136
132
  if (config.noRemoteCache)
137
133
  raw.noRemoteCache = true;
138
134
  // Caller git metadata — auto-detect from CI environment variables.
139
- // When running via `ailf pipeline --remote` in a GitHub Actions workflow,
135
+ // When running via `ailf run --remote` in a GitHub Actions workflow,
140
136
  // the GITHUB_* env vars identify the *calling* repo (not the AILF core
141
137
  // repo). This ensures report provenance attributes to the right repo.
142
138
  const callerGit = detectCallerGit();
@@ -391,7 +387,7 @@ export function buildCallerEnvelope(config) {
391
387
  /**
392
388
  * Auto-detect caller git metadata from GitHub Actions environment variables.
393
389
  *
394
- * When the CLI runs in a calling repo's CI (via `npx @sanity/ailf pipeline
390
+ * When the CLI runs in a calling repo's CI (via `npx @sanity/ailf run
395
391
  * --remote`), the GITHUB_* env vars reflect that repo — not the AILF core
396
392
  * repo. We capture them here so the API can carry them through to report
397
393
  * provenance.
@@ -7,7 +7,7 @@
7
7
  * @see packages/eval/src/commands/pipeline-action.ts — underlying implementation
8
8
  */
9
9
  import type { ConfigSource, ResolvedConfig } from "../../_vendor/ailf-core/index.d.ts";
10
- import type { PipelineCliOptions } from "../../commands/pipeline.js";
10
+ import type { PipelineCliOptions } from "../../commands/run.js";
11
11
  export declare class CliConfigAdapter implements ConfigSource {
12
12
  private readonly cliOpts;
13
13
  private readonly rootDir;
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FileConfigAdapter — resolves pipeline config from a local config file.
3
3
  *
4
- * Enables `ailf pipeline --config <path>` to load all pipeline options
4
+ * Enables `ailf run --config <path>` to load all pipeline options
5
5
  * from a file instead of CLI flags. Supports multiple formats in
6
6
  * priority order:
7
7
  *