@sanity/ailf 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +6 -4
- package/dist/_vendor/ailf-core/examples/index.js +9 -4
- package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +12 -2
- package/dist/adapters/task-sources/repo-schemas.js +28 -2
- package/dist/cli.js +0 -0
- package/dist/commands/init.js +39 -5
- package/dist/commands/pipeline-action.js +44 -6
- package/dist/commands/publish.js +2 -1
- package/dist/commands/validate-tasks.js +4 -1
- package/dist/composition-root.js +9 -5
- package/dist/orchestration/build-app-context.js +2 -0
- package/package.json +1 -1
- package/dist/commands/update-quality-scores.d.ts +0 -5
- package/dist/commands/update-quality-scores.js +0 -20
- package/dist/lib/agent-behavior-report.d.ts +0 -8
- package/dist/lib/agent-behavior-report.js +0 -185
- package/dist/lib/baseline.d.ts +0 -19
- package/dist/lib/baseline.js +0 -153
- package/dist/lib/calculate-scores.d.ts +0 -23
- package/dist/lib/calculate-scores.js +0 -42
- package/dist/lib/compare.d.ts +0 -18
- package/dist/lib/compare.js +0 -170
- package/dist/lib/coverage-audit.d.ts +0 -4
- package/dist/lib/coverage-audit.js +0 -42
- package/dist/lib/discovery-report.d.ts +0 -13
- package/dist/lib/discovery-report.js +0 -57
- package/dist/lib/fetch-docs.d.ts +0 -30
- package/dist/lib/fetch-docs.js +0 -171
- package/dist/lib/generate-configs.d.ts +0 -25
- package/dist/lib/generate-configs.js +0 -42
- package/dist/lib/grader-api.d.ts +0 -21
- package/dist/lib/grader-api.js +0 -34
- package/dist/lib/grader-compare.d.ts +0 -19
- package/dist/lib/grader-compare.js +0 -91
- package/dist/lib/grader-consistency.d.ts +0 -27
- package/dist/lib/grader-consistency.js +0 -79
- package/dist/lib/grader-sensitivity.d.ts +0 -19
- package/dist/lib/grader-sensitivity.js +0 -75
- package/dist/lib/grader-validate.d.ts +0 -19
- package/dist/lib/grader-validate.js +0 -78
- package/dist/lib/measure-retrieval.d.ts +0 -14
- package/dist/lib/measure-retrieval.js +0 -71
- package/dist/lib/pr-comment.d.ts +0 -16
- package/dist/lib/pr-comment.js +0 -28
- package/dist/lib/readiness-report.d.ts +0 -13
- package/dist/lib/readiness-report.js +0 -108
- package/dist/lib/webhook-server.d.ts +0 -11
- package/dist/lib/webhook-server.js +0 -24
- package/dist/lib/weekly-digest.d.ts +0 -24
- package/dist/lib/weekly-digest.js +0 -148
- package/dist/orchestration/env-bridge.d.ts +0 -21
- package/dist/orchestration/env-bridge.js +0 -66
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
- package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
- package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/calculate-scores-step.js +0 -89
- package/dist/pipeline/steps/compare-step.d.ts +0 -18
- package/dist/pipeline/steps/compare-step.js +0 -90
- package/dist/pipeline/steps/eval-step.d.ts +0 -53
- package/dist/pipeline/steps/eval-step.js +0 -347
- package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
- package/dist/pipeline/steps/fetch-docs-step.js +0 -84
- package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
- package/dist/pipeline/steps/generate-configs-step.js +0 -98
- package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
- package/dist/pipeline/steps/grader-consistency-step.js +0 -74
- package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
- package/dist/pipeline/steps/publish-report-step.js +0 -243
- package/dist/pipeline/steps/report-step.d.ts +0 -13
- package/dist/pipeline/steps/report-step.js +0 -56
- package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/update-scores-step.js +0 -42
- package/dist/scripts/agent-behavior-report.d.ts +0 -19
- package/dist/scripts/agent-behavior-report.js +0 -315
- package/dist/scripts/baseline.d.ts +0 -43
- package/dist/scripts/baseline.js +0 -267
- package/dist/scripts/calculate-scores.d.ts +0 -166
- package/dist/scripts/calculate-scores.js +0 -1296
- package/dist/scripts/compare.d.ts +0 -22
- package/dist/scripts/compare.js +0 -334
- package/dist/scripts/coverage-audit.d.ts +0 -44
- package/dist/scripts/coverage-audit.js +0 -209
- package/dist/scripts/debug-eval.d.ts +0 -19
- package/dist/scripts/debug-eval.js +0 -73
- package/dist/scripts/discovery-report.d.ts +0 -58
- package/dist/scripts/discovery-report.js +0 -250
- package/dist/scripts/fetch-docs.d.ts +0 -35
- package/dist/scripts/fetch-docs.js +0 -472
- package/dist/scripts/generate-configs.d.ts +0 -66
- package/dist/scripts/generate-configs.js +0 -459
- package/dist/scripts/grader-api.d.ts +0 -27
- package/dist/scripts/grader-api.js +0 -206
- package/dist/scripts/grader-compare.d.ts +0 -22
- package/dist/scripts/grader-compare.js +0 -368
- package/dist/scripts/grader-consistency.d.ts +0 -20
- package/dist/scripts/grader-consistency.js +0 -313
- package/dist/scripts/grader-sensitivity.d.ts +0 -22
- package/dist/scripts/grader-sensitivity.js +0 -354
- package/dist/scripts/grader-validate.d.ts +0 -19
- package/dist/scripts/grader-validate.js +0 -267
- package/dist/scripts/measure-retrieval.d.ts +0 -10
- package/dist/scripts/measure-retrieval.js +0 -145
- package/dist/scripts/pipeline.d.ts +0 -76
- package/dist/scripts/pipeline.js +0 -1031
- package/dist/scripts/pr-comment.d.ts +0 -10
- package/dist/scripts/pr-comment.js +0 -510
- package/dist/scripts/readiness-report.d.ts +0 -88
- package/dist/scripts/readiness-report.js +0 -342
- package/dist/scripts/update-quality-scores.d.ts +0 -15
- package/dist/scripts/update-quality-scores.js +0 -184
- package/dist/scripts/validate.d.ts +0 -13
- package/dist/scripts/validate.js +0 -79
- package/dist/scripts/webhook-server.d.ts +0 -26
- package/dist/scripts/webhook-server.js +0 -147
- package/dist/scripts/weekly-digest.d.ts +0 -24
- package/dist/scripts/weekly-digest.js +0 -144
- package/dist/sinks/format-slack.d.ts +0 -64
- package/dist/sinks/format-slack.js +0 -306
- package/dist/sinks/slack-sink.d.ts +0 -27
- package/dist/sinks/slack-sink.js +0 -78
- package/dist/sinks/webhook-sink.d.ts +0 -19
- package/dist/sinks/webhook-sink.js +0 -50
- package/tasks/.expanded.agentic.yaml +0 -51
- package/tasks/.expanded.yaml +0 -66
|
@@ -90,9 +90,9 @@ export declare const thresholdYaml = "# Example quality threshold configuration.
|
|
|
90
90
|
/** Parsed ailf-config example data (JSON-safe) */
|
|
91
91
|
export declare const ailfConfigData: {
|
|
92
92
|
readonly source: {
|
|
93
|
-
readonly projectId: "
|
|
94
|
-
readonly dataset: "
|
|
95
|
-
readonly baseUrl: "https://
|
|
93
|
+
readonly projectId: "3do82whm";
|
|
94
|
+
readonly dataset: "next";
|
|
95
|
+
readonly baseUrl: "https://www.sanity.io/docs";
|
|
96
96
|
};
|
|
97
97
|
readonly triggers: {
|
|
98
98
|
readonly pr: {
|
|
@@ -110,7 +110,7 @@ export declare const ailfConfigData: {
|
|
|
110
110
|
};
|
|
111
111
|
};
|
|
112
112
|
/** Raw YAML string for ailf-config example (preserves comments) */
|
|
113
|
-
export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014
|
|
113
|
+
export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014 which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId \u2014 Sanity project ID (find yours at sanity.io/manage)\n# dataset \u2014 the dataset to query (e.g., \"production\", \"next\")\n# baseUrl \u2014 the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"3do82whm\"\n dataset: next\n baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration \u2014 when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only \u2014 check that task YAML parses correctly (fast, no LLM calls)\n# eval \u2014 run the full evaluation pipeline\n#\n# paths \u2014 only trigger when files matching these globs change\n# blocking \u2014 if true, a failing eval blocks the PR merge\n# notify \u2014 if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
|
|
114
114
|
/** Parsed task data for example-groq-blog-listing (JSON-safe) */
|
|
115
115
|
export declare const exampleGroqBlogListingData: readonly [{
|
|
116
116
|
readonly id: "example-groq-blog-listing";
|
|
@@ -188,3 +188,5 @@ export interface ExampleRecord {
|
|
|
188
188
|
yaml: string;
|
|
189
189
|
}
|
|
190
190
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
191
|
+
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
192
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This workflow submits evaluations to the AILF API when task or config\n# files change in a pull request. The API handles all processing\n# (LLM calls, doc fetching, grading, report publishing).\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Adjust `paths` to match your documentation file locations\n# - Set full_eval to true for comprehensive (slower) evaluation\n# - See: https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/API_GATEWAY.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n paths:\n - \".ailf/**\"\n\n # Manual trigger from the Actions tab\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n pull-requests: write\n steps:\n # \u2500\u2500\u2500 Submit evaluation to the AILF API \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Submit evaluation\n id: submit\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n FULL_EVAL: ${{ inputs.full_eval || 'false' }}\n run: |\n if [ \"$FULL_EVAL\" = \"true\" ]; then\n DEBUG_FIELD=\"\"\n else\n DEBUG_FIELD='\"debug\": { \"enabled\": true, \"firstN\": 2 },'\n fi\n\n PAYLOAD=$(cat <<EOF\n {\n \"mode\": \"baseline\",\n ${DEBUG_FIELD}\n \"publish\": true,\n \"compare\": true\n }\n EOF\n )\n\n RESPONSE=$(curl -sf -X POST \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n https://ailf-api.sanity.build/v1/pipeline \\\n -d \"$PAYLOAD\")\n\n JOB_ID=$(echo \"$RESPONSE\" | jq -r '.jobId')\n echo \"job_id=$JOB_ID\" >> $GITHUB_OUTPUT\n echo \"\uD83D\uDCCB Submitted job: $JOB_ID\"\n\n # \u2500\u2500\u2500 Poll for results (long-polling) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Wait for results\n id: results\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n run: |\n for i in $(seq 1 40); do\n RESPONSE=$(curl -s \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Prefer: wait=25\" \\\n \"https://ailf-api.sanity.build/v1/jobs/$JOB_ID\")\n\n STATUS=$(echo \"$RESPONSE\" | jq -r '.status')\n\n case \"$STATUS\" in\n completed)\n echo \"status=completed\" >> $GITHUB_OUTPUT\n echo \"report_id=$(echo $RESPONSE | jq -r '.reportId // empty')\" >> $GITHUB_OUTPUT\n echo \"score=$(echo $RESPONSE | jq -r '.score // empty')\" >> $GITHUB_OUTPUT\n echo \"\u2705 Evaluation completed\"\n exit 0\n ;;\n failed|timed-out)\n echo \"status=$STATUS\" >> $GITHUB_OUTPUT\n echo \"::error::Evaluation $STATUS\"\n exit 1\n ;;\n *)\n echo \"\u23F3 [$i/40] $STATUS\"\n ;;\n esac\n done\n\n echo \"::error::Timed out waiting for evaluation\"\n exit 1\n\n # \u2500\u2500\u2500 Post results to PR \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Post PR comment\n if: >-\n always() && github.event_name == 'pull_request' &&\n steps.submit.outputs.job_id != ''\n uses: actions/github-script@v7\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n with:\n script: |\n const marker = '<!-- ailf-score-report -->';\n const status = process.env.JOB_STATUS;\n const reportId = process.env.REPORT_ID;\n const jobId = process.env.JOB_ID;\n const score = process.env.SCORE;\n\n let icon, message;\n if (status === 'completed') {\n icon = '\u2705';\n message = score\n ? `Evaluation completed \u2014 score: **${score}/100**`\n : 'Evaluation completed successfully.';\n } else if (status === 'failed' || status === 'timed-out') {\n icon = '\u26A0\uFE0F';\n message = `Evaluation ${status}.`;\n } else {\n icon = '\u23F3';\n message = 'Evaluation status unknown (may still be running).';\n }\n\n let body = `${marker}\\n## ${icon} AI Literacy Evaluation\\n\\n${message}\\n`;\n if (reportId) {\n body += `\\n\uD83D\uDD17 [View detailed report](https://ailf-api.sanity.build/v1/reports/${reportId})\\n`;\n }\n body += `\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n comment_id: existing.id,\n body,\n });\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n body,\n });\n }\n\n # \u2500\u2500\u2500 Job summary \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n - name: Summary\n if: always()\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n run: |\n {\n echo \"## \uD83D\uDCCA AI Literacy Evaluation\"\n echo \"\"\n echo \"| Field | Value |\"\n echo \"|-------|-------|\"\n echo \"| Job | \\`$JOB_ID\\` |\"\n echo \"| Status | $JOB_STATUS |\"\n [ -n \"$SCORE\" ] && echo \"| Score | $SCORE/100 |\"\n [ -n \"$REPORT_ID\" ] && echo \"| Report | [$REPORT_ID](https://ailf-api.sanity.build/v1/reports/$REPORT_ID) |\"\n } >> \"$GITHUB_STEP_SUMMARY\"\n";
|
|
@@ -119,9 +119,9 @@ export const thresholdYaml = "# Example quality threshold configuration.\n#\n# T
|
|
|
119
119
|
/** Parsed ailf-config example data (JSON-safe) */
|
|
120
120
|
export const ailfConfigData = {
|
|
121
121
|
"source": {
|
|
122
|
-
"projectId": "
|
|
123
|
-
"dataset": "
|
|
124
|
-
"baseUrl": "https://
|
|
122
|
+
"projectId": "3do82whm",
|
|
123
|
+
"dataset": "next",
|
|
124
|
+
"baseUrl": "https://www.sanity.io/docs"
|
|
125
125
|
},
|
|
126
126
|
"triggers": {
|
|
127
127
|
"pr": {
|
|
@@ -141,7 +141,7 @@ export const ailfConfigData = {
|
|
|
141
141
|
}
|
|
142
142
|
};
|
|
143
143
|
/** Raw YAML string for ailf-config example (preserves comments) */
|
|
144
|
-
export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source —
|
|
144
|
+
export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source — which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId — Sanity project ID (find yours at sanity.io/manage)\n# dataset — the dataset to query (e.g., \"production\", \"next\")\n# baseUrl — the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"3do82whm\"\n dataset: next\n baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration — when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only — check that task YAML parses correctly (fast, no LLM calls)\n# eval — run the full evaluation pipeline\n#\n# paths — only trigger when files matching these globs change\n# blocking — if true, a failing eval blocks the PR merge\n# notify — if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
|
|
145
145
|
/** Parsed task data for example-groq-blog-listing (JSON-safe) */
|
|
146
146
|
export const exampleGroqBlogListingData = [
|
|
147
147
|
{
|
|
@@ -283,3 +283,8 @@ export const EXAMPLES = {
|
|
|
283
283
|
yaml: Object.values(taskYamlFiles).join("\n"),
|
|
284
284
|
},
|
|
285
285
|
};
|
|
286
|
+
// ---------------------------------------------------------------------------
|
|
287
|
+
// Raw file exports (non-data files, exported as raw strings)
|
|
288
|
+
// ---------------------------------------------------------------------------
|
|
289
|
+
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
290
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# This workflow submits evaluations to the AILF API when task or config\n# files change in a pull request. The API handles all processing\n# (LLM calls, doc fetching, grading, report publishing).\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# Customization:\n# - Adjust `paths` to match your documentation file locations\n# - Set full_eval to true for comprehensive (slower) evaluation\n# - See: https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/API_GATEWAY.md\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n paths:\n - \".ailf/**\"\n\n # Manual trigger from the Actions tab\n workflow_dispatch:\n inputs:\n full_eval:\n description: \"Run full evaluation (all tests, slower)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n pull-requests: write\n steps:\n # ─── Submit evaluation to the AILF API ─────────────────────\n - name: Submit evaluation\n id: submit\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n FULL_EVAL: ${{ inputs.full_eval || 'false' }}\n run: |\n if [ \"$FULL_EVAL\" = \"true\" ]; then\n DEBUG_FIELD=\"\"\n else\n DEBUG_FIELD='\"debug\": { \"enabled\": true, \"firstN\": 2 },'\n fi\n\n PAYLOAD=$(cat <<EOF\n {\n \"mode\": \"baseline\",\n ${DEBUG_FIELD}\n \"publish\": true,\n \"compare\": true\n }\n EOF\n )\n\n RESPONSE=$(curl -sf -X POST \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Content-Type: application/json\" \\\n https://ailf-api.sanity.build/v1/pipeline \\\n -d \"$PAYLOAD\")\n\n JOB_ID=$(echo \"$RESPONSE\" | jq -r '.jobId')\n echo \"job_id=$JOB_ID\" >> $GITHUB_OUTPUT\n echo \"📋 Submitted job: $JOB_ID\"\n\n # ─── Poll for results (long-polling) ───────────────────────\n - name: Wait for results\n id: results\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n run: |\n for i in $(seq 1 40); do\n RESPONSE=$(curl -s \\\n -H \"Authorization: Bearer $AILF_API_KEY\" \\\n -H \"Prefer: wait=25\" \\\n \"https://ailf-api.sanity.build/v1/jobs/$JOB_ID\")\n\n STATUS=$(echo \"$RESPONSE\" | jq -r '.status')\n\n case \"$STATUS\" in\n completed)\n echo \"status=completed\" >> $GITHUB_OUTPUT\n echo \"report_id=$(echo $RESPONSE | jq -r '.reportId // empty')\" >> $GITHUB_OUTPUT\n echo \"score=$(echo $RESPONSE | jq -r '.score // empty')\" >> $GITHUB_OUTPUT\n echo \"✅ Evaluation completed\"\n exit 0\n ;;\n failed|timed-out)\n echo \"status=$STATUS\" >> $GITHUB_OUTPUT\n echo \"::error::Evaluation $STATUS\"\n exit 1\n ;;\n *)\n echo \"⏳ [$i/40] $STATUS\"\n ;;\n esac\n done\n\n echo \"::error::Timed out waiting for evaluation\"\n exit 1\n\n # ─── Post results to PR ────────────────────────────────────\n - name: Post PR comment\n if: >-\n always() && github.event_name == 'pull_request' &&\n steps.submit.outputs.job_id != ''\n uses: actions/github-script@v7\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n with:\n script: |\n const marker = '<!-- ailf-score-report -->';\n const status = process.env.JOB_STATUS;\n const reportId = process.env.REPORT_ID;\n const jobId = process.env.JOB_ID;\n const score = process.env.SCORE;\n\n let icon, message;\n if (status === 'completed') {\n icon = '✅';\n message = score\n ? `Evaluation completed — score: **${score}/100**`\n : 'Evaluation completed successfully.';\n } else if (status === 'failed' || status === 'timed-out') {\n icon = '⚠️';\n message = `Evaluation ${status}.`;\n } else {\n icon = '⏳';\n message = 'Evaluation status unknown (may still be running).';\n }\n\n let body = `${marker}\\n## ${icon} AI Literacy Evaluation\\n\\n${message}\\n`;\n if (reportId) {\n body += `\\n🔗 [View detailed report](https://ailf-api.sanity.build/v1/reports/${reportId})\\n`;\n }\n body += `\\n<sub>Job: \\`${jobId}\\`</sub>\\n`;\n\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n });\n const existing = comments.find(c => c.body?.includes(marker));\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n comment_id: existing.id,\n body,\n });\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner,\n repo: context.repo.repo,\n issue_number: context.issue.number,\n body,\n });\n }\n\n # ─── Job summary ───────────────────────────────────────────\n - name: Summary\n if: always()\n env:\n JOB_STATUS: ${{ steps.results.outputs.status || 'unknown' }}\n REPORT_ID: ${{ steps.results.outputs.report_id || '' }}\n JOB_ID: ${{ steps.submit.outputs.job_id }}\n SCORE: ${{ steps.results.outputs.score || '' }}\n run: |\n {\n echo \"## 📊 AI Literacy Evaluation\"\n echo \"\"\n echo \"| Field | Value |\"\n echo \"|-------|-------|\"\n echo \"| Job | \\`$JOB_ID\\` |\"\n echo \"| Status | $JOB_STATUS |\"\n [ -n \"$SCORE\" ] && echo \"| Score | $SCORE/100 |\"\n [ -n \"$REPORT_ID\" ] && echo \"| Report | [$REPORT_ID](https://ailf-api.sanity.build/v1/reports/$REPORT_ID) |\"\n } >> \"$GITHUB_STEP_SUMMARY\"\n";
|
|
@@ -95,6 +95,10 @@ export interface ResolvedConfig {
|
|
|
95
95
|
taskSourceType?: "content-lake" | "yaml";
|
|
96
96
|
/** Path to repo-based tasks directory (e.g., .ailf/tasks/) */
|
|
97
97
|
repoTasksPath?: string;
|
|
98
|
+
/** Report store project ID from .ailf/config.yaml reportStore block */
|
|
99
|
+
reportStoreProjectId?: string;
|
|
100
|
+
/** Report store dataset from .ailf/config.yaml reportStore block */
|
|
101
|
+
reportStoreDataset?: string;
|
|
98
102
|
/** Callback URL configuration for API-triggered evaluations */
|
|
99
103
|
callback?: {
|
|
100
104
|
url: string;
|
|
@@ -185,10 +185,20 @@ export declare const RepoTaskFileSchema: z.ZodArray<z.ZodObject<{
|
|
|
185
185
|
}, z.core.$strip>>;
|
|
186
186
|
}, z.core.$strip>>;
|
|
187
187
|
/**
|
|
188
|
-
* Zod schema for .ailf/config.yaml — controls
|
|
189
|
-
*
|
|
188
|
+
* Zod schema for .ailf/config.yaml — controls documentation source,
|
|
189
|
+
* report destination, and trigger behavior for evaluations from an
|
|
190
|
+
* external repository.
|
|
190
191
|
*/
|
|
191
192
|
export declare const RepoConfigSchema: z.ZodObject<{
|
|
193
|
+
source: z.ZodOptional<z.ZodObject<{
|
|
194
|
+
projectId: z.ZodOptional<z.ZodString>;
|
|
195
|
+
dataset: z.ZodOptional<z.ZodString>;
|
|
196
|
+
baseUrl: z.ZodOptional<z.ZodString>;
|
|
197
|
+
}, z.core.$strip>>;
|
|
198
|
+
reportStore: z.ZodOptional<z.ZodObject<{
|
|
199
|
+
projectId: z.ZodString;
|
|
200
|
+
dataset: z.ZodString;
|
|
201
|
+
}, z.core.$strip>>;
|
|
192
202
|
triggers: z.ZodOptional<z.ZodObject<{
|
|
193
203
|
pr: z.ZodOptional<z.ZodObject<{
|
|
194
204
|
mode: z.ZodDefault<z.ZodEnum<{
|
|
@@ -189,10 +189,36 @@ const ScheduleTriggerSchema = TriggerConfigSchema.extend({
|
|
|
189
189
|
cron: z.string().min(1),
|
|
190
190
|
});
|
|
191
191
|
/**
|
|
192
|
-
*
|
|
193
|
-
*
|
|
192
|
+
* Documentation source configuration.
|
|
193
|
+
* Defines which Sanity project holds the documentation being evaluated.
|
|
194
|
+
*/
|
|
195
|
+
const SourceConfigSchema = z
|
|
196
|
+
.object({
|
|
197
|
+
projectId: z.string().min(1).optional(),
|
|
198
|
+
dataset: z.string().min(1).optional(),
|
|
199
|
+
baseUrl: z.string().url().optional(),
|
|
200
|
+
})
|
|
201
|
+
.optional();
|
|
202
|
+
/**
|
|
203
|
+
* Report store configuration.
|
|
204
|
+
* Defines which Sanity project receives `ailf.report` documents.
|
|
205
|
+
* This should match the project/dataset configured in the user's Studio.
|
|
206
|
+
* The API token comes from the AILF_REPORT_SANITY_API_TOKEN env var.
|
|
207
|
+
*/
|
|
208
|
+
const ReportStoreConfigSchema = z
|
|
209
|
+
.object({
|
|
210
|
+
projectId: z.string().min(1),
|
|
211
|
+
dataset: z.string().min(1),
|
|
212
|
+
})
|
|
213
|
+
.optional();
|
|
214
|
+
/**
|
|
215
|
+
* Zod schema for .ailf/config.yaml — controls documentation source,
|
|
216
|
+
* report destination, and trigger behavior for evaluations from an
|
|
217
|
+
* external repository.
|
|
194
218
|
*/
|
|
195
219
|
export const RepoConfigSchema = z.object({
|
|
220
|
+
source: SourceConfigSchema,
|
|
221
|
+
reportStore: ReportStoreConfigSchema,
|
|
196
222
|
triggers: z
|
|
197
223
|
.object({
|
|
198
224
|
pr: TriggerConfigSchema.optional(),
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/commands/init.js
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
import { Command } from "commander";
|
|
19
19
|
import { existsSync, mkdirSync, writeFileSync } from "fs";
|
|
20
20
|
import { resolve, relative } from "path";
|
|
21
|
-
import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, } from "../_vendor/ailf-core/index.js";
|
|
21
|
+
import { ailfConfigData, ailfConfigYaml, taskYamlFiles, TASK_FILE_NAMES, allTaskData, workflowYaml, } from "../_vendor/ailf-core/index.js";
|
|
22
22
|
// ---------------------------------------------------------------------------
|
|
23
23
|
// Command factory
|
|
24
24
|
// ---------------------------------------------------------------------------
|
|
@@ -127,7 +127,40 @@ async function runInit(opts) {
|
|
|
127
127
|
else {
|
|
128
128
|
skipped.push(rel(targetDir, gitignorePath));
|
|
129
129
|
}
|
|
130
|
-
// 5.
|
|
130
|
+
// 5. Write GitHub Actions workflow
|
|
131
|
+
const workflowDir = resolve(targetDir, ".github", "workflows");
|
|
132
|
+
const workflowPath = resolve(workflowDir, "ailf-eval.yml");
|
|
133
|
+
mkdirSync(workflowDir, { recursive: true });
|
|
134
|
+
if (writeIfNew(workflowPath, workflowYaml, force)) {
|
|
135
|
+
written.push(rel(targetDir, workflowPath));
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
skipped.push(rel(targetDir, workflowPath));
|
|
139
|
+
}
|
|
140
|
+
// 6. Write .env.example (secrets template — never committed)
|
|
141
|
+
const envExamplePath = resolve(targetDir, ".env.example");
|
|
142
|
+
const envExampleContent = `# ═══════════════════════════════════════════════════════════════════
|
|
143
|
+
# AI Literacy Framework — Environment Variables
|
|
144
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
145
|
+
# Copy this file to .env and fill in your values:
|
|
146
|
+
# cp .env.example .env
|
|
147
|
+
#
|
|
148
|
+
# IMPORTANT: Never commit .env to version control.
|
|
149
|
+
# ═══════════════════════════════════════════════════════════════════
|
|
150
|
+
|
|
151
|
+
# ─── AILF API Key (required) ─────────────────────────────────────
|
|
152
|
+
# Authenticates requests to the AILF API (ailf-api.sanity.build).
|
|
153
|
+
# The API handles LLM calls, doc fetching, grading, and publishing.
|
|
154
|
+
# Request a key from the AILF team.
|
|
155
|
+
AILF_API_KEY=ailf_live_sk_...
|
|
156
|
+
`;
|
|
157
|
+
if (writeIfNew(envExamplePath, envExampleContent, force)) {
|
|
158
|
+
written.push(rel(targetDir, envExamplePath));
|
|
159
|
+
}
|
|
160
|
+
else {
|
|
161
|
+
skipped.push(rel(targetDir, envExamplePath));
|
|
162
|
+
}
|
|
163
|
+
// 7. Summary
|
|
131
164
|
console.log();
|
|
132
165
|
if (written.length > 0) {
|
|
133
166
|
for (const f of written) {
|
|
@@ -143,8 +176,9 @@ async function runInit(opts) {
|
|
|
143
176
|
console.log();
|
|
144
177
|
console.log(" Next steps:");
|
|
145
178
|
console.log();
|
|
146
|
-
console.log(` 1.
|
|
147
|
-
console.log(
|
|
148
|
-
console.log(" 3.
|
|
179
|
+
console.log(` 1. Customize the example tasks in ${rel(targetDir, tasksDir)}/`);
|
|
180
|
+
console.log(" 2. Validate: npx @sanity/ailf validate-tasks .ailf/tasks/");
|
|
181
|
+
console.log(" 3. Add AILF_API_KEY as a GitHub Actions secret (Settings → Secrets)");
|
|
182
|
+
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml handles the rest");
|
|
149
183
|
console.log();
|
|
150
184
|
}
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
*
|
|
11
11
|
* @see packages/eval/src/orchestration/ for the step-based pipeline
|
|
12
12
|
*/
|
|
13
|
-
import { writeFileSync } from "fs";
|
|
13
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
14
14
|
import { dirname, resolve } from "path";
|
|
15
15
|
import { fileURLToPath } from "url";
|
|
16
16
|
import { classifyUrls } from "../pipeline/classify-url.js";
|
|
@@ -18,6 +18,8 @@ import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.
|
|
|
18
18
|
import { buildAppContext } from "../orchestration/build-app-context.js";
|
|
19
19
|
import { buildStepSequence } from "../orchestration/build-step-sequence.js";
|
|
20
20
|
import { orchestratePipeline } from "../orchestration/pipeline-orchestrator.js";
|
|
21
|
+
import { load } from "js-yaml";
|
|
22
|
+
import { parseRepoConfig, } from "../adapters/task-sources/repo-schemas.js";
|
|
21
23
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
24
|
const ROOT = resolve(__dirname, "..", "..");
|
|
23
25
|
// ---------------------------------------------------------------------------
|
|
@@ -32,6 +34,8 @@ const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
|
|
|
32
34
|
* Exported so the plan builder can call it independently.
|
|
33
35
|
*/
|
|
34
36
|
export function computeResolvedOptions(opts) {
|
|
37
|
+
// Resolve paths relative to the caller's cwd, not the eval package root
|
|
38
|
+
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
35
39
|
// Validate mode
|
|
36
40
|
const mode = opts.mode;
|
|
37
41
|
if (!VALID_MODES.includes(mode)) {
|
|
@@ -163,14 +167,21 @@ export function computeResolvedOptions(opts) {
|
|
|
163
167
|
// Smart default: full runs auto-publish when store is configured
|
|
164
168
|
publishEnabled = reportStoreConfigured && !debugEnabled;
|
|
165
169
|
}
|
|
166
|
-
// Report store overrides —
|
|
167
|
-
//
|
|
168
|
-
//
|
|
170
|
+
// Report store overrides — resolution order:
|
|
171
|
+
// 1. Explicit CLI flags (--report-dataset, --report-project)
|
|
172
|
+
// 2. Environment variables (AILF_REPORT_DATASET, AILF_REPORT_PROJECT_ID)
|
|
173
|
+
// 3. .ailf/config.yaml reportStore block (when --repo-tasks-path is set)
|
|
174
|
+
// 4. Eval dataset override (so perspective evals publish to the same dataset)
|
|
175
|
+
const repoConfig = loadRepoConfigIfPresent(opts.repoTasksPath);
|
|
169
176
|
const reportDataset = opts.reportDataset ??
|
|
170
177
|
process.env.AILF_REPORT_DATASET ??
|
|
178
|
+
repoConfig?.reportStore?.dataset ??
|
|
171
179
|
datasetOverride ??
|
|
172
180
|
undefined;
|
|
173
|
-
const reportProjectId = opts.reportProject ??
|
|
181
|
+
const reportProjectId = opts.reportProject ??
|
|
182
|
+
process.env.AILF_REPORT_PROJECT_ID ??
|
|
183
|
+
repoConfig?.reportStore?.projectId ??
|
|
184
|
+
undefined;
|
|
174
185
|
return {
|
|
175
186
|
allowedOriginArgs,
|
|
176
187
|
areaOption,
|
|
@@ -206,7 +217,9 @@ export function computeResolvedOptions(opts) {
|
|
|
206
217
|
skipFetch: opts.skipFetch,
|
|
207
218
|
source: opts.source,
|
|
208
219
|
studioOriginOverride,
|
|
209
|
-
repoTasksPath: opts.repoTasksPath
|
|
220
|
+
repoTasksPath: opts.repoTasksPath
|
|
221
|
+
? resolve(callerCwd, opts.repoTasksPath)
|
|
222
|
+
: undefined,
|
|
210
223
|
taskOption,
|
|
211
224
|
taskSourceType: resolveTaskSourceType(opts.taskSource),
|
|
212
225
|
urlArgs,
|
|
@@ -303,3 +316,28 @@ function writePipelineResult(result) {
|
|
|
303
316
|
// results/latest/ may not exist yet — not critical
|
|
304
317
|
}
|
|
305
318
|
}
|
|
319
|
+
/**
|
|
320
|
+
* Load .ailf/config.yaml if --repo-tasks-path is set and the config file
|
|
321
|
+
* exists. Returns null if not applicable.
|
|
322
|
+
*
|
|
323
|
+
* The config.yaml lives one level up from the tasks/ directory:
|
|
324
|
+
* .ailf/config.yaml ← config
|
|
325
|
+
* .ailf/tasks/ ← repoTasksPath
|
|
326
|
+
*/
|
|
327
|
+
function loadRepoConfigIfPresent(repoTasksPath) {
|
|
328
|
+
if (!repoTasksPath)
|
|
329
|
+
return null;
|
|
330
|
+
// .ailf/tasks/ → .ailf/config.yaml
|
|
331
|
+
const configPath = resolve(repoTasksPath, "..", "config.yaml");
|
|
332
|
+
if (!existsSync(configPath))
|
|
333
|
+
return null;
|
|
334
|
+
try {
|
|
335
|
+
const raw = readFileSync(configPath, "utf-8");
|
|
336
|
+
const parsed = load(raw);
|
|
337
|
+
return parseRepoConfig(parsed);
|
|
338
|
+
}
|
|
339
|
+
catch (err) {
|
|
340
|
+
console.warn(` ⚠️ Failed to parse ${configPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
341
|
+
return null;
|
|
342
|
+
}
|
|
343
|
+
}
|
package/dist/commands/publish.js
CHANGED
|
@@ -101,7 +101,8 @@ async function runPublishCommand(summaryPath, opts) {
|
|
|
101
101
|
// -----------------------------------------------------------------------
|
|
102
102
|
// 1. Resolve and read the score summary
|
|
103
103
|
// -----------------------------------------------------------------------
|
|
104
|
-
const
|
|
104
|
+
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
105
|
+
const resolvedPath = resolve(callerCwd, summaryPath);
|
|
105
106
|
if (!existsSync(resolvedPath)) {
|
|
106
107
|
console.error(` ✖ File not found: ${resolvedPath}`);
|
|
107
108
|
console.error();
|
|
@@ -24,7 +24,10 @@ export function createValidateTasksCommand() {
|
|
|
24
24
|
.argument("[path]", "Path to tasks directory (default: .ailf/tasks/)", ".ailf/tasks")
|
|
25
25
|
.option("--strict", "Treat warnings as errors", false)
|
|
26
26
|
.action(async (tasksPath, opts) => {
|
|
27
|
-
|
|
27
|
+
// Resolve relative to the caller's working directory, not the
|
|
28
|
+
// eval package root (which differs when run via bin/ailf.js)
|
|
29
|
+
const callerCwd = process.env.AILF_CALLER_CWD ?? process.cwd();
|
|
30
|
+
const resolvedPath = resolve(callerCwd, tasksPath);
|
|
28
31
|
if (!existsSync(resolvedPath)) {
|
|
29
32
|
console.error(`❌ Directory not found: ${resolvedPath}`);
|
|
30
33
|
process.exit(1);
|
package/dist/composition-root.js
CHANGED
|
@@ -43,7 +43,7 @@ export function createAppContext(config) {
|
|
|
43
43
|
// Eval runner — Promptfoo subprocess
|
|
44
44
|
const evalRunner = new PromptfooEvalAdapter(config.rootDir);
|
|
45
45
|
// Report store — Sanity Content Lake (for publish + auto-compare)
|
|
46
|
-
const reportStore = createReportStore();
|
|
46
|
+
const reportStore = createReportStore(config);
|
|
47
47
|
// Sinks — loaded from config/sinks.yaml
|
|
48
48
|
const sinks = loadSinks();
|
|
49
49
|
return {
|
|
@@ -75,7 +75,7 @@ function createCache(config) {
|
|
|
75
75
|
const token = process.env.AILF_REPORT_SANITY_API_TOKEN ?? process.env.SANITY_API_TOKEN;
|
|
76
76
|
if (!token)
|
|
77
77
|
return local;
|
|
78
|
-
return new ContentLakeCacheAdapter(local, createReportStore());
|
|
78
|
+
return new ContentLakeCacheAdapter(local, createReportStore(config));
|
|
79
79
|
}
|
|
80
80
|
function createTaskSource(config) {
|
|
81
81
|
// Primary source — selected by config.taskSourceType
|
|
@@ -96,10 +96,14 @@ function createTaskSource(config) {
|
|
|
96
96
|
}
|
|
97
97
|
return primary;
|
|
98
98
|
}
|
|
99
|
-
function createReportStore() {
|
|
99
|
+
function createReportStore(config) {
|
|
100
100
|
return new ReportStore({
|
|
101
|
-
dataset: process.env.AILF_REPORT_DATASET ??
|
|
102
|
-
|
|
101
|
+
dataset: process.env.AILF_REPORT_DATASET ??
|
|
102
|
+
config?.reportStoreDataset ??
|
|
103
|
+
undefined,
|
|
104
|
+
projectId: process.env.AILF_REPORT_PROJECT_ID ??
|
|
105
|
+
config?.reportStoreProjectId ??
|
|
106
|
+
undefined,
|
|
103
107
|
token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
|
|
104
108
|
process.env.SANITY_API_TOKEN ??
|
|
105
109
|
undefined,
|
|
@@ -67,6 +67,8 @@ export function mapToResolvedConfig(opts, rootDir) {
|
|
|
67
67
|
beforeOption: opts.beforeOption,
|
|
68
68
|
taskSourceType: opts.taskSourceType,
|
|
69
69
|
repoTasksPath: opts.repoTasksPath,
|
|
70
|
+
reportStoreProjectId: opts.reportProjectId,
|
|
71
|
+
reportStoreDataset: opts.reportDataset,
|
|
70
72
|
};
|
|
71
73
|
}
|
|
72
74
|
/**
|
package/package.json
CHANGED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* update-quality-scores command — update QUALITY_SCORE.md from scores.
|
|
3
|
-
*/
|
|
4
|
-
import { Command } from "commander";
|
|
5
|
-
export function createUpdateQualityScoresCommand() {
|
|
6
|
-
return new Command("update-quality-scores")
|
|
7
|
-
.description("Update docs/QUALITY_SCORE.md from score-summary.json")
|
|
8
|
-
.action(async () => {
|
|
9
|
-
const { updateQualityScores } = await import("../scripts/update-quality-scores.js");
|
|
10
|
-
console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
|
|
11
|
-
const result = updateQualityScores();
|
|
12
|
-
if (result.success) {
|
|
13
|
-
console.log(` ✅ ${result.message}`);
|
|
14
|
-
}
|
|
15
|
-
else {
|
|
16
|
-
console.error(` ❌ ${result.message}`);
|
|
17
|
-
process.exit(1);
|
|
18
|
-
}
|
|
19
|
-
});
|
|
20
|
-
}
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* lib/agent-behavior-report.ts — DEPRECATED re-export shim.
|
|
3
|
-
* @deprecated Import from ../pipeline/agent-behavior-report.js instead.
|
|
4
|
-
*/
|
|
5
|
-
import "dotenv/config";
|
|
6
|
-
export { analyzeResults, CANONICAL_DOC_MAP, detectFeatureArea, } from "../pipeline/agent-behavior-report.js";
|
|
7
|
-
export type { AnalysisResult, FeatureAnalysis, TaskBehavior, TestResult, } from "../pipeline/agent-behavior-report.js";
|
|
8
|
-
export declare function main(resultsPathArg?: string): void;
|