@sanity/ailf 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/types/index.d.ts +7 -2
- package/dist/cli.js +2 -0
- package/dist/commands/check-staleness.d.ts +14 -0
- package/dist/commands/check-staleness.js +74 -0
- package/dist/commands/init.js +2 -6
- package/dist/orchestration/steps/publish-report-step.js +20 -2
- package/package.json +3 -3
|
@@ -433,6 +433,6 @@ export interface ExampleRecord {
|
|
|
433
433
|
}
|
|
434
434
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
435
435
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
436
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#
|
|
436
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
437
437
|
/** TypeScript project configuration template (ailf.config.ts) */
|
|
438
438
|
export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
|
|
@@ -630,7 +630,7 @@ export const EXAMPLES = {
|
|
|
630
630
|
// Raw file exports (non-data files, exported as raw strings)
|
|
631
631
|
// ---------------------------------------------------------------------------
|
|
632
632
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
633
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#
|
|
633
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
634
634
|
// ---------------------------------------------------------------------------
|
|
635
635
|
// TypeScript template exports (for ailf init --output-format ts)
|
|
636
636
|
// ---------------------------------------------------------------------------
|
|
@@ -309,8 +309,13 @@ export interface StoredTestResult {
|
|
|
309
309
|
* API error, or refusal). Same semantics as GraderJudgment.outputFailure.
|
|
310
310
|
*/
|
|
311
311
|
outputFailure?: boolean;
|
|
312
|
-
/**
|
|
313
|
-
|
|
312
|
+
/**
|
|
313
|
+
* The model's generated code/response (truncated to 8000 chars).
|
|
314
|
+
* Populated by the scoring step and used by uploadTestOutputs. Stripped
|
|
315
|
+
* from the inline shape after upload (D0030) — the full value lives in
|
|
316
|
+
* the GCS artifact, keyed by `{taskId}::{modelId}`.
|
|
317
|
+
*/
|
|
318
|
+
responseOutput?: string;
|
|
314
319
|
/** True when responseOutput was truncated from a longer response */
|
|
315
320
|
responseOutputTruncated?: boolean;
|
|
316
321
|
/** Task description (e.g. "Functions - Webhook handler (gold)") */
|
package/dist/cli.js
CHANGED
|
@@ -147,6 +147,8 @@ import { createAgentReportCommand } from "./commands/agent-report.js";
|
|
|
147
147
|
program.addCommand(createAgentReportCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
148
148
|
import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
|
|
149
149
|
program.addCommand(createWeeklyDigestCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
150
|
+
import { createCheckStalenessCommand } from "./commands/check-staleness.js";
|
|
151
|
+
program.addCommand(createCheckStalenessCommand().helpGroup(CommandGroup.AnalysisReports));
|
|
150
152
|
// ── Grader Reliability ────────────────────────────────────────────────
|
|
151
153
|
import { createGraderCommand } from "./commands/grader/index.js";
|
|
152
154
|
program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check-staleness command — verifies recent evaluation reports exist.
|
|
3
|
+
*
|
|
4
|
+
* Exits 0 when the most recent report in the Sanity Content Lake is within
|
|
5
|
+
* the max-age window, 1 otherwise (including "no reports at all"). Emits a
|
|
6
|
+
* single JSON line on stdout summarizing the decision so CI can pipe it
|
|
7
|
+
* directly into an alert payload.
|
|
8
|
+
*
|
|
9
|
+
* Used by the scheduled staleness workflow to detect silent pipeline
|
|
10
|
+
* failures — cases where scheduled evaluations stop producing reports but
|
|
11
|
+
* no workflow run fails loudly enough to be noticed.
|
|
12
|
+
*/
|
|
13
|
+
import { Command } from "commander";
|
|
14
|
+
export declare function createCheckStalenessCommand(): Command;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* check-staleness command — verifies recent evaluation reports exist.
|
|
3
|
+
*
|
|
4
|
+
* Exits 0 when the most recent report in the Sanity Content Lake is within
|
|
5
|
+
* the max-age window, 1 otherwise (including "no reports at all"). Emits a
|
|
6
|
+
* single JSON line on stdout summarizing the decision so CI can pipe it
|
|
7
|
+
* directly into an alert payload.
|
|
8
|
+
*
|
|
9
|
+
* Used by the scheduled staleness workflow to detect silent pipeline
|
|
10
|
+
* failures — cases where scheduled evaluations stop producing reports but
|
|
11
|
+
* no workflow run fails loudly enough to be noticed.
|
|
12
|
+
*/
|
|
13
|
+
import { Command } from "commander";
|
|
14
|
+
export function createCheckStalenessCommand() {
|
|
15
|
+
return new Command("check-staleness")
|
|
16
|
+
.description("Exit 1 if no evaluation report has been produced within the max-age window")
|
|
17
|
+
.option("--max-age <days>", "Max age in days before reports are considered stale", (v) => Number.parseInt(v, 10), 3)
|
|
18
|
+
.action(async (opts) => {
|
|
19
|
+
const { getSanityClient } = await import("../sanity/client.js");
|
|
20
|
+
// Resolve report-store credentials with the same precedence as
|
|
21
|
+
// weekly-digest.ts and composition-root.ts — AILF_REPORT_* wins over
|
|
22
|
+
// the evaluated-source SANITY_* defaults so the staleness probe tracks
|
|
23
|
+
// the actual report dataset even when it diverges from the eval source.
|
|
24
|
+
const client = getSanityClient({
|
|
25
|
+
dataset: process.env.AILF_REPORT_DATASET,
|
|
26
|
+
projectId: process.env.AILF_REPORT_PROJECT_ID,
|
|
27
|
+
token: process.env.AILF_REPORT_SANITY_API_TOKEN ??
|
|
28
|
+
process.env.SANITY_API_TOKEN,
|
|
29
|
+
});
|
|
30
|
+
const maxAgeDays = opts.maxAge;
|
|
31
|
+
// Bound the GROQ sort with a `completedAt > $floor` filter. Beyond
|
|
32
|
+
// ~10,000 reports the unbounded `order(completedAt desc)[0]` scan
|
|
33
|
+
// becomes a noticeable cost; a floor proportional to the max-age
|
|
34
|
+
// window keeps the scan cheap regardless of corpus size. The factor
|
|
35
|
+
// of 10× max-age gives plenty of headroom — if the last report
|
|
36
|
+
// predates the floor, the absence of any result still yields the
|
|
37
|
+
// correct "stale" verdict.
|
|
38
|
+
const floorDays = Math.max(maxAgeDays * 10, 30);
|
|
39
|
+
const floor = new Date(Date.now() - floorDays * 24 * 60 * 60 * 1000).toISOString();
|
|
40
|
+
const QUERY = `*[_type == "ailf.report" && completedAt > $floor] | order(completedAt desc)[0]{
|
|
41
|
+
"reportId": reportId,
|
|
42
|
+
"completedAt": completedAt,
|
|
43
|
+
"tag": tag
|
|
44
|
+
}`;
|
|
45
|
+
const latest = await client.fetch(QUERY, { floor });
|
|
46
|
+
// Use `process.exitCode` + `return` rather than `process.exit()` so
|
|
47
|
+
// stdout flushes cleanly when the caller captures via `$(...)` — a
|
|
48
|
+
// hard exit can drop buffered output on piped captures. Matches the
|
|
49
|
+
// pattern used by agent-report.ts, capture-list.ts, etc.
|
|
50
|
+
if (!latest || !latest.completedAt) {
|
|
51
|
+
console.log(JSON.stringify({
|
|
52
|
+
floorDays,
|
|
53
|
+
maxAgeDays,
|
|
54
|
+
reason: "no-reports",
|
|
55
|
+
stale: true,
|
|
56
|
+
}));
|
|
57
|
+
process.exitCode = 1;
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
const ageMs = Date.now() - new Date(latest.completedAt).getTime();
|
|
61
|
+
const ageDays = Number((ageMs / (24 * 60 * 60 * 1000)).toFixed(2));
|
|
62
|
+
const stale = ageDays > maxAgeDays;
|
|
63
|
+
console.log(JSON.stringify({
|
|
64
|
+
ageDays,
|
|
65
|
+
floorDays,
|
|
66
|
+
latestCompletedAt: latest.completedAt,
|
|
67
|
+
latestReportId: latest.reportId,
|
|
68
|
+
latestTag: latest.tag,
|
|
69
|
+
maxAgeDays,
|
|
70
|
+
stale,
|
|
71
|
+
}));
|
|
72
|
+
process.exitCode = stale ? 1 : 0;
|
|
73
|
+
});
|
|
74
|
+
}
|
package/dist/commands/init.js
CHANGED
|
@@ -250,10 +250,9 @@ async function runInit(opts) {
|
|
|
250
250
|
console.log(` 1. Edit the example tasks in ${rel(targetDir, tasksDir)}/ — update`);
|
|
251
251
|
console.log(" slugs and prompts for your documentation");
|
|
252
252
|
console.log(` 2. Validate locally: npx @sanity/ailf@latest validate-tasks .ailf/tasks/`);
|
|
253
|
-
console.log(" 3. Add
|
|
253
|
+
console.log(" 3. Add a GitHub Actions secret");
|
|
254
254
|
console.log(" (Settings → Secrets and variables → Actions):");
|
|
255
255
|
console.log(" • AILF_API_KEY — your API key (starts with ailf_live_sk_)");
|
|
256
|
-
console.log(" • NPM_TOKEN — npm token with read access to @sanity scope");
|
|
257
256
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
258
257
|
console.log(" automatically on PRs");
|
|
259
258
|
if (format === "ts") {
|
|
@@ -262,14 +261,11 @@ async function runInit(opts) {
|
|
|
262
261
|
console.log(" via defineTask() from @sanity/ailf-core.");
|
|
263
262
|
}
|
|
264
263
|
console.log();
|
|
265
|
-
console.log(" 🔑 Retrieve
|
|
264
|
+
console.log(" 🔑 Retrieve the API key from 1Password (Sanity employees):");
|
|
266
265
|
console.log();
|
|
267
266
|
console.log(" # Shared dev API key (for local testing and CI)");
|
|
268
267
|
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
|
|
269
268
|
console.log();
|
|
270
|
-
console.log(" # npm token (read access to @sanity scope)");
|
|
271
|
-
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/NPM_TOKEN"');
|
|
272
|
-
console.log();
|
|
273
269
|
console.log(" Not a Sanity employee? Request an API key from the AILF team.");
|
|
274
270
|
console.log();
|
|
275
271
|
console.log(" 💡 Test locally before pushing:");
|
|
@@ -113,11 +113,20 @@ export class PublishReportStep {
|
|
|
113
113
|
tag: this.options.publishTag ?? ctx.config.publishTag,
|
|
114
114
|
title,
|
|
115
115
|
};
|
|
116
|
-
// Upload test output artifacts to GCS (D0030 — non-blocking, P5)
|
|
116
|
+
// Upload test output artifacts to GCS (D0030 — non-blocking, P5).
|
|
117
|
+
// When upload succeeds, strip responseOutput from the inline
|
|
118
|
+
// testResults[] so the Content Lake document carries only the slim
|
|
119
|
+
// shape; the full output lives in the GCS artifact. When upload
|
|
120
|
+
// fails, leave the inline shape intact so Studio's drill-down UI
|
|
121
|
+
// still works via the backward-compat fallback.
|
|
117
122
|
if (ctx.artifactUploader && summary.testResults?.length) {
|
|
118
123
|
const artifactRef = await uploadTestOutputs(ctx.artifactUploader, reportId, now, summary.testResults);
|
|
119
124
|
if (artifactRef) {
|
|
120
125
|
report.artifacts = { testOutputs: artifactRef };
|
|
126
|
+
report.summary = {
|
|
127
|
+
...summary,
|
|
128
|
+
testResults: summary.testResults.map(slimTestResult),
|
|
129
|
+
};
|
|
121
130
|
}
|
|
122
131
|
}
|
|
123
132
|
// Share reportId with downstream steps (CallbackStep + orchestrator job update)
|
|
@@ -218,6 +227,15 @@ function buildProvenanceInput(summary, ctx, options, autoScope) {
|
|
|
218
227
|
taskIds,
|
|
219
228
|
};
|
|
220
229
|
}
|
|
230
|
+
/**
|
|
231
|
+
* Strip the large responseOutput fields from a StoredTestResult so the
|
|
232
|
+
* remaining object is safe to inline in the Content Lake document (D0030).
|
|
233
|
+
* The full output lives in the GCS artifact uploaded by uploadTestOutputs.
|
|
234
|
+
*/
|
|
235
|
+
function slimTestResult(tr) {
|
|
236
|
+
const { responseOutput: _o, responseOutputTruncated: _t, ...rest } = tr;
|
|
237
|
+
return rest;
|
|
238
|
+
}
|
|
221
239
|
/**
|
|
222
240
|
* Extract test outputs from StoredTestResult[] and upload as a single
|
|
223
241
|
* JSON artifact to GCS. The artifact is keyed by `{taskId}::{modelId}`
|
|
@@ -230,7 +248,7 @@ async function uploadTestOutputs(uploader, reportId, createdAt, testResults) {
|
|
|
230
248
|
for (const tr of testResults) {
|
|
231
249
|
const key = `${tr.taskId}::${tr.modelId}`;
|
|
232
250
|
entries[key] = {
|
|
233
|
-
responseOutput: tr.responseOutput,
|
|
251
|
+
responseOutput: tr.responseOutput ?? "",
|
|
234
252
|
responseOutputTruncated: tr.responseOutputTruncated ?? false,
|
|
235
253
|
};
|
|
236
254
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.7.1",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -52,8 +52,8 @@
|
|
|
52
52
|
"@types/node": "^22.13.1",
|
|
53
53
|
"tsx": "^4.19.2",
|
|
54
54
|
"typescript": "^5.7.3",
|
|
55
|
-
"@sanity/ailf-
|
|
56
|
-
"@sanity/ailf-
|
|
55
|
+
"@sanity/ailf-shared": "0.1.0",
|
|
56
|
+
"@sanity/ailf-core": "0.1.0"
|
|
57
57
|
},
|
|
58
58
|
"scripts": {
|
|
59
59
|
"build": "tsc && tsx scripts/bundle-workspace-deps.ts",
|