npm - @sanity/ailf - Versions diffs - 0.1.15 → 0.1.17 - Mend

@sanity/ailf 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +6 -74
package/dist/_vendor/ailf-core/examples/index.d.ts +3 -3
package/dist/_vendor/ailf-core/examples/index.js +3 -3
package/dist/_vendor/ailf-core/ports/context.d.ts +11 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -0
package/dist/_vendor/ailf-core/schemas/pipeline-request.js +19 -0
package/dist/commands/init.js +10 -0
package/dist/orchestration/steps/publish-report-step.js +1 -0
package/dist/pipeline/map-request-to-config.js +1 -0
package/dist/pipeline/provenance.d.ts +11 -0
package/dist/pipeline/provenance.js +14 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -4,86 +4,18 @@ CLI and evaluation engine for the **AI Literacy Framework** — measures how
 effectively documentation enables AI coding tools to implement features
 correctly.
-## Installation
-```bash
-# Run without installing (recommended for quick start)
-npx @sanity/ailf --help
-# Or install globally
-pnpm add -g @sanity/ailf
-# Or as a project dependency
-pnpm add @sanity/ailf
-```
-## Quick start
-### 1. Initialize a project
 ```bash
-npx @sanity/ailf init
-```
-This creates a `.ailf/` directory with example configuration and task files:
+npx @sanity/ailf@latest init        # scaffold a project
+npx @sanity/ailf@latest --help      # see all commands
 ```
-.ailf/
-├── config.yaml              # Project configuration
-├── .gitignore               # Keeps generated files out of VCS
-└── tasks/
-    ├── example-groq-blog-listing.yaml
-    └── example-studio-custom-input.yaml
-```
-### 2. Set up environment
-Create a `.env` file in your project root:
-```bash
-# Required — LLM provider for evaluation and grading
-OPENAI_API_KEY=sk-...
-# Required — read access to Sanity documentation content
-SANITY_API_TOKEN=sk...
-# Optional — publish reports to your Sanity Studio
-AILF_REPORT_SANITY_API_TOKEN=sk...
-AILF_REPORT_PROJECT_ID=your-project-id
-AILF_REPORT_DATASET=production
-```
-### 3. Edit tasks and run
-```bash
-# Edit .ailf/config.yaml with your Sanity project settings
-# Customize or replace the example tasks in .ailf/tasks/
-# Validate task definitions
-npx @sanity/ailf validate-tasks .ailf/tasks/
-# Run evaluation in debug mode (fast feedback)
-npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/ --debug
-# Full evaluation
-npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/
-```
-## Documentation
-- **[API Reference](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/API.md)**
-  — all commands, flags, and environment variables
-- **[Contributing Tasks](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md)**
-  — task authoring guide
-- **[Architecture](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/ARCHITECTURE.md)**
-  — domain model and data flow
-- **[Root README](https://github.com/sanity-labs/ai-literacy-framework)** —
-  project overview
+**→ See the
+[full documentation](https://github.com/sanity-labs/ai-literacy-framework#readme)**
+for installation, quick start, configuration, and usage guides.
 ## Related packages
 | Package                                                                    | Description                                        |
 | -------------------------------------------------------------------------- | -------------------------------------------------- |
+| [`@sanity/ailf-tasks`](https://www.npmjs.com/package/@sanity/ailf-tasks)   | Lightweight task validator — schemas + YAML parser |
 | [`@sanity/ailf-studio`](https://www.npmjs.com/package/@sanity/ailf-studio) | Sanity Studio dashboard plugin for viewing reports |
-| `@sanity/ailf-core`                                                        | Domain kernel (types, schemas, ports)              |
-| `@sanity/ailf-shared`                                                      | Cross-package contract types                       |

package/dist/_vendor/ailf-core/examples/index.d.ts CHANGED Viewed

@@ -110,7 +110,7 @@ export declare const ailfConfigData: {
     };
 };
 /** Raw YAML string for ailf-config example (preserves comments) */
-export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014 which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId \u2014 Sanity project ID (find yours at sanity.io/manage)\n# dataset   \u2014 the dataset to query (e.g., \"production\", \"next\")\n# baseUrl   \u2014 the public URL of your documentation site\n#             (used by agentic mode to test agent discoverability)\nsource:\n  projectId: \"3do82whm\"\n  dataset: next\n  baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration \u2014 when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n#   validate-only \u2014 check that task YAML parses correctly (fast, no LLM calls)\n#   eval          \u2014 run the full evaluation pipeline\n#\n# paths \u2014 only trigger when files matching these globs change\n# blocking \u2014 if true, a failing eval blocks the PR merge\n# notify \u2014 if true, post results to configured notification channels\ntriggers:\n  # On pull requests: just validate task files parse correctly\n  pr:\n    mode: validate-only\n\n  # When .ailf/ files change in a PR: run a real evaluation\n  pr-task-change:\n    mode: eval\n    paths: [\".ailf/**\"]\n\n  # On merge to main: run evaluation (non-blocking)\n  main:\n    mode: eval\n    blocking: false\n    notify: true\n";
+export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014 which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId \u2014 Sanity project ID (find yours at sanity.io/manage)\n# dataset   \u2014 the dataset to query (e.g., \"production\", \"next\")\n# baseUrl   \u2014 the public URL of your documentation site\n#             (used by agentic mode to test agent discoverability)\nsource:\n  projectId: \"3do82whm\"\n  dataset: next\n  baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration \u2014 when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n#   validate-only \u2014 check that task YAML parses correctly (fast, no LLM calls)\n#   eval          \u2014 run the full evaluation pipeline\n#\n# paths \u2014 only trigger when files matching these globs change\n# blocking \u2014 if true, a failing eval blocks the PR merge\n# notify \u2014 if true, post results to configured notification channels\ntriggers:\n  # On pull requests: just validate task files parse correctly\n  pr:\n    mode: validate-only\n\n  # When .ailf/ files change in a PR: run a real evaluation\n  pr-task-change:\n    mode: eval\n    paths: [\".ailf/**\"]\n\n  # On merge to main: run evaluation (non-blocking)\n  main:\n    mode: eval\n    blocking: false\n    notify: true\n";
 /** Parsed task data for example-groq-blog-listing (JSON-safe) */
 export declare const exampleGroqBlogListingData: readonly [{
     readonly id: "example-groq-blog-listing";
@@ -144,7 +144,7 @@ export declare const exampleGroqBlogListingData: readonly [{
     };
 }];
 /** Raw YAML string for example-groq-blog-listing (preserves comments) */
-export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n#   baseline:\n#     enabled: false\n#\n# Full field reference:\n#   https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n  # Short human-readable summary. Shown in score tables and reports.\n  description: \"Example \u2014 Blog listing with GROQ queries\"\n\n  # Feature area this task belongs to. Tasks with the same area are\n  # grouped together in score summaries. Use a short kebab-case name.\n  featureArea: groq\n\n  # Gold-standard documentation articles for this task. The pipeline\n  # fetches these from Sanity and injects them into the prompt for\n  # baseline evaluation. Each entry needs:\n  #   slug   \u2014 the article's URL slug in your docs site\n  #   reason \u2014 why this doc is relevant (helps with auditing)\n  canonicalDocs:\n    - slug: groq-introduction\n      reason: \"Core GROQ syntax and query language reference\"\n    - slug: how-queries-work\n      reason: \"Query execution model and best practices\"\n\n  # When true, the pipeline auto-generates an additional rubric that\n  # checks whether the LLM's response actually used the provided docs.\n  docCoverage: true\n\n  # Path to a gold-standard implementation, relative to canonical/.\n  # The grader uses this as a reference when scoring code correctness.\n  referenceSolution: canonical/example-groq-blog-listing.ts\n\n  # vars.task \u2014 the implementation prompt given to the LLM.\n  # Write this as if you're asking a developer to build the feature.\n  # Be specific about requirements so the grader can evaluate clearly.\n  #\n  # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n  #   \u2022 Gold variant: injected with canonical doc content\n  #   \u2022 Baseline variant: left empty (tests model knowledge alone)\n  vars:\n    task: |\n      Create a Next.js page component that lists blog posts from Sanity\n      using GROQ. The page should display the title, slug, and published\n      date for each post, sorted by most recent first. Use the Sanity\n      client to fetch data.\n    docs: \"\"\n\n  # Grading assertions \u2014 how the LLM's response is scored.\n  #\n  # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n  # The \"template\" references a rubric from config/rubrics.yaml.\n  # The \"criteria\" are task-specific bullets injected into the template.\n  #\n  # Available templates:\n  #   task-completion   \u2014 did the LLM implement the feature? (weight: 0.50)\n  #   code-correctness  \u2014 is the code idiomatic and correct? (weight: 0.25)\n  #\n  # You can also use value-based assertions:\n  #   - type: contains\n  #     value: \"client.fetch\"\n  #   - type: contains-any\n  #     value: [\"createClient\", \"sanityClient\"]\n  assert:\n    - type: llm-rubric\n      template: task-completion\n      criteria:\n        - \"Uses the groq tagged template literal\"\n        - \"Fetches blog posts with title, slug, and publishedAt fields\"\n        - \"Orders results by publishedAt in descending order\"\n\n    - type: llm-rubric\n      template: code-correctness\n      criteria:\n        - \"Uses createClient from @sanity/client or next-sanity\"\n        - \"Exports a valid Next.js page component\"\n\n  # Baseline variant configuration.\n  #   enabled \u2014 set to false to skip this task entirely\n  #   rubric  \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n  baseline:\n    enabled: true\n    rubric: abbreviated\n";
+export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n#   baseline:\n#     enabled: false\n#\n# Full field reference:\n#   https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n  # Short human-readable summary. Shown in score tables and reports.\n  description: \"Example \u2014 Blog listing with GROQ queries\"\n\n  # Feature area this task belongs to. Tasks with the same area are\n  # grouped together in score summaries. Use a short kebab-case name.\n  featureArea: groq\n\n  # Gold-standard documentation articles for this task. The pipeline\n  # fetches these from Sanity and injects them into the prompt for\n  # baseline evaluation. Each entry needs:\n  #   slug   \u2014 the article's URL slug in your docs site\n  #   reason \u2014 why this doc is relevant (helps with auditing)\n  canonicalDocs:\n    - slug: groq-introduction\n      reason: \"Core GROQ syntax and query language reference\"\n    - slug: how-queries-work\n      reason: \"Query execution model and best practices\"\n\n  # When true, the pipeline auto-generates an additional rubric that\n  # checks whether the LLM's response actually used the provided docs.\n  docCoverage: true\n\n  # Path to a gold-standard implementation, relative to canonical/.\n  # The grader uses this as a reference when scoring code correctness.\n  referenceSolution: canonical/example-groq-blog-listing.ts\n\n  # vars.task \u2014 the implementation prompt given to the LLM.\n  # Write this as if you're asking a developer to build the feature.\n  # Be specific about requirements so the grader can evaluate clearly.\n  #\n  # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n  #   \u2022 Gold variant: injected with canonical doc content\n  #   \u2022 Baseline variant: left empty (tests model knowledge alone)\n  vars:\n    task: |\n      Create a Next.js page component that lists blog posts from Sanity\n      using GROQ. The page should display the title, slug, and published\n      date for each post, sorted by most recent first. Use the Sanity\n      client to fetch data.\n    docs: \"\"\n\n  # Grading assertions \u2014 how the LLM's response is scored.\n  #\n  # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n  # The \"template\" references a rubric from config/rubrics.yaml.\n  # The \"criteria\" are task-specific bullets injected into the template.\n  #\n  # Available templates:\n  #   task-completion   \u2014 did the LLM implement the feature? (weight: 0.50)\n  #   code-correctness  \u2014 is the code idiomatic and correct? (weight: 0.25)\n  #\n  # You can also use value-based assertions:\n  #   - type: contains\n  #     value: \"client.fetch\"\n  #   - type: contains-any\n  #     value: [\"createClient\", \"sanityClient\"]\n  assert:\n    - type: llm-rubric\n      template: task-completion\n      criteria:\n        - \"Uses the groq tagged template literal\"\n        - \"Fetches blog posts with title, slug, and publishedAt fields\"\n        - \"Orders results by publishedAt in descending order\"\n\n    - type: llm-rubric\n      template: code-correctness\n      criteria:\n        - \"Uses createClient from @sanity/client or next-sanity\"\n        - \"Exports a valid Next.js page component\"\n\n  # Baseline variant configuration.\n  #   enabled \u2014 set to false to skip this task entirely\n  #   rubric  \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n  baseline:\n    enabled: true\n    rubric: abbreviated\n";
 /** Parsed task data for example-studio-custom-input (JSON-safe) */
 export declare const exampleStudioCustomInputData: readonly [{
     readonly id: "example-studio-custom-input";
@@ -191,4 +191,4 @@ export interface ExampleRecord {
 }
 export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
 /** GitHub Actions workflow template for AI Literacy evaluation */
-export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n#   Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n#     AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#     NPM_TOKEN   \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n#   - Narrow the trigger paths to reduce cost (see comment below)\n#   - Check debug_mode for faster iteration (fewer tests)\n#   - See: https://github.com/sanity-io/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n  pull_request:\n    branches: [main]\n    # Runs on every PR to main by default. To reduce cost:\n    #   paths: [\".ailf/**\", \"docs/**\"]\n\n  workflow_dispatch:\n    inputs:\n      debug_mode:\n        description: \"Run in debug mode (fewer tests, faster iteration)\"\n        type: boolean\n        default: false\n\nconcurrency:\n  group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  evaluate:\n    name: AI Literacy Evaluation\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n      pull-requests: write\n    steps:\n      - uses: actions/checkout@v4\n\n      - name: Configure npm for @sanity scope\n        run:\n          echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n          ~/.npmrc\n\n      - name: Run evaluation\n        id: eval\n        env:\n          AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n        run: |\n          npx @sanity/ailf@latest pipeline --remote \\\n            --output /tmp/ailf-report.md \\\n            ${{ inputs.debug_mode && '--debug' || '' }}\n\n      - name: Post PR comment\n        if: always() && github.event_name == 'pull_request'\n        uses: actions/github-script@v7\n        with:\n          script: |\n            const fs = require('fs');\n\n            // --- Constants ---\n            const MARKER = '<!-- ailf-score-report -->';\n            const HISTORY_START = '<!-- ailf-score-history -->';\n            const HISTORY_END = '<!-- /ailf-score-history -->';\n            const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n            // --- Read new report ---\n            let newReport;\n            try {\n              newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n            } catch {\n              newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n            }\n\n            const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n            if (!prNumber) {\n              console.log('No PR number found, skipping comment');\n              return;\n            }\n\n            // --- Find existing comment ---\n            const { data: comments } = await github.rest.issues.listComments({\n              owner: context.repo.owner, repo: context.repo.repo,\n              issue_number: prNumber,\n            });\n            const existing = comments.find(c => c.body?.includes(MARKER));\n\n            // --- Build history from previous comment ---\n            let historyEntries = [];\n            if (existing) {\n              const oldBody = existing.body || '';\n\n              // Collect existing collapsed history entries\n              const histStart = oldBody.indexOf(HISTORY_START);\n              const histEnd = oldBody.indexOf(HISTORY_END);\n              if (histStart !== -1 && histEnd !== -1) {\n                const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n                // Split on </details> boundaries to get individual entries\n                if (historyContent) {\n                  historyEntries = historyContent\n                    .split(/<\\/details>\\s*/)\n                    .map(s => s.trim())\n                    .filter(s => s.startsWith('<details>'))\n                    .map(s => s + '\\n</details>');\n                }\n              }\n\n              // Extract the current report (will become the newest history entry)\n              let previousReport = '';\n              if (histStart !== -1) {\n                // Report is between MARKER and the \"Previous runs\" heading (or history section)\n                const markerIdx = oldBody.indexOf(MARKER);\n                // Find the --- separator before history\n                const separatorIdx = oldBody.lastIndexOf('---', histStart);\n                const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n                previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n              } else {\n                // No history yet \u2014 everything after MARKER is the report\n                const markerIdx = oldBody.indexOf(MARKER);\n                if (markerIdx !== -1) {\n                  previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n                }\n              }\n\n              // Collapse the previous report into a <details> entry\n              if (previousReport) {\n                const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n                const score = scoreMatch ? scoreMatch[1] : '?';\n                const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n                const date = dateMatch\n                  ? dateMatch[1].trim()\n                  : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n                const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n                historyEntries.unshift(entry); // newest first\n              }\n\n              // Enforce max history limit\n              historyEntries = historyEntries.slice(0, MAX_HISTORY);\n            }\n\n            // --- Assemble final comment ---\n            const historySection = historyEntries.length > 0\n              ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n              : '';\n            const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n            if (existing) {\n              await github.rest.issues.updateComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                comment_id: existing.id, body: finalBody,\n              });\n              console.log(`Updated comment (${historyEntries.length} history entries)`);\n            } else {\n              await github.rest.issues.createComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                issue_number: prNumber, body: finalBody,\n              });\n              console.log('Created new PR comment');\n            }\n\n      - name: Summary\n        if: always()\n        run: |\n          if [ -f /tmp/ailf-report.md ]; then\n            cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n          else\n            echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n          fi\n";
+export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n#   Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n#     AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#     NPM_TOKEN   \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n#   - Narrow the trigger paths to reduce cost (see comment below)\n#   - Check debug_mode for faster iteration (fewer tests)\n#   - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n  pull_request:\n    branches: [main]\n    # Runs on every PR to main by default. To reduce cost:\n    #   paths: [\".ailf/**\", \"docs/**\"]\n\n  workflow_dispatch:\n    inputs:\n      debug_mode:\n        description: \"Run in debug mode (fewer tests, faster iteration)\"\n        type: boolean\n        default: false\n\nconcurrency:\n  group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  evaluate:\n    name: AI Literacy Evaluation\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n      pull-requests: write\n    steps:\n      - uses: actions/checkout@v4\n\n      - name: Configure npm for @sanity scope\n        run:\n          echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n          ~/.npmrc\n\n      - name: Run evaluation\n        id: eval\n        env:\n          AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n        run: |\n          npx @sanity/ailf@latest pipeline --remote \\\n            --output /tmp/ailf-report.md \\\n            ${{ inputs.debug_mode && '--debug' || '' }}\n\n      - name: Post PR comment\n        if: always() && github.event_name == 'pull_request'\n        uses: actions/github-script@v7\n        with:\n          script: |\n            const fs = require('fs');\n\n            // --- Constants ---\n            const MARKER = '<!-- ailf-score-report -->';\n            const HISTORY_START = '<!-- ailf-score-history -->';\n            const HISTORY_END = '<!-- /ailf-score-history -->';\n            const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n            // --- Read new report ---\n            let newReport;\n            try {\n              newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n            } catch {\n              newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n            }\n\n            const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n            if (!prNumber) {\n              console.log('No PR number found, skipping comment');\n              return;\n            }\n\n            // --- Find existing comment ---\n            const { data: comments } = await github.rest.issues.listComments({\n              owner: context.repo.owner, repo: context.repo.repo,\n              issue_number: prNumber,\n            });\n            const existing = comments.find(c => c.body?.includes(MARKER));\n\n            // --- Build history from previous comment ---\n            let historyEntries = [];\n            if (existing) {\n              const oldBody = existing.body || '';\n\n              // Collect existing collapsed history entries\n              const histStart = oldBody.indexOf(HISTORY_START);\n              const histEnd = oldBody.indexOf(HISTORY_END);\n              if (histStart !== -1 && histEnd !== -1) {\n                const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n                // Split on </details> boundaries to get individual entries\n                if (historyContent) {\n                  historyEntries = historyContent\n                    .split(/<\\/details>\\s*/)\n                    .map(s => s.trim())\n                    .filter(s => s.startsWith('<details>'))\n                    .map(s => s + '\\n</details>');\n                }\n              }\n\n              // Extract the current report (will become the newest history entry)\n              let previousReport = '';\n              if (histStart !== -1) {\n                // Report is between MARKER and the \"Previous runs\" heading (or history section)\n                const markerIdx = oldBody.indexOf(MARKER);\n                // Find the --- separator before history\n                const separatorIdx = oldBody.lastIndexOf('---', histStart);\n                const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n                previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n              } else {\n                // No history yet \u2014 everything after MARKER is the report\n                const markerIdx = oldBody.indexOf(MARKER);\n                if (markerIdx !== -1) {\n                  previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n                }\n              }\n\n              // Collapse the previous report into a <details> entry\n              if (previousReport) {\n                const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n                const score = scoreMatch ? scoreMatch[1] : '?';\n                const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n                const date = dateMatch\n                  ? dateMatch[1].trim()\n                  : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n                const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n                historyEntries.unshift(entry); // newest first\n              }\n\n              // Enforce max history limit\n              historyEntries = historyEntries.slice(0, MAX_HISTORY);\n            }\n\n            // --- Assemble final comment ---\n            const historySection = historyEntries.length > 0\n              ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n              : '';\n            const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n            if (existing) {\n              await github.rest.issues.updateComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                comment_id: existing.id, body: finalBody,\n              });\n              console.log(`Updated comment (${historyEntries.length} history entries)`);\n            } else {\n              await github.rest.issues.createComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                issue_number: prNumber, body: finalBody,\n              });\n              console.log('Created new PR comment');\n            }\n\n      - name: Summary\n        if: always()\n        run: |\n          if [ -f /tmp/ailf-report.md ]; then\n            cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n          else\n            echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n          fi\n";

package/dist/_vendor/ailf-core/examples/index.js CHANGED Viewed

@@ -141,7 +141,7 @@ export const ailfConfigData = {
     }
 };
 /** Raw YAML string for ailf-config example (preserves comments) */
-export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source — which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId — Sanity project ID (find yours at sanity.io/manage)\n# dataset   — the dataset to query (e.g., \"production\", \"next\")\n# baseUrl   — the public URL of your documentation site\n#             (used by agentic mode to test agent discoverability)\nsource:\n  projectId: \"3do82whm\"\n  dataset: next\n  baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration — when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n#   validate-only — check that task YAML parses correctly (fast, no LLM calls)\n#   eval          — run the full evaluation pipeline\n#\n# paths — only trigger when files matching these globs change\n# blocking — if true, a failing eval blocks the PR merge\n# notify — if true, post results to configured notification channels\ntriggers:\n  # On pull requests: just validate task files parse correctly\n  pr:\n    mode: validate-only\n\n  # When .ailf/ files change in a PR: run a real evaluation\n  pr-task-change:\n    mode: eval\n    paths: [\".ailf/**\"]\n\n  # On merge to main: run evaluation (non-blocking)\n  main:\n    mode: eval\n    blocking: false\n    notify: true\n";
+export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source — which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId — Sanity project ID (find yours at sanity.io/manage)\n# dataset   — the dataset to query (e.g., \"production\", \"next\")\n# baseUrl   — the public URL of your documentation site\n#             (used by agentic mode to test agent discoverability)\nsource:\n  projectId: \"3do82whm\"\n  dataset: next\n  baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration — when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n#   validate-only — check that task YAML parses correctly (fast, no LLM calls)\n#   eval          — run the full evaluation pipeline\n#\n# paths — only trigger when files matching these globs change\n# blocking — if true, a failing eval blocks the PR merge\n# notify — if true, post results to configured notification channels\ntriggers:\n  # On pull requests: just validate task files parse correctly\n  pr:\n    mode: validate-only\n\n  # When .ailf/ files change in a PR: run a real evaluation\n  pr-task-change:\n    mode: eval\n    paths: [\".ailf/**\"]\n\n  # On merge to main: run evaluation (non-blocking)\n  main:\n    mode: eval\n    blocking: false\n    notify: true\n";
 /** Parsed task data for example-groq-blog-listing (JSON-safe) */
 export const exampleGroqBlogListingData = [
     {
@@ -190,7 +190,7 @@ export const exampleGroqBlogListingData = [
     }
 ];
 /** Raw YAML string for example-groq-blog-listing (preserves comments) */
-export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n#   baseline:\n#     enabled: false\n#\n# Full field reference:\n#   https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n  # Short human-readable summary. Shown in score tables and reports.\n  description: \"Example — Blog listing with GROQ queries\"\n\n  # Feature area this task belongs to. Tasks with the same area are\n  # grouped together in score summaries. Use a short kebab-case name.\n  featureArea: groq\n\n  # Gold-standard documentation articles for this task. The pipeline\n  # fetches these from Sanity and injects them into the prompt for\n  # baseline evaluation. Each entry needs:\n  #   slug   — the article's URL slug in your docs site\n  #   reason — why this doc is relevant (helps with auditing)\n  canonicalDocs:\n    - slug: groq-introduction\n      reason: \"Core GROQ syntax and query language reference\"\n    - slug: how-queries-work\n      reason: \"Query execution model and best practices\"\n\n  # When true, the pipeline auto-generates an additional rubric that\n  # checks whether the LLM's response actually used the provided docs.\n  docCoverage: true\n\n  # Path to a gold-standard implementation, relative to canonical/.\n  # The grader uses this as a reference when scoring code correctness.\n  referenceSolution: canonical/example-groq-blog-listing.ts\n\n  # vars.task — the implementation prompt given to the LLM.\n  # Write this as if you're asking a developer to build the feature.\n  # Be specific about requirements so the grader can evaluate clearly.\n  #\n  # vars.docs — leave empty (\"\"). The pipeline fills this in:\n  #   • Gold variant: injected with canonical doc content\n  #   • Baseline variant: left empty (tests model knowledge alone)\n  vars:\n    task: |\n      Create a Next.js page component that lists blog posts from Sanity\n      using GROQ. The page should display the title, slug, and published\n      date for each post, sorted by most recent first. Use the Sanity\n      client to fetch data.\n    docs: \"\"\n\n  # Grading assertions — how the LLM's response is scored.\n  #\n  # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n  # The \"template\" references a rubric from config/rubrics.yaml.\n  # The \"criteria\" are task-specific bullets injected into the template.\n  #\n  # Available templates:\n  #   task-completion   — did the LLM implement the feature? (weight: 0.50)\n  #   code-correctness  — is the code idiomatic and correct? (weight: 0.25)\n  #\n  # You can also use value-based assertions:\n  #   - type: contains\n  #     value: \"client.fetch\"\n  #   - type: contains-any\n  #     value: [\"createClient\", \"sanityClient\"]\n  assert:\n    - type: llm-rubric\n      template: task-completion\n      criteria:\n        - \"Uses the groq tagged template literal\"\n        - \"Fetches blog posts with title, slug, and publishedAt fields\"\n        - \"Orders results by publishedAt in descending order\"\n\n    - type: llm-rubric\n      template: code-correctness\n      criteria:\n        - \"Uses createClient from @sanity/client or next-sanity\"\n        - \"Exports a valid Next.js page component\"\n\n  # Baseline variant configuration.\n  #   enabled — set to false to skip this task entirely\n  #   rubric  — \"abbreviated\" (faster, default), \"full\", or \"none\"\n  baseline:\n    enabled: true\n    rubric: abbreviated\n";
+export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n#   baseline:\n#     enabled: false\n#\n# Full field reference:\n#   https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n  # Short human-readable summary. Shown in score tables and reports.\n  description: \"Example — Blog listing with GROQ queries\"\n\n  # Feature area this task belongs to. Tasks with the same area are\n  # grouped together in score summaries. Use a short kebab-case name.\n  featureArea: groq\n\n  # Gold-standard documentation articles for this task. The pipeline\n  # fetches these from Sanity and injects them into the prompt for\n  # baseline evaluation. Each entry needs:\n  #   slug   — the article's URL slug in your docs site\n  #   reason — why this doc is relevant (helps with auditing)\n  canonicalDocs:\n    - slug: groq-introduction\n      reason: \"Core GROQ syntax and query language reference\"\n    - slug: how-queries-work\n      reason: \"Query execution model and best practices\"\n\n  # When true, the pipeline auto-generates an additional rubric that\n  # checks whether the LLM's response actually used the provided docs.\n  docCoverage: true\n\n  # Path to a gold-standard implementation, relative to canonical/.\n  # The grader uses this as a reference when scoring code correctness.\n  referenceSolution: canonical/example-groq-blog-listing.ts\n\n  # vars.task — the implementation prompt given to the LLM.\n  # Write this as if you're asking a developer to build the feature.\n  # Be specific about requirements so the grader can evaluate clearly.\n  #\n  # vars.docs — leave empty (\"\"). The pipeline fills this in:\n  #   • Gold variant: injected with canonical doc content\n  #   • Baseline variant: left empty (tests model knowledge alone)\n  vars:\n    task: |\n      Create a Next.js page component that lists blog posts from Sanity\n      using GROQ. The page should display the title, slug, and published\n      date for each post, sorted by most recent first. Use the Sanity\n      client to fetch data.\n    docs: \"\"\n\n  # Grading assertions — how the LLM's response is scored.\n  #\n  # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n  # The \"template\" references a rubric from config/rubrics.yaml.\n  # The \"criteria\" are task-specific bullets injected into the template.\n  #\n  # Available templates:\n  #   task-completion   — did the LLM implement the feature? (weight: 0.50)\n  #   code-correctness  — is the code idiomatic and correct? (weight: 0.25)\n  #\n  # You can also use value-based assertions:\n  #   - type: contains\n  #     value: \"client.fetch\"\n  #   - type: contains-any\n  #     value: [\"createClient\", \"sanityClient\"]\n  assert:\n    - type: llm-rubric\n      template: task-completion\n      criteria:\n        - \"Uses the groq tagged template literal\"\n        - \"Fetches blog posts with title, slug, and publishedAt fields\"\n        - \"Orders results by publishedAt in descending order\"\n\n    - type: llm-rubric\n      template: code-correctness\n      criteria:\n        - \"Uses createClient from @sanity/client or next-sanity\"\n        - \"Exports a valid Next.js page component\"\n\n  # Baseline variant configuration.\n  #   enabled — set to false to skip this task entirely\n  #   rubric  — \"abbreviated\" (faster, default), \"full\", or \"none\"\n  baseline:\n    enabled: true\n    rubric: abbreviated\n";
 /** Parsed task data for example-studio-custom-input (JSON-safe) */
 export const exampleStudioCustomInputData = [
     {
@@ -289,4 +289,4 @@ export const EXAMPLES = {
 // Raw file exports (non-data files, exported as raw strings)
 // ---------------------------------------------------------------------------
 /** GitHub Actions workflow template for AI Literacy evaluation */
-export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n#   Add one secret to your repository (Settings → Secrets → Actions):\n#     AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#     NPM_TOKEN   — npm token with read access to @sanity scope\n#\n# Customization:\n#   - Narrow the trigger paths to reduce cost (see comment below)\n#   - Check debug_mode for faster iteration (fewer tests)\n#   - See: https://github.com/sanity-io/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n  pull_request:\n    branches: [main]\n    # Runs on every PR to main by default. To reduce cost:\n    #   paths: [\".ailf/**\", \"docs/**\"]\n\n  workflow_dispatch:\n    inputs:\n      debug_mode:\n        description: \"Run in debug mode (fewer tests, faster iteration)\"\n        type: boolean\n        default: false\n\nconcurrency:\n  group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  evaluate:\n    name: AI Literacy Evaluation\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n      pull-requests: write\n    steps:\n      - uses: actions/checkout@v4\n\n      - name: Configure npm for @sanity scope\n        run:\n          echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n          ~/.npmrc\n\n      - name: Run evaluation\n        id: eval\n        env:\n          AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n        run: |\n          npx @sanity/ailf@latest pipeline --remote \\\n            --output /tmp/ailf-report.md \\\n            ${{ inputs.debug_mode && '--debug' || '' }}\n\n      - name: Post PR comment\n        if: always() && github.event_name == 'pull_request'\n        uses: actions/github-script@v7\n        with:\n          script: |\n            const fs = require('fs');\n\n            // --- Constants ---\n            const MARKER = '<!-- ailf-score-report -->';\n            const HISTORY_START = '<!-- ailf-score-history -->';\n            const HISTORY_END = '<!-- /ailf-score-history -->';\n            const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n            // --- Read new report ---\n            let newReport;\n            try {\n              newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n            } catch {\n              newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n            }\n\n            const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n            if (!prNumber) {\n              console.log('No PR number found, skipping comment');\n              return;\n            }\n\n            // --- Find existing comment ---\n            const { data: comments } = await github.rest.issues.listComments({\n              owner: context.repo.owner, repo: context.repo.repo,\n              issue_number: prNumber,\n            });\n            const existing = comments.find(c => c.body?.includes(MARKER));\n\n            // --- Build history from previous comment ---\n            let historyEntries = [];\n            if (existing) {\n              const oldBody = existing.body || '';\n\n              // Collect existing collapsed history entries\n              const histStart = oldBody.indexOf(HISTORY_START);\n              const histEnd = oldBody.indexOf(HISTORY_END);\n              if (histStart !== -1 && histEnd !== -1) {\n                const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n                // Split on </details> boundaries to get individual entries\n                if (historyContent) {\n                  historyEntries = historyContent\n                    .split(/<\\/details>\\s*/)\n                    .map(s => s.trim())\n                    .filter(s => s.startsWith('<details>'))\n                    .map(s => s + '\\n</details>');\n                }\n              }\n\n              // Extract the current report (will become the newest history entry)\n              let previousReport = '';\n              if (histStart !== -1) {\n                // Report is between MARKER and the \"Previous runs\" heading (or history section)\n                const markerIdx = oldBody.indexOf(MARKER);\n                // Find the --- separator before history\n                const separatorIdx = oldBody.lastIndexOf('---', histStart);\n                const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n                previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n              } else {\n                // No history yet — everything after MARKER is the report\n                const markerIdx = oldBody.indexOf(MARKER);\n                if (markerIdx !== -1) {\n                  previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n                }\n              }\n\n              // Collapse the previous report into a <details> entry\n              if (previousReport) {\n                const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n                const score = scoreMatch ? scoreMatch[1] : '?';\n                const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n                const date = dateMatch\n                  ? dateMatch[1].trim()\n                  : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n                const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n                historyEntries.unshift(entry); // newest first\n              }\n\n              // Enforce max history limit\n              historyEntries = historyEntries.slice(0, MAX_HISTORY);\n            }\n\n            // --- Assemble final comment ---\n            const historySection = historyEntries.length > 0\n              ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n              : '';\n            const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n            if (existing) {\n              await github.rest.issues.updateComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                comment_id: existing.id, body: finalBody,\n              });\n              console.log(`Updated comment (${historyEntries.length} history entries)`);\n            } else {\n              await github.rest.issues.createComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                issue_number: prNumber, body: finalBody,\n              });\n              console.log('Created new PR comment');\n            }\n\n      - name: Summary\n        if: always()\n        run: |\n          if [ -f /tmp/ailf-report.md ]; then\n            cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n          else\n            echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n          fi\n";
+export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n#   Add one secret to your repository (Settings → Secrets → Actions):\n#     AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#     NPM_TOKEN   — npm token with read access to @sanity scope\n#\n# Customization:\n#   - Narrow the trigger paths to reduce cost (see comment below)\n#   - Check debug_mode for faster iteration (fewer tests)\n#   - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n  pull_request:\n    branches: [main]\n    # Runs on every PR to main by default. To reduce cost:\n    #   paths: [\".ailf/**\", \"docs/**\"]\n\n  workflow_dispatch:\n    inputs:\n      debug_mode:\n        description: \"Run in debug mode (fewer tests, faster iteration)\"\n        type: boolean\n        default: false\n\nconcurrency:\n  group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n  cancel-in-progress: true\n\njobs:\n  evaluate:\n    name: AI Literacy Evaluation\n    runs-on: ubuntu-latest\n    permissions:\n      contents: read\n      pull-requests: write\n    steps:\n      - uses: actions/checkout@v4\n\n      - name: Configure npm for @sanity scope\n        run:\n          echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n          ~/.npmrc\n\n      - name: Run evaluation\n        id: eval\n        env:\n          AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n        run: |\n          npx @sanity/ailf@latest pipeline --remote \\\n            --output /tmp/ailf-report.md \\\n            ${{ inputs.debug_mode && '--debug' || '' }}\n\n      - name: Post PR comment\n        if: always() && github.event_name == 'pull_request'\n        uses: actions/github-script@v7\n        with:\n          script: |\n            const fs = require('fs');\n\n            // --- Constants ---\n            const MARKER = '<!-- ailf-score-report -->';\n            const HISTORY_START = '<!-- ailf-score-history -->';\n            const HISTORY_END = '<!-- /ailf-score-history -->';\n            const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n            // --- Read new report ---\n            let newReport;\n            try {\n              newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n            } catch {\n              newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n            }\n\n            const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n            if (!prNumber) {\n              console.log('No PR number found, skipping comment');\n              return;\n            }\n\n            // --- Find existing comment ---\n            const { data: comments } = await github.rest.issues.listComments({\n              owner: context.repo.owner, repo: context.repo.repo,\n              issue_number: prNumber,\n            });\n            const existing = comments.find(c => c.body?.includes(MARKER));\n\n            // --- Build history from previous comment ---\n            let historyEntries = [];\n            if (existing) {\n              const oldBody = existing.body || '';\n\n              // Collect existing collapsed history entries\n              const histStart = oldBody.indexOf(HISTORY_START);\n              const histEnd = oldBody.indexOf(HISTORY_END);\n              if (histStart !== -1 && histEnd !== -1) {\n                const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n                // Split on </details> boundaries to get individual entries\n                if (historyContent) {\n                  historyEntries = historyContent\n                    .split(/<\\/details>\\s*/)\n                    .map(s => s.trim())\n                    .filter(s => s.startsWith('<details>'))\n                    .map(s => s + '\\n</details>');\n                }\n              }\n\n              // Extract the current report (will become the newest history entry)\n              let previousReport = '';\n              if (histStart !== -1) {\n                // Report is between MARKER and the \"Previous runs\" heading (or history section)\n                const markerIdx = oldBody.indexOf(MARKER);\n                // Find the --- separator before history\n                const separatorIdx = oldBody.lastIndexOf('---', histStart);\n                const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n                previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n              } else {\n                // No history yet — everything after MARKER is the report\n                const markerIdx = oldBody.indexOf(MARKER);\n                if (markerIdx !== -1) {\n                  previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n                }\n              }\n\n              // Collapse the previous report into a <details> entry\n              if (previousReport) {\n                const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n                const score = scoreMatch ? scoreMatch[1] : '?';\n                const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n                const date = dateMatch\n                  ? dateMatch[1].trim()\n                  : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n                const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n                historyEntries.unshift(entry); // newest first\n              }\n\n              // Enforce max history limit\n              historyEntries = historyEntries.slice(0, MAX_HISTORY);\n            }\n\n            // --- Assemble final comment ---\n            const historySection = historyEntries.length > 0\n              ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n              : '';\n            const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n            if (existing) {\n              await github.rest.issues.updateComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                comment_id: existing.id, body: finalBody,\n              });\n              console.log(`Updated comment (${historyEntries.length} history entries)`);\n            } else {\n              await github.rest.issues.createComment({\n                owner: context.repo.owner, repo: context.repo.repo,\n                issue_number: prNumber, body: finalBody,\n              });\n              console.log('Created new PR comment');\n            }\n\n      - name: Summary\n        if: always()\n        run: |\n          if [ -f /tmp/ailf-report.md ]; then\n            cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n          else\n            echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n            echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n          fi\n";

package/dist/_vendor/ailf-core/ports/context.d.ts CHANGED Viewed

@@ -99,6 +99,17 @@ export interface ResolvedConfig {
     reportStoreProjectId?: string;
     /** Report store dataset from .ailf/config.yaml reportStore block */
     reportStoreDataset?: string;
+    /**
+     * Git metadata from the *calling* repository (cross-repo evaluations).
+     * When set, this overrides the CI env var-based git detection in provenance,
+     * ensuring the report attributes to the caller — not the AILF core repo.
+     */
+    callerGit?: {
+        branch?: string;
+        prNumber?: number;
+        repo: string;
+        sha?: string;
+    };
     /** Callback URL configuration for API-triggered evaluations */
     callback?: {
         url: string;

package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts CHANGED Viewed

@@ -20,6 +20,12 @@ export declare const PipelineRequestSchema: z.ZodObject<{
         "cloud-run": "cloud-run";
     }>>;
     areas: z.ZodOptional<z.ZodArray<z.ZodString>>;
+    callerGit: z.ZodOptional<z.ZodObject<{
+        branch: z.ZodOptional<z.ZodString>;
+        prNumber: z.ZodOptional<z.ZodNumber>;
+        repo: z.ZodString;
+        sha: z.ZodOptional<z.ZodString>;
+    }, z.core.$strip>>;
     callback: z.ZodOptional<z.ZodObject<{
         headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
         url: z.ZodString;

package/dist/_vendor/ailf-core/schemas/pipeline-request.js CHANGED Viewed

@@ -30,12 +30,31 @@ const CallbackSchema = z.object({
     url: z.string().url(),
 });
 // ---------------------------------------------------------------------------
+// Caller Git context — for cross-repo evaluations
+// ---------------------------------------------------------------------------
+/**
+ * Git metadata from the *calling* repository.
+ *
+ * When a cross-repo evaluation is triggered (via repository_dispatch or
+ * the API), the GitHub Actions env vars (GITHUB_REPOSITORY, GITHUB_SHA,
+ * etc.) reflect the *AILF core repo* where the workflow executes — not
+ * the repo that requested the evaluation. This field carries the caller's
+ * actual git context so provenance correctly attributes the evaluation.
+ */
+const CallerGitSchema = z.object({
+    branch: z.string().optional(),
+    prNumber: z.number().int().positive().optional(),
+    repo: z.string(),
+    sha: z.string().optional(),
+});
+// ---------------------------------------------------------------------------
 // Pipeline Request — the universal invocation contract
 // ---------------------------------------------------------------------------
 export const PipelineRequestSchema = z.object({
     allowedOrigins: z.array(z.string()).optional(),
     backend: z.enum(["github-actions", "cloud-run"]).optional(),
     areas: z.array(z.string()).optional(),
+    callerGit: CallerGitSchema.optional(),
     callback: CallbackSchema.optional(),
     changedDocs: z.array(z.string()).optional(),
     compare: z.boolean().optional(),

package/dist/commands/init.js CHANGED Viewed

@@ -163,6 +163,16 @@ async function runInit(opts) {
     console.log("  4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
     console.log("     automatically on PRs");
     console.log();
+    console.log("  🔑 Retrieve secrets from 1Password (Sanity employees):");
+    console.log();
+    console.log("     # Shared dev API key (for local testing and CI)");
+    console.log('     op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
+    console.log();
+    console.log("     # npm token (read access to @sanity scope)");
+    console.log('     op read "op://Shared/AI Literacy Framework - Shared API Tokens/NPM_TOKEN"');
+    console.log();
+    console.log("     Not a Sanity employee? Request an API key from the AILF team.");
+    console.log();
     console.log("  💡 Test locally before pushing:");
     console.log("     AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
     console.log();

package/dist/orchestration/steps/publish-report-step.js CHANGED Viewed

@@ -146,6 +146,7 @@ function buildProvenanceInput(summary, ctx, options) {
         : undefined;
     return {
         areas,
+        callerGit: ctx.config.callerGit,
         evalFingerprint,
         mode,
         promptfooUrls: options.promptfooUrls,

package/dist/pipeline/map-request-to-config.js CHANGED Viewed

@@ -56,6 +56,7 @@ export function mapRequestToConfig(request, rootDir) {
         sanityDocumentArgs: undefined,
         beforeOption: undefined,
         repoTasksPath: undefined,
+        callerGit: request.callerGit,
         callback: request.callback,
         jobId: request.jobId,
         remote: false,

package/dist/pipeline/provenance.d.ts CHANGED Viewed

@@ -16,6 +16,17 @@ import type { EvalMode, PromptfooUrlEntry, ReportProvenance } from "./types.js";
 export interface ProvenanceInput {
     /** Feature areas that were evaluated */
     areas: string[];
+    /**
+     * Git metadata from the *calling* repository (cross-repo evaluations).
+     * When provided, overrides CI env var detection so provenance attributes
+     * to the caller — not the AILF core repo where the workflow executes.
+     */
+    callerGit?: {
+        branch?: string;
+        prNumber?: number;
+        repo: string;
+        sha?: string;
+    };
     /** SHA-256 hash of the doc context files (from cache system) */
     contextHash?: string;
     /** Evaluation fingerprint for cross-environment cache lookup */

package/dist/pipeline/provenance.js CHANGED Viewed

@@ -25,11 +25,21 @@ import { load } from "js-yaml";
  */
 export function buildProvenance(input) {
     const models = loadModelsConfig(input.rootDir);
+    // Cross-repo evaluations: prefer explicit caller git metadata over
+    // CI env vars (which always reflect the AILF core repo).
+    const git = input.callerGit
+        ? {
+            branch: input.callerGit.branch ?? "unknown",
+            prNumber: input.callerGit.prNumber,
+            repo: input.callerGit.repo,
+            sha: input.callerGit.sha ?? "unknown",
+        }
+        : detectGitMetadata();
     return {
         areas: input.areas,
         contextHash: input.contextHash,
         evalFingerprint: input.evalFingerprint,
-        git: detectGitMetadata(),
+        git,
         graderModel: models.grader.id,
         mode: input.mode,
         models: models.models.map((m) => ({ id: m.id, label: m.label })),
@@ -110,6 +120,9 @@ function detectTrigger() {
     if (eventName === "repository_dispatch") {
         return {
             callerRef: process.env.GITHUB_REF,
+            // Note: callerRepo here is a fallback. The accurate caller repo
+            // comes from callerGit (injected into the PipelineRequest payload).
+            // GITHUB_REPOSITORY_OWNER_ID is just the org ID, not owner/repo.
             callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
             type: "cross-repo",
         };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.15",
+  "version": "0.1.17",
   "private": false,
   "publishConfig": {
     "access": "restricted"