@sanity/ailf 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -74
- package/dist/_vendor/ailf-core/examples/index.d.ts +3 -3
- package/dist/_vendor/ailf-core/examples/index.js +3 -3
- package/dist/_vendor/ailf-core/ports/context.d.ts +11 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +6 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.js +19 -0
- package/dist/commands/init.js +10 -0
- package/dist/orchestration/steps/publish-report-step.js +1 -0
- package/dist/pipeline/map-request-to-config.js +1 -0
- package/dist/pipeline/provenance.d.ts +11 -0
- package/dist/pipeline/provenance.js +14 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,86 +4,18 @@ CLI and evaluation engine for the **AI Literacy Framework** — measures how
|
|
|
4
4
|
effectively documentation enables AI coding tools to implement features
|
|
5
5
|
correctly.
|
|
6
6
|
|
|
7
|
-
## Installation
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
# Run without installing (recommended for quick start)
|
|
11
|
-
npx @sanity/ailf --help
|
|
12
|
-
|
|
13
|
-
# Or install globally
|
|
14
|
-
pnpm add -g @sanity/ailf
|
|
15
|
-
|
|
16
|
-
# Or as a project dependency
|
|
17
|
-
pnpm add @sanity/ailf
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Quick start
|
|
21
|
-
|
|
22
|
-
### 1. Initialize a project
|
|
23
|
-
|
|
24
7
|
```bash
|
|
25
|
-
npx @sanity/ailf init
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
This creates a `.ailf/` directory with example configuration and task files:
|
|
29
|
-
|
|
8
|
+
npx @sanity/ailf@latest init # scaffold a project
|
|
9
|
+
npx @sanity/ailf@latest --help # see all commands
|
|
30
10
|
```
|
|
31
|
-
.ailf/
|
|
32
|
-
├── config.yaml # Project configuration
|
|
33
|
-
├── .gitignore # Keeps generated files out of VCS
|
|
34
|
-
└── tasks/
|
|
35
|
-
├── example-groq-blog-listing.yaml
|
|
36
|
-
└── example-studio-custom-input.yaml
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
### 2. Set up environment
|
|
40
|
-
|
|
41
|
-
Create a `.env` file in your project root:
|
|
42
|
-
|
|
43
|
-
```bash
|
|
44
|
-
# Required — LLM provider for evaluation and grading
|
|
45
|
-
OPENAI_API_KEY=sk-...
|
|
46
|
-
|
|
47
|
-
# Required — read access to Sanity documentation content
|
|
48
|
-
SANITY_API_TOKEN=sk...
|
|
49
|
-
|
|
50
|
-
# Optional — publish reports to your Sanity Studio
|
|
51
|
-
AILF_REPORT_SANITY_API_TOKEN=sk...
|
|
52
|
-
AILF_REPORT_PROJECT_ID=your-project-id
|
|
53
|
-
AILF_REPORT_DATASET=production
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
### 3. Edit tasks and run
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
# Edit .ailf/config.yaml with your Sanity project settings
|
|
60
|
-
# Customize or replace the example tasks in .ailf/tasks/
|
|
61
|
-
|
|
62
|
-
# Validate task definitions
|
|
63
|
-
npx @sanity/ailf validate-tasks .ailf/tasks/
|
|
64
|
-
|
|
65
|
-
# Run evaluation in debug mode (fast feedback)
|
|
66
|
-
npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/ --debug
|
|
67
|
-
|
|
68
|
-
# Full evaluation
|
|
69
|
-
npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
## Documentation
|
|
73
11
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
— task authoring guide
|
|
78
|
-
- **[Architecture](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/ARCHITECTURE.md)**
|
|
79
|
-
— domain model and data flow
|
|
80
|
-
- **[Root README](https://github.com/sanity-labs/ai-literacy-framework)** —
|
|
81
|
-
project overview
|
|
12
|
+
**→ See the
|
|
13
|
+
[full documentation](https://github.com/sanity-labs/ai-literacy-framework#readme)**
|
|
14
|
+
for installation, quick start, configuration, and usage guides.
|
|
82
15
|
|
|
83
16
|
## Related packages
|
|
84
17
|
|
|
85
18
|
| Package | Description |
|
|
86
19
|
| -------------------------------------------------------------------------- | -------------------------------------------------- |
|
|
20
|
+
| [`@sanity/ailf-tasks`](https://www.npmjs.com/package/@sanity/ailf-tasks) | Lightweight task validator — schemas + YAML parser |
|
|
87
21
|
| [`@sanity/ailf-studio`](https://www.npmjs.com/package/@sanity/ailf-studio) | Sanity Studio dashboard plugin for viewing reports |
|
|
88
|
-
| `@sanity/ailf-core` | Domain kernel (types, schemas, ports) |
|
|
89
|
-
| `@sanity/ailf-shared` | Cross-package contract types |
|
|
@@ -110,7 +110,7 @@ export declare const ailfConfigData: {
|
|
|
110
110
|
};
|
|
111
111
|
};
|
|
112
112
|
/** Raw YAML string for ailf-config example (preserves comments) */
|
|
113
|
-
export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-
|
|
113
|
+
export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014 which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId \u2014 Sanity project ID (find yours at sanity.io/manage)\n# dataset \u2014 the dataset to query (e.g., \"production\", \"next\")\n# baseUrl \u2014 the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"3do82whm\"\n dataset: next\n baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration \u2014 when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only \u2014 check that task YAML parses correctly (fast, no LLM calls)\n# eval \u2014 run the full evaluation pipeline\n#\n# paths \u2014 only trigger when files matching these globs change\n# blocking \u2014 if true, a failing eval blocks the PR merge\n# notify \u2014 if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
|
|
114
114
|
/** Parsed task data for example-groq-blog-listing (JSON-safe) */
|
|
115
115
|
export declare const exampleGroqBlogListingData: readonly [{
|
|
116
116
|
readonly id: "example-groq-blog-listing";
|
|
@@ -144,7 +144,7 @@ export declare const exampleGroqBlogListingData: readonly [{
|
|
|
144
144
|
};
|
|
145
145
|
}];
|
|
146
146
|
/** Raw YAML string for example-groq-blog-listing (preserves comments) */
|
|
147
|
-
export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n# baseline:\n# enabled: false\n#\n# Full field reference:\n# https://github.com/sanity-
|
|
147
|
+
export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n# baseline:\n# enabled: false\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example \u2014 Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug \u2014 the article's URL slug in your docs site\n # reason \u2014 why this doc is relevant (helps with auditing)\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task \u2014 the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n # \u2022 Gold variant: injected with canonical doc content\n # \u2022 Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions \u2014 how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion \u2014 did the LLM implement the feature? (weight: 0.50)\n # code-correctness \u2014 is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled \u2014 set to false to skip this task entirely\n # rubric \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n";
|
|
148
148
|
/** Parsed task data for example-studio-custom-input (JSON-safe) */
|
|
149
149
|
export declare const exampleStudioCustomInputData: readonly [{
|
|
150
150
|
readonly id: "example-studio-custom-input";
|
|
@@ -191,4 +191,4 @@ export interface ExampleRecord {
|
|
|
191
191
|
}
|
|
192
192
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
193
193
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
194
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n# NPM_TOKEN \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-
|
|
194
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n# NPM_TOKEN \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Configure npm for @sanity scope\n run:\n echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n ~/.npmrc\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
@@ -141,7 +141,7 @@ export const ailfConfigData = {
|
|
|
141
141
|
}
|
|
142
142
|
};
|
|
143
143
|
/** Raw YAML string for ailf-config example (preserves comments) */
|
|
144
|
-
export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-
|
|
144
|
+
export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source — which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId — Sanity project ID (find yours at sanity.io/manage)\n# dataset — the dataset to query (e.g., \"production\", \"next\")\n# baseUrl — the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"3do82whm\"\n dataset: next\n baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration — when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only — check that task YAML parses correctly (fast, no LLM calls)\n# eval — run the full evaluation pipeline\n#\n# paths — only trigger when files matching these globs change\n# blocking — if true, a failing eval blocks the PR merge\n# notify — if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
|
|
145
145
|
/** Parsed task data for example-groq-blog-listing (JSON-safe) */
|
|
146
146
|
export const exampleGroqBlogListingData = [
|
|
147
147
|
{
|
|
@@ -190,7 +190,7 @@ export const exampleGroqBlogListingData = [
|
|
|
190
190
|
}
|
|
191
191
|
];
|
|
192
192
|
/** Raw YAML string for example-groq-blog-listing (preserves comments) */
|
|
193
|
-
export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n# baseline:\n# enabled: false\n#\n# Full field reference:\n# https://github.com/sanity-
|
|
193
|
+
export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# To disable this task without deleting the file, set:\n# baseline:\n# enabled: false\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example — Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug — the article's URL slug in your docs site\n # reason — why this doc is relevant (helps with auditing)\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task — the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs — leave empty (\"\"). The pipeline fills this in:\n # • Gold variant: injected with canonical doc content\n # • Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions — how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion — did the LLM implement the feature? (weight: 0.50)\n # code-correctness — is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled — set to false to skip this task entirely\n # rubric — \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n";
|
|
194
194
|
/** Parsed task data for example-studio-custom-input (JSON-safe) */
|
|
195
195
|
export const exampleStudioCustomInputData = [
|
|
196
196
|
{
|
|
@@ -289,4 +289,4 @@ export const EXAMPLES = {
|
|
|
289
289
|
// Raw file exports (non-data files, exported as raw strings)
|
|
290
290
|
// ---------------------------------------------------------------------------
|
|
291
291
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
292
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n# NPM_TOKEN — npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-
|
|
292
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n# NPM_TOKEN — npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Configure npm for @sanity scope\n run:\n echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n ~/.npmrc\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
@@ -99,6 +99,17 @@ export interface ResolvedConfig {
|
|
|
99
99
|
reportStoreProjectId?: string;
|
|
100
100
|
/** Report store dataset from .ailf/config.yaml reportStore block */
|
|
101
101
|
reportStoreDataset?: string;
|
|
102
|
+
/**
|
|
103
|
+
* Git metadata from the *calling* repository (cross-repo evaluations).
|
|
104
|
+
* When set, this overrides the CI env var-based git detection in provenance,
|
|
105
|
+
* ensuring the report attributes to the caller — not the AILF core repo.
|
|
106
|
+
*/
|
|
107
|
+
callerGit?: {
|
|
108
|
+
branch?: string;
|
|
109
|
+
prNumber?: number;
|
|
110
|
+
repo: string;
|
|
111
|
+
sha?: string;
|
|
112
|
+
};
|
|
102
113
|
/** Callback URL configuration for API-triggered evaluations */
|
|
103
114
|
callback?: {
|
|
104
115
|
url: string;
|
|
@@ -20,6 +20,12 @@ export declare const PipelineRequestSchema: z.ZodObject<{
|
|
|
20
20
|
"cloud-run": "cloud-run";
|
|
21
21
|
}>>;
|
|
22
22
|
areas: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
23
|
+
callerGit: z.ZodOptional<z.ZodObject<{
|
|
24
|
+
branch: z.ZodOptional<z.ZodString>;
|
|
25
|
+
prNumber: z.ZodOptional<z.ZodNumber>;
|
|
26
|
+
repo: z.ZodString;
|
|
27
|
+
sha: z.ZodOptional<z.ZodString>;
|
|
28
|
+
}, z.core.$strip>>;
|
|
23
29
|
callback: z.ZodOptional<z.ZodObject<{
|
|
24
30
|
headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
25
31
|
url: z.ZodString;
|
|
@@ -30,12 +30,31 @@ const CallbackSchema = z.object({
|
|
|
30
30
|
url: z.string().url(),
|
|
31
31
|
});
|
|
32
32
|
// ---------------------------------------------------------------------------
|
|
33
|
+
// Caller Git context — for cross-repo evaluations
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
/**
|
|
36
|
+
* Git metadata from the *calling* repository.
|
|
37
|
+
*
|
|
38
|
+
* When a cross-repo evaluation is triggered (via repository_dispatch or
|
|
39
|
+
* the API), the GitHub Actions env vars (GITHUB_REPOSITORY, GITHUB_SHA,
|
|
40
|
+
* etc.) reflect the *AILF core repo* where the workflow executes — not
|
|
41
|
+
* the repo that requested the evaluation. This field carries the caller's
|
|
42
|
+
* actual git context so provenance correctly attributes the evaluation.
|
|
43
|
+
*/
|
|
44
|
+
const CallerGitSchema = z.object({
|
|
45
|
+
branch: z.string().optional(),
|
|
46
|
+
prNumber: z.number().int().positive().optional(),
|
|
47
|
+
repo: z.string(),
|
|
48
|
+
sha: z.string().optional(),
|
|
49
|
+
});
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
33
51
|
// Pipeline Request — the universal invocation contract
|
|
34
52
|
// ---------------------------------------------------------------------------
|
|
35
53
|
export const PipelineRequestSchema = z.object({
|
|
36
54
|
allowedOrigins: z.array(z.string()).optional(),
|
|
37
55
|
backend: z.enum(["github-actions", "cloud-run"]).optional(),
|
|
38
56
|
areas: z.array(z.string()).optional(),
|
|
57
|
+
callerGit: CallerGitSchema.optional(),
|
|
39
58
|
callback: CallbackSchema.optional(),
|
|
40
59
|
changedDocs: z.array(z.string()).optional(),
|
|
41
60
|
compare: z.boolean().optional(),
|
package/dist/commands/init.js
CHANGED
|
@@ -163,6 +163,16 @@ async function runInit(opts) {
|
|
|
163
163
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
164
164
|
console.log(" automatically on PRs");
|
|
165
165
|
console.log();
|
|
166
|
+
console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
|
|
167
|
+
console.log();
|
|
168
|
+
console.log(" # Shared dev API key (for local testing and CI)");
|
|
169
|
+
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
|
|
170
|
+
console.log();
|
|
171
|
+
console.log(" # npm token (read access to @sanity scope)");
|
|
172
|
+
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/NPM_TOKEN"');
|
|
173
|
+
console.log();
|
|
174
|
+
console.log(" Not a Sanity employee? Request an API key from the AILF team.");
|
|
175
|
+
console.log();
|
|
166
176
|
console.log(" 💡 Test locally before pushing:");
|
|
167
177
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
168
178
|
console.log();
|
|
@@ -16,6 +16,17 @@ import type { EvalMode, PromptfooUrlEntry, ReportProvenance } from "./types.js";
|
|
|
16
16
|
export interface ProvenanceInput {
|
|
17
17
|
/** Feature areas that were evaluated */
|
|
18
18
|
areas: string[];
|
|
19
|
+
/**
|
|
20
|
+
* Git metadata from the *calling* repository (cross-repo evaluations).
|
|
21
|
+
* When provided, overrides CI env var detection so provenance attributes
|
|
22
|
+
* to the caller — not the AILF core repo where the workflow executes.
|
|
23
|
+
*/
|
|
24
|
+
callerGit?: {
|
|
25
|
+
branch?: string;
|
|
26
|
+
prNumber?: number;
|
|
27
|
+
repo: string;
|
|
28
|
+
sha?: string;
|
|
29
|
+
};
|
|
19
30
|
/** SHA-256 hash of the doc context files (from cache system) */
|
|
20
31
|
contextHash?: string;
|
|
21
32
|
/** Evaluation fingerprint for cross-environment cache lookup */
|
|
@@ -25,11 +25,21 @@ import { load } from "js-yaml";
|
|
|
25
25
|
*/
|
|
26
26
|
export function buildProvenance(input) {
|
|
27
27
|
const models = loadModelsConfig(input.rootDir);
|
|
28
|
+
// Cross-repo evaluations: prefer explicit caller git metadata over
|
|
29
|
+
// CI env vars (which always reflect the AILF core repo).
|
|
30
|
+
const git = input.callerGit
|
|
31
|
+
? {
|
|
32
|
+
branch: input.callerGit.branch ?? "unknown",
|
|
33
|
+
prNumber: input.callerGit.prNumber,
|
|
34
|
+
repo: input.callerGit.repo,
|
|
35
|
+
sha: input.callerGit.sha ?? "unknown",
|
|
36
|
+
}
|
|
37
|
+
: detectGitMetadata();
|
|
28
38
|
return {
|
|
29
39
|
areas: input.areas,
|
|
30
40
|
contextHash: input.contextHash,
|
|
31
41
|
evalFingerprint: input.evalFingerprint,
|
|
32
|
-
git
|
|
42
|
+
git,
|
|
33
43
|
graderModel: models.grader.id,
|
|
34
44
|
mode: input.mode,
|
|
35
45
|
models: models.models.map((m) => ({ id: m.id, label: m.label })),
|
|
@@ -110,6 +120,9 @@ function detectTrigger() {
|
|
|
110
120
|
if (eventName === "repository_dispatch") {
|
|
111
121
|
return {
|
|
112
122
|
callerRef: process.env.GITHUB_REF,
|
|
123
|
+
// Note: callerRepo here is a fallback. The accurate caller repo
|
|
124
|
+
// comes from callerGit (injected into the PipelineRequest payload).
|
|
125
|
+
// GITHUB_REPOSITORY_OWNER_ID is just the org ID, not owner/repo.
|
|
113
126
|
callerRepo: process.env.GITHUB_REPOSITORY_OWNER_ID ?? "unknown",
|
|
114
127
|
type: "cross-repo",
|
|
115
128
|
};
|