@sanity/ailf 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -4,86 +4,18 @@ CLI and evaluation engine for the **AI Literacy Framework** — measures how
|
|
|
4
4
|
effectively documentation enables AI coding tools to implement features
|
|
5
5
|
correctly.
|
|
6
6
|
|
|
7
|
-
## Installation
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
# Run without installing (recommended for quick start)
|
|
11
|
-
npx @sanity/ailf --help
|
|
12
|
-
|
|
13
|
-
# Or install globally
|
|
14
|
-
pnpm add -g @sanity/ailf
|
|
15
|
-
|
|
16
|
-
# Or as a project dependency
|
|
17
|
-
pnpm add @sanity/ailf
|
|
18
|
-
```
|
|
19
|
-
|
|
20
|
-
## Quick start
|
|
21
|
-
|
|
22
|
-
### 1. Initialize a project
|
|
23
|
-
|
|
24
7
|
```bash
|
|
25
|
-
npx @sanity/ailf init
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
This creates a `.ailf/` directory with example configuration and task files:
|
|
29
|
-
|
|
8
|
+
npx @sanity/ailf@latest init # scaffold a project
|
|
9
|
+
npx @sanity/ailf@latest --help # see all commands
|
|
30
10
|
```
|
|
31
|
-
.ailf/
|
|
32
|
-
├── config.yaml # Project configuration
|
|
33
|
-
├── .gitignore # Keeps generated files out of VCS
|
|
34
|
-
└── tasks/
|
|
35
|
-
├── example-groq-blog-listing.yaml
|
|
36
|
-
└── example-studio-custom-input.yaml
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
### 2. Set up environment
|
|
40
|
-
|
|
41
|
-
Create a `.env` file in your project root:
|
|
42
|
-
|
|
43
|
-
```bash
|
|
44
|
-
# Required — LLM provider for evaluation and grading
|
|
45
|
-
OPENAI_API_KEY=sk-...
|
|
46
|
-
|
|
47
|
-
# Required — read access to Sanity documentation content
|
|
48
|
-
SANITY_API_TOKEN=sk...
|
|
49
|
-
|
|
50
|
-
# Optional — publish reports to your Sanity Studio
|
|
51
|
-
AILF_REPORT_SANITY_API_TOKEN=sk...
|
|
52
|
-
AILF_REPORT_PROJECT_ID=your-project-id
|
|
53
|
-
AILF_REPORT_DATASET=production
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
### 3. Edit tasks and run
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
# Edit .ailf/config.yaml with your Sanity project settings
|
|
60
|
-
# Customize or replace the example tasks in .ailf/tasks/
|
|
61
|
-
|
|
62
|
-
# Validate task definitions
|
|
63
|
-
npx @sanity/ailf validate-tasks .ailf/tasks/
|
|
64
|
-
|
|
65
|
-
# Run evaluation in debug mode (fast feedback)
|
|
66
|
-
npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/ --debug
|
|
67
|
-
|
|
68
|
-
# Full evaluation
|
|
69
|
-
npx @sanity/ailf pipeline --repo-tasks-path .ailf/tasks/
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
## Documentation
|
|
73
11
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
— task authoring guide
|
|
78
|
-
- **[Architecture](https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/ARCHITECTURE.md)**
|
|
79
|
-
— domain model and data flow
|
|
80
|
-
- **[Root README](https://github.com/sanity-labs/ai-literacy-framework)** —
|
|
81
|
-
project overview
|
|
12
|
+
**→ See the
|
|
13
|
+
[full documentation](https://github.com/sanity-labs/ai-literacy-framework#readme)**
|
|
14
|
+
for installation, quick start, configuration, and usage guides.
|
|
82
15
|
|
|
83
16
|
## Related packages
|
|
84
17
|
|
|
85
18
|
| Package | Description |
|
|
86
19
|
| -------------------------------------------------------------------------- | -------------------------------------------------- |
|
|
20
|
+
| [`@sanity/ailf-tasks`](https://www.npmjs.com/package/@sanity/ailf-tasks) | Lightweight task validator — schemas + YAML parser |
|
|
87
21
|
| [`@sanity/ailf-studio`](https://www.npmjs.com/package/@sanity/ailf-studio) | Sanity Studio dashboard plugin for viewing reports |
|
|
88
|
-
| `@sanity/ailf-core` | Domain kernel (types, schemas, ports) |
|
|
89
|
-
| `@sanity/ailf-shared` | Cross-package contract types |
|
|
@@ -110,7 +110,7 @@ export declare const ailfConfigData: {
|
|
|
110
110
|
};
|
|
111
111
|
};
|
|
112
112
|
/** Raw YAML string for ailf-config example (preserves comments) */
|
|
113
|
-
export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-
|
|
113
|
+
export declare const ailfConfigYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# .ailf/config.yaml \u2014 AI Literacy Framework project configuration\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Documentation source \u2014 which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId \u2014 Sanity project ID (find yours at sanity.io/manage)\n# dataset \u2014 the dataset to query (e.g., \"production\", \"next\")\n# baseUrl \u2014 the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"3do82whm\"\n dataset: next\n baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration \u2014 when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only \u2014 check that task YAML parses correctly (fast, no LLM calls)\n# eval \u2014 run the full evaluation pipeline\n#\n# paths \u2014 only trigger when files matching these globs change\n# blocking \u2014 if true, a failing eval blocks the PR merge\n# notify \u2014 if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
|
|
114
114
|
/** Parsed task data for example-groq-blog-listing (JSON-safe) */
|
|
115
115
|
export declare const exampleGroqBlogListingData: readonly [{
|
|
116
116
|
readonly id: "example-groq-blog-listing";
|
|
@@ -142,16 +142,172 @@ export declare const exampleGroqBlogListingData: readonly [{
|
|
|
142
142
|
readonly enabled: true;
|
|
143
143
|
readonly rubric: "abbreviated";
|
|
144
144
|
};
|
|
145
|
+
readonly execution: {
|
|
146
|
+
readonly enabled: false;
|
|
147
|
+
};
|
|
145
148
|
}];
|
|
146
149
|
/** Raw YAML string for example-groq-blog-listing (preserves comments) */
|
|
147
|
-
export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n#
|
|
150
|
+
export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example \u2014 Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug \u2014 the article's URL slug in your docs site\n # reason \u2014 why this doc is relevant (helps with auditing)\n #\n # This example uses slug-based references \u2014 the simplest form.\n # See the other example tasks for path, id, and perspective references.\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task \u2014 the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n # \u2022 Gold variant: injected with canonical doc content\n # \u2022 Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions \u2014 how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion \u2014 did the LLM implement the feature? (weight: 0.50)\n # code-correctness \u2014 is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled \u2014 set to false to skip this task entirely\n # rubric \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Execution configuration.\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
151
|
+
/** Parsed task data for example-id-based-ref (JSON-safe) */
|
|
152
|
+
export declare const exampleIdBasedRefData: readonly [{
|
|
153
|
+
readonly id: "example-id-based-ref";
|
|
154
|
+
readonly description: "Example — GROQ cheat sheet (ID-based doc references)";
|
|
155
|
+
readonly featureArea: "groq";
|
|
156
|
+
readonly canonicalDocs: readonly [{
|
|
157
|
+
readonly id: "81b839a4-2fc1-4769-941a-ec4de9276492";
|
|
158
|
+
readonly slug: "query-cheat-sheet";
|
|
159
|
+
readonly reason: "GROQ query patterns and cheat sheet reference";
|
|
160
|
+
}, {
|
|
161
|
+
readonly id: "44e5fc43-4628-4a4c-8e00-6ae12b83b8d2";
|
|
162
|
+
readonly slug: "groq-introduction";
|
|
163
|
+
readonly reason: "Core GROQ syntax and language overview";
|
|
164
|
+
}];
|
|
165
|
+
readonly docCoverage: true;
|
|
166
|
+
readonly vars: {
|
|
167
|
+
readonly task: "Using the GROQ query language, write queries for the following\ncommon content operations:\n1. Fetch all documents of type \"post\" with title and slug\n2. Filter posts published after a specific date\n3. Sort results by publishedAt descending with a limit of 10\n4. Join author data from a reference field\nExplain each query pattern and when to use it.\n";
|
|
168
|
+
readonly docs: "";
|
|
169
|
+
};
|
|
170
|
+
readonly assert: readonly [{
|
|
171
|
+
readonly type: "llm-rubric";
|
|
172
|
+
readonly template: "task-completion";
|
|
173
|
+
readonly criteria: readonly ["Provides a GROQ query that filters by _type", "Demonstrates date-based filtering", "Shows ordering with order() and slicing for limits", "Demonstrates reference expansion (dereferencing with ->)"];
|
|
174
|
+
}, {
|
|
175
|
+
readonly type: "llm-rubric";
|
|
176
|
+
readonly template: "code-correctness";
|
|
177
|
+
readonly criteria: readonly ["All GROQ queries use valid syntax", "Projections correctly select and alias fields"];
|
|
178
|
+
}];
|
|
179
|
+
readonly baseline: {
|
|
180
|
+
readonly enabled: true;
|
|
181
|
+
readonly rubric: "abbreviated";
|
|
182
|
+
};
|
|
183
|
+
readonly execution: {
|
|
184
|
+
readonly enabled: false;
|
|
185
|
+
};
|
|
186
|
+
}];
|
|
187
|
+
/** Raw YAML string for example-id-based-ref (preserves comments) */
|
|
188
|
+
export declare const exampleIdBasedRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Document ID-based canonical doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `id` to reference canonical documentation by\n# Sanity document `_id`. This is useful for:\n# - Draft documents that don't have a stable slug yet\n# - Programmatic references from imports or migrations\n# - Documents where you know the _id but not the slug\n#\n# The `id` ref type can also carry optional `slug` and `path` fields\n# as human-readable annotations \u2014 these are NOT used for resolution,\n# only for display in logs and reports.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-id-based-ref\n description: \"Example \u2014 GROQ cheat sheet (ID-based doc references)\"\n\n featureArea: groq\n\n # ID-based canonical doc references.\n #\n # Use the Sanity document _id to reference articles directly.\n # Optional slug/path annotations help humans reading the YAML\n # but are NOT used for resolution \u2014 only the `id` field matters.\n #\n # These IDs reference real articles in the Sanity docs (next dataset):\n # 81b839a4... = \"Query Cheat Sheet - GROQ\"\n # 44e5fc43... = \"GROQ introduction\"\n canonicalDocs:\n - id: \"81b839a4-2fc1-4769-941a-ec4de9276492\"\n slug: query-cheat-sheet # annotation only \u2014 not used for resolution\n reason: \"GROQ query patterns and cheat sheet reference\"\n - id: \"44e5fc43-4628-4a4c-8e00-6ae12b83b8d2\"\n slug: groq-introduction # annotation only \u2014 not used for resolution\n reason: \"Core GROQ syntax and language overview\"\n\n docCoverage: true\n\n vars:\n task: |\n Using the GROQ query language, write queries for the following\n common content operations:\n 1. Fetch all documents of type \"post\" with title and slug\n 2. Filter posts published after a specific date\n 3. Sort results by publishedAt descending with a limit of 10\n 4. Join author data from a reference field\n Explain each query pattern and when to use it.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Provides a GROQ query that filters by _type\"\n - \"Demonstrates date-based filtering\"\n - \"Shows ordering with order() and slicing for limits\"\n - \"Demonstrates reference expansion (dereferencing with ->)\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Projections correctly select and alias fields\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
189
|
+
/** Parsed task data for example-mixed-ref-types (JSON-safe) */
|
|
190
|
+
export declare const exampleMixedRefTypesData: readonly [{
|
|
191
|
+
readonly id: "example-mixed-ref-types";
|
|
192
|
+
readonly description: "Example — Mixed canonical doc reference types (slug + path + id + perspective)";
|
|
193
|
+
readonly featureArea: "groq";
|
|
194
|
+
readonly canonicalDocs: readonly [{
|
|
195
|
+
readonly slug: "groq-introduction";
|
|
196
|
+
readonly reason: "Core GROQ language overview";
|
|
197
|
+
}, {
|
|
198
|
+
readonly path: "content-lake/how-queries-work";
|
|
199
|
+
readonly reason: "Query execution model (disambiguated by section)";
|
|
200
|
+
}, {
|
|
201
|
+
readonly id: "81b839a4-2fc1-4769-941a-ec4de9276492";
|
|
202
|
+
readonly slug: "query-cheat-sheet";
|
|
203
|
+
readonly reason: "GROQ query patterns cheat sheet (referenced by document ID)";
|
|
204
|
+
}, {
|
|
205
|
+
readonly perspective: "rE9TSJvR4";
|
|
206
|
+
readonly reason: "Additional GROQ docs from the test content release";
|
|
207
|
+
}];
|
|
208
|
+
readonly docCoverage: true;
|
|
209
|
+
readonly vars: {
|
|
210
|
+
readonly task: "Create a comprehensive GROQ query guide that covers:\n1. Basic query structure and filtering by document type\n2. Projections and field selection\n3. Ordering, slicing, and pagination\n4. Reference joins and nested data\n5. Webhook GROQ filter configuration\nInclude working examples for each concept and explain the\nquery execution model.\n";
|
|
211
|
+
readonly docs: "";
|
|
212
|
+
};
|
|
213
|
+
readonly assert: readonly [{
|
|
214
|
+
readonly type: "llm-rubric";
|
|
215
|
+
readonly template: "task-completion";
|
|
216
|
+
readonly criteria: readonly ["Covers basic GROQ query structure with _type filtering", "Demonstrates projections with field aliasing", "Shows ordering with order() and slice syntax", "Includes reference joins with the dereference operator", "Explains webhook GROQ filter patterns"];
|
|
217
|
+
}, {
|
|
218
|
+
readonly type: "llm-rubric";
|
|
219
|
+
readonly template: "code-correctness";
|
|
220
|
+
readonly criteria: readonly ["All GROQ queries use valid syntax", "Examples are practical and self-contained"];
|
|
221
|
+
}];
|
|
222
|
+
readonly baseline: {
|
|
223
|
+
readonly enabled: true;
|
|
224
|
+
readonly rubric: "abbreviated";
|
|
225
|
+
};
|
|
226
|
+
readonly execution: {
|
|
227
|
+
readonly enabled: false;
|
|
228
|
+
};
|
|
229
|
+
}];
|
|
230
|
+
/** Raw YAML string for example-mixed-ref-types (preserves comments) */
|
|
231
|
+
export declare const exampleMixedRefTypesYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: All canonical doc reference types in one task\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates combining all four canonical doc reference strategies\n# in a single task's canonicalDocs array:\n#\n# slug \u2014 by article slug (simplest, may not be unique)\n# path \u2014 by section/slug path (unique, preferred)\n# id \u2014 by Sanity document _id (for drafts, imports)\n# perspective \u2014 by content release (one-to-many expansion)\n#\n# Each reference type resolves through a different strategy, but the\n# pipeline merges them into a single flat documentation context. The\n# LLM sees the same combined markdown regardless of how docs were found.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-mixed-ref-types\n description:\n \"Example \u2014 Mixed canonical doc reference types (slug + path + id +\n perspective)\"\n\n featureArea: groq\n\n # All four canonical doc reference types demonstrated together.\n #\n # The pipeline resolves each ref independently:\n # 1. slug refs \u2192 GROQ query by slug.current\n # 2. path refs \u2192 GROQ query by section + slug (or slug fallback)\n # 3. id refs \u2192 GROQ query by _id\n # 4. perspective refs \u2192 expand to all articles in the release\n #\n # After resolution, all entries become a flat list of document slugs.\n # The fetched markdown is concatenated into a single context string.\n canonicalDocs:\n # Slug reference \u2014 simplest form, resolves by article slug\n - slug: groq-introduction\n reason: \"Core GROQ language overview\"\n\n # Path reference \u2014 section-qualified, uniquely identifies the article\n - path: content-lake/how-queries-work\n reason: \"Query execution model (disambiguated by section)\"\n\n # ID reference \u2014 resolves by Sanity document _id\n # Optional slug annotation helps humans reading the YAML\n - id: \"81b839a4-2fc1-4769-941a-ec4de9276492\"\n slug: query-cheat-sheet\n reason: \"GROQ query patterns cheat sheet (referenced by document ID)\"\n\n # Perspective reference \u2014 expands to all articles in the release\n # This adds webhooks, query-cheat-sheet, and groq-joins from release rE9TSJvR4\n # Note: query-cheat-sheet appears via both the id ref and the perspective ref,\n # but the pipeline deduplicates slugs automatically.\n - perspective: rE9TSJvR4\n reason: \"Additional GROQ docs from the test content release\"\n\n docCoverage: true\n\n vars:\n task: |\n Create a comprehensive GROQ query guide that covers:\n 1. Basic query structure and filtering by document type\n 2. Projections and field selection\n 3. Ordering, slicing, and pagination\n 4. Reference joins and nested data\n 5. Webhook GROQ filter configuration\n Include working examples for each concept and explain the\n query execution model.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Covers basic GROQ query structure with _type filtering\"\n - \"Demonstrates projections with field aliasing\"\n - \"Shows ordering with order() and slice syntax\"\n - \"Includes reference joins with the dereference operator\"\n - \"Explains webhook GROQ filter patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Examples are practical and self-contained\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
232
|
+
/** Parsed task data for example-path-based-ref (JSON-safe) */
|
|
233
|
+
export declare const examplePathBasedRefData: readonly [{
|
|
234
|
+
readonly id: "example-path-based-ref";
|
|
235
|
+
readonly description: "Example — GROQ webhooks (path-based doc references)";
|
|
236
|
+
readonly featureArea: "groq";
|
|
237
|
+
readonly canonicalDocs: readonly [{
|
|
238
|
+
readonly path: "content-lake/webhooks";
|
|
239
|
+
readonly reason: "GROQ-powered webhooks configuration and GROQ filter patterns";
|
|
240
|
+
}, {
|
|
241
|
+
readonly path: "content-lake/how-queries-work";
|
|
242
|
+
readonly reason: "How GROQ queries execute — projection, filtering, ordering";
|
|
243
|
+
}];
|
|
244
|
+
readonly docCoverage: true;
|
|
245
|
+
readonly vars: {
|
|
246
|
+
readonly task: "Create a webhook configuration for a Sanity project that triggers\nwhen blog post documents are published. The webhook should use a\nGROQ filter to match only documents of type \"post\" and a GROQ\nprojection to include the title, slug, and publishedAt fields in\nthe webhook payload. Explain the GROQ filter syntax used.\n";
|
|
247
|
+
readonly docs: "";
|
|
248
|
+
};
|
|
249
|
+
readonly assert: readonly [{
|
|
250
|
+
readonly type: "llm-rubric";
|
|
251
|
+
readonly template: "task-completion";
|
|
252
|
+
readonly criteria: readonly ["Configures a webhook with a GROQ filter for _type == 'post'", "Uses a GROQ projection to shape the payload", "Includes title, slug, and publishedAt in the projection", "Explains when the webhook fires (on publish events)"];
|
|
253
|
+
}, {
|
|
254
|
+
readonly type: "llm-rubric";
|
|
255
|
+
readonly template: "code-correctness";
|
|
256
|
+
readonly criteria: readonly ["GROQ filter syntax is valid", "Projection syntax correctly selects nested fields"];
|
|
257
|
+
}];
|
|
258
|
+
readonly baseline: {
|
|
259
|
+
readonly enabled: true;
|
|
260
|
+
readonly rubric: "abbreviated";
|
|
261
|
+
};
|
|
262
|
+
readonly execution: {
|
|
263
|
+
readonly enabled: false;
|
|
264
|
+
};
|
|
265
|
+
}];
|
|
266
|
+
/** Raw YAML string for example-path-based-ref (preserves comments) */
|
|
267
|
+
export declare const examplePathBasedRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Path-based canonical doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `path` to reference canonical documentation.\n# Paths are the preferred reference type because they uniquely identify\n# an article across sections (unlike slugs, which can collide).\n#\n# Path format:\n# - Simple: \"webhooks\" \u2192 resolves by slug lookup\n# - Sectioned: \"content-lake/webhooks\" \u2192 disambiguates by section + slug\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-path-based-ref\n description: \"Example \u2014 GROQ webhooks (path-based doc references)\"\n\n featureArea: groq\n\n # Path-based canonical doc references.\n #\n # Use \"section/slug\" format to uniquely identify articles:\n # - \"content-lake/webhooks\" \u2192 the webhooks article in the Content Lake section\n # - \"content-lake/how-queries-work\" \u2192 disambiguated from any other section\n #\n # Simple paths (just the slug) also work but don't disambiguate sections.\n canonicalDocs:\n - path: content-lake/webhooks\n reason: \"GROQ-powered webhooks configuration and GROQ filter patterns\"\n - path: content-lake/how-queries-work\n reason: \"How GROQ queries execute \u2014 projection, filtering, ordering\"\n\n docCoverage: true\n\n vars:\n task: |\n Create a webhook configuration for a Sanity project that triggers\n when blog post documents are published. The webhook should use a\n GROQ filter to match only documents of type \"post\" and a GROQ\n projection to include the title, slug, and publishedAt fields in\n the webhook payload. Explain the GROQ filter syntax used.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Configures a webhook with a GROQ filter for _type == 'post'\"\n - \"Uses a GROQ projection to shape the payload\"\n - \"Includes title, slug, and publishedAt in the projection\"\n - \"Explains when the webhook fires (on publish events)\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"GROQ filter syntax is valid\"\n - \"Projection syntax correctly selects nested fields\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
268
|
+
/** Parsed task data for example-perspective-ref (JSON-safe) */
|
|
269
|
+
export declare const examplePerspectiveRefData: readonly [{
|
|
270
|
+
readonly id: "example-perspective-ref";
|
|
271
|
+
readonly description: "Example — GROQ features from content release (perspective-based doc references)";
|
|
272
|
+
readonly featureArea: "groq";
|
|
273
|
+
readonly canonicalDocs: readonly [{
|
|
274
|
+
readonly perspective: "rE9TSJvR4";
|
|
275
|
+
readonly reason: "All GROQ documentation updates in the test content release";
|
|
276
|
+
}, {
|
|
277
|
+
readonly slug: "groq-introduction";
|
|
278
|
+
readonly reason: "Foundational GROQ syntax reference (published, stable)";
|
|
279
|
+
}];
|
|
280
|
+
readonly docCoverage: true;
|
|
281
|
+
readonly vars: {
|
|
282
|
+
readonly task: "Using GROQ, demonstrate advanced query patterns including:\n1. Joining data across document types using references\n2. Filtering webhook payloads with GROQ projections\n3. Using the query cheat sheet patterns for common operations\nProvide working GROQ query examples for each pattern.\n";
|
|
283
|
+
readonly docs: "";
|
|
284
|
+
};
|
|
285
|
+
readonly assert: readonly [{
|
|
286
|
+
readonly type: "llm-rubric";
|
|
287
|
+
readonly template: "task-completion";
|
|
288
|
+
readonly criteria: readonly ["Demonstrates GROQ join syntax for cross-document queries", "Shows GROQ filter patterns for webhook configuration", "Includes practical query examples from cheat sheet patterns"];
|
|
289
|
+
}, {
|
|
290
|
+
readonly type: "llm-rubric";
|
|
291
|
+
readonly template: "code-correctness";
|
|
292
|
+
readonly criteria: readonly ["All GROQ queries use valid syntax", "Reference joins use correct dereference operator (->)"];
|
|
293
|
+
}];
|
|
294
|
+
readonly baseline: {
|
|
295
|
+
readonly enabled: true;
|
|
296
|
+
readonly rubric: "abbreviated";
|
|
297
|
+
};
|
|
298
|
+
readonly execution: {
|
|
299
|
+
readonly enabled: false;
|
|
300
|
+
};
|
|
301
|
+
}];
|
|
302
|
+
/** Raw YAML string for example-perspective-ref (preserves comments) */
|
|
303
|
+
export declare const examplePerspectiveRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Perspective / content release doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `perspective` to reference all documentation\n# articles within a content release. This is the key capability for\n# evaluating NEW feature documentation before it's published.\n#\n# How it works:\n# - A perspective ref is one-to-many: the doc fetcher queries the\n# named release and expands it to ALL articles versioned within it.\n# - Downstream consumers see the same flat DocContext[] regardless\n# of how docs were resolved.\n# - When the release is published, the perspective entry becomes a\n# no-op (articles are now in published). Migrate to explicit path\n# or slug refs at your convenience.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-perspective-ref\n description:\n \"Example \u2014 GROQ features from content release (perspective-based doc\n references)\"\n\n featureArea: groq\n\n # Perspective-based canonical doc reference.\n #\n # The perspective ID references a content release in the Sanity\n # Content Lake. At evaluation time, the doc fetcher auto-discovers\n # all articles versioned in this release and includes them as\n # canonical documentation context.\n #\n # Release rE9TSJvR4 contains:\n # - \"GROQ-powered webhooks\" (webhooks)\n # - \"Query Cheat Sheet - GROQ\" (query-cheat-sheet)\n # - \"GROQ joins\" (groq-joins)\n #\n # You can combine perspective refs with explicit slug/path/id refs\n # to include foundational published docs alongside release content.\n canonicalDocs:\n - perspective: rE9TSJvR4\n reason: \"All GROQ documentation updates in the test content release\"\n - slug: groq-introduction\n reason: \"Foundational GROQ syntax reference (published, stable)\"\n\n docCoverage: true\n\n vars:\n task: |\n Using GROQ, demonstrate advanced query patterns including:\n 1. Joining data across document types using references\n 2. Filtering webhook payloads with GROQ projections\n 3. Using the query cheat sheet patterns for common operations\n Provide working GROQ query examples for each pattern.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Demonstrates GROQ join syntax for cross-document queries\"\n - \"Shows GROQ filter patterns for webhook configuration\"\n - \"Includes practical query examples from cheat sheet patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Reference joins use correct dereference operator (->)\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
148
304
|
/** Parsed task data for example-studio-custom-input (JSON-safe) */
|
|
149
305
|
export declare const exampleStudioCustomInputData: readonly [{
|
|
150
306
|
readonly id: "example-studio-custom-input";
|
|
151
307
|
readonly description: "Example — Custom input component in Sanity Studio";
|
|
152
308
|
readonly featureArea: "studio";
|
|
153
309
|
readonly canonicalDocs: readonly [{
|
|
154
|
-
readonly slug: "custom-input-
|
|
310
|
+
readonly slug: "custom-input-widgets";
|
|
155
311
|
readonly reason: "Guide for building custom form inputs in Sanity Studio";
|
|
156
312
|
}];
|
|
157
313
|
readonly docCoverage: true;
|
|
@@ -173,15 +329,18 @@ export declare const exampleStudioCustomInputData: readonly [{
|
|
|
173
329
|
readonly enabled: true;
|
|
174
330
|
readonly rubric: "abbreviated";
|
|
175
331
|
};
|
|
332
|
+
readonly execution: {
|
|
333
|
+
readonly enabled: false;
|
|
334
|
+
};
|
|
176
335
|
}];
|
|
177
336
|
/** Raw YAML string for example-studio-custom-input (preserves comments) */
|
|
178
|
-
export declare const exampleStudioCustomInputYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Custom input component in Sanity Studio\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# To
|
|
337
|
+
export declare const exampleStudioCustomInputYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Custom input component in Sanity Studio\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-studio-custom-input\n description: \"Example \u2014 Custom input component in Sanity Studio\"\n\n featureArea: studio\n\n # Slug-based canonical doc reference.\n # Note: the correct slug is \"custom-input-widgets\" (not \"custom-input-components\").\n canonicalDocs:\n - slug: custom-input-widgets\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n\n docCoverage: true\n referenceSolution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
179
338
|
/** All task example data as a flat array (JSON-safe) */
|
|
180
339
|
export declare const allTaskData: readonly unknown[];
|
|
181
340
|
/** Map of task ID (filename stem) → raw YAML string (preserves comments) */
|
|
182
341
|
export declare const taskYamlFiles: Record<string, string>;
|
|
183
342
|
/** List of task file stems, in alphabetical order */
|
|
184
|
-
export declare const TASK_FILE_NAMES: readonly ["example-groq-blog-listing", "example-studio-custom-input"];
|
|
343
|
+
export declare const TASK_FILE_NAMES: readonly ["example-groq-blog-listing", "example-id-based-ref", "example-mixed-ref-types", "example-path-based-ref", "example-perspective-ref", "example-studio-custom-input"];
|
|
185
344
|
export type ExampleType = "config" | "source" | "rubric" | "threshold" | "ailf-config" | "task";
|
|
186
345
|
export declare const EXAMPLE_TYPES: readonly ExampleType[];
|
|
187
346
|
export interface ExampleRecord {
|
|
@@ -191,4 +350,4 @@ export interface ExampleRecord {
|
|
|
191
350
|
}
|
|
192
351
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
193
352
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
194
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n# NPM_TOKEN \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-
|
|
353
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n# NPM_TOKEN \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Configure npm for @sanity scope\n run:\n echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n ~/.npmrc\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
@@ -141,7 +141,7 @@ export const ailfConfigData = {
|
|
|
141
141
|
}
|
|
142
142
|
};
|
|
143
143
|
/** Raw YAML string for ailf-config example (preserves comments) */
|
|
144
|
-
export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-
|
|
144
|
+
export const ailfConfigYaml = "# ──────────────────────────────────────────────────────────────────────\n# .ailf/config.yaml — AI Literacy Framework project configuration\n# ──────────────────────────────────────────────────────────────────────\n#\n# This file configures how the AILF evaluation pipeline runs in this\n# repository. Place it at .ailf/config.yaml in your project root.\n#\n# Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n# The API handles LLM calls, doc fetching, grading, and report\n# publishing. Your repo only needs one secret: AILF_API_KEY.\n#\n# Docs: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\n# Documentation source — which docs are being evaluated.\n#\n# This tells the pipeline which Sanity project and dataset contain\n# the documentation under test. For most users, this is Sanity's own\n# docs project.\n#\n# projectId — Sanity project ID (find yours at sanity.io/manage)\n# dataset — the dataset to query (e.g., \"production\", \"next\")\n# baseUrl — the public URL of your documentation site\n# (used by agentic mode to test agent discoverability)\nsource:\n projectId: \"3do82whm\"\n dataset: next\n baseUrl: \"https://www.sanity.io/docs\"\n\n# Trigger configuration — when evaluations run automatically.\n#\n# Each key is a trigger context. The pipeline checks which trigger\n# matches the current execution context (PR, merge, schedule, etc.)\n# and applies its settings.\n#\n# mode options:\n# validate-only — check that task YAML parses correctly (fast, no LLM calls)\n# eval — run the full evaluation pipeline\n#\n# paths — only trigger when files matching these globs change\n# blocking — if true, a failing eval blocks the PR merge\n# notify — if true, post results to configured notification channels\ntriggers:\n # On pull requests: just validate task files parse correctly\n pr:\n mode: validate-only\n\n # When .ailf/ files change in a PR: run a real evaluation\n pr-task-change:\n mode: eval\n paths: [\".ailf/**\"]\n\n # On merge to main: run evaluation (non-blocking)\n main:\n mode: eval\n blocking: false\n notify: true\n";
|
|
145
145
|
/** Parsed task data for example-groq-blog-listing (JSON-safe) */
|
|
146
146
|
export const exampleGroqBlogListingData = [
|
|
147
147
|
{
|
|
@@ -186,11 +186,233 @@ export const exampleGroqBlogListingData = [
|
|
|
186
186
|
"baseline": {
|
|
187
187
|
"enabled": true,
|
|
188
188
|
"rubric": "abbreviated"
|
|
189
|
+
},
|
|
190
|
+
"execution": {
|
|
191
|
+
"enabled": false
|
|
189
192
|
}
|
|
190
193
|
}
|
|
191
194
|
];
|
|
192
195
|
/** Raw YAML string for example-groq-blog-listing (preserves comments) */
|
|
193
|
-
export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n#
|
|
196
|
+
export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example — Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug — the article's URL slug in your docs site\n # reason — why this doc is relevant (helps with auditing)\n #\n # This example uses slug-based references — the simplest form.\n # See the other example tasks for path, id, and perspective references.\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task — the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs — leave empty (\"\"). The pipeline fills this in:\n # • Gold variant: injected with canonical doc content\n # • Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions — how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion — did the LLM implement the feature? (weight: 0.50)\n # code-correctness — is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled — set to false to skip this task entirely\n # rubric — \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Execution configuration.\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
197
|
+
/** Parsed task data for example-id-based-ref (JSON-safe) */
|
|
198
|
+
export const exampleIdBasedRefData = [
|
|
199
|
+
{
|
|
200
|
+
"id": "example-id-based-ref",
|
|
201
|
+
"description": "Example — GROQ cheat sheet (ID-based doc references)",
|
|
202
|
+
"featureArea": "groq",
|
|
203
|
+
"canonicalDocs": [
|
|
204
|
+
{
|
|
205
|
+
"id": "81b839a4-2fc1-4769-941a-ec4de9276492",
|
|
206
|
+
"slug": "query-cheat-sheet",
|
|
207
|
+
"reason": "GROQ query patterns and cheat sheet reference"
|
|
208
|
+
},
|
|
209
|
+
{
|
|
210
|
+
"id": "44e5fc43-4628-4a4c-8e00-6ae12b83b8d2",
|
|
211
|
+
"slug": "groq-introduction",
|
|
212
|
+
"reason": "Core GROQ syntax and language overview"
|
|
213
|
+
}
|
|
214
|
+
],
|
|
215
|
+
"docCoverage": true,
|
|
216
|
+
"vars": {
|
|
217
|
+
"task": "Using the GROQ query language, write queries for the following\ncommon content operations:\n1. Fetch all documents of type \"post\" with title and slug\n2. Filter posts published after a specific date\n3. Sort results by publishedAt descending with a limit of 10\n4. Join author data from a reference field\nExplain each query pattern and when to use it.\n",
|
|
218
|
+
"docs": ""
|
|
219
|
+
},
|
|
220
|
+
"assert": [
|
|
221
|
+
{
|
|
222
|
+
"type": "llm-rubric",
|
|
223
|
+
"template": "task-completion",
|
|
224
|
+
"criteria": [
|
|
225
|
+
"Provides a GROQ query that filters by _type",
|
|
226
|
+
"Demonstrates date-based filtering",
|
|
227
|
+
"Shows ordering with order() and slicing for limits",
|
|
228
|
+
"Demonstrates reference expansion (dereferencing with ->)"
|
|
229
|
+
]
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"type": "llm-rubric",
|
|
233
|
+
"template": "code-correctness",
|
|
234
|
+
"criteria": [
|
|
235
|
+
"All GROQ queries use valid syntax",
|
|
236
|
+
"Projections correctly select and alias fields"
|
|
237
|
+
]
|
|
238
|
+
}
|
|
239
|
+
],
|
|
240
|
+
"baseline": {
|
|
241
|
+
"enabled": true,
|
|
242
|
+
"rubric": "abbreviated"
|
|
243
|
+
},
|
|
244
|
+
"execution": {
|
|
245
|
+
"enabled": false
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
];
|
|
249
|
+
/** Raw YAML string for example-id-based-ref (preserves comments) */
|
|
250
|
+
export const exampleIdBasedRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Document ID-based canonical doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `id` to reference canonical documentation by\n# Sanity document `_id`. This is useful for:\n# - Draft documents that don't have a stable slug yet\n# - Programmatic references from imports or migrations\n# - Documents where you know the _id but not the slug\n#\n# The `id` ref type can also carry optional `slug` and `path` fields\n# as human-readable annotations — these are NOT used for resolution,\n# only for display in logs and reports.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-id-based-ref\n description: \"Example — GROQ cheat sheet (ID-based doc references)\"\n\n featureArea: groq\n\n # ID-based canonical doc references.\n #\n # Use the Sanity document _id to reference articles directly.\n # Optional slug/path annotations help humans reading the YAML\n # but are NOT used for resolution — only the `id` field matters.\n #\n # These IDs reference real articles in the Sanity docs (next dataset):\n # 81b839a4... = \"Query Cheat Sheet - GROQ\"\n # 44e5fc43... = \"GROQ introduction\"\n canonicalDocs:\n - id: \"81b839a4-2fc1-4769-941a-ec4de9276492\"\n slug: query-cheat-sheet # annotation only — not used for resolution\n reason: \"GROQ query patterns and cheat sheet reference\"\n - id: \"44e5fc43-4628-4a4c-8e00-6ae12b83b8d2\"\n slug: groq-introduction # annotation only — not used for resolution\n reason: \"Core GROQ syntax and language overview\"\n\n docCoverage: true\n\n vars:\n task: |\n Using the GROQ query language, write queries for the following\n common content operations:\n 1. Fetch all documents of type \"post\" with title and slug\n 2. Filter posts published after a specific date\n 3. Sort results by publishedAt descending with a limit of 10\n 4. Join author data from a reference field\n Explain each query pattern and when to use it.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Provides a GROQ query that filters by _type\"\n - \"Demonstrates date-based filtering\"\n - \"Shows ordering with order() and slicing for limits\"\n - \"Demonstrates reference expansion (dereferencing with ->)\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Projections correctly select and alias fields\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
251
|
+
/** Parsed task data for example-mixed-ref-types (JSON-safe) */
|
|
252
|
+
export const exampleMixedRefTypesData = [
|
|
253
|
+
{
|
|
254
|
+
"id": "example-mixed-ref-types",
|
|
255
|
+
"description": "Example — Mixed canonical doc reference types (slug + path + id + perspective)",
|
|
256
|
+
"featureArea": "groq",
|
|
257
|
+
"canonicalDocs": [
|
|
258
|
+
{
|
|
259
|
+
"slug": "groq-introduction",
|
|
260
|
+
"reason": "Core GROQ language overview"
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"path": "content-lake/how-queries-work",
|
|
264
|
+
"reason": "Query execution model (disambiguated by section)"
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
"id": "81b839a4-2fc1-4769-941a-ec4de9276492",
|
|
268
|
+
"slug": "query-cheat-sheet",
|
|
269
|
+
"reason": "GROQ query patterns cheat sheet (referenced by document ID)"
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
"perspective": "rE9TSJvR4",
|
|
273
|
+
"reason": "Additional GROQ docs from the test content release"
|
|
274
|
+
}
|
|
275
|
+
],
|
|
276
|
+
"docCoverage": true,
|
|
277
|
+
"vars": {
|
|
278
|
+
"task": "Create a comprehensive GROQ query guide that covers:\n1. Basic query structure and filtering by document type\n2. Projections and field selection\n3. Ordering, slicing, and pagination\n4. Reference joins and nested data\n5. Webhook GROQ filter configuration\nInclude working examples for each concept and explain the\nquery execution model.\n",
|
|
279
|
+
"docs": ""
|
|
280
|
+
},
|
|
281
|
+
"assert": [
|
|
282
|
+
{
|
|
283
|
+
"type": "llm-rubric",
|
|
284
|
+
"template": "task-completion",
|
|
285
|
+
"criteria": [
|
|
286
|
+
"Covers basic GROQ query structure with _type filtering",
|
|
287
|
+
"Demonstrates projections with field aliasing",
|
|
288
|
+
"Shows ordering with order() and slice syntax",
|
|
289
|
+
"Includes reference joins with the dereference operator",
|
|
290
|
+
"Explains webhook GROQ filter patterns"
|
|
291
|
+
]
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
"type": "llm-rubric",
|
|
295
|
+
"template": "code-correctness",
|
|
296
|
+
"criteria": [
|
|
297
|
+
"All GROQ queries use valid syntax",
|
|
298
|
+
"Examples are practical and self-contained"
|
|
299
|
+
]
|
|
300
|
+
}
|
|
301
|
+
],
|
|
302
|
+
"baseline": {
|
|
303
|
+
"enabled": true,
|
|
304
|
+
"rubric": "abbreviated"
|
|
305
|
+
},
|
|
306
|
+
"execution": {
|
|
307
|
+
"enabled": false
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
];
|
|
311
|
+
/** Raw YAML string for example-mixed-ref-types (preserves comments) */
|
|
312
|
+
export const exampleMixedRefTypesYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: All canonical doc reference types in one task\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates combining all four canonical doc reference strategies\n# in a single task's canonicalDocs array:\n#\n# slug — by article slug (simplest, may not be unique)\n# path — by section/slug path (unique, preferred)\n# id — by Sanity document _id (for drafts, imports)\n# perspective — by content release (one-to-many expansion)\n#\n# Each reference type resolves through a different strategy, but the\n# pipeline merges them into a single flat documentation context. The\n# LLM sees the same combined markdown regardless of how docs were found.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-mixed-ref-types\n description:\n \"Example — Mixed canonical doc reference types (slug + path + id +\n perspective)\"\n\n featureArea: groq\n\n # All four canonical doc reference types demonstrated together.\n #\n # The pipeline resolves each ref independently:\n # 1. slug refs → GROQ query by slug.current\n # 2. path refs → GROQ query by section + slug (or slug fallback)\n # 3. id refs → GROQ query by _id\n # 4. perspective refs → expand to all articles in the release\n #\n # After resolution, all entries become a flat list of document slugs.\n # The fetched markdown is concatenated into a single context string.\n canonicalDocs:\n # Slug reference — simplest form, resolves by article slug\n - slug: groq-introduction\n reason: \"Core GROQ language overview\"\n\n # Path reference — section-qualified, uniquely identifies the article\n - path: content-lake/how-queries-work\n reason: \"Query execution model (disambiguated by section)\"\n\n # ID reference — resolves by Sanity document _id\n # Optional slug annotation helps humans reading the YAML\n - id: \"81b839a4-2fc1-4769-941a-ec4de9276492\"\n slug: query-cheat-sheet\n reason: \"GROQ query patterns cheat sheet (referenced by document ID)\"\n\n # Perspective reference — expands to all articles in the release\n # This adds webhooks, query-cheat-sheet, and groq-joins from release rE9TSJvR4\n # Note: query-cheat-sheet appears via both the id ref and the perspective ref,\n # but the pipeline deduplicates slugs automatically.\n - perspective: rE9TSJvR4\n reason: \"Additional GROQ docs from the test content release\"\n\n docCoverage: true\n\n vars:\n task: |\n Create a comprehensive GROQ query guide that covers:\n 1. Basic query structure and filtering by document type\n 2. Projections and field selection\n 3. Ordering, slicing, and pagination\n 4. Reference joins and nested data\n 5. Webhook GROQ filter configuration\n Include working examples for each concept and explain the\n query execution model.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Covers basic GROQ query structure with _type filtering\"\n - \"Demonstrates projections with field aliasing\"\n - \"Shows ordering with order() and slice syntax\"\n - \"Includes reference joins with the dereference operator\"\n - \"Explains webhook GROQ filter patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Examples are practical and self-contained\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
313
|
+
/** Parsed task data for example-path-based-ref (JSON-safe) */
|
|
314
|
+
export const examplePathBasedRefData = [
|
|
315
|
+
{
|
|
316
|
+
"id": "example-path-based-ref",
|
|
317
|
+
"description": "Example — GROQ webhooks (path-based doc references)",
|
|
318
|
+
"featureArea": "groq",
|
|
319
|
+
"canonicalDocs": [
|
|
320
|
+
{
|
|
321
|
+
"path": "content-lake/webhooks",
|
|
322
|
+
"reason": "GROQ-powered webhooks configuration and GROQ filter patterns"
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
"path": "content-lake/how-queries-work",
|
|
326
|
+
"reason": "How GROQ queries execute — projection, filtering, ordering"
|
|
327
|
+
}
|
|
328
|
+
],
|
|
329
|
+
"docCoverage": true,
|
|
330
|
+
"vars": {
|
|
331
|
+
"task": "Create a webhook configuration for a Sanity project that triggers\nwhen blog post documents are published. The webhook should use a\nGROQ filter to match only documents of type \"post\" and a GROQ\nprojection to include the title, slug, and publishedAt fields in\nthe webhook payload. Explain the GROQ filter syntax used.\n",
|
|
332
|
+
"docs": ""
|
|
333
|
+
},
|
|
334
|
+
"assert": [
|
|
335
|
+
{
|
|
336
|
+
"type": "llm-rubric",
|
|
337
|
+
"template": "task-completion",
|
|
338
|
+
"criteria": [
|
|
339
|
+
"Configures a webhook with a GROQ filter for _type == 'post'",
|
|
340
|
+
"Uses a GROQ projection to shape the payload",
|
|
341
|
+
"Includes title, slug, and publishedAt in the projection",
|
|
342
|
+
"Explains when the webhook fires (on publish events)"
|
|
343
|
+
]
|
|
344
|
+
},
|
|
345
|
+
{
|
|
346
|
+
"type": "llm-rubric",
|
|
347
|
+
"template": "code-correctness",
|
|
348
|
+
"criteria": [
|
|
349
|
+
"GROQ filter syntax is valid",
|
|
350
|
+
"Projection syntax correctly selects nested fields"
|
|
351
|
+
]
|
|
352
|
+
}
|
|
353
|
+
],
|
|
354
|
+
"baseline": {
|
|
355
|
+
"enabled": true,
|
|
356
|
+
"rubric": "abbreviated"
|
|
357
|
+
},
|
|
358
|
+
"execution": {
|
|
359
|
+
"enabled": false
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
];
|
|
363
|
+
/** Raw YAML string for example-path-based-ref (preserves comments) */
|
|
364
|
+
export const examplePathBasedRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Path-based canonical doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `path` to reference canonical documentation.\n# Paths are the preferred reference type because they uniquely identify\n# an article across sections (unlike slugs, which can collide).\n#\n# Path format:\n# - Simple: \"webhooks\" → resolves by slug lookup\n# - Sectioned: \"content-lake/webhooks\" → disambiguates by section + slug\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-path-based-ref\n description: \"Example — GROQ webhooks (path-based doc references)\"\n\n featureArea: groq\n\n # Path-based canonical doc references.\n #\n # Use \"section/slug\" format to uniquely identify articles:\n # - \"content-lake/webhooks\" → the webhooks article in the Content Lake section\n # - \"content-lake/how-queries-work\" → disambiguated from any other section\n #\n # Simple paths (just the slug) also work but don't disambiguate sections.\n canonicalDocs:\n - path: content-lake/webhooks\n reason: \"GROQ-powered webhooks configuration and GROQ filter patterns\"\n - path: content-lake/how-queries-work\n reason: \"How GROQ queries execute — projection, filtering, ordering\"\n\n docCoverage: true\n\n vars:\n task: |\n Create a webhook configuration for a Sanity project that triggers\n when blog post documents are published. The webhook should use a\n GROQ filter to match only documents of type \"post\" and a GROQ\n projection to include the title, slug, and publishedAt fields in\n the webhook payload. Explain the GROQ filter syntax used.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Configures a webhook with a GROQ filter for _type == 'post'\"\n - \"Uses a GROQ projection to shape the payload\"\n - \"Includes title, slug, and publishedAt in the projection\"\n - \"Explains when the webhook fires (on publish events)\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"GROQ filter syntax is valid\"\n - \"Projection syntax correctly selects nested fields\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
365
|
+
/** Parsed task data for example-perspective-ref (JSON-safe) */
|
|
366
|
+
export const examplePerspectiveRefData = [
|
|
367
|
+
{
|
|
368
|
+
"id": "example-perspective-ref",
|
|
369
|
+
"description": "Example — GROQ features from content release (perspective-based doc references)",
|
|
370
|
+
"featureArea": "groq",
|
|
371
|
+
"canonicalDocs": [
|
|
372
|
+
{
|
|
373
|
+
"perspective": "rE9TSJvR4",
|
|
374
|
+
"reason": "All GROQ documentation updates in the test content release"
|
|
375
|
+
},
|
|
376
|
+
{
|
|
377
|
+
"slug": "groq-introduction",
|
|
378
|
+
"reason": "Foundational GROQ syntax reference (published, stable)"
|
|
379
|
+
}
|
|
380
|
+
],
|
|
381
|
+
"docCoverage": true,
|
|
382
|
+
"vars": {
|
|
383
|
+
"task": "Using GROQ, demonstrate advanced query patterns including:\n1. Joining data across document types using references\n2. Filtering webhook payloads with GROQ projections\n3. Using the query cheat sheet patterns for common operations\nProvide working GROQ query examples for each pattern.\n",
|
|
384
|
+
"docs": ""
|
|
385
|
+
},
|
|
386
|
+
"assert": [
|
|
387
|
+
{
|
|
388
|
+
"type": "llm-rubric",
|
|
389
|
+
"template": "task-completion",
|
|
390
|
+
"criteria": [
|
|
391
|
+
"Demonstrates GROQ join syntax for cross-document queries",
|
|
392
|
+
"Shows GROQ filter patterns for webhook configuration",
|
|
393
|
+
"Includes practical query examples from cheat sheet patterns"
|
|
394
|
+
]
|
|
395
|
+
},
|
|
396
|
+
{
|
|
397
|
+
"type": "llm-rubric",
|
|
398
|
+
"template": "code-correctness",
|
|
399
|
+
"criteria": [
|
|
400
|
+
"All GROQ queries use valid syntax",
|
|
401
|
+
"Reference joins use correct dereference operator (->)"
|
|
402
|
+
]
|
|
403
|
+
}
|
|
404
|
+
],
|
|
405
|
+
"baseline": {
|
|
406
|
+
"enabled": true,
|
|
407
|
+
"rubric": "abbreviated"
|
|
408
|
+
},
|
|
409
|
+
"execution": {
|
|
410
|
+
"enabled": false
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
];
|
|
414
|
+
/** Raw YAML string for example-perspective-ref (preserves comments) */
|
|
415
|
+
export const examplePerspectiveRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Perspective / content release doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `perspective` to reference all documentation\n# articles within a content release. This is the key capability for\n# evaluating NEW feature documentation before it's published.\n#\n# How it works:\n# - A perspective ref is one-to-many: the doc fetcher queries the\n# named release and expands it to ALL articles versioned within it.\n# - Downstream consumers see the same flat DocContext[] regardless\n# of how docs were resolved.\n# - When the release is published, the perspective entry becomes a\n# no-op (articles are now in published). Migrate to explicit path\n# or slug refs at your convenience.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-perspective-ref\n description:\n \"Example — GROQ features from content release (perspective-based doc\n references)\"\n\n featureArea: groq\n\n # Perspective-based canonical doc reference.\n #\n # The perspective ID references a content release in the Sanity\n # Content Lake. At evaluation time, the doc fetcher auto-discovers\n # all articles versioned in this release and includes them as\n # canonical documentation context.\n #\n # Release rE9TSJvR4 contains:\n # - \"GROQ-powered webhooks\" (webhooks)\n # - \"Query Cheat Sheet - GROQ\" (query-cheat-sheet)\n # - \"GROQ joins\" (groq-joins)\n #\n # You can combine perspective refs with explicit slug/path/id refs\n # to include foundational published docs alongside release content.\n canonicalDocs:\n - perspective: rE9TSJvR4\n reason: \"All GROQ documentation updates in the test content release\"\n - slug: groq-introduction\n reason: \"Foundational GROQ syntax reference (published, stable)\"\n\n docCoverage: true\n\n vars:\n task: |\n Using GROQ, demonstrate advanced query patterns including:\n 1. Joining data across document types using references\n 2. Filtering webhook payloads with GROQ projections\n 3. Using the query cheat sheet patterns for common operations\n Provide working GROQ query examples for each pattern.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Demonstrates GROQ join syntax for cross-document queries\"\n - \"Shows GROQ filter patterns for webhook configuration\"\n - \"Includes practical query examples from cheat sheet patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Reference joins use correct dereference operator (->)\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
194
416
|
/** Parsed task data for example-studio-custom-input (JSON-safe) */
|
|
195
417
|
export const exampleStudioCustomInputData = [
|
|
196
418
|
{
|
|
@@ -199,7 +421,7 @@ export const exampleStudioCustomInputData = [
|
|
|
199
421
|
"featureArea": "studio",
|
|
200
422
|
"canonicalDocs": [
|
|
201
423
|
{
|
|
202
|
-
"slug": "custom-input-
|
|
424
|
+
"slug": "custom-input-widgets",
|
|
203
425
|
"reason": "Guide for building custom form inputs in Sanity Studio"
|
|
204
426
|
}
|
|
205
427
|
],
|
|
@@ -232,26 +454,37 @@ export const exampleStudioCustomInputData = [
|
|
|
232
454
|
"baseline": {
|
|
233
455
|
"enabled": true,
|
|
234
456
|
"rubric": "abbreviated"
|
|
457
|
+
},
|
|
458
|
+
"execution": {
|
|
459
|
+
"enabled": false
|
|
235
460
|
}
|
|
236
461
|
}
|
|
237
462
|
];
|
|
238
463
|
/** Raw YAML string for example-studio-custom-input (preserves comments) */
|
|
239
|
-
export const exampleStudioCustomInputYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Custom input component in Sanity Studio\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# To
|
|
464
|
+
export const exampleStudioCustomInputYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Custom input component in Sanity Studio\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-studio-custom-input\n description: \"Example — Custom input component in Sanity Studio\"\n\n featureArea: studio\n\n # Slug-based canonical doc reference.\n # Note: the correct slug is \"custom-input-widgets\" (not \"custom-input-components\").\n canonicalDocs:\n - slug: custom-input-widgets\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n\n docCoverage: true\n referenceSolution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
|
|
240
465
|
// ---------------------------------------------------------------------------
|
|
241
466
|
// Aggregate task exports
|
|
242
467
|
// ---------------------------------------------------------------------------
|
|
243
468
|
/** All task example data as a flat array (JSON-safe) */
|
|
244
469
|
export const allTaskData = [
|
|
245
470
|
...exampleGroqBlogListingData,
|
|
471
|
+
...exampleIdBasedRefData,
|
|
472
|
+
...exampleMixedRefTypesData,
|
|
473
|
+
...examplePathBasedRefData,
|
|
474
|
+
...examplePerspectiveRefData,
|
|
246
475
|
...exampleStudioCustomInputData,
|
|
247
476
|
];
|
|
248
477
|
/** Map of task ID (filename stem) → raw YAML string (preserves comments) */
|
|
249
478
|
export const taskYamlFiles = {
|
|
250
479
|
"example-groq-blog-listing": exampleGroqBlogListingYaml,
|
|
480
|
+
"example-id-based-ref": exampleIdBasedRefYaml,
|
|
481
|
+
"example-mixed-ref-types": exampleMixedRefTypesYaml,
|
|
482
|
+
"example-path-based-ref": examplePathBasedRefYaml,
|
|
483
|
+
"example-perspective-ref": examplePerspectiveRefYaml,
|
|
251
484
|
"example-studio-custom-input": exampleStudioCustomInputYaml,
|
|
252
485
|
};
|
|
253
486
|
/** List of task file stems, in alphabetical order */
|
|
254
|
-
export const TASK_FILE_NAMES = ["example-groq-blog-listing", "example-studio-custom-input"];
|
|
487
|
+
export const TASK_FILE_NAMES = ["example-groq-blog-listing", "example-id-based-ref", "example-mixed-ref-types", "example-path-based-ref", "example-perspective-ref", "example-studio-custom-input"];
|
|
255
488
|
export const EXAMPLE_TYPES = ["config", "source", "rubric", "threshold", "ailf-config", "task"];
|
|
256
489
|
export const EXAMPLES = {
|
|
257
490
|
"config": {
|
|
@@ -289,4 +522,4 @@ export const EXAMPLES = {
|
|
|
289
522
|
// Raw file exports (non-data files, exported as raw strings)
|
|
290
523
|
// ---------------------------------------------------------------------------
|
|
291
524
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
292
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n# NPM_TOKEN — npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-
|
|
525
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n# NPM_TOKEN — npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Configure npm for @sanity scope\n run:\n echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n ~/.npmrc\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
@@ -57,6 +57,10 @@ export class RepoTaskSource {
|
|
|
57
57
|
throw new Error(`Failed to validate ${file}:\n${msg}`, { cause: err });
|
|
58
58
|
}
|
|
59
59
|
for (const entry of validated) {
|
|
60
|
+
// Filter stages:
|
|
61
|
+
// 1. Area filter — skip tasks outside requested feature areas
|
|
62
|
+
// 2. Task ID filter — skip tasks not matching explicit task IDs
|
|
63
|
+
// 3. Execution.enabled — skip tasks explicitly disabled
|
|
60
64
|
// Area filter
|
|
61
65
|
if (filter?.areas &&
|
|
62
66
|
filter.areas.length > 0 &&
|
|
@@ -71,6 +75,10 @@ export class RepoTaskSource {
|
|
|
71
75
|
!filter.taskIds.includes(entry.id)) {
|
|
72
76
|
continue;
|
|
73
77
|
}
|
|
78
|
+
// Execution.enabled filter — skip tasks explicitly disabled
|
|
79
|
+
if (entry.execution?.enabled === false) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
74
82
|
definitions.push(mapToTaskDefinition(entry));
|
|
75
83
|
}
|
|
76
84
|
}
|
package/dist/commands/init.js
CHANGED
|
@@ -163,6 +163,16 @@ async function runInit(opts) {
|
|
|
163
163
|
console.log(" 4. Push — the workflow at .github/workflows/ailf-eval.yml runs");
|
|
164
164
|
console.log(" automatically on PRs");
|
|
165
165
|
console.log();
|
|
166
|
+
console.log(" 🔑 Retrieve secrets from 1Password (Sanity employees):");
|
|
167
|
+
console.log();
|
|
168
|
+
console.log(" # Shared dev API key (for local testing and CI)");
|
|
169
|
+
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/AILF_API_KEY_DEV"');
|
|
170
|
+
console.log();
|
|
171
|
+
console.log(" # npm token (read access to @sanity scope)");
|
|
172
|
+
console.log(' op read "op://Shared/AI Literacy Framework - Shared API Tokens/NPM_TOKEN"');
|
|
173
|
+
console.log();
|
|
174
|
+
console.log(" Not a Sanity employee? Request an API key from the AILF team.");
|
|
175
|
+
console.log();
|
|
166
176
|
console.log(" 💡 Test locally before pushing:");
|
|
167
177
|
console.log(" AILF_API_KEY=... npx @sanity/ailf@latest pipeline --remote --debug");
|
|
168
178
|
console.log();
|