@sanity/ailf 0.1.34 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Sanity.io
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -56,6 +56,7 @@ definitions:
56
56
  "completed_at": completedAt,
57
57
  "duration_ms": durationMs,
58
58
  tag,
59
+ title,
59
60
  "mode": provenance.mode,
60
61
  "source_name": provenance.source.name,
61
62
  "source_base_url": provenance.source.baseUrl,
@@ -318,6 +319,11 @@ schemas:
318
319
  - string
319
320
  - "null"
320
321
  description: Optional human-supplied label
322
+ title:
323
+ type:
324
+ - string
325
+ - "null"
326
+ description: Auto-generated descriptive title for discoverability
321
327
  mode:
322
328
  type:
323
329
  - string
@@ -19,6 +19,7 @@ SELECT
19
19
  TIMESTAMP(completed_at) AS completed_at,
20
20
  CAST(duration_ms AS INT64) AS duration_ms,
21
21
  tag,
22
+ title,
22
23
  mode,
23
24
  source_name,
24
25
  source_base_url,
@@ -142,12 +142,10 @@ export declare const exampleGroqBlogListingData: readonly [{
142
142
  readonly enabled: true;
143
143
  readonly rubric: "abbreviated";
144
144
  };
145
- readonly execution: {
146
- readonly enabled: false;
147
- };
145
+ readonly status: "draft";
148
146
  }];
149
147
  /** Raw YAML string for example-groq-blog-listing (preserves comments) */
150
- export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example \u2014 Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug \u2014 the article's URL slug in your docs site\n # reason \u2014 why this doc is relevant (helps with auditing)\n #\n # This example uses slug-based references \u2014 the simplest form.\n # See the other example tasks for path, id, and perspective references.\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task \u2014 the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n # \u2022 Gold variant: injected with canonical doc content\n # \u2022 Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions \u2014 how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion \u2014 did the LLM implement the feature? (weight: 0.50)\n # code-correctness \u2014 is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled \u2014 set to false to skip this task entirely\n # rubric \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Execution configuration.\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
148
+ export declare const exampleGroqBlogListingYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Blog listing with GROQ queries\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n# Unique identifier \u2014 lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example \u2014 Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug \u2014 the article's URL slug in your docs site\n # reason \u2014 why this doc is relevant (helps with auditing)\n #\n # This example uses slug-based references \u2014 the simplest form.\n # See the other example tasks for path, id, and perspective references.\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task \u2014 the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs \u2014 leave empty (\"\"). The pipeline fills this in:\n # \u2022 Gold variant: injected with canonical doc content\n # \u2022 Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions \u2014 how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion \u2014 did the LLM implement the feature? (weight: 0.50)\n # code-correctness \u2014 is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled \u2014 set to false to skip this task entirely\n # rubric \u2014 \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
151
149
  /** Parsed task data for example-id-based-ref (JSON-safe) */
152
150
  export declare const exampleIdBasedRefData: readonly [{
153
151
  readonly id: "example-id-based-ref";
@@ -180,12 +178,10 @@ export declare const exampleIdBasedRefData: readonly [{
180
178
  readonly enabled: true;
181
179
  readonly rubric: "abbreviated";
182
180
  };
183
- readonly execution: {
184
- readonly enabled: false;
185
- };
181
+ readonly status: "draft";
186
182
  }];
187
183
  /** Raw YAML string for example-id-based-ref (preserves comments) */
188
- export declare const exampleIdBasedRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Document ID-based canonical doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `id` to reference canonical documentation by\n# Sanity document `_id`. This is useful for:\n# - Draft documents that don't have a stable slug yet\n# - Programmatic references from imports or migrations\n# - Documents where you know the _id but not the slug\n#\n# The `id` ref type can also carry optional `slug` and `path` fields\n# as human-readable annotations \u2014 these are NOT used for resolution,\n# only for display in logs and reports.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-id-based-ref\n description: \"Example \u2014 GROQ feature support (ID-based doc references)\"\n\n featureArea: groq\n\n # ID-based canonical doc references.\n #\n # Use the Sanity document _id to reference articles directly.\n # Optional slug/path annotations help humans reading the YAML\n # but are NOT used for resolution \u2014 only the `id` field matters.\n #\n # These IDs reference real articles in the Sanity docs (next dataset):\n # 0ba88f1b... = \"GROQ feature support across Sanity\"\n # 5b9c2863... = \"Custom GROQ functions\"\n canonicalDocs:\n - id: \"0ba88f1b-d1a7-418a-9267-2e343d01886a\"\n slug: groq-feature-support-by-context # annotation only \u2014 not used for resolution\n reason: \"GROQ feature support across different Sanity contexts\"\n - id: \"5b9c2863-ef01-4565-af8e-ee54e081ee74\"\n slug: custom-groq-functions # annotation only \u2014 not used for resolution\n reason: \"Custom GROQ functions and pipelines\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how GROQ is used across different Sanity contexts.\n Cover the following:\n 1. Which GROQ features are available in each context (API queries,\n webhooks, custom functions, access control)\n 2. How to create and use custom GROQ functions\n 3. Any differences in GROQ support between contexts\n Provide examples demonstrating context-specific GROQ patterns.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains GROQ availability across different Sanity contexts\"\n - \"Describes custom GROQ function creation and usage\"\n - \"Notes differences in GROQ support between contexts\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"GROQ examples use valid syntax\"\n - \"Custom function examples follow the correct API pattern\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
184
+ export declare const exampleIdBasedRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Document ID-based canonical doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `id` to reference canonical documentation by\n# Sanity document `_id`. This is useful for:\n# - Draft documents that don't have a stable slug yet\n# - Programmatic references from imports or migrations\n# - Documents where you know the _id but not the slug\n#\n# The `id` ref type can also carry optional `slug` and `path` fields\n# as human-readable annotations \u2014 these are NOT used for resolution,\n# only for display in logs and reports.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-id-based-ref\n description: \"Example \u2014 GROQ feature support (ID-based doc references)\"\n\n featureArea: groq\n\n # ID-based canonical doc references.\n #\n # Use the Sanity document _id to reference articles directly.\n # Optional slug/path annotations help humans reading the YAML\n # but are NOT used for resolution \u2014 only the `id` field matters.\n #\n # These IDs reference real articles in the Sanity docs (next dataset):\n # 0ba88f1b... = \"GROQ feature support across Sanity\"\n # 5b9c2863... = \"Custom GROQ functions\"\n canonicalDocs:\n - id: \"0ba88f1b-d1a7-418a-9267-2e343d01886a\"\n slug: groq-feature-support-by-context # annotation only \u2014 not used for resolution\n reason: \"GROQ feature support across different Sanity contexts\"\n - id: \"5b9c2863-ef01-4565-af8e-ee54e081ee74\"\n slug: custom-groq-functions # annotation only \u2014 not used for resolution\n reason: \"Custom GROQ functions and pipelines\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how GROQ is used across different Sanity contexts.\n Cover the following:\n 1. Which GROQ features are available in each context (API queries,\n webhooks, custom functions, access control)\n 2. How to create and use custom GROQ functions\n 3. Any differences in GROQ support between contexts\n Provide examples demonstrating context-specific GROQ patterns.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains GROQ availability across different Sanity contexts\"\n - \"Describes custom GROQ function creation and usage\"\n - \"Notes differences in GROQ support between contexts\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"GROQ examples use valid syntax\"\n - \"Custom function examples follow the correct API pattern\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
189
185
  /** Parsed task data for example-path-based-ref (JSON-safe) */
190
186
  export declare const examplePathBasedRefData: readonly [{
191
187
  readonly id: "example-path-based-ref";
@@ -216,12 +212,10 @@ export declare const examplePathBasedRefData: readonly [{
216
212
  readonly enabled: true;
217
213
  readonly rubric: "abbreviated";
218
214
  };
219
- readonly execution: {
220
- readonly enabled: false;
221
- };
215
+ readonly status: "draft";
222
216
  }];
223
217
  /** Raw YAML string for example-path-based-ref (preserves comments) */
224
- export declare const examplePathBasedRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Path-based canonical doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `path` to reference canonical documentation.\n# Paths are the preferred reference type because they uniquely identify\n# an article across sections (unlike slugs, which can collide).\n#\n# Path format:\n# - Simple: \"webhooks\" \u2192 resolves by slug lookup\n# - Sectioned: \"content-lake/webhooks\" \u2192 disambiguates by section + slug\n#\n# This example demonstrates why paths matter: the slug \"documents\"\n# exists in both the \"content-lake\" and \"cli-reference\" sections.\n# Using \"content-lake/documents\" ensures we get the right one.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-path-based-ref\n description: \"Example \u2014 GROQ mutations (path-based doc references)\"\n\n featureArea: groq\n\n # Path-based canonical doc references.\n #\n # Use \"section/slug\" format to uniquely identify articles:\n # - \"content-lake/mutations-introduction\" \u2192 the mutations article\n # - \"content-lake/documents\" \u2192 the documents article in Content Lake\n # (not the CLI \"documents\" article in cli-reference section)\n #\n # The \"documents\" slug exists in two sections \u2014 this is exactly why\n # path-based references are preferred over slug-based references.\n canonicalDocs:\n - path: content-lake/mutations-introduction\n reason: \"Introduction to document mutations in the Content Lake\"\n - path: content-lake/documents\n reason: \"Document structure and types (Content Lake, not CLI reference)\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how to create, update, and delete documents in Sanity's\n Content Lake using mutations. Cover:\n 1. The different mutation types (create, createOrReplace, patch, delete)\n 2. Document structure and required fields (_id, _type)\n 3. How to use patch operations to update specific fields\n 4. Best practices for mutation patterns\n Provide working code examples using @sanity/client.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains create, createOrReplace, patch, and delete mutations\"\n - \"Describes required document fields (_id, _type)\"\n - \"Shows patch operations for field-level updates\"\n - \"Includes practical code examples\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses correct @sanity/client mutation API\"\n - \"Patch operations use valid set/unset/inc syntax\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
218
+ export declare const examplePathBasedRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Path-based canonical doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `path` to reference canonical documentation.\n# Paths are the preferred reference type because they uniquely identify\n# an article across sections (unlike slugs, which can collide).\n#\n# Path format:\n# - Simple: \"webhooks\" \u2192 resolves by slug lookup\n# - Sectioned: \"content-lake/webhooks\" \u2192 disambiguates by section + slug\n#\n# This example demonstrates why paths matter: the slug \"documents\"\n# exists in both the \"content-lake\" and \"cli-reference\" sections.\n# Using \"content-lake/documents\" ensures we get the right one.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-path-based-ref\n description: \"Example \u2014 GROQ mutations (path-based doc references)\"\n\n featureArea: groq\n\n # Path-based canonical doc references.\n #\n # Use \"section/slug\" format to uniquely identify articles:\n # - \"content-lake/mutations-introduction\" \u2192 the mutations article\n # - \"content-lake/documents\" \u2192 the documents article in Content Lake\n # (not the CLI \"documents\" article in cli-reference section)\n #\n # The \"documents\" slug exists in two sections \u2014 this is exactly why\n # path-based references are preferred over slug-based references.\n canonicalDocs:\n - path: content-lake/mutations-introduction\n reason: \"Introduction to document mutations in the Content Lake\"\n - path: content-lake/documents\n reason: \"Document structure and types (Content Lake, not CLI reference)\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how to create, update, and delete documents in Sanity's\n Content Lake using mutations. Cover:\n 1. The different mutation types (create, createOrReplace, patch, delete)\n 2. Document structure and required fields (_id, _type)\n 3. How to use patch operations to update specific fields\n 4. Best practices for mutation patterns\n Provide working code examples using @sanity/client.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains create, createOrReplace, patch, and delete mutations\"\n - \"Describes required document fields (_id, _type)\"\n - \"Shows patch operations for field-level updates\"\n - \"Includes practical code examples\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses correct @sanity/client mutation API\"\n - \"Patch operations use valid set/unset/inc syntax\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
225
219
  /** Parsed task data for example-perspective-ref (JSON-safe) */
226
220
  export declare const examplePerspectiveRefData: readonly [{
227
221
  readonly id: "example-perspective-ref";
@@ -252,12 +246,10 @@ export declare const examplePerspectiveRefData: readonly [{
252
246
  readonly enabled: true;
253
247
  readonly rubric: "abbreviated";
254
248
  };
255
- readonly execution: {
256
- readonly enabled: false;
257
- };
249
+ readonly status: "draft";
258
250
  }];
259
251
  /** Raw YAML string for example-perspective-ref (preserves comments) */
260
- export declare const examplePerspectiveRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Perspective / content release doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `perspective` to reference all documentation\n# articles within a content release. This is the key capability for\n# evaluating NEW feature documentation before it's published.\n#\n# How it works:\n# - A perspective ref is one-to-many: the doc fetcher queries the\n# named release and expands it to ALL articles versioned within it.\n# - Downstream consumers see the same flat DocContext[] regardless\n# of how docs were resolved.\n# - When the release is published, the perspective entry becomes a\n# no-op (articles are now in published). Migrate to explicit path\n# or slug refs at your convenience.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-perspective-ref\n description:\n \"Example \u2014 GROQ features from content release (perspective-based doc\n references)\"\n\n featureArea: groq\n\n # Perspective-based canonical doc reference.\n #\n # The perspective ID references a content release in the Sanity\n # Content Lake. At evaluation time, the doc fetcher auto-discovers\n # all articles versioned in this release and includes them as\n # canonical documentation context.\n #\n # Release rE9TSJvR4 contains:\n # - \"GROQ-powered webhooks\" (webhooks)\n # - \"Query Cheat Sheet - GROQ\" (query-cheat-sheet)\n # - \"GROQ joins\" (groq-joins)\n #\n # You can combine perspective refs with explicit slug/path/id refs\n # to include foundational published docs alongside release content.\n # Here we add groq-data-types as a complementary published reference.\n canonicalDocs:\n - perspective: rE9TSJvR4\n reason: \"All GROQ documentation updates in the test content release\"\n - slug: groq-data-types\n reason: \"GROQ data type reference (published, stable)\"\n\n docCoverage: true\n\n vars:\n task: |\n Using GROQ, demonstrate advanced query patterns including:\n 1. Joining data across document types using references\n 2. Filtering webhook payloads with GROQ projections\n 3. Using the query cheat sheet patterns for common operations\n 4. Working with different GROQ data types in filters\n Provide working GROQ query examples for each pattern.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Demonstrates GROQ join syntax for cross-document queries\"\n - \"Shows GROQ filter patterns for webhook configuration\"\n - \"Includes practical query examples from cheat sheet patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Reference joins use correct dereference operator (->)\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
252
+ export declare const examplePerspectiveRefYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Perspective / content release doc references\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Demonstrates using `perspective` to reference all documentation\n# articles within a content release. This is the key capability for\n# evaluating NEW feature documentation before it's published.\n#\n# How it works:\n# - A perspective ref is one-to-many: the doc fetcher queries the\n# named release and expands it to ALL articles versioned within it.\n# - Downstream consumers see the same flat DocContext[] regardless\n# of how docs were resolved.\n# - When the release is published, the perspective entry becomes a\n# no-op (articles are now in published). Migrate to explicit path\n# or slug refs at your convenience.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-perspective-ref\n description:\n \"Example \u2014 GROQ features from content release (perspective-based doc\n references)\"\n\n featureArea: groq\n\n # Perspective-based canonical doc reference.\n #\n # The perspective ID references a content release in the Sanity\n # Content Lake. At evaluation time, the doc fetcher auto-discovers\n # all articles versioned in this release and includes them as\n # canonical documentation context.\n #\n # Release rE9TSJvR4 contains:\n # - \"GROQ-powered webhooks\" (webhooks)\n # - \"Query Cheat Sheet - GROQ\" (query-cheat-sheet)\n # - \"GROQ joins\" (groq-joins)\n #\n # You can combine perspective refs with explicit slug/path/id refs\n # to include foundational published docs alongside release content.\n # Here we add groq-data-types as a complementary published reference.\n canonicalDocs:\n - perspective: rE9TSJvR4\n reason: \"All GROQ documentation updates in the test content release\"\n - slug: groq-data-types\n reason: \"GROQ data type reference (published, stable)\"\n\n docCoverage: true\n\n vars:\n task: |\n Using GROQ, demonstrate advanced query patterns including:\n 1. Joining data across document types using references\n 2. Filtering webhook payloads with GROQ projections\n 3. Using the query cheat sheet patterns for common operations\n 4. Working with different GROQ data types in filters\n Provide working GROQ query examples for each pattern.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Demonstrates GROQ join syntax for cross-document queries\"\n - \"Shows GROQ filter patterns for webhook configuration\"\n - \"Includes practical query examples from cheat sheet patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Reference joins use correct dereference operator (->)\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
261
253
  /** Parsed task data for example-studio-custom-input (JSON-safe) */
262
254
  export declare const exampleStudioCustomInputData: readonly [{
263
255
  readonly id: "example-studio-custom-input";
@@ -289,12 +281,10 @@ export declare const exampleStudioCustomInputData: readonly [{
289
281
  readonly enabled: true;
290
282
  readonly rubric: "abbreviated";
291
283
  };
292
- readonly execution: {
293
- readonly enabled: false;
294
- };
284
+ readonly status: "draft";
295
285
  }];
296
286
  /** Raw YAML string for example-studio-custom-input (preserves comments) */
297
- export declare const exampleStudioCustomInputYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Custom input component in Sanity Studio\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-studio-custom-input\n description: \"Example \u2014 Custom input component in Sanity Studio\"\n\n featureArea: studio\n\n # Slug-based canonical doc references.\n canonicalDocs:\n - slug: custom-input-widgets\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n - slug: form-components\n reason: \"Form component API and customization patterns\"\n\n docCoverage: true\n referenceSolution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
287
+ export declare const exampleStudioCustomInputYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# Example Task: Custom input component in Sanity Studio\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# This is a starter template \u2014 edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n- id: example-studio-custom-input\n description: \"Example \u2014 Custom input component in Sanity Studio\"\n\n featureArea: studio\n\n # Slug-based canonical doc references.\n canonicalDocs:\n - slug: custom-input-widgets\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n - slug: form-components\n reason: \"Form component API and customization patterns\"\n\n docCoverage: true\n referenceSolution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
298
288
  /** All task example data as a flat array (JSON-safe) */
299
289
  export declare const allTaskData: readonly unknown[];
300
290
  /** Map of task ID (filename stem) → raw YAML string (preserves comments) */
@@ -187,13 +187,11 @@ export const exampleGroqBlogListingData = [
187
187
  "enabled": true,
188
188
  "rubric": "abbreviated"
189
189
  },
190
- "execution": {
191
- "enabled": false
192
- }
190
+ "status": "draft"
193
191
  }
194
192
  ];
195
193
  /** Raw YAML string for example-groq-blog-listing (preserves comments) */
196
- export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example — Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug — the article's URL slug in your docs site\n # reason — why this doc is relevant (helps with auditing)\n #\n # This example uses slug-based references — the simplest form.\n # See the other example tasks for path, id, and perspective references.\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task — the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs — leave empty (\"\"). The pipeline fills this in:\n # • Gold variant: injected with canonical doc content\n # • Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions — how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion — did the LLM implement the feature? (weight: 0.50)\n # code-correctness — is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled — set to false to skip this task entirely\n # rubric — \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Execution configuration.\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
194
+ export const exampleGroqBlogListingYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Blog listing with GROQ queries\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Each task evaluates whether an AI coding agent can implement a feature\n# using your docs as context. Delete this file or replace it entirely.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# Full field reference:\n# https://github.com/sanity-labs/ai-literacy-framework/blob/main/docs/CONTRIBUTING_TASKS.md\n# ──────────────────────────────────────────────────────────────────────\n\n# Unique identifier — lowercase alphanumeric with hyphens.\n# Must be unique across all task files in .ailf/tasks/.\n- id: example-groq-blog-listing\n\n # Short human-readable summary. Shown in score tables and reports.\n description: \"Example — Blog listing with GROQ queries\"\n\n # Feature area this task belongs to. Tasks with the same area are\n # grouped together in score summaries. Use a short kebab-case name.\n featureArea: groq\n\n # Gold-standard documentation articles for this task. The pipeline\n # fetches these from Sanity and injects them into the prompt for\n # baseline evaluation. Each entry needs:\n # slug — the article's URL slug in your docs site\n # reason — why this doc is relevant (helps with auditing)\n #\n # This example uses slug-based references — the simplest form.\n # See the other example tasks for path, id, and perspective references.\n canonicalDocs:\n - slug: groq-introduction\n reason: \"Core GROQ syntax and query language reference\"\n - slug: how-queries-work\n reason: \"Query execution model and best practices\"\n\n # When true, the pipeline auto-generates an additional rubric that\n # checks whether the LLM's response actually used the provided docs.\n docCoverage: true\n\n # Path to a gold-standard implementation, relative to canonical/.\n # The grader uses this as a reference when scoring code correctness.\n referenceSolution: canonical/example-groq-blog-listing.ts\n\n # vars.task — the implementation prompt given to the LLM.\n # Write this as if you're asking a developer to build the feature.\n # Be specific about requirements so the grader can evaluate clearly.\n #\n # vars.docs — leave empty (\"\"). The pipeline fills this in:\n # • Gold variant: injected with canonical doc content\n # • Baseline variant: left empty (tests model knowledge alone)\n vars:\n task: |\n Create a Next.js page component that lists blog posts from Sanity\n using GROQ. The page should display the title, slug, and published\n date for each post, sorted by most recent first. Use the Sanity\n client to fetch data.\n docs: \"\"\n\n # Grading assertions — how the LLM's response is scored.\n #\n # \"llm-rubric\" assertions use a grader LLM to score against criteria.\n # The \"template\" references a rubric from config/rubrics.yaml.\n # The \"criteria\" are task-specific bullets injected into the template.\n #\n # Available templates:\n # task-completion — did the LLM implement the feature? (weight: 0.50)\n # code-correctness — is the code idiomatic and correct? (weight: 0.25)\n #\n # You can also use value-based assertions:\n # - type: contains\n # value: \"client.fetch\"\n # - type: contains-any\n # value: [\"createClient\", \"sanityClient\"]\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Uses the groq tagged template literal\"\n - \"Fetches blog posts with title, slug, and publishedAt fields\"\n - \"Orders results by publishedAt in descending order\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses createClient from @sanity/client or next-sanity\"\n - \"Exports a valid Next.js page component\"\n\n # Baseline variant configuration.\n # enabled — set to false to skip this task entirely\n # rubric — \"abbreviated\" (faster, default), \"full\", or \"none\"\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
197
195
  /** Parsed task data for example-id-based-ref (JSON-safe) */
198
196
  export const exampleIdBasedRefData = [
199
197
  {
@@ -240,13 +238,11 @@ export const exampleIdBasedRefData = [
240
238
  "enabled": true,
241
239
  "rubric": "abbreviated"
242
240
  },
243
- "execution": {
244
- "enabled": false
245
- }
241
+ "status": "draft"
246
242
  }
247
243
  ];
248
244
  /** Raw YAML string for example-id-based-ref (preserves comments) */
249
- export const exampleIdBasedRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Document ID-based canonical doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `id` to reference canonical documentation by\n# Sanity document `_id`. This is useful for:\n# - Draft documents that don't have a stable slug yet\n# - Programmatic references from imports or migrations\n# - Documents where you know the _id but not the slug\n#\n# The `id` ref type can also carry optional `slug` and `path` fields\n# as human-readable annotations — these are NOT used for resolution,\n# only for display in logs and reports.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-id-based-ref\n description: \"Example — GROQ feature support (ID-based doc references)\"\n\n featureArea: groq\n\n # ID-based canonical doc references.\n #\n # Use the Sanity document _id to reference articles directly.\n # Optional slug/path annotations help humans reading the YAML\n # but are NOT used for resolution — only the `id` field matters.\n #\n # These IDs reference real articles in the Sanity docs (next dataset):\n # 0ba88f1b... = \"GROQ feature support across Sanity\"\n # 5b9c2863... = \"Custom GROQ functions\"\n canonicalDocs:\n - id: \"0ba88f1b-d1a7-418a-9267-2e343d01886a\"\n slug: groq-feature-support-by-context # annotation only — not used for resolution\n reason: \"GROQ feature support across different Sanity contexts\"\n - id: \"5b9c2863-ef01-4565-af8e-ee54e081ee74\"\n slug: custom-groq-functions # annotation only — not used for resolution\n reason: \"Custom GROQ functions and pipelines\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how GROQ is used across different Sanity contexts.\n Cover the following:\n 1. Which GROQ features are available in each context (API queries,\n webhooks, custom functions, access control)\n 2. How to create and use custom GROQ functions\n 3. Any differences in GROQ support between contexts\n Provide examples demonstrating context-specific GROQ patterns.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains GROQ availability across different Sanity contexts\"\n - \"Describes custom GROQ function creation and usage\"\n - \"Notes differences in GROQ support between contexts\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"GROQ examples use valid syntax\"\n - \"Custom function examples follow the correct API pattern\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
245
+ export const exampleIdBasedRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Document ID-based canonical doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `id` to reference canonical documentation by\n# Sanity document `_id`. This is useful for:\n# - Draft documents that don't have a stable slug yet\n# - Programmatic references from imports or migrations\n# - Documents where you know the _id but not the slug\n#\n# The `id` ref type can also carry optional `slug` and `path` fields\n# as human-readable annotations — these are NOT used for resolution,\n# only for display in logs and reports.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-id-based-ref\n description: \"Example — GROQ feature support (ID-based doc references)\"\n\n featureArea: groq\n\n # ID-based canonical doc references.\n #\n # Use the Sanity document _id to reference articles directly.\n # Optional slug/path annotations help humans reading the YAML\n # but are NOT used for resolution — only the `id` field matters.\n #\n # These IDs reference real articles in the Sanity docs (next dataset):\n # 0ba88f1b... = \"GROQ feature support across Sanity\"\n # 5b9c2863... = \"Custom GROQ functions\"\n canonicalDocs:\n - id: \"0ba88f1b-d1a7-418a-9267-2e343d01886a\"\n slug: groq-feature-support-by-context # annotation only — not used for resolution\n reason: \"GROQ feature support across different Sanity contexts\"\n - id: \"5b9c2863-ef01-4565-af8e-ee54e081ee74\"\n slug: custom-groq-functions # annotation only — not used for resolution\n reason: \"Custom GROQ functions and pipelines\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how GROQ is used across different Sanity contexts.\n Cover the following:\n 1. Which GROQ features are available in each context (API queries,\n webhooks, custom functions, access control)\n 2. How to create and use custom GROQ functions\n 3. Any differences in GROQ support between contexts\n Provide examples demonstrating context-specific GROQ patterns.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains GROQ availability across different Sanity contexts\"\n - \"Describes custom GROQ function creation and usage\"\n - \"Notes differences in GROQ support between contexts\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"GROQ examples use valid syntax\"\n - \"Custom function examples follow the correct API pattern\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
250
246
  /** Parsed task data for example-path-based-ref (JSON-safe) */
251
247
  export const examplePathBasedRefData = [
252
248
  {
@@ -292,13 +288,11 @@ export const examplePathBasedRefData = [
292
288
  "enabled": true,
293
289
  "rubric": "abbreviated"
294
290
  },
295
- "execution": {
296
- "enabled": false
297
- }
291
+ "status": "draft"
298
292
  }
299
293
  ];
300
294
  /** Raw YAML string for example-path-based-ref (preserves comments) */
301
- export const examplePathBasedRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Path-based canonical doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `path` to reference canonical documentation.\n# Paths are the preferred reference type because they uniquely identify\n# an article across sections (unlike slugs, which can collide).\n#\n# Path format:\n# - Simple: \"webhooks\" → resolves by slug lookup\n# - Sectioned: \"content-lake/webhooks\" → disambiguates by section + slug\n#\n# This example demonstrates why paths matter: the slug \"documents\"\n# exists in both the \"content-lake\" and \"cli-reference\" sections.\n# Using \"content-lake/documents\" ensures we get the right one.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-path-based-ref\n description: \"Example — GROQ mutations (path-based doc references)\"\n\n featureArea: groq\n\n # Path-based canonical doc references.\n #\n # Use \"section/slug\" format to uniquely identify articles:\n # - \"content-lake/mutations-introduction\" → the mutations article\n # - \"content-lake/documents\" → the documents article in Content Lake\n # (not the CLI \"documents\" article in cli-reference section)\n #\n # The \"documents\" slug exists in two sections — this is exactly why\n # path-based references are preferred over slug-based references.\n canonicalDocs:\n - path: content-lake/mutations-introduction\n reason: \"Introduction to document mutations in the Content Lake\"\n - path: content-lake/documents\n reason: \"Document structure and types (Content Lake, not CLI reference)\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how to create, update, and delete documents in Sanity's\n Content Lake using mutations. Cover:\n 1. The different mutation types (create, createOrReplace, patch, delete)\n 2. Document structure and required fields (_id, _type)\n 3. How to use patch operations to update specific fields\n 4. Best practices for mutation patterns\n Provide working code examples using @sanity/client.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains create, createOrReplace, patch, and delete mutations\"\n - \"Describes required document fields (_id, _type)\"\n - \"Shows patch operations for field-level updates\"\n - \"Includes practical code examples\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses correct @sanity/client mutation API\"\n - \"Patch operations use valid set/unset/inc syntax\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
295
+ export const examplePathBasedRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Path-based canonical doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `path` to reference canonical documentation.\n# Paths are the preferred reference type because they uniquely identify\n# an article across sections (unlike slugs, which can collide).\n#\n# Path format:\n# - Simple: \"webhooks\" → resolves by slug lookup\n# - Sectioned: \"content-lake/webhooks\" → disambiguates by section + slug\n#\n# This example demonstrates why paths matter: the slug \"documents\"\n# exists in both the \"content-lake\" and \"cli-reference\" sections.\n# Using \"content-lake/documents\" ensures we get the right one.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-path-based-ref\n description: \"Example — GROQ mutations (path-based doc references)\"\n\n featureArea: groq\n\n # Path-based canonical doc references.\n #\n # Use \"section/slug\" format to uniquely identify articles:\n # - \"content-lake/mutations-introduction\" → the mutations article\n # - \"content-lake/documents\" → the documents article in Content Lake\n # (not the CLI \"documents\" article in cli-reference section)\n #\n # The \"documents\" slug exists in two sections — this is exactly why\n # path-based references are preferred over slug-based references.\n canonicalDocs:\n - path: content-lake/mutations-introduction\n reason: \"Introduction to document mutations in the Content Lake\"\n - path: content-lake/documents\n reason: \"Document structure and types (Content Lake, not CLI reference)\"\n\n docCoverage: true\n\n vars:\n task: |\n Explain how to create, update, and delete documents in Sanity's\n Content Lake using mutations. Cover:\n 1. The different mutation types (create, createOrReplace, patch, delete)\n 2. Document structure and required fields (_id, _type)\n 3. How to use patch operations to update specific fields\n 4. Best practices for mutation patterns\n Provide working code examples using @sanity/client.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Explains create, createOrReplace, patch, and delete mutations\"\n - \"Describes required document fields (_id, _type)\"\n - \"Shows patch operations for field-level updates\"\n - \"Includes practical code examples\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses correct @sanity/client mutation API\"\n - \"Patch operations use valid set/unset/inc syntax\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
302
296
  /** Parsed task data for example-perspective-ref (JSON-safe) */
303
297
  export const examplePerspectiveRefData = [
304
298
  {
@@ -343,13 +337,11 @@ export const examplePerspectiveRefData = [
343
337
  "enabled": true,
344
338
  "rubric": "abbreviated"
345
339
  },
346
- "execution": {
347
- "enabled": false
348
- }
340
+ "status": "draft"
349
341
  }
350
342
  ];
351
343
  /** Raw YAML string for example-perspective-ref (preserves comments) */
352
- export const examplePerspectiveRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Perspective / content release doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `perspective` to reference all documentation\n# articles within a content release. This is the key capability for\n# evaluating NEW feature documentation before it's published.\n#\n# How it works:\n# - A perspective ref is one-to-many: the doc fetcher queries the\n# named release and expands it to ALL articles versioned within it.\n# - Downstream consumers see the same flat DocContext[] regardless\n# of how docs were resolved.\n# - When the release is published, the perspective entry becomes a\n# no-op (articles are now in published). Migrate to explicit path\n# or slug refs at your convenience.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-perspective-ref\n description:\n \"Example — GROQ features from content release (perspective-based doc\n references)\"\n\n featureArea: groq\n\n # Perspective-based canonical doc reference.\n #\n # The perspective ID references a content release in the Sanity\n # Content Lake. At evaluation time, the doc fetcher auto-discovers\n # all articles versioned in this release and includes them as\n # canonical documentation context.\n #\n # Release rE9TSJvR4 contains:\n # - \"GROQ-powered webhooks\" (webhooks)\n # - \"Query Cheat Sheet - GROQ\" (query-cheat-sheet)\n # - \"GROQ joins\" (groq-joins)\n #\n # You can combine perspective refs with explicit slug/path/id refs\n # to include foundational published docs alongside release content.\n # Here we add groq-data-types as a complementary published reference.\n canonicalDocs:\n - perspective: rE9TSJvR4\n reason: \"All GROQ documentation updates in the test content release\"\n - slug: groq-data-types\n reason: \"GROQ data type reference (published, stable)\"\n\n docCoverage: true\n\n vars:\n task: |\n Using GROQ, demonstrate advanced query patterns including:\n 1. Joining data across document types using references\n 2. Filtering webhook payloads with GROQ projections\n 3. Using the query cheat sheet patterns for common operations\n 4. Working with different GROQ data types in filters\n Provide working GROQ query examples for each pattern.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Demonstrates GROQ join syntax for cross-document queries\"\n - \"Shows GROQ filter patterns for webhook configuration\"\n - \"Includes practical query examples from cheat sheet patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Reference joins use correct dereference operator (->)\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
344
+ export const examplePerspectiveRefYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Perspective / content release doc references\n# ──────────────────────────────────────────────────────────────────────\n#\n# Demonstrates using `perspective` to reference all documentation\n# articles within a content release. This is the key capability for\n# evaluating NEW feature documentation before it's published.\n#\n# How it works:\n# - A perspective ref is one-to-many: the doc fetcher queries the\n# named release and expands it to ALL articles versioned within it.\n# - Downstream consumers see the same flat DocContext[] regardless\n# of how docs were resolved.\n# - When the release is published, the perspective entry becomes a\n# no-op (articles are now in published). Migrate to explicit path\n# or slug refs at your convenience.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n#\n# @see docs/design-docs/canonical-doc-resolution.md\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-perspective-ref\n description:\n \"Example — GROQ features from content release (perspective-based doc\n references)\"\n\n featureArea: groq\n\n # Perspective-based canonical doc reference.\n #\n # The perspective ID references a content release in the Sanity\n # Content Lake. At evaluation time, the doc fetcher auto-discovers\n # all articles versioned in this release and includes them as\n # canonical documentation context.\n #\n # Release rE9TSJvR4 contains:\n # - \"GROQ-powered webhooks\" (webhooks)\n # - \"Query Cheat Sheet - GROQ\" (query-cheat-sheet)\n # - \"GROQ joins\" (groq-joins)\n #\n # You can combine perspective refs with explicit slug/path/id refs\n # to include foundational published docs alongside release content.\n # Here we add groq-data-types as a complementary published reference.\n canonicalDocs:\n - perspective: rE9TSJvR4\n reason: \"All GROQ documentation updates in the test content release\"\n - slug: groq-data-types\n reason: \"GROQ data type reference (published, stable)\"\n\n docCoverage: true\n\n vars:\n task: |\n Using GROQ, demonstrate advanced query patterns including:\n 1. Joining data across document types using references\n 2. Filtering webhook payloads with GROQ projections\n 3. Using the query cheat sheet patterns for common operations\n 4. Working with different GROQ data types in filters\n Provide working GROQ query examples for each pattern.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Demonstrates GROQ join syntax for cross-document queries\"\n - \"Shows GROQ filter patterns for webhook configuration\"\n - \"Includes practical query examples from cheat sheet patterns\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"All GROQ queries use valid syntax\"\n - \"Reference joins use correct dereference operator (->)\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
353
345
  /** Parsed task data for example-studio-custom-input (JSON-safe) */
354
346
  export const exampleStudioCustomInputData = [
355
347
  {
@@ -396,13 +388,11 @@ export const exampleStudioCustomInputData = [
396
388
  "enabled": true,
397
389
  "rubric": "abbreviated"
398
390
  },
399
- "execution": {
400
- "enabled": false
401
- }
391
+ "status": "draft"
402
392
  }
403
393
  ];
404
394
  /** Raw YAML string for example-studio-custom-input (preserves comments) */
405
- export const exampleStudioCustomInputYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Custom input component in Sanity Studio\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# This example task is DISABLED by default. To enable it, either:\n# 1. Remove the execution.enabled: false line below, or\n# 2. Set execution.enabled: true\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-studio-custom-input\n description: \"Example — Custom input component in Sanity Studio\"\n\n featureArea: studio\n\n # Slug-based canonical doc references.\n canonicalDocs:\n - slug: custom-input-widgets\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n - slug: form-components\n reason: \"Form component API and customization patterns\"\n\n docCoverage: true\n referenceSolution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship disabled so they don't run automatically.\n # Set enabled: true (or remove this block) to activate.\n execution:\n enabled: false\n";
395
+ export const exampleStudioCustomInputYaml = "# ──────────────────────────────────────────────────────────────────────\n# Example Task: Custom input component in Sanity Studio\n# ──────────────────────────────────────────────────────────────────────\n#\n# This is a starter template — edit it for your own documentation.\n# Delete this file or replace it with your own tasks.\n#\n# This example task ships as a DRAFT so it does not run in production\n# evaluations automatically. To activate it, change status to \"active\"\n# or remove the status line entirely (defaults to active).\n# ──────────────────────────────────────────────────────────────────────\n\n- id: example-studio-custom-input\n description: \"Example — Custom input component in Sanity Studio\"\n\n featureArea: studio\n\n # Slug-based canonical doc references.\n canonicalDocs:\n - slug: custom-input-widgets\n reason: \"Guide for building custom form inputs in Sanity Studio\"\n - slug: form-components\n reason: \"Form component API and customization patterns\"\n\n docCoverage: true\n referenceSolution: canonical/example-studio-custom-input.ts\n\n vars:\n task: |\n Build a custom string input component for Sanity Studio that shows\n a character count below the input field. The component should accept\n a maxLength option from the field schema and display a warning when\n the text exceeds the limit.\n docs: \"\"\n\n assert:\n - type: llm-rubric\n template: task-completion\n criteria:\n - \"Implements a React component that renders a text input\"\n - \"Displays a live character count\"\n - \"Reads maxLength from schema options\"\n - \"Shows a visual warning when limit is exceeded\"\n\n - type: llm-rubric\n template: code-correctness\n criteria:\n - \"Uses the Sanity UI library for styling\"\n - \"Calls onChange with patch operations\"\n\n baseline:\n enabled: true\n rubric: abbreviated\n\n # Example tasks ship as drafts so they don't run in production evals.\n # Change to \"active\" (or remove this line) to activate.\n status: draft\n";
406
396
  // ---------------------------------------------------------------------------
407
397
  // Aggregate task exports
408
398
  // ---------------------------------------------------------------------------
@@ -112,6 +112,8 @@ export interface TaskDefinition {
112
112
  baseline?: BaselineConfig;
113
113
  /** Additional template variables beyond task (e.g., custom vars) */
114
114
  extraVars?: Record<string, unknown>;
115
+ /** Lifecycle status — controls pipeline inclusion. Absent = "active". */
116
+ status?: "active" | "archived" | "draft" | "paused";
115
117
  /** Freeform labels for filtering and organization */
116
118
  tags?: string[];
117
119
  }
@@ -179,6 +179,8 @@ export interface FeatureScore {
179
179
  export interface FilterOptions {
180
180
  /** Feature areas to include (filename stems, e.g., ["groq", "frameworks"]) */
181
181
  areas?: string[];
182
+ /** Include draft-status tasks in addition to active tasks */
183
+ includeDrafts?: boolean;
182
184
  /** Tags to include — tasks must have at least one matching tag */
183
185
  tags?: string[];
184
186
  /** Specific task IDs to include (e.g., ["groq-blog-queries"]) */
@@ -452,6 +454,14 @@ export interface PipelineState {
452
454
  * Consumed by GenerateConfigsStep and RunEvalStep to narrow scope.
453
455
  */
454
456
  releaseAutoScope?: ReleaseAutoScope;
457
+ /**
458
+ * Feature areas that scored below the critical threshold (40).
459
+ * Set by CalculateScoresStep, consumed by the orchestrator for
460
+ * final pipeline result reporting. The pipeline continues running
461
+ * (gap-analysis, publish, report, compare) even when areas are
462
+ * below threshold — this is informational, not a hard failure.
463
+ */
464
+ belowCritical?: string[];
455
465
  }
456
466
  /**
457
467
  * Release auto-scope metadata — which tasks are affected by a content
@@ -1018,6 +1028,8 @@ export interface Report {
1018
1028
  summary: ScoreSummary;
1019
1029
  /** Optional human-supplied label */
1020
1030
  tag?: string;
1031
+ /** Auto-generated descriptive title for discoverability and sharing */
1032
+ title?: string;
1021
1033
  }
1022
1034
  /** Branded type for report identifiers (UUID v7 for time-sortability) */
1023
1035
  export type ReportId = string & {
@@ -34,6 +34,12 @@ export type RubricTemplateName = (typeof RUBRIC_TEMPLATE_NAMES)[number];
34
34
  export declare const RepoTaskSchema: z.ZodObject<{
35
35
  id: z.ZodString;
36
36
  description: z.ZodString;
37
+ status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
38
+ active: "active";
39
+ draft: "draft";
40
+ paused: "paused";
41
+ archived: "archived";
42
+ }>>>;
37
43
  featureArea: z.ZodString;
38
44
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
39
45
  canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
@@ -113,6 +119,12 @@ export type RepoTask = z.infer<typeof RepoTaskSchema>;
113
119
  export declare const RepoTaskFileSchema: z.ZodArray<z.ZodObject<{
114
120
  id: z.ZodString;
115
121
  description: z.ZodString;
122
+ status: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
123
+ active: "active";
124
+ draft: "draft";
125
+ paused: "paused";
126
+ archived: "archived";
127
+ }>>>;
116
128
  featureArea: z.ZodString;
117
129
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
118
130
  canonicalDocs: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
@@ -151,6 +151,10 @@ export const RepoTaskSchema = z.object({
151
151
  .min(1)
152
152
  .regex(/^[a-z0-9][a-z0-9-]*$/, "Task ID must be lowercase alphanumeric with hyphens"),
153
153
  description: z.string().min(1),
154
+ status: z
155
+ .enum(["active", "draft", "paused", "archived"])
156
+ .optional()
157
+ .default("active"),
154
158
  featureArea: z
155
159
  .string()
156
160
  .min(1)
@@ -31,7 +31,14 @@ const TASKS_QUERY = /* groq */ `
31
31
  *[_type == "ailf.task"
32
32
  && (!defined($areas) || featureArea->areaId.current in $areas)
33
33
  && (!defined($taskIds) || id.current in $taskIds)
34
- && (execution.enabled != false)
34
+ && (
35
+ // Status-based filtering (unified — replaces execution.enabled)
36
+ status == "active"
37
+ || !defined(status)
38
+ || ($includeDrafts == true && status == "draft")
39
+ // Explicit --task targeting bypasses draft/paused (but not archived)
40
+ || (defined($taskIds) && status != "archived")
41
+ )
35
42
  && (!defined($tags) || count((tags)[@ in $tags]) > 0)
36
43
  ] | order(featureArea->areaId.current asc, id.current asc) {
37
44
  "taskId": id.current,
@@ -92,6 +99,7 @@ function buildGroqParams(filter) {
92
99
  areas: filter?.areas && filter.areas.length > 0
93
100
  ? filter.areas.map((a) => a.toLowerCase())
94
101
  : null,
102
+ includeDrafts: filter?.includeDrafts ?? false,
95
103
  tags: filter?.tags && filter.tags.length > 0 ? filter.tags : null,
96
104
  taskIds: filter?.taskIds && filter.taskIds.length > 0 ? filter.taskIds : null,
97
105
  };
@@ -60,7 +60,8 @@ export class RepoTaskSource {
60
60
  // Filter stages:
61
61
  // 1. Area filter — skip tasks outside requested feature areas
62
62
  // 2. Task ID filter — skip tasks not matching explicit task IDs
63
- // 3. Execution.enabled — skip tasks explicitly disabled
63
+ // 3. Status filter — skip non-active tasks (unless targeting by ID)
64
+ // 4. Tag filter — skip tasks not matching requested tags
64
65
  // Area filter
65
66
  if (filter?.areas &&
66
67
  filter.areas.length > 0 &&
@@ -75,9 +76,22 @@ export class RepoTaskSource {
75
76
  !filter.taskIds.includes(entry.id)) {
76
77
  continue;
77
78
  }
78
- // Execution.enabled filter — skip tasks explicitly disabled
79
- if (entry.execution?.enabled === false) {
80
- continue;
79
+ // Status filter — unified lifecycle control
80
+ // Resolve effective status: explicit status field wins,
81
+ // then fall back to execution.enabled for backwards compat
82
+ const effectiveStatus = entry.status ??
83
+ (entry.execution?.enabled === false ? "paused" : "active");
84
+ const isTargetedById = filter?.taskIds && filter.taskIds.includes(entry.id);
85
+ if (effectiveStatus === "archived") {
86
+ continue; // Archived is always excluded, even with --task
87
+ }
88
+ if (effectiveStatus === "paused" && !isTargetedById) {
89
+ continue; // Paused skipped unless explicitly targeted
90
+ }
91
+ if (effectiveStatus === "draft" &&
92
+ !isTargetedById &&
93
+ !filter?.includeDrafts) {
94
+ continue; // Draft skipped unless targeted or includeDrafts
81
95
  }
82
96
  // Tag filter — skip tasks that don't match any requested tag
83
97
  if (filter?.tags &&
@@ -114,6 +128,7 @@ function mapToTaskDefinition(raw) {
114
128
  taskPrompt: typeof task === "string" ? task : "",
115
129
  ...(raw.baseline ? { baseline: raw.baseline } : {}),
116
130
  ...(extraVars ? { extraVars } : {}),
131
+ ...(raw.status && raw.status !== "active" ? { status: raw.status } : {}),
117
132
  ...(raw.tags?.length ? { tags: raw.tags } : {}),
118
133
  };
119
134
  }
@@ -36,11 +36,15 @@ export function createCalculateScoresCommand() {
36
36
  remote: false,
37
37
  apiUrl: "https://ailf-api.sanity.build",
38
38
  });
39
- calculateAndWriteScores({
39
+ const result = calculateAndWriteScores({
40
40
  resultsPath,
41
41
  rootDir: ctx.config.rootDir,
42
42
  source: opts.source,
43
43
  });
44
+ // At the CLI boundary, exit non-zero if areas are below threshold
45
+ if (result.belowCritical.length > 0) {
46
+ process.exitCode = 1;
47
+ }
44
48
  }
45
49
  catch (err) {
46
50
  process.exitCode = 1;
@@ -24,6 +24,7 @@ import { fileURLToPath } from "url";
24
24
  import { Command } from "commander";
25
25
  import { createAppContext } from "../composition-root.js";
26
26
  import { buildProvenance, } from "../pipeline/provenance.js";
27
+ import { generateReportTitle } from "../pipeline/report-title.js";
27
28
  import { generateReportId, } from "../report-store.js";
28
29
  import { withRetry } from "../sinks/retry.js";
29
30
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -166,6 +167,7 @@ async function runPublishCommand(summaryPath, opts) {
166
167
  };
167
168
  }
168
169
  const reportId = generateReportId();
170
+ const title = generateReportTitle({ provenance });
169
171
  const report = {
170
172
  comparison: comparison ?? undefined,
171
173
  completedAt: now,
@@ -174,6 +176,7 @@ async function runPublishCommand(summaryPath, opts) {
174
176
  provenance,
175
177
  summary,
176
178
  tag: opts.tag,
179
+ title,
177
180
  };
178
181
  // -----------------------------------------------------------------------
179
182
  // 4. Dry run — print preview and exit