@sanity/ailf-studio 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +8 -7
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -9,11 +9,11 @@ import { useClient } from "sanity";
|
|
|
9
9
|
|
|
10
10
|
// src/lib/constants.ts
|
|
11
11
|
var API_VERSION = "2026-03-11";
|
|
12
|
-
var isProduction = process.env.NODE_ENV === "production";
|
|
13
|
-
var ARTIFACT_API_BASE_URL = isProduction ? "https://ailf-api.sanity.build/v1" : "http://localhost:3000/v1";
|
|
14
12
|
var ENV = globalThis.process?.env ?? {};
|
|
13
|
+
var isProduction = process.env.NODE_ENV === "production";
|
|
14
|
+
var ARTIFACT_API_BASE_URL = ENV.SANITY_STUDIO_AILF_API_BASE_URL ?? (isProduction ? "https://ailf-api.sanity.build/v1" : "http://localhost:3000/v1");
|
|
15
15
|
var REFERENCE_DATASET = ENV.SANITY_STUDIO_AILF_REF_DATASET ?? "next";
|
|
16
|
-
var
|
|
16
|
+
var EDITORIAL_STUDIO_URL = ENV.SANITY_STUDIO_ORIGIN ?? "https://admin.sanity.io";
|
|
17
17
|
var AILF_DATASET = ENV.SANITY_STUDIO_AILF_DATASET ?? "ailf-prod-private";
|
|
18
18
|
|
|
19
19
|
// src/actions/ArchiveTaskAction.tsx
|
|
@@ -681,7 +681,7 @@ Click into any report for the full breakdown: per-area scores, diagnostics, and
|
|
|
681
681
|
{
|
|
682
682
|
"id": "scoring-model",
|
|
683
683
|
"title": "Understanding Scores",
|
|
684
|
-
"body": "## The three dimensions\n\nEvery evaluation task is scored on three dimensions, each graded 0\u2013100:\n\n- **Task Completion (50% weight)** \u2014 Can the AI implement the requested feature?\n Does the output actually do what was asked?\n- **Code Correctness (25% weight)** \u2014 Is the generated code idiomatic, correct,\n and following best practices?\n- **Doc Coverage (25% weight)** \u2014 Did the documentation provide the information\n needed to implement the feature?\n\n## How the overall score is calculated\n\nThe three dimensions combine into a single **AI Literacy Score** per task using\nnamed scoring profiles from `config/rubrics.
|
|
684
|
+
"body": "## The three dimensions\n\nEvery evaluation task is scored on three dimensions, each graded 0\u2013100:\n\n- **Task Completion (50% weight)** \u2014 Can the AI implement the requested feature?\n Does the output actually do what was asked?\n- **Code Correctness (25% weight)** \u2014 Is the generated code idiomatic, correct,\n and following best practices?\n- **Doc Coverage (25% weight)** \u2014 Did the documentation provide the information\n needed to implement the feature?\n\n## How the overall score is calculated\n\nThe three dimensions combine into a single **AI Literacy Score** per task using\nnamed scoring profiles from `packages/eval/config/rubrics.ts`:\n\n```\nGold (with docs): Total = Task \xD7 0.50 + Code \xD7 0.25 + Docs \xD7 0.25\nBaseline (no docs): Total = Task \xD7 0.60 + Code \xD7 0.40\n```\n\nThe gold profile includes all three dimensions. The baseline profile excludes\nDoc Coverage because it is undefined when no documentation is provided. This\nensures Doc Lift (ceiling \u2212 floor) is a clean structural measurement of\ndocumentation value.\n\nThe weighted composite produces a score from 0\u2013100. Scores are then averaged\nacross all tasks in a feature area to produce a **per-area score**, and across\nall areas to produce the **overall score**.\n\n## What the numbers mean\n\n| Score range | Interpretation |\n| ------------ | ----------------------------------------------------------------- |\n| **80\u2013100** | Docs are working well \u2014 AI agents produce correct implementations |\n| **70\u201379** | Needs attention \u2014 there may be gaps in specific dimensions |\n| **Below 70** | Weak \u2014 AI agents consistently struggle with this area |\n\n## Ceiling decomposition (baseline mode)\n\nWhen running in baseline mode, each task is evaluated twice \u2014 with and without\ndocumentation. This produces:\n\n- **Floor score** \u2014 Score without docs (what the model knows from training data\n alone)\n- **Ceiling score** \u2014 Score with gold-standard docs injected directly into the\n prompt\n- **Doc Lift** \u2014 Ceiling minus floor. Positive means docs help; negative means\n docs hurt.\n- **Doc Quality Gap** \u2014 100 minus ceiling. Room for documentation improvement.\n\n## Three-layer decomposition (full mode)\n\nFull mode adds a third measurement \u2014 what happens when AI agents find docs on\ntheir own:\n\n- **Floor** \u2014 No docs (parametric knowledge only)\n- **Ceiling** \u2014 Gold-standard docs injected (best the docs can do)\n- **Actual** \u2014 Agent-retrieved docs (real-world performance)\n- **Retrieval Gap** \u2014 Ceiling minus actual (quality lost to findability)\n- **Infrastructure Efficiency** \u2014 Actual \xF7 ceiling (what fraction of doc quality\n reaches agents)\n\n## Cost tracking\n\nEach evaluation also tracks token costs:\n\n- **Provider cost** \u2014 Token usage for generating implementations\n- **Grader cost** \u2014 Token usage for the grading model's assessments\n- **Total cost** \u2014 Both combined, reported in the score summary",
|
|
685
685
|
"source": "docs/help/scoring-model.md",
|
|
686
686
|
"related": [
|
|
687
687
|
"three-layer",
|
|
@@ -3135,9 +3135,8 @@ function ReleasePicker(props) {
|
|
|
3135
3135
|
|
|
3136
3136
|
// src/schema/task.ts
|
|
3137
3137
|
function articleStudioUrl(document2) {
|
|
3138
|
-
const origin = typeof window !== "undefined" && window.location?.origin ? window.location.origin : "";
|
|
3139
3138
|
const typePart = document2.type ? `;type=${document2.type}` : "";
|
|
3140
|
-
return `${
|
|
3139
|
+
return `${EDITORIAL_STUDIO_URL}/intent/edit/id=${document2.id}${typePart}/`;
|
|
3141
3140
|
}
|
|
3142
3141
|
var ASSERTION_TYPES = [
|
|
3143
3142
|
{ title: "LLM Rubric", value: "llm-rubric" },
|
|
@@ -13569,7 +13568,9 @@ function DocBadge({
|
|
|
13569
13568
|
children: isLinked ? /* @__PURE__ */ jsx69(
|
|
13570
13569
|
"a",
|
|
13571
13570
|
{
|
|
13572
|
-
href:
|
|
13571
|
+
href: `${EDITORIAL_STUDIO_URL}/intent/edit/id=${doc.documentId}/`,
|
|
13572
|
+
target: "_blank",
|
|
13573
|
+
rel: "noopener noreferrer",
|
|
13573
13574
|
onMouseEnter: () => setHovered(true),
|
|
13574
13575
|
onMouseLeave: () => setHovered(false),
|
|
13575
13576
|
style: {
|