npm - @sanity/ailf-studio - Versions diffs - 1.0.0 → 1.1.0 - Mend

@sanity/ailf-studio 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -466,6 +466,18 @@ interface OverallAgentBehaviorData {
     totalUniqueDocSlugs: number;
     totalUniqueSearchQueries: number;
 }
+/** Per-model score breakdown stored in summary.perModel */
+interface PerModelData {
+    modelId: string;
+    label: string;
+    overall: {
+        avgScore: number;
+        avgDocLift: number;
+        cost?: null | number;
+        testCount: number;
+    };
+    scores: ScoreItem[];
+}
 /** Summary data as stored in Sanity */
 interface SummaryData {
     /** Per-feature agent behavior data (only present when agentic mode ran) */
@@ -489,6 +501,8 @@ interface SummaryData {
     };
     /** Low-scoring grader judgments — the raw "red text" explaining failures */
     lowScoringJudgments: JudgmentData[] | null;
+    /** Per-model score breakdown (one entry per LLM model evaluated) */
+    perModel?: PerModelData[] | null;
     /** Gap analysis recommendations (when gap analysis was run) */
     recommendations: null | RecommendationsData;
     scores: ScoreItem[];
@@ -637,6 +651,7 @@ declare const GLOSSARY: {
     readonly efficiencyAnomalies: "Areas where agent efficiency exceeds 100% — meaning agents perform better with self-found docs than with gold-standard docs injected directly. This can indicate doc quality issues (injected docs confuse the model) or agent memorization.";
     readonly docLiftWins: "Areas where documentation boosts AI performance by 5 or more points. Higher doc lift means the docs are providing crucial information that the model doesn't already know.";
     readonly retrievalExcellence: "Areas where AI agents successfully find and use at least 85% of the available doc quality through web search. Good retrieval means your docs are well-indexed and easy for agents to discover.";
+    readonly modelBreakdown: "Break down scores by individual LLM model. The default 'All Models' view shows the cross-model average. Select a specific model to see how it performed independently — useful for spotting models that struggle with specific feature areas.";
     readonly strengths: "What's working well: high-scoring areas, dimensions where the docs are strong, and areas where AI agents successfully find and use the documentation.";
     readonly recommendations: "Prioritized remediation plan from gap analysis. Each recommendation identifies a documentation problem, the affected feature area, and the estimated score lift from fixing it.";
     readonly totalPotentialLift: "Aggregate potential score lift if all identified gaps were fixed. This is a conservative estimate — each gap targets the median of non-bottlenecked dimensions, not 100.";
@@ -967,4 +982,4 @@ declare function ailfTool(options?: AilfToolOptions): Tool;
  */
 declare const ailfPlugin: sanity.Plugin<void>;
-export { AssertionInput, CanonicalDocInput, type ComparisonData, type ContentImpactItem, GLOSSARY, GraduateToNativeAction, HelpDrawer, HelpProvider, type HelpTopic, MirrorBanner, type ProvenanceData, ReleasePicker, type ReportDetail, type ReportListItem, type RunEvaluationActionOptions, RunTaskEvaluationAction, type ScoreItem, type SummaryData, SyncStatusBadge, type TimelineDataPoint, ailfPlugin, ailfTool, articleSearchQuery, comparisonPairQuery, contentImpactQuery, createRunEvaluationAction, deriveHelpTopic, distinctAreasQuery, distinctModesQuery, distinctPerspectivesQuery, distinctSourcesQuery, distinctTargetDocumentsQuery, distinctTriggersQuery, evalRequestSchema, featureAreaSchema, findTopic, latestReportsQuery, recentDocumentEvalsQuery, referenceSolutionSchema, reportDetailQuery, reportSchema, scoreTimelineQuery, searchTopics, taskSchema, useHelp, webhookConfigSchema };
+export { AssertionInput, CanonicalDocInput, type ComparisonData, type ContentImpactItem, GLOSSARY, GraduateToNativeAction, HelpDrawer, HelpProvider, type HelpTopic, MirrorBanner, type PerModelData, type ProvenanceData, ReleasePicker, type ReportDetail, type ReportListItem, type RunEvaluationActionOptions, RunTaskEvaluationAction, type ScoreItem, type SummaryData, SyncStatusBadge, type TimelineDataPoint, ailfPlugin, ailfTool, articleSearchQuery, comparisonPairQuery, contentImpactQuery, createRunEvaluationAction, deriveHelpTopic, distinctAreasQuery, distinctModesQuery, distinctPerspectivesQuery, distinctSourcesQuery, distinctTargetDocumentsQuery, distinctTriggersQuery, evalRequestSchema, featureAreaSchema, findTopic, latestReportsQuery, recentDocumentEvalsQuery, referenceSolutionSchema, reportDetailQuery, reportSchema, scoreTimelineQuery, searchTopics, taskSchema, useHelp, webhookConfigSchema };