@sanity/ailf-studio 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +16 -1
- package/dist/index.js +753 -686
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -466,6 +466,18 @@ interface OverallAgentBehaviorData {
|
|
|
466
466
|
totalUniqueDocSlugs: number;
|
|
467
467
|
totalUniqueSearchQueries: number;
|
|
468
468
|
}
|
|
469
|
+
/** Per-model score breakdown stored in summary.perModel */
|
|
470
|
+
interface PerModelData {
|
|
471
|
+
modelId: string;
|
|
472
|
+
label: string;
|
|
473
|
+
overall: {
|
|
474
|
+
avgScore: number;
|
|
475
|
+
avgDocLift: number;
|
|
476
|
+
cost?: null | number;
|
|
477
|
+
testCount: number;
|
|
478
|
+
};
|
|
479
|
+
scores: ScoreItem[];
|
|
480
|
+
}
|
|
469
481
|
/** Summary data as stored in Sanity */
|
|
470
482
|
interface SummaryData {
|
|
471
483
|
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
@@ -489,6 +501,8 @@ interface SummaryData {
|
|
|
489
501
|
};
|
|
490
502
|
/** Low-scoring grader judgments — the raw "red text" explaining failures */
|
|
491
503
|
lowScoringJudgments: JudgmentData[] | null;
|
|
504
|
+
/** Per-model score breakdown (one entry per LLM model evaluated) */
|
|
505
|
+
perModel?: PerModelData[] | null;
|
|
492
506
|
/** Gap analysis recommendations (when gap analysis was run) */
|
|
493
507
|
recommendations: null | RecommendationsData;
|
|
494
508
|
scores: ScoreItem[];
|
|
@@ -637,6 +651,7 @@ declare const GLOSSARY: {
|
|
|
637
651
|
readonly efficiencyAnomalies: "Areas where agent efficiency exceeds 100% — meaning agents perform better with self-found docs than with gold-standard docs injected directly. This can indicate doc quality issues (injected docs confuse the model) or agent memorization.";
|
|
638
652
|
readonly docLiftWins: "Areas where documentation boosts AI performance by 5 or more points. Higher doc lift means the docs are providing crucial information that the model doesn't already know.";
|
|
639
653
|
readonly retrievalExcellence: "Areas where AI agents successfully find and use at least 85% of the available doc quality through web search. Good retrieval means your docs are well-indexed and easy for agents to discover.";
|
|
654
|
+
readonly modelBreakdown: "Break down scores by individual LLM model. The default 'All Models' view shows the cross-model average. Select a specific model to see how it performed independently — useful for spotting models that struggle with specific feature areas.";
|
|
640
655
|
readonly strengths: "What's working well: high-scoring areas, dimensions where the docs are strong, and areas where AI agents successfully find and use the documentation.";
|
|
641
656
|
readonly recommendations: "Prioritized remediation plan from gap analysis. Each recommendation identifies a documentation problem, the affected feature area, and the estimated score lift from fixing it.";
|
|
642
657
|
readonly totalPotentialLift: "Aggregate potential score lift if all identified gaps were fixed. This is a conservative estimate — each gap targets the median of non-bottlenecked dimensions, not 100.";
|
|
@@ -967,4 +982,4 @@ declare function ailfTool(options?: AilfToolOptions): Tool;
|
|
|
967
982
|
*/
|
|
968
983
|
declare const ailfPlugin: sanity.Plugin<void>;
|
|
969
984
|
|
|
970
|
-
export { AssertionInput, CanonicalDocInput, type ComparisonData, type ContentImpactItem, GLOSSARY, GraduateToNativeAction, HelpDrawer, HelpProvider, type HelpTopic, MirrorBanner, type ProvenanceData, ReleasePicker, type ReportDetail, type ReportListItem, type RunEvaluationActionOptions, RunTaskEvaluationAction, type ScoreItem, type SummaryData, SyncStatusBadge, type TimelineDataPoint, ailfPlugin, ailfTool, articleSearchQuery, comparisonPairQuery, contentImpactQuery, createRunEvaluationAction, deriveHelpTopic, distinctAreasQuery, distinctModesQuery, distinctPerspectivesQuery, distinctSourcesQuery, distinctTargetDocumentsQuery, distinctTriggersQuery, evalRequestSchema, featureAreaSchema, findTopic, latestReportsQuery, recentDocumentEvalsQuery, referenceSolutionSchema, reportDetailQuery, reportSchema, scoreTimelineQuery, searchTopics, taskSchema, useHelp, webhookConfigSchema };
|
|
985
|
+
export { AssertionInput, CanonicalDocInput, type ComparisonData, type ContentImpactItem, GLOSSARY, GraduateToNativeAction, HelpDrawer, HelpProvider, type HelpTopic, MirrorBanner, type PerModelData, type ProvenanceData, ReleasePicker, type ReportDetail, type ReportListItem, type RunEvaluationActionOptions, RunTaskEvaluationAction, type ScoreItem, type SummaryData, SyncStatusBadge, type TimelineDataPoint, ailfPlugin, ailfTool, articleSearchQuery, comparisonPairQuery, contentImpactQuery, createRunEvaluationAction, deriveHelpTopic, distinctAreasQuery, distinctModesQuery, distinctPerspectivesQuery, distinctSourcesQuery, distinctTargetDocumentsQuery, distinctTriggersQuery, evalRequestSchema, featureAreaSchema, findTopic, latestReportsQuery, recentDocumentEvalsQuery, referenceSolutionSchema, reportDetailQuery, reportSchema, scoreTimelineQuery, searchTopics, taskSchema, useHelp, webhookConfigSchema };
|