@sanity/ailf-studio 1.17.1 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -300,8 +300,6 @@ The plugin exports building blocks for custom views or extensions.
300
300
  | `ProvenanceData` | Report provenance metadata |
301
301
  | `SummaryData` | Score summary (overall + per-area + per-model) |
302
302
  | `ScoreItem` | Individual area score entry |
303
- | `RecommendationGap` | Single gap analysis recommendation |
304
- | `RecommendationsData` | Full recommendations payload |
305
303
  | `JudgmentData` | Individual grader judgment with reasoning |
306
304
  | `DocumentRef` | Canonical document reference (re-exported from `@sanity/ailf-shared`) |
307
305
  | `ScoreGrade` | Letter grade type (re-exported from `@sanity/ailf-shared`) |
package/dist/index.d.ts CHANGED
@@ -594,23 +594,6 @@ interface StudioArtifactRef {
594
594
  */
595
595
  sourceRunId?: string;
596
596
  }
597
- /** A single gap/recommendation from gap analysis */
598
- interface RecommendationGap {
599
- affectedTaskIds: string[];
600
- area: string;
601
- bottleneckDimensions: string[];
602
- confidence: "high" | "low" | "medium";
603
- estimatedLift: number;
604
- failureMode: string;
605
- priority: number;
606
- remediation: string;
607
- }
608
- /** Gap analysis recommendations stored in Sanity */
609
- interface RecommendationsData {
610
- gaps: RecommendationGap[];
611
- generatedAt: string;
612
- totalPotentialLift: number;
613
- }
614
597
  /**
615
598
  * Per-test result stored in reports for drill-down and audit.
616
599
  * Mirrors StoredTestResult from @sanity/ailf-core.
@@ -739,6 +722,7 @@ interface SummaryData {
739
722
  graderPrompts?: StudioArtifactRef;
740
723
  traces?: StudioArtifactRef;
741
724
  pipelineContext?: StudioArtifactRef;
725
+ diagnosis?: StudioArtifactRef;
742
726
  };
743
727
  belowCritical: string[];
744
728
  /** All Sanity documents used across the entire evaluation */
@@ -761,8 +745,6 @@ interface SummaryData {
761
745
  lowScoringJudgments: JudgmentData[] | null;
762
746
  /** Per-model score breakdown (one entry per LLM model evaluated) */
763
747
  perModel?: PerModelData[] | null;
764
- /** Gap analysis recommendations (when gap analysis was run) */
765
- recommendations: null | RecommendationsData;
766
748
  /**
767
749
  * Slim failure-mode summary (W0051). `topTitles[*]` carry the
768
750
  * `graderJudgments`-era `id = formatEntryKey({mode, category})` so the