@sanity/ailf-studio 1.3.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.d.ts +71 -8
  2. package/dist/index.js +3607 -1641
  3. package/package.json +5 -2
package/dist/index.d.ts CHANGED
@@ -318,6 +318,8 @@ interface ProvenanceData {
318
318
  id: string;
319
319
  label: string;
320
320
  }[];
321
+ /** Identity of the pipeline run that produced this report (D0032) */
322
+ runId: string;
321
323
  /** @deprecated Use `promptfooUrls` when available */
322
324
  promptfooUrl?: string;
323
325
  /** Per-mode Promptfoo share URLs (one per sub-eval) */
@@ -434,13 +436,36 @@ interface ScoreItem {
434
436
  /** Ceiling score — gold-standard docs injected */
435
437
  ceilingScore?: number;
436
438
  }
437
- /** Reference to an artifact stored in an external object store (GCS). */
439
+ /**
440
+ * A single row in `ArtifactRef.entries[]`. W0051 adds optional `preview` +
441
+ * `association` + `truncated` so list-view renderers can consume the
442
+ * descriptor-extracted preview without fetching the external payload.
443
+ * Older manifests (pre-W0051) carry only `{ key, bytes }`; readers treat
444
+ * missing fields as absent data, not as errors.
445
+ */
446
+ interface ArtifactRefEntry {
447
+ key: string;
448
+ bytes: number;
449
+ association?: Record<string, string | number>;
450
+ truncated?: boolean;
451
+ preview?: unknown;
452
+ }
453
+ /** Reference to an artifact stored in an external object store. */
438
454
  interface ArtifactRef {
439
- store: "gcs";
455
+ store: "gcs" | "local";
440
456
  bucket: string;
441
457
  path: string;
442
458
  bytes?: number;
443
459
  entryCount?: number;
460
+ /**
461
+ * Added in W0047 / D0032. Missing on pre-W0047 legacy refs, in which case
462
+ * dispatchers must treat it as `"bulk"` (the only layout that existed then).
463
+ */
464
+ layout?: "bulk" | "per-entry";
465
+ /** Per-entry index (populated for `layout: "per-entry"` refs only). */
466
+ entries?: ArtifactRefEntry[];
467
+ truncated?: boolean;
468
+ preview?: unknown;
444
469
  }
445
470
  /** A single gap/recommendation from gap analysis */
446
471
  interface RecommendationGap {
@@ -503,16 +528,33 @@ interface JudgmentData {
503
528
  score: number;
504
529
  taskId: string;
505
530
  }
506
- /** Per-feature agent behavior data — how agents interacted with docs */
531
+ /**
532
+ * Per-feature agent behavior data — how agents interacted with docs.
533
+ *
534
+ * W0051 slimmed the full `searchQueries` / `docSlugsVisited` arrays out
535
+ * of the Report summary and replaced them with `*Sample` (bounded first-N)
536
+ * + `*Count` (distinct total). Older reports still carry the full arrays
537
+ * under the legacy names; both shapes are optional here so the
538
+ * `AgentBehaviorCard` renders either fluidly.
539
+ */
507
540
  interface FeatureAgentBehaviorData {
508
541
  avgDocPagesVisited: number;
509
542
  avgNetworkTimeMs: number;
510
543
  avgSearchesPerformed: number;
511
- docSlugsVisited: string[];
512
544
  externalDomains: string[];
513
545
  feature: string;
514
- searchQueries: string[];
515
546
  tasksWithBehaviorData: number;
547
+ /** W0051 slim: bounded sample of unique search queries (first ~5). */
548
+ searchQueriesSample?: string[];
549
+ /** W0051 slim: count of distinct queries in the full traces artifact. */
550
+ searchQueriesCount?: number;
551
+ /** W0051 slim: bounded sample of unique doc slugs visited. */
552
+ docSlugsVisitedSample?: string[];
553
+ /** W0051 slim: count of distinct slugs in the full traces artifact. */
554
+ docSlugsVisitedCount?: number;
555
+ /** Legacy (pre-W0051): the full arrays inlined on the Report. */
556
+ searchQueries?: string[];
557
+ docSlugsVisited?: string[];
516
558
  }
517
559
  /** Overall agent behavior stats (aggregated across all features) */
518
560
  interface OverallAgentBehaviorData {
@@ -539,11 +581,14 @@ interface PerModelData {
539
581
  interface SummaryData {
540
582
  /** Per-feature agent behavior data (only present when agentic mode ran) */
541
583
  agentBehavior?: FeatureAgentBehaviorData[] | null;
542
- /** External artifact references — present when pipeline uploads to GCS (D0030) */
543
- artifacts?: {
584
+ /** External artifact references — present when pipeline uploads to GCS (D0032) */
585
+ artifactManifest?: {
544
586
  testOutputs?: ArtifactRef;
545
587
  renderedPrompts?: ArtifactRef;
546
588
  rawResults?: ArtifactRef;
589
+ graderPrompts?: ArtifactRef;
590
+ taskDefinitions?: ArtifactRef;
591
+ evalResults?: ArtifactRef;
547
592
  traces?: ArtifactRef;
548
593
  };
549
594
  belowCritical: string[];
@@ -569,6 +614,24 @@ interface SummaryData {
569
614
  perModel?: PerModelData[] | null;
570
615
  /** Gap analysis recommendations (when gap analysis was run) */
571
616
  recommendations: null | RecommendationsData;
617
+ /**
618
+ * Slim failure-mode summary (W0051). `topTitles[*]` carry the
619
+ * `graderJudgments`-era `id = formatEntryKey({mode, category})` so the
620
+ * FailureModesPanel can resolve each row to its per-category manifest
621
+ * entry via `useFailureModeArtifact`.
622
+ */
623
+ failureModes?: {
624
+ counts: Record<string, number>;
625
+ topTitles: {
626
+ id: string;
627
+ category: string;
628
+ severity: "low" | "medium" | "high" | "critical";
629
+ title: string;
630
+ count: number;
631
+ }[];
632
+ totalJudgments: number;
633
+ classificationRate: number;
634
+ } | null;
572
635
  scores: ScoreItem[];
573
636
  /** Per-test results with model output and metadata (D0029) */
574
637
  testResults?: StoredTestResultData[] | null;
@@ -658,7 +721,7 @@ declare function searchTopics(query: string): HelpTopic[];
658
721
  *
659
722
  * The drawer:
660
723
  * - Reads current topic from HelpContext
661
- * - Renders markdown body via HelpMarkdown
724
+ * - Renders markdown body via the shared <Markdown> component
662
725
  * - Shows "See also" links for related topics
663
726
  * - Includes a search bar for topic discovery
664
727
  * - Supports back navigation through topic history