@sanity/ailf-studio 1.3.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +71 -8
- package/dist/index.js +3607 -1641
- package/package.json +5 -2
package/dist/index.d.ts
CHANGED
|
@@ -318,6 +318,8 @@ interface ProvenanceData {
|
|
|
318
318
|
id: string;
|
|
319
319
|
label: string;
|
|
320
320
|
}[];
|
|
321
|
+
/** Identity of the pipeline run that produced this report (D0032) */
|
|
322
|
+
runId: string;
|
|
321
323
|
/** @deprecated Use `promptfooUrls` when available */
|
|
322
324
|
promptfooUrl?: string;
|
|
323
325
|
/** Per-mode Promptfoo share URLs (one per sub-eval) */
|
|
@@ -434,13 +436,36 @@ interface ScoreItem {
|
|
|
434
436
|
/** Ceiling score — gold-standard docs injected */
|
|
435
437
|
ceilingScore?: number;
|
|
436
438
|
}
|
|
437
|
-
/**
|
|
439
|
+
/**
|
|
440
|
+
* A single row in `ArtifactRef.entries[]`. W0051 adds optional `preview` +
|
|
441
|
+
* `association` + `truncated` so list-view renderers can consume the
|
|
442
|
+
* descriptor-extracted preview without fetching the external payload.
|
|
443
|
+
* Older manifests (pre-W0051) carry only `{ key, bytes }`; readers treat
|
|
444
|
+
* missing fields as absent data, not as errors.
|
|
445
|
+
*/
|
|
446
|
+
interface ArtifactRefEntry {
|
|
447
|
+
key: string;
|
|
448
|
+
bytes: number;
|
|
449
|
+
association?: Record<string, string | number>;
|
|
450
|
+
truncated?: boolean;
|
|
451
|
+
preview?: unknown;
|
|
452
|
+
}
|
|
453
|
+
/** Reference to an artifact stored in an external object store. */
|
|
438
454
|
interface ArtifactRef {
|
|
439
|
-
store: "gcs";
|
|
455
|
+
store: "gcs" | "local";
|
|
440
456
|
bucket: string;
|
|
441
457
|
path: string;
|
|
442
458
|
bytes?: number;
|
|
443
459
|
entryCount?: number;
|
|
460
|
+
/**
|
|
461
|
+
* Added in W0047 / D0032. Missing on pre-W0047 legacy refs, in which case
|
|
462
|
+
* dispatchers must treat it as `"bulk"` (the only layout that existed then).
|
|
463
|
+
*/
|
|
464
|
+
layout?: "bulk" | "per-entry";
|
|
465
|
+
/** Per-entry index (populated for `layout: "per-entry"` refs only). */
|
|
466
|
+
entries?: ArtifactRefEntry[];
|
|
467
|
+
truncated?: boolean;
|
|
468
|
+
preview?: unknown;
|
|
444
469
|
}
|
|
445
470
|
/** A single gap/recommendation from gap analysis */
|
|
446
471
|
interface RecommendationGap {
|
|
@@ -503,16 +528,33 @@ interface JudgmentData {
|
|
|
503
528
|
score: number;
|
|
504
529
|
taskId: string;
|
|
505
530
|
}
|
|
506
|
-
/**
|
|
531
|
+
/**
|
|
532
|
+
* Per-feature agent behavior data — how agents interacted with docs.
|
|
533
|
+
*
|
|
534
|
+
* W0051 slimmed the full `searchQueries` / `docSlugsVisited` arrays out
|
|
535
|
+
* of the Report summary and replaced them with `*Sample` (bounded first-N)
|
|
536
|
+
* + `*Count` (distinct total). Older reports still carry the full arrays
|
|
537
|
+
* under the legacy names; both shapes are optional here so the
|
|
538
|
+
* `AgentBehaviorCard` renders either fluidly.
|
|
539
|
+
*/
|
|
507
540
|
interface FeatureAgentBehaviorData {
|
|
508
541
|
avgDocPagesVisited: number;
|
|
509
542
|
avgNetworkTimeMs: number;
|
|
510
543
|
avgSearchesPerformed: number;
|
|
511
|
-
docSlugsVisited: string[];
|
|
512
544
|
externalDomains: string[];
|
|
513
545
|
feature: string;
|
|
514
|
-
searchQueries: string[];
|
|
515
546
|
tasksWithBehaviorData: number;
|
|
547
|
+
/** W0051 slim: bounded sample of unique search queries (first ~5). */
|
|
548
|
+
searchQueriesSample?: string[];
|
|
549
|
+
/** W0051 slim: count of distinct queries in the full traces artifact. */
|
|
550
|
+
searchQueriesCount?: number;
|
|
551
|
+
/** W0051 slim: bounded sample of unique doc slugs visited. */
|
|
552
|
+
docSlugsVisitedSample?: string[];
|
|
553
|
+
/** W0051 slim: count of distinct slugs in the full traces artifact. */
|
|
554
|
+
docSlugsVisitedCount?: number;
|
|
555
|
+
/** Legacy (pre-W0051): the full arrays inlined on the Report. */
|
|
556
|
+
searchQueries?: string[];
|
|
557
|
+
docSlugsVisited?: string[];
|
|
516
558
|
}
|
|
517
559
|
/** Overall agent behavior stats (aggregated across all features) */
|
|
518
560
|
interface OverallAgentBehaviorData {
|
|
@@ -539,11 +581,14 @@ interface PerModelData {
|
|
|
539
581
|
interface SummaryData {
|
|
540
582
|
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
541
583
|
agentBehavior?: FeatureAgentBehaviorData[] | null;
|
|
542
|
-
/** External artifact references — present when pipeline uploads to GCS (
|
|
543
|
-
|
|
584
|
+
/** External artifact references — present when pipeline uploads to GCS (D0032) */
|
|
585
|
+
artifactManifest?: {
|
|
544
586
|
testOutputs?: ArtifactRef;
|
|
545
587
|
renderedPrompts?: ArtifactRef;
|
|
546
588
|
rawResults?: ArtifactRef;
|
|
589
|
+
graderPrompts?: ArtifactRef;
|
|
590
|
+
taskDefinitions?: ArtifactRef;
|
|
591
|
+
evalResults?: ArtifactRef;
|
|
547
592
|
traces?: ArtifactRef;
|
|
548
593
|
};
|
|
549
594
|
belowCritical: string[];
|
|
@@ -569,6 +614,24 @@ interface SummaryData {
|
|
|
569
614
|
perModel?: PerModelData[] | null;
|
|
570
615
|
/** Gap analysis recommendations (when gap analysis was run) */
|
|
571
616
|
recommendations: null | RecommendationsData;
|
|
617
|
+
/**
|
|
618
|
+
* Slim failure-mode summary (W0051). `topTitles[*]` carry the
|
|
619
|
+
* `graderJudgments`-era `id = formatEntryKey({mode, category})` so the
|
|
620
|
+
* FailureModesPanel can resolve each row to its per-category manifest
|
|
621
|
+
* entry via `useFailureModeArtifact`.
|
|
622
|
+
*/
|
|
623
|
+
failureModes?: {
|
|
624
|
+
counts: Record<string, number>;
|
|
625
|
+
topTitles: {
|
|
626
|
+
id: string;
|
|
627
|
+
category: string;
|
|
628
|
+
severity: "low" | "medium" | "high" | "critical";
|
|
629
|
+
title: string;
|
|
630
|
+
count: number;
|
|
631
|
+
}[];
|
|
632
|
+
totalJudgments: number;
|
|
633
|
+
classificationRate: number;
|
|
634
|
+
} | null;
|
|
572
635
|
scores: ScoreItem[];
|
|
573
636
|
/** Per-test results with model output and metadata (D0029) */
|
|
574
637
|
testResults?: StoredTestResultData[] | null;
|
|
@@ -658,7 +721,7 @@ declare function searchTopics(query: string): HelpTopic[];
|
|
|
658
721
|
*
|
|
659
722
|
* The drawer:
|
|
660
723
|
* - Reads current topic from HelpContext
|
|
661
|
-
* - Renders markdown body via
|
|
724
|
+
* - Renders markdown body via the shared <Markdown> component
|
|
662
725
|
* - Shows "See also" links for related topics
|
|
663
726
|
* - Includes a search bar for topic discovery
|
|
664
727
|
* - Supports back navigation through topic history
|