npm - @sanity/ailf-studio - Versions diffs - 1.3.1 → 1.6.0 - Mend

@sanity/ailf-studio 1.3.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -318,6 +318,8 @@ interface ProvenanceData {
         id: string;
         label: string;
     }[];
+    /** Identity of the pipeline run that produced this report (D0032) */
+    runId: string;
     /** @deprecated Use `promptfooUrls` when available */
     promptfooUrl?: string;
     /** Per-mode Promptfoo share URLs (one per sub-eval) */
@@ -434,13 +436,36 @@ interface ScoreItem {
     /** Ceiling score — gold-standard docs injected */
     ceilingScore?: number;
 }
-/** Reference to an artifact stored in an external object store (GCS). */
+/**
+ * A single row in `ArtifactRef.entries[]`. W0051 adds optional `preview` +
+ * `association` + `truncated` so list-view renderers can consume the
+ * descriptor-extracted preview without fetching the external payload.
+ * Older manifests (pre-W0051) carry only `{ key, bytes }`; readers treat
+ * missing fields as absent data, not as errors.
+ */
+interface ArtifactRefEntry {
+    key: string;
+    bytes: number;
+    association?: Record<string, string | number>;
+    truncated?: boolean;
+    preview?: unknown;
+}
+/** Reference to an artifact stored in an external object store. */
 interface ArtifactRef {
-    store: "gcs";
+    store: "gcs" | "local";
     bucket: string;
     path: string;
     bytes?: number;
     entryCount?: number;
+    /**
+     * Added in W0047 / D0032. Missing on pre-W0047 legacy refs, in which case
+     * dispatchers must treat it as `"bulk"` (the only layout that existed then).
+     */
+    layout?: "bulk" | "per-entry";
+    /** Per-entry index (populated for `layout: "per-entry"` refs only). */
+    entries?: ArtifactRefEntry[];
+    truncated?: boolean;
+    preview?: unknown;
 }
 /** A single gap/recommendation from gap analysis */
 interface RecommendationGap {
@@ -503,16 +528,33 @@ interface JudgmentData {
     score: number;
     taskId: string;
 }
-/** Per-feature agent behavior data — how agents interacted with docs */
+/**
+ * Per-feature agent behavior data — how agents interacted with docs.
+ *
+ * W0051 slimmed the full `searchQueries` / `docSlugsVisited` arrays out
+ * of the Report summary and replaced them with `*Sample` (bounded first-N)
+ * + `*Count` (distinct total). Older reports still carry the full arrays
+ * under the legacy names; both shapes are optional here so the
+ * `AgentBehaviorCard` renders either fluidly.
+ */
 interface FeatureAgentBehaviorData {
     avgDocPagesVisited: number;
     avgNetworkTimeMs: number;
     avgSearchesPerformed: number;
-    docSlugsVisited: string[];
     externalDomains: string[];
     feature: string;
-    searchQueries: string[];
     tasksWithBehaviorData: number;
+    /** W0051 slim: bounded sample of unique search queries (first ~5). */
+    searchQueriesSample?: string[];
+    /** W0051 slim: count of distinct queries in the full traces artifact. */
+    searchQueriesCount?: number;
+    /** W0051 slim: bounded sample of unique doc slugs visited. */
+    docSlugsVisitedSample?: string[];
+    /** W0051 slim: count of distinct slugs in the full traces artifact. */
+    docSlugsVisitedCount?: number;
+    /** Legacy (pre-W0051): the full arrays inlined on the Report. */
+    searchQueries?: string[];
+    docSlugsVisited?: string[];
 }
 /** Overall agent behavior stats (aggregated across all features) */
 interface OverallAgentBehaviorData {
@@ -539,11 +581,14 @@ interface PerModelData {
 interface SummaryData {
     /** Per-feature agent behavior data (only present when agentic mode ran) */
     agentBehavior?: FeatureAgentBehaviorData[] | null;
-    /** External artifact references — present when pipeline uploads to GCS (D0030) */
-    artifacts?: {
+    /** External artifact references — present when pipeline uploads to GCS (D0032) */
+    artifactManifest?: {
         testOutputs?: ArtifactRef;
         renderedPrompts?: ArtifactRef;
         rawResults?: ArtifactRef;
+        graderPrompts?: ArtifactRef;
+        taskDefinitions?: ArtifactRef;
+        evalResults?: ArtifactRef;
         traces?: ArtifactRef;
     };
     belowCritical: string[];
@@ -569,6 +614,24 @@ interface SummaryData {
     perModel?: PerModelData[] | null;
     /** Gap analysis recommendations (when gap analysis was run) */
     recommendations: null | RecommendationsData;
+    /**
+     * Slim failure-mode summary (W0051). `topTitles[*]` carry the
+     * `graderJudgments`-era `id = formatEntryKey({mode, category})` so the
+     * FailureModesPanel can resolve each row to its per-category manifest
+     * entry via `useFailureModeArtifact`.
+     */
+    failureModes?: {
+        counts: Record<string, number>;
+        topTitles: {
+            id: string;
+            category: string;
+            severity: "low" | "medium" | "high" | "critical";
+            title: string;
+            count: number;
+        }[];
+        totalJudgments: number;
+        classificationRate: number;
+    } | null;
     scores: ScoreItem[];
     /** Per-test results with model output and metadata (D0029) */
     testResults?: StoredTestResultData[] | null;
@@ -658,7 +721,7 @@ declare function searchTopics(query: string): HelpTopic[];
  *
  * The drawer:
  * - Reads current topic from HelpContext
- * - Renders markdown body via HelpMarkdown
+ * - Renders markdown body via the shared <Markdown> component
  * - Shows "See also" links for related topics
  * - Includes a search bar for topic discovery
  * - Supports back navigation through topic history