@sanity/ailf-studio 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -318,6 +318,8 @@ interface ProvenanceData {
318
318
  id: string;
319
319
  label: string;
320
320
  }[];
321
+ /** Identity of the pipeline run that produced this report (D0032) */
322
+ runId: string;
321
323
  /** @deprecated Use `promptfooUrls` when available */
322
324
  promptfooUrl?: string;
323
325
  /** Per-mode Promptfoo share URLs (one per sub-eval) */
@@ -441,6 +443,16 @@ interface ArtifactRef {
441
443
  path: string;
442
444
  bytes?: number;
443
445
  entryCount?: number;
446
+ /**
447
+ * Added in W0047 / D0032. Missing on pre-W0047 legacy refs, in which case
448
+ * dispatchers must treat it as `"bulk"` (the only layout that existed then).
449
+ */
450
+ layout?: "bulk" | "per-entry";
451
+ /** Per-entry index (populated for `layout: "per-entry"` refs only). */
452
+ entries?: {
453
+ key: string;
454
+ bytes: number;
455
+ }[];
444
456
  }
445
457
  /** A single gap/recommendation from gap analysis */
446
458
  interface RecommendationGap {
@@ -539,11 +551,14 @@ interface PerModelData {
539
551
  interface SummaryData {
540
552
  /** Per-feature agent behavior data (only present when agentic mode ran) */
541
553
  agentBehavior?: FeatureAgentBehaviorData[] | null;
542
- /** External artifact references — present when pipeline uploads to GCS (D0030) */
543
- artifacts?: {
554
+ /** External artifact references — present when pipeline uploads to GCS (D0032) */
555
+ artifactManifest?: {
544
556
  testOutputs?: ArtifactRef;
545
557
  renderedPrompts?: ArtifactRef;
546
558
  rawResults?: ArtifactRef;
559
+ graderPrompts?: ArtifactRef;
560
+ taskDefinitions?: ArtifactRef;
561
+ evalResults?: ArtifactRef;
547
562
  traces?: ArtifactRef;
548
563
  };
549
564
  belowCritical: string[];
package/dist/index.js CHANGED
@@ -734,6 +734,30 @@ function artifactRefSchema() {
734
734
  name: "entryCount",
735
735
  title: "Entry Count",
736
736
  type: "number"
737
+ }),
738
+ defineField4({
739
+ name: "layout",
740
+ title: "Layout",
741
+ type: "string",
742
+ options: { list: ["bulk", "per-entry"] }
743
+ }),
744
+ defineField4({
745
+ name: "entries",
746
+ title: "Per-Entry Index",
747
+ type: "array",
748
+ of: [
749
+ {
750
+ fields: [
751
+ defineField4({ name: "key", title: "Key", type: "string" }),
752
+ defineField4({
753
+ name: "bytes",
754
+ title: "Size (bytes)",
755
+ type: "number"
756
+ })
757
+ ],
758
+ type: "object"
759
+ }
760
+ ]
737
761
  })
738
762
  ],
739
763
  type: "object"
@@ -1650,7 +1674,7 @@ var reportSchema = defineType4({
1650
1674
  type: "array"
1651
1675
  }),
1652
1676
  defineField4({
1653
- description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0030).",
1677
+ description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0032).",
1654
1678
  fields: [
1655
1679
  defineField4({
1656
1680
  ...artifactRefSchema(),
@@ -1667,14 +1691,29 @@ var reportSchema = defineType4({
1667
1691
  name: "rawResults",
1668
1692
  title: "Raw Results"
1669
1693
  }),
1694
+ defineField4({
1695
+ ...artifactRefSchema(),
1696
+ name: "graderPrompts",
1697
+ title: "Grader Prompts"
1698
+ }),
1699
+ defineField4({
1700
+ ...artifactRefSchema(),
1701
+ name: "taskDefinitions",
1702
+ title: "Task Definitions"
1703
+ }),
1704
+ defineField4({
1705
+ ...artifactRefSchema(),
1706
+ name: "evalResults",
1707
+ title: "Eval Results"
1708
+ }),
1670
1709
  defineField4({
1671
1710
  ...artifactRefSchema(),
1672
1711
  name: "traces",
1673
1712
  title: "Traces"
1674
1713
  })
1675
1714
  ],
1676
- name: "artifacts",
1677
- title: "Artifacts",
1715
+ name: "artifactManifest",
1716
+ title: "Artifact Manifest",
1678
1717
  type: "object"
1679
1718
  })
1680
1719
  ],
@@ -5890,67 +5929,128 @@ import { useClient as useClient10 } from "sanity";
5890
5929
 
5891
5930
  // src/lib/useArtifactCache.ts
5892
5931
  import { useCallback as useCallback14, useRef as useRef5, useState as useState10 } from "react";
5893
- function useArtifactCache(reportId, artifactRef) {
5932
+ function useArtifactCache(opts) {
5933
+ const { runId, artifactRef, type } = opts;
5894
5934
  const cacheRef = useRef5(/* @__PURE__ */ new Map());
5895
5935
  const [status, setStatus] = useState10("idle");
5896
5936
  const [error, setError] = useState10(null);
5897
- const fetchingRef = useRef5(false);
5898
- const fetchArtifacts = useCallback14(async () => {
5899
- if (fetchingRef.current || cacheRef.current.size > 0) return;
5900
- if (!artifactRef) return;
5901
- fetchingRef.current = true;
5902
- setStatus("loading");
5903
- setError(null);
5904
- try {
5905
- const signingRes = await fetch(
5906
- `${ARTIFACT_API_BASE_URL}/artifacts/${encodeURIComponent(reportId)}?type=testOutputs`,
5907
- {
5908
- credentials: "omit",
5909
- headers: { Accept: "application/json" }
5910
- }
5911
- );
5912
- if (!signingRes.ok) {
5913
- const body = await signingRes.text().catch(() => "");
5914
- throw new Error(
5915
- `Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
5916
- );
5917
- }
5918
- const envelope = await signingRes.json();
5919
- if (envelope.object === "error" || !envelope.url) {
5920
- throw new Error(
5921
- envelope.error?.message ?? "Invalid signing response \u2014 missing signed URL"
5922
- );
5923
- }
5924
- const artifactRes = await fetch(envelope.url, {
5925
- credentials: "omit"
5926
- });
5927
- if (!artifactRes.ok) {
5928
- throw new Error(
5929
- `GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
5930
- );
5937
+ const inFlight = useRef5(/* @__PURE__ */ new Set());
5938
+ const BULK_KEY = "__bulk__";
5939
+ const availableEntries = artifactRef?.entries?.map((e) => e.key) ?? [];
5940
+ const fetchEntry = useCallback14(
5941
+ async (key) => {
5942
+ if (!artifactRef || !runId) return;
5943
+ if (!artifactRef.layout || artifactRef.layout === "bulk") return;
5944
+ if (cacheRef.current.has(key) || inFlight.current.has(key)) return;
5945
+ inFlight.current.add(key);
5946
+ setStatus("loading");
5947
+ setError(null);
5948
+ try {
5949
+ const url = `${ARTIFACT_API_BASE_URL}/runs/${encodeURIComponent(runId)}/artifacts/${encodeURIComponent(type)}/${encodeURIComponent(key)}`;
5950
+ const entry = await signAndFetch(url);
5951
+ cacheRef.current.set(key, entry);
5952
+ setStatus("ready");
5953
+ } catch (err) {
5954
+ setError(err instanceof Error ? err.message : String(err));
5955
+ setStatus("error");
5956
+ } finally {
5957
+ inFlight.current.delete(key);
5931
5958
  }
5932
- const artifact = await artifactRes.json();
5933
- const map = /* @__PURE__ */ new Map();
5934
- for (const [key, entry] of Object.entries(artifact.entries)) {
5935
- map.set(key, entry);
5959
+ },
5960
+ [runId, artifactRef, type]
5961
+ );
5962
+ const fetchAll = useCallback14(async () => {
5963
+ if (!artifactRef || !runId) return;
5964
+ if (inFlight.current.has(BULK_KEY)) return;
5965
+ if (!artifactRef.layout || artifactRef.layout === "bulk") {
5966
+ if (cacheRef.current.size > 0) return;
5967
+ inFlight.current.add(BULK_KEY);
5968
+ setStatus("loading");
5969
+ setError(null);
5970
+ try {
5971
+ const url = `${ARTIFACT_API_BASE_URL}/runs/${encodeURIComponent(runId)}/artifacts/${encodeURIComponent(type)}`;
5972
+ const body = await signAndFetch(url);
5973
+ const next = /* @__PURE__ */ new Map();
5974
+ for (const [key, entry] of Object.entries(body.entries)) {
5975
+ next.set(key, entry);
5976
+ }
5977
+ cacheRef.current = next;
5978
+ setStatus("ready");
5979
+ } catch (err) {
5980
+ setError(err instanceof Error ? err.message : String(err));
5981
+ setStatus("error");
5982
+ } finally {
5983
+ inFlight.current.delete(BULK_KEY);
5936
5984
  }
5937
- cacheRef.current = map;
5938
- setStatus("ready");
5939
- } catch (err) {
5940
- const message = err instanceof Error ? err.message : String(err);
5941
- setError(message);
5942
- setStatus("error");
5943
- } finally {
5944
- fetchingRef.current = false;
5985
+ return;
5945
5986
  }
5946
- }, [reportId, artifactRef]);
5947
- const getOutput = useCallback14(
5948
- (taskId, modelId) => {
5949
- return cacheRef.current.get(`${taskId}::${modelId}`) ?? null;
5950
- },
5987
+ const keys = artifactRef.entries?.map((e) => e.key) ?? [];
5988
+ await Promise.all(keys.map((k) => fetchEntry(k)));
5989
+ }, [runId, artifactRef, type, fetchEntry]);
5990
+ const getEntry = useCallback14(
5991
+ (key) => cacheRef.current.get(key) ?? null,
5951
5992
  []
5952
5993
  );
5953
- return { status, error, getOutput, fetchArtifacts };
5994
+ return {
5995
+ status,
5996
+ error,
5997
+ availableEntries,
5998
+ getEntry,
5999
+ fetchEntry,
6000
+ fetchAll
6001
+ };
6002
+ }
6003
+ async function signAndFetch(signingUrl) {
6004
+ const signingRes = await fetch(signingUrl, {
6005
+ credentials: "omit",
6006
+ headers: { Accept: "application/json" }
6007
+ });
6008
+ if (!signingRes.ok) {
6009
+ const body = await signingRes.text().catch(() => "");
6010
+ throw new Error(
6011
+ `Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
6012
+ );
6013
+ }
6014
+ const envelope = await signingRes.json();
6015
+ if (envelope.object === "error" || !envelope.url) {
6016
+ throw new Error(
6017
+ envelope.error?.message ?? "Invalid signing response \u2014 missing signed URL"
6018
+ );
6019
+ }
6020
+ const artifactRes = await fetch(envelope.url, { credentials: "omit" });
6021
+ if (!artifactRes.ok) {
6022
+ throw new Error(
6023
+ `GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
6024
+ );
6025
+ }
6026
+ return await artifactRes.json();
6027
+ }
6028
+
6029
+ // src/lib/use-test-outputs-artifact.ts
6030
+ function useTestOutputsArtifact(runId, artifactRef) {
6031
+ const cache = useArtifactCache({
6032
+ runId,
6033
+ artifactRef,
6034
+ type: "testOutputs"
6035
+ });
6036
+ return {
6037
+ status: cache.status,
6038
+ error: cache.error,
6039
+ getOutput: (taskId, modelId) => cache.getEntry(`${taskId}::${modelId}`),
6040
+ fetchOutput: async (taskId, modelId) => {
6041
+ if (!artifactRef) return;
6042
+ if (!artifactRef.layout || artifactRef.layout === "bulk") {
6043
+ await cache.fetchAll();
6044
+ return;
6045
+ }
6046
+ await cache.fetchEntry(`${taskId}::${modelId}`);
6047
+ },
6048
+ hasOutput: (taskId, modelId) => {
6049
+ if (!artifactRef) return false;
6050
+ if (!artifactRef.layout || artifactRef.layout === "bulk") return true;
6051
+ return cache.availableEntries.includes(`${taskId}::${modelId}`);
6052
+ }
6053
+ };
5954
6054
  }
5955
6055
 
5956
6056
  // src/lib/thresholds.ts
@@ -7160,8 +7260,10 @@ function JudgmentCard({
7160
7260
  );
7161
7261
  const resolvedOutput = inlineOutput ?? artifactEntry?.responseOutput ?? null;
7162
7262
  const resolvedTruncated = testResult?.responseOutputTruncated ?? artifactEntry?.responseOutputTruncated ?? false;
7163
- const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && artifactCache.status !== "ready";
7164
- const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact;
7263
+ const entryKnownToManifest = artifactCache?.hasOutput(judgment.taskId, judgment.modelId) ?? false;
7264
+ const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && entryKnownToManifest;
7265
+ const entryUnavailable = !inlineOutput && !artifactEntry && artifactCache != null && !entryKnownToManifest;
7266
+ const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact || entryUnavailable;
7165
7267
  useEffect8(() => {
7166
7268
  if (focused) {
7167
7269
  setExpanded(true);
@@ -7224,12 +7326,19 @@ function JudgmentCard({
7224
7326
  const handleToggleOutput = useCallback16(
7225
7327
  (e) => {
7226
7328
  e.stopPropagation();
7227
- if (!outputExpanded && !resolvedOutput && artifactCache) {
7228
- artifactCache.fetchArtifacts();
7329
+ if (!outputExpanded && !resolvedOutput && artifactCache && entryKnownToManifest) {
7330
+ artifactCache.fetchOutput(judgment.taskId, judgment.modelId);
7229
7331
  }
7230
7332
  setOutputExpanded((prev) => !prev);
7231
7333
  },
7232
- [outputExpanded, resolvedOutput, artifactCache]
7334
+ [
7335
+ outputExpanded,
7336
+ resolvedOutput,
7337
+ artifactCache,
7338
+ entryKnownToManifest,
7339
+ judgment.taskId,
7340
+ judgment.modelId
7341
+ ]
7233
7342
  );
7234
7343
  return /* @__PURE__ */ jsx28(
7235
7344
  Box16,
@@ -7368,8 +7477,9 @@ function JudgmentCard({
7368
7477
  padding: 12
7369
7478
  },
7370
7479
  children: [
7371
- !resolvedOutput && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
7372
- !resolvedOutput && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
7480
+ !resolvedOutput && entryUnavailable && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Model output not available for this entry." }),
7481
+ !resolvedOutput && !entryUnavailable && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
7482
+ !resolvedOutput && !entryUnavailable && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
7373
7483
  "Failed to load model output",
7374
7484
  artifactCache.error ? `: ${artifactCache.error}` : ""
7375
7485
  ] }),
@@ -9323,9 +9433,9 @@ function ReportDetail({
9323
9433
  cancelled = true;
9324
9434
  };
9325
9435
  }, [client, reportId]);
9326
- const artifactCache = useArtifactCache(
9327
- reportId,
9328
- report?.summary?.artifacts?.testOutputs
9436
+ const artifactCache = useTestOutputsArtifact(
9437
+ report?.provenance?.runId,
9438
+ report?.summary?.artifactManifest?.testOutputs
9329
9439
  );
9330
9440
  const { summary } = report ?? {};
9331
9441
  const hasWeaknesses = Boolean(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf-studio",
3
- "version": "1.3.1",
3
+ "version": "1.4.0",
4
4
  "description": "AI Literacy Framework — Sanity Studio dashboard plugin",
5
5
  "type": "module",
6
6
  "license": "MIT",