@sanity/ailf-studio 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -318,6 +318,8 @@ interface ProvenanceData {
318
318
  id: string;
319
319
  label: string;
320
320
  }[];
321
+ /** Identity of the pipeline run that produced this report (D0032) */
322
+ runId: string;
321
323
  /** @deprecated Use `promptfooUrls` when available */
322
324
  promptfooUrl?: string;
323
325
  /** Per-mode Promptfoo share URLs (one per sub-eval) */
@@ -441,6 +443,16 @@ interface ArtifactRef {
441
443
  path: string;
442
444
  bytes?: number;
443
445
  entryCount?: number;
446
+ /**
447
+ * Added in W0047 / D0032. Missing on pre-W0047 legacy refs, in which case
448
+ * dispatchers must treat it as `"bulk"` (the only layout that existed then).
449
+ */
450
+ layout?: "bulk" | "per-entry";
451
+ /** Per-entry index (populated for `layout: "per-entry"` refs only). */
452
+ entries?: {
453
+ key: string;
454
+ bytes: number;
455
+ }[];
444
456
  }
445
457
  /** A single gap/recommendation from gap analysis */
446
458
  interface RecommendationGap {
@@ -461,7 +473,12 @@ interface RecommendationsData {
461
473
  }
462
474
  /**
463
475
  * Per-test result stored in reports for drill-down and audit.
464
- * Mirrors StoredTestResult from @sanity/ailf-core. See D0029.
476
+ * Mirrors StoredTestResult from @sanity/ailf-core.
477
+ *
478
+ * Per D0030, new reports omit `responseOutput` / `responseOutputTruncated`
479
+ * inline — the full output lives in the `testOutputs` GCS artifact and is
480
+ * fetched via `useArtifactCache`. Both fields remain optional so the
481
+ * reader path tolerates legacy reports that were published before W0045.
465
482
  */
466
483
  interface StoredTestResultData {
467
484
  area: string;
@@ -476,7 +493,7 @@ interface StoredTestResultData {
476
493
  latencyMs?: number;
477
494
  modelId: string;
478
495
  outputFailure?: boolean;
479
- responseOutput: string;
496
+ responseOutput?: string;
480
497
  responseOutputTruncated?: boolean;
481
498
  taskId: string;
482
499
  tokenUsage?: {
@@ -534,11 +551,14 @@ interface PerModelData {
534
551
  interface SummaryData {
535
552
  /** Per-feature agent behavior data (only present when agentic mode ran) */
536
553
  agentBehavior?: FeatureAgentBehaviorData[] | null;
537
- /** External artifact references — present when pipeline uploads to GCS (D0030) */
538
- artifacts?: {
554
+ /** External artifact references — present when pipeline uploads to GCS (D0032) */
555
+ artifactManifest?: {
539
556
  testOutputs?: ArtifactRef;
540
557
  renderedPrompts?: ArtifactRef;
541
558
  rawResults?: ArtifactRef;
559
+ graderPrompts?: ArtifactRef;
560
+ taskDefinitions?: ArtifactRef;
561
+ evalResults?: ArtifactRef;
542
562
  traces?: ArtifactRef;
543
563
  };
544
564
  belowCritical: string[];
package/dist/index.js CHANGED
@@ -10,7 +10,7 @@ import { useClient, useCurrentUser } from "sanity";
10
10
  // src/lib/constants.ts
11
11
  var API_VERSION = "2026-03-11";
12
12
  var isProduction = process.env.NODE_ENV === "production";
13
- var ARTIFACT_API_BASE_URL = isProduction ? "https://ailf-api.sanity.build/v1" : "http://localhost:4100/v1";
13
+ var ARTIFACT_API_BASE_URL = isProduction ? "https://ailf-api.sanity.build/v1" : "http://localhost:3000/v1";
14
14
 
15
15
  // src/actions/GraduateToNativeAction.tsx
16
16
  import { jsx, jsxs } from "react/jsx-runtime";
@@ -734,6 +734,30 @@ function artifactRefSchema() {
734
734
  name: "entryCount",
735
735
  title: "Entry Count",
736
736
  type: "number"
737
+ }),
738
+ defineField4({
739
+ name: "layout",
740
+ title: "Layout",
741
+ type: "string",
742
+ options: { list: ["bulk", "per-entry"] }
743
+ }),
744
+ defineField4({
745
+ name: "entries",
746
+ title: "Per-Entry Index",
747
+ type: "array",
748
+ of: [
749
+ {
750
+ fields: [
751
+ defineField4({ name: "key", title: "Key", type: "string" }),
752
+ defineField4({
753
+ name: "bytes",
754
+ title: "Size (bytes)",
755
+ type: "number"
756
+ })
757
+ ],
758
+ type: "object"
759
+ }
760
+ ]
737
761
  })
738
762
  ],
739
763
  type: "object"
@@ -1418,17 +1442,6 @@ var reportSchema = defineType4({
1418
1442
  title: "Composite Score",
1419
1443
  type: "number"
1420
1444
  }),
1421
- defineField4({
1422
- description: "The model's generated code/response (truncated to 8000 chars).",
1423
- name: "responseOutput",
1424
- title: "Response Output",
1425
- type: "text"
1426
- }),
1427
- defineField4({
1428
- name: "responseOutputTruncated",
1429
- title: "Truncated",
1430
- type: "boolean"
1431
- }),
1432
1445
  defineField4({
1433
1446
  name: "latencyMs",
1434
1447
  title: "Latency (ms)",
@@ -1661,7 +1674,7 @@ var reportSchema = defineType4({
1661
1674
  type: "array"
1662
1675
  }),
1663
1676
  defineField4({
1664
- description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0030).",
1677
+ description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0032).",
1665
1678
  fields: [
1666
1679
  defineField4({
1667
1680
  ...artifactRefSchema(),
@@ -1678,14 +1691,29 @@ var reportSchema = defineType4({
1678
1691
  name: "rawResults",
1679
1692
  title: "Raw Results"
1680
1693
  }),
1694
+ defineField4({
1695
+ ...artifactRefSchema(),
1696
+ name: "graderPrompts",
1697
+ title: "Grader Prompts"
1698
+ }),
1699
+ defineField4({
1700
+ ...artifactRefSchema(),
1701
+ name: "taskDefinitions",
1702
+ title: "Task Definitions"
1703
+ }),
1704
+ defineField4({
1705
+ ...artifactRefSchema(),
1706
+ name: "evalResults",
1707
+ title: "Eval Results"
1708
+ }),
1681
1709
  defineField4({
1682
1710
  ...artifactRefSchema(),
1683
1711
  name: "traces",
1684
1712
  title: "Traces"
1685
1713
  })
1686
1714
  ],
1687
- name: "artifacts",
1688
- title: "Artifacts",
1715
+ name: "artifactManifest",
1716
+ title: "Artifact Manifest",
1689
1717
  type: "object"
1690
1718
  })
1691
1719
  ],
@@ -5901,71 +5929,128 @@ import { useClient as useClient10 } from "sanity";
5901
5929
 
5902
5930
  // src/lib/useArtifactCache.ts
5903
5931
  import { useCallback as useCallback14, useRef as useRef5, useState as useState10 } from "react";
5904
- function useArtifactCache(reportId, artifactRef, client) {
5932
+ function useArtifactCache(opts) {
5933
+ const { runId, artifactRef, type } = opts;
5905
5934
  const cacheRef = useRef5(/* @__PURE__ */ new Map());
5906
5935
  const [status, setStatus] = useState10("idle");
5907
5936
  const [error, setError] = useState10(null);
5908
- const fetchingRef = useRef5(false);
5909
- const fetchArtifacts = useCallback14(async () => {
5910
- if (fetchingRef.current || cacheRef.current.size > 0) return;
5911
- if (!artifactRef) return;
5912
- fetchingRef.current = true;
5913
- setStatus("loading");
5914
- setError(null);
5915
- try {
5916
- const token = client.config().token;
5917
- const signingRes = await fetch(
5918
- `${ARTIFACT_API_BASE_URL}/artifacts/${encodeURIComponent(reportId)}?type=testOutputs`,
5919
- {
5920
- credentials: "omit",
5921
- headers: {
5922
- Accept: "application/json",
5923
- ...token ? { Authorization: `Bearer ${token}` } : {}
5924
- }
5925
- }
5926
- );
5927
- if (!signingRes.ok) {
5928
- const body = await signingRes.text().catch(() => "");
5929
- throw new Error(
5930
- `Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
5931
- );
5932
- }
5933
- const envelope = await signingRes.json();
5934
- if (!envelope.ok || !envelope.data?.url) {
5935
- throw new Error(
5936
- envelope.error ?? "Invalid signing response \u2014 missing signed URL"
5937
- );
5938
- }
5939
- const artifactRes = await fetch(envelope.data.url, {
5940
- credentials: "omit"
5941
- });
5942
- if (!artifactRes.ok) {
5943
- throw new Error(
5944
- `GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
5945
- );
5937
+ const inFlight = useRef5(/* @__PURE__ */ new Set());
5938
+ const BULK_KEY = "__bulk__";
5939
+ const availableEntries = artifactRef?.entries?.map((e) => e.key) ?? [];
5940
+ const fetchEntry = useCallback14(
5941
+ async (key) => {
5942
+ if (!artifactRef || !runId) return;
5943
+ if (!artifactRef.layout || artifactRef.layout === "bulk") return;
5944
+ if (cacheRef.current.has(key) || inFlight.current.has(key)) return;
5945
+ inFlight.current.add(key);
5946
+ setStatus("loading");
5947
+ setError(null);
5948
+ try {
5949
+ const url = `${ARTIFACT_API_BASE_URL}/runs/${encodeURIComponent(runId)}/artifacts/${encodeURIComponent(type)}/${encodeURIComponent(key)}`;
5950
+ const entry = await signAndFetch(url);
5951
+ cacheRef.current.set(key, entry);
5952
+ setStatus("ready");
5953
+ } catch (err) {
5954
+ setError(err instanceof Error ? err.message : String(err));
5955
+ setStatus("error");
5956
+ } finally {
5957
+ inFlight.current.delete(key);
5946
5958
  }
5947
- const artifact = await artifactRes.json();
5948
- const map = /* @__PURE__ */ new Map();
5949
- for (const [key, entry] of Object.entries(artifact.entries)) {
5950
- map.set(key, entry);
5959
+ },
5960
+ [runId, artifactRef, type]
5961
+ );
5962
+ const fetchAll = useCallback14(async () => {
5963
+ if (!artifactRef || !runId) return;
5964
+ if (inFlight.current.has(BULK_KEY)) return;
5965
+ if (!artifactRef.layout || artifactRef.layout === "bulk") {
5966
+ if (cacheRef.current.size > 0) return;
5967
+ inFlight.current.add(BULK_KEY);
5968
+ setStatus("loading");
5969
+ setError(null);
5970
+ try {
5971
+ const url = `${ARTIFACT_API_BASE_URL}/runs/${encodeURIComponent(runId)}/artifacts/${encodeURIComponent(type)}`;
5972
+ const body = await signAndFetch(url);
5973
+ const next = /* @__PURE__ */ new Map();
5974
+ for (const [key, entry] of Object.entries(body.entries)) {
5975
+ next.set(key, entry);
5976
+ }
5977
+ cacheRef.current = next;
5978
+ setStatus("ready");
5979
+ } catch (err) {
5980
+ setError(err instanceof Error ? err.message : String(err));
5981
+ setStatus("error");
5982
+ } finally {
5983
+ inFlight.current.delete(BULK_KEY);
5951
5984
  }
5952
- cacheRef.current = map;
5953
- setStatus("ready");
5954
- } catch (err) {
5955
- const message = err instanceof Error ? err.message : String(err);
5956
- setError(message);
5957
- setStatus("error");
5958
- } finally {
5959
- fetchingRef.current = false;
5985
+ return;
5960
5986
  }
5961
- }, [reportId, artifactRef, client]);
5962
- const getOutput = useCallback14(
5963
- (taskId, modelId) => {
5964
- return cacheRef.current.get(`${taskId}::${modelId}`) ?? null;
5965
- },
5987
+ const keys = artifactRef.entries?.map((e) => e.key) ?? [];
5988
+ await Promise.all(keys.map((k) => fetchEntry(k)));
5989
+ }, [runId, artifactRef, type, fetchEntry]);
5990
+ const getEntry = useCallback14(
5991
+ (key) => cacheRef.current.get(key) ?? null,
5966
5992
  []
5967
5993
  );
5968
- return { status, error, getOutput, fetchArtifacts };
5994
+ return {
5995
+ status,
5996
+ error,
5997
+ availableEntries,
5998
+ getEntry,
5999
+ fetchEntry,
6000
+ fetchAll
6001
+ };
6002
+ }
6003
+ async function signAndFetch(signingUrl) {
6004
+ const signingRes = await fetch(signingUrl, {
6005
+ credentials: "omit",
6006
+ headers: { Accept: "application/json" }
6007
+ });
6008
+ if (!signingRes.ok) {
6009
+ const body = await signingRes.text().catch(() => "");
6010
+ throw new Error(
6011
+ `Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
6012
+ );
6013
+ }
6014
+ const envelope = await signingRes.json();
6015
+ if (envelope.object === "error" || !envelope.url) {
6016
+ throw new Error(
6017
+ envelope.error?.message ?? "Invalid signing response \u2014 missing signed URL"
6018
+ );
6019
+ }
6020
+ const artifactRes = await fetch(envelope.url, { credentials: "omit" });
6021
+ if (!artifactRes.ok) {
6022
+ throw new Error(
6023
+ `GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
6024
+ );
6025
+ }
6026
+ return await artifactRes.json();
6027
+ }
6028
+
6029
+ // src/lib/use-test-outputs-artifact.ts
6030
+ function useTestOutputsArtifact(runId, artifactRef) {
6031
+ const cache = useArtifactCache({
6032
+ runId,
6033
+ artifactRef,
6034
+ type: "testOutputs"
6035
+ });
6036
+ return {
6037
+ status: cache.status,
6038
+ error: cache.error,
6039
+ getOutput: (taskId, modelId) => cache.getEntry(`${taskId}::${modelId}`),
6040
+ fetchOutput: async (taskId, modelId) => {
6041
+ if (!artifactRef) return;
6042
+ if (!artifactRef.layout || artifactRef.layout === "bulk") {
6043
+ await cache.fetchAll();
6044
+ return;
6045
+ }
6046
+ await cache.fetchEntry(`${taskId}::${modelId}`);
6047
+ },
6048
+ hasOutput: (taskId, modelId) => {
6049
+ if (!artifactRef) return false;
6050
+ if (!artifactRef.layout || artifactRef.layout === "bulk") return true;
6051
+ return cache.availableEntries.includes(`${taskId}::${modelId}`);
6052
+ }
6053
+ };
5969
6054
  }
5970
6055
 
5971
6056
  // src/lib/thresholds.ts
@@ -7175,8 +7260,10 @@ function JudgmentCard({
7175
7260
  );
7176
7261
  const resolvedOutput = inlineOutput ?? artifactEntry?.responseOutput ?? null;
7177
7262
  const resolvedTruncated = testResult?.responseOutputTruncated ?? artifactEntry?.responseOutputTruncated ?? false;
7178
- const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && artifactCache.status !== "ready";
7179
- const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact;
7263
+ const entryKnownToManifest = artifactCache?.hasOutput(judgment.taskId, judgment.modelId) ?? false;
7264
+ const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && entryKnownToManifest;
7265
+ const entryUnavailable = !inlineOutput && !artifactEntry && artifactCache != null && !entryKnownToManifest;
7266
+ const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact || entryUnavailable;
7180
7267
  useEffect8(() => {
7181
7268
  if (focused) {
7182
7269
  setExpanded(true);
@@ -7239,12 +7326,19 @@ function JudgmentCard({
7239
7326
  const handleToggleOutput = useCallback16(
7240
7327
  (e) => {
7241
7328
  e.stopPropagation();
7242
- if (!outputExpanded && !resolvedOutput && artifactCache) {
7243
- artifactCache.fetchArtifacts();
7329
+ if (!outputExpanded && !resolvedOutput && artifactCache && entryKnownToManifest) {
7330
+ artifactCache.fetchOutput(judgment.taskId, judgment.modelId);
7244
7331
  }
7245
7332
  setOutputExpanded((prev) => !prev);
7246
7333
  },
7247
- [outputExpanded, resolvedOutput, artifactCache]
7334
+ [
7335
+ outputExpanded,
7336
+ resolvedOutput,
7337
+ artifactCache,
7338
+ entryKnownToManifest,
7339
+ judgment.taskId,
7340
+ judgment.modelId
7341
+ ]
7248
7342
  );
7249
7343
  return /* @__PURE__ */ jsx28(
7250
7344
  Box16,
@@ -7383,8 +7477,9 @@ function JudgmentCard({
7383
7477
  padding: 12
7384
7478
  },
7385
7479
  children: [
7386
- !resolvedOutput && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
7387
- !resolvedOutput && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
7480
+ !resolvedOutput && entryUnavailable && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Model output not available for this entry." }),
7481
+ !resolvedOutput && !entryUnavailable && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
7482
+ !resolvedOutput && !entryUnavailable && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
7388
7483
  "Failed to load model output",
7389
7484
  artifactCache.error ? `: ${artifactCache.error}` : ""
7390
7485
  ] }),
@@ -9338,10 +9433,9 @@ function ReportDetail({
9338
9433
  cancelled = true;
9339
9434
  };
9340
9435
  }, [client, reportId]);
9341
- const artifactCache = useArtifactCache(
9342
- reportId,
9343
- report?.summary?.artifacts?.testOutputs,
9344
- client
9436
+ const artifactCache = useTestOutputsArtifact(
9437
+ report?.provenance?.runId,
9438
+ report?.summary?.artifactManifest?.testOutputs
9345
9439
  );
9346
9440
  const { summary } = report ?? {};
9347
9441
  const hasWeaknesses = Boolean(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf-studio",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "AI Literacy Framework — Sanity Studio dashboard plugin",
5
5
  "type": "module",
6
6
  "license": "MIT",