@sanity/ailf-studio 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +17 -2
- package/dist/index.js +176 -66
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -318,6 +318,8 @@ interface ProvenanceData {
|
|
|
318
318
|
id: string;
|
|
319
319
|
label: string;
|
|
320
320
|
}[];
|
|
321
|
+
/** Identity of the pipeline run that produced this report (D0032) */
|
|
322
|
+
runId: string;
|
|
321
323
|
/** @deprecated Use `promptfooUrls` when available */
|
|
322
324
|
promptfooUrl?: string;
|
|
323
325
|
/** Per-mode Promptfoo share URLs (one per sub-eval) */
|
|
@@ -441,6 +443,16 @@ interface ArtifactRef {
|
|
|
441
443
|
path: string;
|
|
442
444
|
bytes?: number;
|
|
443
445
|
entryCount?: number;
|
|
446
|
+
/**
|
|
447
|
+
* Added in W0047 / D0032. Missing on pre-W0047 legacy refs, in which case
|
|
448
|
+
* dispatchers must treat it as `"bulk"` (the only layout that existed then).
|
|
449
|
+
*/
|
|
450
|
+
layout?: "bulk" | "per-entry";
|
|
451
|
+
/** Per-entry index (populated for `layout: "per-entry"` refs only). */
|
|
452
|
+
entries?: {
|
|
453
|
+
key: string;
|
|
454
|
+
bytes: number;
|
|
455
|
+
}[];
|
|
444
456
|
}
|
|
445
457
|
/** A single gap/recommendation from gap analysis */
|
|
446
458
|
interface RecommendationGap {
|
|
@@ -539,11 +551,14 @@ interface PerModelData {
|
|
|
539
551
|
interface SummaryData {
|
|
540
552
|
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
541
553
|
agentBehavior?: FeatureAgentBehaviorData[] | null;
|
|
542
|
-
/** External artifact references — present when pipeline uploads to GCS (
|
|
543
|
-
|
|
554
|
+
/** External artifact references — present when pipeline uploads to GCS (D0032) */
|
|
555
|
+
artifactManifest?: {
|
|
544
556
|
testOutputs?: ArtifactRef;
|
|
545
557
|
renderedPrompts?: ArtifactRef;
|
|
546
558
|
rawResults?: ArtifactRef;
|
|
559
|
+
graderPrompts?: ArtifactRef;
|
|
560
|
+
taskDefinitions?: ArtifactRef;
|
|
561
|
+
evalResults?: ArtifactRef;
|
|
547
562
|
traces?: ArtifactRef;
|
|
548
563
|
};
|
|
549
564
|
belowCritical: string[];
|
package/dist/index.js
CHANGED
|
@@ -734,6 +734,30 @@ function artifactRefSchema() {
|
|
|
734
734
|
name: "entryCount",
|
|
735
735
|
title: "Entry Count",
|
|
736
736
|
type: "number"
|
|
737
|
+
}),
|
|
738
|
+
defineField4({
|
|
739
|
+
name: "layout",
|
|
740
|
+
title: "Layout",
|
|
741
|
+
type: "string",
|
|
742
|
+
options: { list: ["bulk", "per-entry"] }
|
|
743
|
+
}),
|
|
744
|
+
defineField4({
|
|
745
|
+
name: "entries",
|
|
746
|
+
title: "Per-Entry Index",
|
|
747
|
+
type: "array",
|
|
748
|
+
of: [
|
|
749
|
+
{
|
|
750
|
+
fields: [
|
|
751
|
+
defineField4({ name: "key", title: "Key", type: "string" }),
|
|
752
|
+
defineField4({
|
|
753
|
+
name: "bytes",
|
|
754
|
+
title: "Size (bytes)",
|
|
755
|
+
type: "number"
|
|
756
|
+
})
|
|
757
|
+
],
|
|
758
|
+
type: "object"
|
|
759
|
+
}
|
|
760
|
+
]
|
|
737
761
|
})
|
|
738
762
|
],
|
|
739
763
|
type: "object"
|
|
@@ -1650,7 +1674,7 @@ var reportSchema = defineType4({
|
|
|
1650
1674
|
type: "array"
|
|
1651
1675
|
}),
|
|
1652
1676
|
defineField4({
|
|
1653
|
-
description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (
|
|
1677
|
+
description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0032).",
|
|
1654
1678
|
fields: [
|
|
1655
1679
|
defineField4({
|
|
1656
1680
|
...artifactRefSchema(),
|
|
@@ -1667,14 +1691,29 @@ var reportSchema = defineType4({
|
|
|
1667
1691
|
name: "rawResults",
|
|
1668
1692
|
title: "Raw Results"
|
|
1669
1693
|
}),
|
|
1694
|
+
defineField4({
|
|
1695
|
+
...artifactRefSchema(),
|
|
1696
|
+
name: "graderPrompts",
|
|
1697
|
+
title: "Grader Prompts"
|
|
1698
|
+
}),
|
|
1699
|
+
defineField4({
|
|
1700
|
+
...artifactRefSchema(),
|
|
1701
|
+
name: "taskDefinitions",
|
|
1702
|
+
title: "Task Definitions"
|
|
1703
|
+
}),
|
|
1704
|
+
defineField4({
|
|
1705
|
+
...artifactRefSchema(),
|
|
1706
|
+
name: "evalResults",
|
|
1707
|
+
title: "Eval Results"
|
|
1708
|
+
}),
|
|
1670
1709
|
defineField4({
|
|
1671
1710
|
...artifactRefSchema(),
|
|
1672
1711
|
name: "traces",
|
|
1673
1712
|
title: "Traces"
|
|
1674
1713
|
})
|
|
1675
1714
|
],
|
|
1676
|
-
name: "
|
|
1677
|
-
title: "
|
|
1715
|
+
name: "artifactManifest",
|
|
1716
|
+
title: "Artifact Manifest",
|
|
1678
1717
|
type: "object"
|
|
1679
1718
|
})
|
|
1680
1719
|
],
|
|
@@ -5890,67 +5929,128 @@ import { useClient as useClient10 } from "sanity";
|
|
|
5890
5929
|
|
|
5891
5930
|
// src/lib/useArtifactCache.ts
|
|
5892
5931
|
import { useCallback as useCallback14, useRef as useRef5, useState as useState10 } from "react";
|
|
5893
|
-
function useArtifactCache(
|
|
5932
|
+
function useArtifactCache(opts) {
|
|
5933
|
+
const { runId, artifactRef, type } = opts;
|
|
5894
5934
|
const cacheRef = useRef5(/* @__PURE__ */ new Map());
|
|
5895
5935
|
const [status, setStatus] = useState10("idle");
|
|
5896
5936
|
const [error, setError] = useState10(null);
|
|
5897
|
-
const
|
|
5898
|
-
const
|
|
5899
|
-
|
|
5900
|
-
|
|
5901
|
-
|
|
5902
|
-
|
|
5903
|
-
|
|
5904
|
-
|
|
5905
|
-
|
|
5906
|
-
|
|
5907
|
-
|
|
5908
|
-
|
|
5909
|
-
|
|
5910
|
-
|
|
5911
|
-
|
|
5912
|
-
|
|
5913
|
-
|
|
5914
|
-
|
|
5915
|
-
|
|
5916
|
-
|
|
5917
|
-
|
|
5918
|
-
const envelope = await signingRes.json();
|
|
5919
|
-
if (envelope.object === "error" || !envelope.url) {
|
|
5920
|
-
throw new Error(
|
|
5921
|
-
envelope.error?.message ?? "Invalid signing response \u2014 missing signed URL"
|
|
5922
|
-
);
|
|
5923
|
-
}
|
|
5924
|
-
const artifactRes = await fetch(envelope.url, {
|
|
5925
|
-
credentials: "omit"
|
|
5926
|
-
});
|
|
5927
|
-
if (!artifactRes.ok) {
|
|
5928
|
-
throw new Error(
|
|
5929
|
-
`GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
|
|
5930
|
-
);
|
|
5937
|
+
const inFlight = useRef5(/* @__PURE__ */ new Set());
|
|
5938
|
+
const BULK_KEY = "__bulk__";
|
|
5939
|
+
const availableEntries = artifactRef?.entries?.map((e) => e.key) ?? [];
|
|
5940
|
+
const fetchEntry = useCallback14(
|
|
5941
|
+
async (key) => {
|
|
5942
|
+
if (!artifactRef || !runId) return;
|
|
5943
|
+
if (!artifactRef.layout || artifactRef.layout === "bulk") return;
|
|
5944
|
+
if (cacheRef.current.has(key) || inFlight.current.has(key)) return;
|
|
5945
|
+
inFlight.current.add(key);
|
|
5946
|
+
setStatus("loading");
|
|
5947
|
+
setError(null);
|
|
5948
|
+
try {
|
|
5949
|
+
const url = `${ARTIFACT_API_BASE_URL}/runs/${encodeURIComponent(runId)}/artifacts/${encodeURIComponent(type)}/${encodeURIComponent(key)}`;
|
|
5950
|
+
const entry = await signAndFetch(url);
|
|
5951
|
+
cacheRef.current.set(key, entry);
|
|
5952
|
+
setStatus("ready");
|
|
5953
|
+
} catch (err) {
|
|
5954
|
+
setError(err instanceof Error ? err.message : String(err));
|
|
5955
|
+
setStatus("error");
|
|
5956
|
+
} finally {
|
|
5957
|
+
inFlight.current.delete(key);
|
|
5931
5958
|
}
|
|
5932
|
-
|
|
5933
|
-
|
|
5934
|
-
|
|
5935
|
-
|
|
5959
|
+
},
|
|
5960
|
+
[runId, artifactRef, type]
|
|
5961
|
+
);
|
|
5962
|
+
const fetchAll = useCallback14(async () => {
|
|
5963
|
+
if (!artifactRef || !runId) return;
|
|
5964
|
+
if (inFlight.current.has(BULK_KEY)) return;
|
|
5965
|
+
if (!artifactRef.layout || artifactRef.layout === "bulk") {
|
|
5966
|
+
if (cacheRef.current.size > 0) return;
|
|
5967
|
+
inFlight.current.add(BULK_KEY);
|
|
5968
|
+
setStatus("loading");
|
|
5969
|
+
setError(null);
|
|
5970
|
+
try {
|
|
5971
|
+
const url = `${ARTIFACT_API_BASE_URL}/runs/${encodeURIComponent(runId)}/artifacts/${encodeURIComponent(type)}`;
|
|
5972
|
+
const body = await signAndFetch(url);
|
|
5973
|
+
const next = /* @__PURE__ */ new Map();
|
|
5974
|
+
for (const [key, entry] of Object.entries(body.entries)) {
|
|
5975
|
+
next.set(key, entry);
|
|
5976
|
+
}
|
|
5977
|
+
cacheRef.current = next;
|
|
5978
|
+
setStatus("ready");
|
|
5979
|
+
} catch (err) {
|
|
5980
|
+
setError(err instanceof Error ? err.message : String(err));
|
|
5981
|
+
setStatus("error");
|
|
5982
|
+
} finally {
|
|
5983
|
+
inFlight.current.delete(BULK_KEY);
|
|
5936
5984
|
}
|
|
5937
|
-
|
|
5938
|
-
setStatus("ready");
|
|
5939
|
-
} catch (err) {
|
|
5940
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
5941
|
-
setError(message);
|
|
5942
|
-
setStatus("error");
|
|
5943
|
-
} finally {
|
|
5944
|
-
fetchingRef.current = false;
|
|
5985
|
+
return;
|
|
5945
5986
|
}
|
|
5946
|
-
|
|
5947
|
-
|
|
5948
|
-
|
|
5949
|
-
|
|
5950
|
-
|
|
5987
|
+
const keys = artifactRef.entries?.map((e) => e.key) ?? [];
|
|
5988
|
+
await Promise.all(keys.map((k) => fetchEntry(k)));
|
|
5989
|
+
}, [runId, artifactRef, type, fetchEntry]);
|
|
5990
|
+
const getEntry = useCallback14(
|
|
5991
|
+
(key) => cacheRef.current.get(key) ?? null,
|
|
5951
5992
|
[]
|
|
5952
5993
|
);
|
|
5953
|
-
return {
|
|
5994
|
+
return {
|
|
5995
|
+
status,
|
|
5996
|
+
error,
|
|
5997
|
+
availableEntries,
|
|
5998
|
+
getEntry,
|
|
5999
|
+
fetchEntry,
|
|
6000
|
+
fetchAll
|
|
6001
|
+
};
|
|
6002
|
+
}
|
|
6003
|
+
async function signAndFetch(signingUrl) {
|
|
6004
|
+
const signingRes = await fetch(signingUrl, {
|
|
6005
|
+
credentials: "omit",
|
|
6006
|
+
headers: { Accept: "application/json" }
|
|
6007
|
+
});
|
|
6008
|
+
if (!signingRes.ok) {
|
|
6009
|
+
const body = await signingRes.text().catch(() => "");
|
|
6010
|
+
throw new Error(
|
|
6011
|
+
`Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
|
|
6012
|
+
);
|
|
6013
|
+
}
|
|
6014
|
+
const envelope = await signingRes.json();
|
|
6015
|
+
if (envelope.object === "error" || !envelope.url) {
|
|
6016
|
+
throw new Error(
|
|
6017
|
+
envelope.error?.message ?? "Invalid signing response \u2014 missing signed URL"
|
|
6018
|
+
);
|
|
6019
|
+
}
|
|
6020
|
+
const artifactRes = await fetch(envelope.url, { credentials: "omit" });
|
|
6021
|
+
if (!artifactRes.ok) {
|
|
6022
|
+
throw new Error(
|
|
6023
|
+
`GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
|
|
6024
|
+
);
|
|
6025
|
+
}
|
|
6026
|
+
return await artifactRes.json();
|
|
6027
|
+
}
|
|
6028
|
+
|
|
6029
|
+
// src/lib/use-test-outputs-artifact.ts
|
|
6030
|
+
function useTestOutputsArtifact(runId, artifactRef) {
|
|
6031
|
+
const cache = useArtifactCache({
|
|
6032
|
+
runId,
|
|
6033
|
+
artifactRef,
|
|
6034
|
+
type: "testOutputs"
|
|
6035
|
+
});
|
|
6036
|
+
return {
|
|
6037
|
+
status: cache.status,
|
|
6038
|
+
error: cache.error,
|
|
6039
|
+
getOutput: (taskId, modelId) => cache.getEntry(`${taskId}::${modelId}`),
|
|
6040
|
+
fetchOutput: async (taskId, modelId) => {
|
|
6041
|
+
if (!artifactRef) return;
|
|
6042
|
+
if (!artifactRef.layout || artifactRef.layout === "bulk") {
|
|
6043
|
+
await cache.fetchAll();
|
|
6044
|
+
return;
|
|
6045
|
+
}
|
|
6046
|
+
await cache.fetchEntry(`${taskId}::${modelId}`);
|
|
6047
|
+
},
|
|
6048
|
+
hasOutput: (taskId, modelId) => {
|
|
6049
|
+
if (!artifactRef) return false;
|
|
6050
|
+
if (!artifactRef.layout || artifactRef.layout === "bulk") return true;
|
|
6051
|
+
return cache.availableEntries.includes(`${taskId}::${modelId}`);
|
|
6052
|
+
}
|
|
6053
|
+
};
|
|
5954
6054
|
}
|
|
5955
6055
|
|
|
5956
6056
|
// src/lib/thresholds.ts
|
|
@@ -7160,8 +7260,10 @@ function JudgmentCard({
|
|
|
7160
7260
|
);
|
|
7161
7261
|
const resolvedOutput = inlineOutput ?? artifactEntry?.responseOutput ?? null;
|
|
7162
7262
|
const resolvedTruncated = testResult?.responseOutputTruncated ?? artifactEntry?.responseOutputTruncated ?? false;
|
|
7163
|
-
const
|
|
7164
|
-
const
|
|
7263
|
+
const entryKnownToManifest = artifactCache?.hasOutput(judgment.taskId, judgment.modelId) ?? false;
|
|
7264
|
+
const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && entryKnownToManifest;
|
|
7265
|
+
const entryUnavailable = !inlineOutput && !artifactEntry && artifactCache != null && !entryKnownToManifest;
|
|
7266
|
+
const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact || entryUnavailable;
|
|
7165
7267
|
useEffect8(() => {
|
|
7166
7268
|
if (focused) {
|
|
7167
7269
|
setExpanded(true);
|
|
@@ -7224,12 +7326,19 @@ function JudgmentCard({
|
|
|
7224
7326
|
const handleToggleOutput = useCallback16(
|
|
7225
7327
|
(e) => {
|
|
7226
7328
|
e.stopPropagation();
|
|
7227
|
-
if (!outputExpanded && !resolvedOutput && artifactCache) {
|
|
7228
|
-
artifactCache.
|
|
7329
|
+
if (!outputExpanded && !resolvedOutput && artifactCache && entryKnownToManifest) {
|
|
7330
|
+
artifactCache.fetchOutput(judgment.taskId, judgment.modelId);
|
|
7229
7331
|
}
|
|
7230
7332
|
setOutputExpanded((prev) => !prev);
|
|
7231
7333
|
},
|
|
7232
|
-
[
|
|
7334
|
+
[
|
|
7335
|
+
outputExpanded,
|
|
7336
|
+
resolvedOutput,
|
|
7337
|
+
artifactCache,
|
|
7338
|
+
entryKnownToManifest,
|
|
7339
|
+
judgment.taskId,
|
|
7340
|
+
judgment.modelId
|
|
7341
|
+
]
|
|
7233
7342
|
);
|
|
7234
7343
|
return /* @__PURE__ */ jsx28(
|
|
7235
7344
|
Box16,
|
|
@@ -7368,8 +7477,9 @@ function JudgmentCard({
|
|
|
7368
7477
|
padding: 12
|
|
7369
7478
|
},
|
|
7370
7479
|
children: [
|
|
7371
|
-
!resolvedOutput &&
|
|
7372
|
-
!resolvedOutput && artifactCache?.status === "
|
|
7480
|
+
!resolvedOutput && entryUnavailable && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Model output not available for this entry." }),
|
|
7481
|
+
!resolvedOutput && !entryUnavailable && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
|
|
7482
|
+
!resolvedOutput && !entryUnavailable && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
|
|
7373
7483
|
"Failed to load model output",
|
|
7374
7484
|
artifactCache.error ? `: ${artifactCache.error}` : ""
|
|
7375
7485
|
] }),
|
|
@@ -9323,9 +9433,9 @@ function ReportDetail({
|
|
|
9323
9433
|
cancelled = true;
|
|
9324
9434
|
};
|
|
9325
9435
|
}, [client, reportId]);
|
|
9326
|
-
const artifactCache =
|
|
9327
|
-
|
|
9328
|
-
report?.summary?.
|
|
9436
|
+
const artifactCache = useTestOutputsArtifact(
|
|
9437
|
+
report?.provenance?.runId,
|
|
9438
|
+
report?.summary?.artifactManifest?.testOutputs
|
|
9329
9439
|
);
|
|
9330
9440
|
const { summary } = report ?? {};
|
|
9331
9441
|
const hasWeaknesses = Boolean(
|