@sanity/ailf-studio 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +45 -1
- package/dist/index.js +501 -84
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -434,6 +434,14 @@ interface ScoreItem {
|
|
|
434
434
|
/** Ceiling score — gold-standard docs injected */
|
|
435
435
|
ceilingScore?: number;
|
|
436
436
|
}
|
|
437
|
+
/** Reference to an artifact stored in an external object store (GCS). */
|
|
438
|
+
interface ArtifactRef {
|
|
439
|
+
store: "gcs";
|
|
440
|
+
bucket: string;
|
|
441
|
+
path: string;
|
|
442
|
+
bytes?: number;
|
|
443
|
+
entryCount?: number;
|
|
444
|
+
}
|
|
437
445
|
/** A single gap/recommendation from gap analysis */
|
|
438
446
|
interface RecommendationGap {
|
|
439
447
|
affectedTaskIds: string[];
|
|
@@ -451,6 +459,33 @@ interface RecommendationsData {
|
|
|
451
459
|
generatedAt: string;
|
|
452
460
|
totalPotentialLift: number;
|
|
453
461
|
}
|
|
462
|
+
/**
|
|
463
|
+
* Per-test result stored in reports for drill-down and audit.
|
|
464
|
+
* Mirrors StoredTestResult from @sanity/ailf-core. See D0029.
|
|
465
|
+
*/
|
|
466
|
+
interface StoredTestResultData {
|
|
467
|
+
area: string;
|
|
468
|
+
canonicalDocs?: DocumentRef[];
|
|
469
|
+
compositeScore?: number;
|
|
470
|
+
cost?: number;
|
|
471
|
+
dimensions: {
|
|
472
|
+
dimension: string;
|
|
473
|
+
reason: string;
|
|
474
|
+
score: number;
|
|
475
|
+
}[];
|
|
476
|
+
latencyMs?: number;
|
|
477
|
+
modelId: string;
|
|
478
|
+
outputFailure?: boolean;
|
|
479
|
+
responseOutput: string;
|
|
480
|
+
responseOutputTruncated?: boolean;
|
|
481
|
+
taskId: string;
|
|
482
|
+
tokenUsage?: {
|
|
483
|
+
completion: number;
|
|
484
|
+
prompt: number;
|
|
485
|
+
total: number;
|
|
486
|
+
};
|
|
487
|
+
variant: "baseline" | "gold";
|
|
488
|
+
}
|
|
454
489
|
/** A single low-scoring grader judgment stored in reports */
|
|
455
490
|
interface JudgmentData {
|
|
456
491
|
/** Docs the task expected the model to use */
|
|
@@ -499,6 +534,13 @@ interface PerModelData {
|
|
|
499
534
|
interface SummaryData {
|
|
500
535
|
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
501
536
|
agentBehavior?: FeatureAgentBehaviorData[] | null;
|
|
537
|
+
/** External artifact references — present when pipeline uploads to GCS (D0030) */
|
|
538
|
+
artifacts?: {
|
|
539
|
+
testOutputs?: ArtifactRef;
|
|
540
|
+
renderedPrompts?: ArtifactRef;
|
|
541
|
+
rawResults?: ArtifactRef;
|
|
542
|
+
traces?: ArtifactRef;
|
|
543
|
+
};
|
|
502
544
|
belowCritical: string[];
|
|
503
545
|
/** All Sanity documents used across the entire evaluation */
|
|
504
546
|
documentManifest?: DocumentRef[];
|
|
@@ -523,6 +565,8 @@ interface SummaryData {
|
|
|
523
565
|
/** Gap analysis recommendations (when gap analysis was run) */
|
|
524
566
|
recommendations: null | RecommendationsData;
|
|
525
567
|
scores: ScoreItem[];
|
|
568
|
+
/** Per-test results with model output and metadata (D0029) */
|
|
569
|
+
testResults?: StoredTestResultData[] | null;
|
|
526
570
|
timestamp: string;
|
|
527
571
|
}
|
|
528
572
|
/** Shape returned by scoreTimelineQuery */
|
|
@@ -734,7 +778,7 @@ declare const scoreTimelineQuery: string;
|
|
|
734
778
|
*
|
|
735
779
|
* Used by: ReportDetail view
|
|
736
780
|
*/
|
|
737
|
-
declare const reportDetailQuery = "\n *[_type == \"ailf.report\" && reportId == $reportId][0] {\n _id,\n reportId,\n completedAt,\n durationMs,\n tag,\n title,\n provenance,\n summary,\n comparison\n }\n";
|
|
781
|
+
declare const reportDetailQuery = "\n *[_type == \"ailf.report\" && reportId == $reportId][0] {\n _id,\n reportId,\n completedAt,\n durationMs,\n tag,\n title,\n provenance,\n summary,\n \"comparison\": comparison {\n areas,\n deltas,\n generatedAt,\n improved,\n mismatched,\n noiseThreshold,\n noiseThresholdEmpirical,\n notEvaluated,\n regressed,\n unchanged\n }\n }\n";
|
|
738
782
|
/**
|
|
739
783
|
* Find all reports that evaluated a specific Sanity document or perspective.
|
|
740
784
|
*
|
package/dist/index.js
CHANGED
|
@@ -9,6 +9,8 @@ import { useClient, useCurrentUser } from "sanity";
|
|
|
9
9
|
|
|
10
10
|
// src/lib/constants.ts
|
|
11
11
|
var API_VERSION = "2026-03-11";
|
|
12
|
+
var isProduction = process.env.NODE_ENV === "production";
|
|
13
|
+
var ARTIFACT_API_BASE_URL = isProduction ? "https://ailf-api.sanity.build/v1" : "http://localhost:4100/v1";
|
|
12
14
|
|
|
13
15
|
// src/actions/GraduateToNativeAction.tsx
|
|
14
16
|
import { jsx, jsxs } from "react/jsx-runtime";
|
|
@@ -721,6 +723,22 @@ function documentRefSchema() {
|
|
|
721
723
|
type: "object"
|
|
722
724
|
};
|
|
723
725
|
}
|
|
726
|
+
function artifactRefSchema() {
|
|
727
|
+
return {
|
|
728
|
+
fields: [
|
|
729
|
+
defineField4({ name: "store", title: "Store", type: "string" }),
|
|
730
|
+
defineField4({ name: "bucket", title: "Bucket", type: "string" }),
|
|
731
|
+
defineField4({ name: "path", title: "Path", type: "string" }),
|
|
732
|
+
defineField4({ name: "bytes", title: "Size (bytes)", type: "number" }),
|
|
733
|
+
defineField4({
|
|
734
|
+
name: "entryCount",
|
|
735
|
+
title: "Entry Count",
|
|
736
|
+
type: "number"
|
|
737
|
+
})
|
|
738
|
+
],
|
|
739
|
+
type: "object"
|
|
740
|
+
};
|
|
741
|
+
}
|
|
724
742
|
var reportSchema = defineType4({
|
|
725
743
|
groups: [
|
|
726
744
|
{ name: "main", title: "Main", default: true },
|
|
@@ -1341,6 +1359,128 @@ var reportSchema = defineType4({
|
|
|
1341
1359
|
title: "Low-Scoring Judgments",
|
|
1342
1360
|
type: "array"
|
|
1343
1361
|
}),
|
|
1362
|
+
defineField4({
|
|
1363
|
+
description: "Per-test results with model output, grader reasoning, and metadata. One entry per test \xD7 model combination. See D0029.",
|
|
1364
|
+
name: "testResults",
|
|
1365
|
+
of: [
|
|
1366
|
+
{
|
|
1367
|
+
fields: [
|
|
1368
|
+
defineField4({
|
|
1369
|
+
name: "taskId",
|
|
1370
|
+
title: "Task",
|
|
1371
|
+
type: "string"
|
|
1372
|
+
}),
|
|
1373
|
+
defineField4({
|
|
1374
|
+
name: "modelId",
|
|
1375
|
+
title: "Model ID",
|
|
1376
|
+
type: "string"
|
|
1377
|
+
}),
|
|
1378
|
+
defineField4({
|
|
1379
|
+
name: "area",
|
|
1380
|
+
title: "Feature Area",
|
|
1381
|
+
type: "string"
|
|
1382
|
+
}),
|
|
1383
|
+
defineField4({
|
|
1384
|
+
name: "variant",
|
|
1385
|
+
options: { list: ["gold", "baseline"] },
|
|
1386
|
+
title: "Variant",
|
|
1387
|
+
type: "string"
|
|
1388
|
+
}),
|
|
1389
|
+
defineField4({
|
|
1390
|
+
name: "dimensions",
|
|
1391
|
+
of: [
|
|
1392
|
+
{
|
|
1393
|
+
fields: [
|
|
1394
|
+
defineField4({
|
|
1395
|
+
name: "dimension",
|
|
1396
|
+
title: "Dimension",
|
|
1397
|
+
type: "string"
|
|
1398
|
+
}),
|
|
1399
|
+
defineField4({
|
|
1400
|
+
name: "score",
|
|
1401
|
+
title: "Score (0\u2013100)",
|
|
1402
|
+
type: "number"
|
|
1403
|
+
}),
|
|
1404
|
+
defineField4({
|
|
1405
|
+
name: "reason",
|
|
1406
|
+
title: "Reason",
|
|
1407
|
+
type: "text"
|
|
1408
|
+
})
|
|
1409
|
+
],
|
|
1410
|
+
type: "object"
|
|
1411
|
+
}
|
|
1412
|
+
],
|
|
1413
|
+
title: "Dimensions",
|
|
1414
|
+
type: "array"
|
|
1415
|
+
}),
|
|
1416
|
+
defineField4({
|
|
1417
|
+
name: "compositeScore",
|
|
1418
|
+
title: "Composite Score",
|
|
1419
|
+
type: "number"
|
|
1420
|
+
}),
|
|
1421
|
+
defineField4({
|
|
1422
|
+
description: "The model's generated code/response (truncated to 8000 chars).",
|
|
1423
|
+
name: "responseOutput",
|
|
1424
|
+
title: "Response Output",
|
|
1425
|
+
type: "text"
|
|
1426
|
+
}),
|
|
1427
|
+
defineField4({
|
|
1428
|
+
name: "responseOutputTruncated",
|
|
1429
|
+
title: "Truncated",
|
|
1430
|
+
type: "boolean"
|
|
1431
|
+
}),
|
|
1432
|
+
defineField4({
|
|
1433
|
+
name: "latencyMs",
|
|
1434
|
+
title: "Latency (ms)",
|
|
1435
|
+
type: "number"
|
|
1436
|
+
}),
|
|
1437
|
+
defineField4({
|
|
1438
|
+
name: "tokenUsage",
|
|
1439
|
+
fields: [
|
|
1440
|
+
defineField4({
|
|
1441
|
+
name: "prompt",
|
|
1442
|
+
title: "Prompt",
|
|
1443
|
+
type: "number"
|
|
1444
|
+
}),
|
|
1445
|
+
defineField4({
|
|
1446
|
+
name: "completion",
|
|
1447
|
+
title: "Completion",
|
|
1448
|
+
type: "number"
|
|
1449
|
+
}),
|
|
1450
|
+
defineField4({
|
|
1451
|
+
name: "total",
|
|
1452
|
+
title: "Total",
|
|
1453
|
+
type: "number"
|
|
1454
|
+
})
|
|
1455
|
+
],
|
|
1456
|
+
title: "Token Usage",
|
|
1457
|
+
type: "object"
|
|
1458
|
+
}),
|
|
1459
|
+
defineField4({
|
|
1460
|
+
name: "cost",
|
|
1461
|
+
title: "Cost",
|
|
1462
|
+
type: "number"
|
|
1463
|
+
}),
|
|
1464
|
+
defineField4({
|
|
1465
|
+
description: "True when the model failed to produce output (empty response, API error, or refusal).",
|
|
1466
|
+
name: "outputFailure",
|
|
1467
|
+
title: "Output Failure",
|
|
1468
|
+
type: "boolean"
|
|
1469
|
+
}),
|
|
1470
|
+
defineField4({
|
|
1471
|
+
description: "Documentation pages the task expected the model to use.",
|
|
1472
|
+
name: "canonicalDocs",
|
|
1473
|
+
of: [documentRefSchema()],
|
|
1474
|
+
title: "Canonical Docs",
|
|
1475
|
+
type: "array"
|
|
1476
|
+
})
|
|
1477
|
+
],
|
|
1478
|
+
type: "object"
|
|
1479
|
+
}
|
|
1480
|
+
],
|
|
1481
|
+
title: "Test Results",
|
|
1482
|
+
type: "array"
|
|
1483
|
+
}),
|
|
1344
1484
|
defineField4({
|
|
1345
1485
|
name: "perModel",
|
|
1346
1486
|
of: [
|
|
@@ -1519,6 +1659,34 @@ var reportSchema = defineType4({
|
|
|
1519
1659
|
],
|
|
1520
1660
|
title: "Agent Behavior",
|
|
1521
1661
|
type: "array"
|
|
1662
|
+
}),
|
|
1663
|
+
defineField4({
|
|
1664
|
+
description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0030).",
|
|
1665
|
+
fields: [
|
|
1666
|
+
defineField4({
|
|
1667
|
+
...artifactRefSchema(),
|
|
1668
|
+
name: "testOutputs",
|
|
1669
|
+
title: "Test Outputs"
|
|
1670
|
+
}),
|
|
1671
|
+
defineField4({
|
|
1672
|
+
...artifactRefSchema(),
|
|
1673
|
+
name: "renderedPrompts",
|
|
1674
|
+
title: "Rendered Prompts"
|
|
1675
|
+
}),
|
|
1676
|
+
defineField4({
|
|
1677
|
+
...artifactRefSchema(),
|
|
1678
|
+
name: "rawResults",
|
|
1679
|
+
title: "Raw Results"
|
|
1680
|
+
}),
|
|
1681
|
+
defineField4({
|
|
1682
|
+
...artifactRefSchema(),
|
|
1683
|
+
name: "traces",
|
|
1684
|
+
title: "Traces"
|
|
1685
|
+
})
|
|
1686
|
+
],
|
|
1687
|
+
name: "artifacts",
|
|
1688
|
+
title: "Artifacts",
|
|
1689
|
+
type: "object"
|
|
1522
1690
|
})
|
|
1523
1691
|
],
|
|
1524
1692
|
group: ["main", "all-fields"],
|
|
@@ -3089,7 +3257,7 @@ import {
|
|
|
3089
3257
|
TabPanel as TabPanel2,
|
|
3090
3258
|
Text as Text41
|
|
3091
3259
|
} from "@sanity/ui";
|
|
3092
|
-
import { useCallback as
|
|
3260
|
+
import { useCallback as useCallback28 } from "react";
|
|
3093
3261
|
import { useRouter as useRouter3 } from "sanity/router";
|
|
3094
3262
|
|
|
3095
3263
|
// src/lib/help-context.ts
|
|
@@ -3752,7 +3920,18 @@ var reportDetailQuery = (
|
|
|
3752
3920
|
title,
|
|
3753
3921
|
provenance,
|
|
3754
3922
|
summary,
|
|
3755
|
-
comparison
|
|
3923
|
+
"comparison": comparison {
|
|
3924
|
+
areas,
|
|
3925
|
+
deltas,
|
|
3926
|
+
generatedAt,
|
|
3927
|
+
improved,
|
|
3928
|
+
mismatched,
|
|
3929
|
+
noiseThreshold,
|
|
3930
|
+
noiseThresholdEmpirical,
|
|
3931
|
+
notEvaluated,
|
|
3932
|
+
regressed,
|
|
3933
|
+
unchanged
|
|
3934
|
+
}
|
|
3756
3935
|
}
|
|
3757
3936
|
`
|
|
3758
3937
|
);
|
|
@@ -5713,13 +5892,82 @@ import {
|
|
|
5713
5892
|
Tooltip as Tooltip8
|
|
5714
5893
|
} from "@sanity/ui";
|
|
5715
5894
|
import {
|
|
5716
|
-
useCallback as
|
|
5895
|
+
useCallback as useCallback26,
|
|
5717
5896
|
useEffect as useEffect9,
|
|
5718
5897
|
useMemo as useMemo9,
|
|
5719
|
-
useState as
|
|
5898
|
+
useState as useState20
|
|
5720
5899
|
} from "react";
|
|
5721
5900
|
import { useClient as useClient10 } from "sanity";
|
|
5722
5901
|
|
|
5902
|
+
// src/lib/useArtifactCache.ts
|
|
5903
|
+
import { useCallback as useCallback14, useRef as useRef5, useState as useState10 } from "react";
|
|
5904
|
+
function useArtifactCache(reportId, artifactRef, client) {
|
|
5905
|
+
const cacheRef = useRef5(/* @__PURE__ */ new Map());
|
|
5906
|
+
const [status, setStatus] = useState10("idle");
|
|
5907
|
+
const [error, setError] = useState10(null);
|
|
5908
|
+
const fetchingRef = useRef5(false);
|
|
5909
|
+
const fetchArtifacts = useCallback14(async () => {
|
|
5910
|
+
if (fetchingRef.current || cacheRef.current.size > 0) return;
|
|
5911
|
+
if (!artifactRef) return;
|
|
5912
|
+
fetchingRef.current = true;
|
|
5913
|
+
setStatus("loading");
|
|
5914
|
+
setError(null);
|
|
5915
|
+
try {
|
|
5916
|
+
const token = client.config().token;
|
|
5917
|
+
const signingRes = await fetch(
|
|
5918
|
+
`${ARTIFACT_API_BASE_URL}/artifacts/${encodeURIComponent(reportId)}?type=testOutputs`,
|
|
5919
|
+
{
|
|
5920
|
+
credentials: "omit",
|
|
5921
|
+
headers: {
|
|
5922
|
+
Accept: "application/json",
|
|
5923
|
+
...token ? { Authorization: `Bearer ${token}` } : {}
|
|
5924
|
+
}
|
|
5925
|
+
}
|
|
5926
|
+
);
|
|
5927
|
+
if (!signingRes.ok) {
|
|
5928
|
+
const body = await signingRes.text().catch(() => "");
|
|
5929
|
+
throw new Error(
|
|
5930
|
+
`Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
|
|
5931
|
+
);
|
|
5932
|
+
}
|
|
5933
|
+
const envelope = await signingRes.json();
|
|
5934
|
+
if (!envelope.ok || !envelope.data?.url) {
|
|
5935
|
+
throw new Error(
|
|
5936
|
+
envelope.error ?? "Invalid signing response \u2014 missing signed URL"
|
|
5937
|
+
);
|
|
5938
|
+
}
|
|
5939
|
+
const artifactRes = await fetch(envelope.data.url, {
|
|
5940
|
+
credentials: "omit"
|
|
5941
|
+
});
|
|
5942
|
+
if (!artifactRes.ok) {
|
|
5943
|
+
throw new Error(
|
|
5944
|
+
`GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
|
|
5945
|
+
);
|
|
5946
|
+
}
|
|
5947
|
+
const artifact = await artifactRes.json();
|
|
5948
|
+
const map = /* @__PURE__ */ new Map();
|
|
5949
|
+
for (const [key, entry] of Object.entries(artifact.entries)) {
|
|
5950
|
+
map.set(key, entry);
|
|
5951
|
+
}
|
|
5952
|
+
cacheRef.current = map;
|
|
5953
|
+
setStatus("ready");
|
|
5954
|
+
} catch (err) {
|
|
5955
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
5956
|
+
setError(message);
|
|
5957
|
+
setStatus("error");
|
|
5958
|
+
} finally {
|
|
5959
|
+
fetchingRef.current = false;
|
|
5960
|
+
}
|
|
5961
|
+
}, [reportId, artifactRef, client]);
|
|
5962
|
+
const getOutput = useCallback14(
|
|
5963
|
+
(taskId, modelId) => {
|
|
5964
|
+
return cacheRef.current.get(`${taskId}::${modelId}`) ?? null;
|
|
5965
|
+
},
|
|
5966
|
+
[]
|
|
5967
|
+
);
|
|
5968
|
+
return { status, error, getOutput, fetchArtifacts };
|
|
5969
|
+
}
|
|
5970
|
+
|
|
5723
5971
|
// src/lib/thresholds.ts
|
|
5724
5972
|
var SCORE_POSITIVE = 80;
|
|
5725
5973
|
var SCORE_CAUTION = 70;
|
|
@@ -5761,7 +6009,7 @@ function negativeDocLiftSentiment(count) {
|
|
|
5761
6009
|
}
|
|
5762
6010
|
|
|
5763
6011
|
// src/components/report-detail/AgentActivitySection.tsx
|
|
5764
|
-
import { useMemo as useMemo5, useState as
|
|
6012
|
+
import { useMemo as useMemo5, useState as useState11 } from "react";
|
|
5765
6013
|
import { HelpCircleIcon as HelpCircleIcon6, SearchIcon as SearchIcon4 } from "@sanity/icons";
|
|
5766
6014
|
import {
|
|
5767
6015
|
Badge as Badge5,
|
|
@@ -5936,7 +6184,7 @@ function FeatureActivityCard({
|
|
|
5936
6184
|
] }) });
|
|
5937
6185
|
}
|
|
5938
6186
|
function SearchQueryList({ queries }) {
|
|
5939
|
-
const [filter, setFilter] =
|
|
6187
|
+
const [filter, setFilter] = useState11("");
|
|
5940
6188
|
const filtered = useMemo5(() => {
|
|
5941
6189
|
if (!filter) return queries;
|
|
5942
6190
|
const lower = filter.toLowerCase();
|
|
@@ -6582,7 +6830,7 @@ function Pill({
|
|
|
6582
6830
|
// src/components/report-detail/LineageCard.tsx
|
|
6583
6831
|
import { LinkIcon as LinkIcon2 } from "@sanity/icons";
|
|
6584
6832
|
import { Badge as Badge6, Card as Card13, Flex as Flex15, Stack as Stack19, Text as Text24 } from "@sanity/ui";
|
|
6585
|
-
import { useCallback as
|
|
6833
|
+
import { useCallback as useCallback15, useEffect as useEffect7, useState as useState12 } from "react";
|
|
6586
6834
|
import { useClient as useClient5 } from "sanity";
|
|
6587
6835
|
import { useRouter as useRouter2 } from "sanity/router";
|
|
6588
6836
|
import { jsx as jsx26, jsxs as jsxs22 } from "react/jsx-runtime";
|
|
@@ -6602,7 +6850,7 @@ function LineageCard({ provenance, reportId }) {
|
|
|
6602
6850
|
const { lineage } = provenance;
|
|
6603
6851
|
const router = useRouter2();
|
|
6604
6852
|
const client = useClient5({ apiVersion: API_VERSION });
|
|
6605
|
-
const [spawned, setSpawned] =
|
|
6853
|
+
const [spawned, setSpawned] = useState12([]);
|
|
6606
6854
|
useEffect7(() => {
|
|
6607
6855
|
let cancelled = false;
|
|
6608
6856
|
client.fetch(SPAWNED_REPORTS_QUERY, {
|
|
@@ -6650,7 +6898,7 @@ function LineageLink({
|
|
|
6650
6898
|
reportId,
|
|
6651
6899
|
router
|
|
6652
6900
|
}) {
|
|
6653
|
-
const handleClick =
|
|
6901
|
+
const handleClick = useCallback15(() => {
|
|
6654
6902
|
router.navigate({ reportId });
|
|
6655
6903
|
}, [reportId, router]);
|
|
6656
6904
|
return /* @__PURE__ */ jsxs22(Flex15, { align: "center", gap: 2, children: [
|
|
@@ -6679,7 +6927,7 @@ function SpawnedReportRow({
|
|
|
6679
6927
|
report,
|
|
6680
6928
|
router
|
|
6681
6929
|
}) {
|
|
6682
|
-
const handleClick =
|
|
6930
|
+
const handleClick = useCallback15(() => {
|
|
6683
6931
|
router.navigate({ reportId: report.reportId });
|
|
6684
6932
|
}, [report.reportId, router]);
|
|
6685
6933
|
const dateLabel = formatShortDate(report.completedAt);
|
|
@@ -6714,7 +6962,7 @@ function formatShortDate(iso) {
|
|
|
6714
6962
|
}
|
|
6715
6963
|
|
|
6716
6964
|
// src/components/report-detail/JudgmentList.tsx
|
|
6717
|
-
import { useCallback as
|
|
6965
|
+
import React2, { useCallback as useCallback16, useEffect as useEffect8, useRef as useRef6, useState as useState13 } from "react";
|
|
6718
6966
|
import { CopyIcon, ErrorOutlineIcon as ErrorOutlineIcon2, HelpCircleIcon as HelpCircleIcon7 } from "@sanity/icons";
|
|
6719
6967
|
import { Box as Box16, Button as Button3, Flex as Flex16, Stack as Stack20, Text as Text25, Tooltip as Tooltip7, useToast as useToast2 } from "@sanity/ui";
|
|
6720
6968
|
|
|
@@ -6768,14 +7016,29 @@ function groupByArea(judgments) {
|
|
|
6768
7016
|
function dimensionLabel2(dim) {
|
|
6769
7017
|
return dim.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
|
|
6770
7018
|
}
|
|
7019
|
+
function testResultKey(taskId, dimension, modelId) {
|
|
7020
|
+
return `${taskId}::${dimension}::${modelId}`;
|
|
7021
|
+
}
|
|
6771
7022
|
function JudgmentList({
|
|
7023
|
+
artifactCache,
|
|
6772
7024
|
focus,
|
|
6773
7025
|
judgments,
|
|
6774
|
-
onFocusChange
|
|
7026
|
+
onFocusChange,
|
|
7027
|
+
testResults
|
|
6775
7028
|
}) {
|
|
6776
7029
|
if (judgments.length === 0) return null;
|
|
6777
7030
|
const pruned = pruneNoise(judgments);
|
|
6778
7031
|
if (pruned.length === 0) return null;
|
|
7032
|
+
const testResultMap = React2.useMemo(() => {
|
|
7033
|
+
const map = /* @__PURE__ */ new Map();
|
|
7034
|
+
if (!testResults) return map;
|
|
7035
|
+
for (const tr of testResults) {
|
|
7036
|
+
for (const dim of tr.dimensions) {
|
|
7037
|
+
map.set(testResultKey(tr.taskId, dim.dimension, tr.modelId), tr);
|
|
7038
|
+
}
|
|
7039
|
+
}
|
|
7040
|
+
return map;
|
|
7041
|
+
}, [testResults]);
|
|
6779
7042
|
const outputFailureCount = judgments.length - pruned.length;
|
|
6780
7043
|
const grouped = groupByArea(pruned);
|
|
6781
7044
|
return /* @__PURE__ */ jsxs23(Stack20, { space: 3, children: [
|
|
@@ -6868,13 +7131,18 @@ function JudgmentList({
|
|
|
6868
7131
|
] }),
|
|
6869
7132
|
/* @__PURE__ */ jsx28(Stack20, { space: 2, children: areaJudgments.map((j) => {
|
|
6870
7133
|
const slug = judgmentSlug(j);
|
|
7134
|
+
const matchedResult = testResultMap.get(
|
|
7135
|
+
testResultKey(j.taskId, j.dimension, j.modelId)
|
|
7136
|
+
);
|
|
6871
7137
|
return /* @__PURE__ */ jsx28(
|
|
6872
7138
|
JudgmentCard,
|
|
6873
7139
|
{
|
|
7140
|
+
artifactCache,
|
|
6874
7141
|
focused: slug === focus,
|
|
6875
7142
|
judgment: j,
|
|
6876
7143
|
onFocusChange,
|
|
6877
|
-
slug
|
|
7144
|
+
slug,
|
|
7145
|
+
testResult: matchedResult
|
|
6878
7146
|
},
|
|
6879
7147
|
slug
|
|
6880
7148
|
);
|
|
@@ -6886,17 +7154,29 @@ function JudgmentList({
|
|
|
6886
7154
|
] });
|
|
6887
7155
|
}
|
|
6888
7156
|
function JudgmentCard({
|
|
7157
|
+
artifactCache,
|
|
6889
7158
|
focused,
|
|
6890
7159
|
judgment,
|
|
6891
7160
|
onFocusChange,
|
|
6892
|
-
slug
|
|
7161
|
+
slug,
|
|
7162
|
+
testResult
|
|
6893
7163
|
}) {
|
|
6894
|
-
const [expanded, setExpanded] =
|
|
6895
|
-
const
|
|
7164
|
+
const [expanded, setExpanded] = useState13(focused);
|
|
7165
|
+
const [outputExpanded, setOutputExpanded] = useState13(false);
|
|
7166
|
+
const cardRef = useRef6(null);
|
|
6896
7167
|
const toast = useToast2();
|
|
6897
7168
|
const dimLabel = dimensionLabel2(judgment.dimension);
|
|
6898
7169
|
const sep = judgment.taskId.indexOf(" - ");
|
|
6899
7170
|
const taskName = sep > 0 ? judgment.taskId.substring(sep + 3) : judgment.taskId;
|
|
7171
|
+
const inlineOutput = testResult?.responseOutput;
|
|
7172
|
+
const artifactEntry = artifactCache?.getOutput(
|
|
7173
|
+
judgment.taskId,
|
|
7174
|
+
judgment.modelId
|
|
7175
|
+
);
|
|
7176
|
+
const resolvedOutput = inlineOutput ?? artifactEntry?.responseOutput ?? null;
|
|
7177
|
+
const resolvedTruncated = testResult?.responseOutputTruncated ?? artifactEntry?.responseOutputTruncated ?? false;
|
|
7178
|
+
const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && artifactCache.status !== "ready";
|
|
7179
|
+
const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact;
|
|
6900
7180
|
useEffect8(() => {
|
|
6901
7181
|
if (focused) {
|
|
6902
7182
|
setExpanded(true);
|
|
@@ -6906,10 +7186,10 @@ function JudgmentCard({
|
|
|
6906
7186
|
return () => clearTimeout(timer);
|
|
6907
7187
|
}
|
|
6908
7188
|
}, []);
|
|
6909
|
-
const handleToggle =
|
|
7189
|
+
const handleToggle = useCallback16(() => {
|
|
6910
7190
|
setExpanded((prev) => !prev);
|
|
6911
7191
|
}, []);
|
|
6912
|
-
const handleCopyLink =
|
|
7192
|
+
const handleCopyLink = useCallback16(
|
|
6913
7193
|
(e) => {
|
|
6914
7194
|
e.stopPropagation();
|
|
6915
7195
|
onFocusChange?.(slug);
|
|
@@ -6933,6 +7213,39 @@ function JudgmentCard({
|
|
|
6933
7213
|
},
|
|
6934
7214
|
[toast]
|
|
6935
7215
|
);
|
|
7216
|
+
const handleCopyOutput = useCallback16(
|
|
7217
|
+
(e) => {
|
|
7218
|
+
e.stopPropagation();
|
|
7219
|
+
if (!resolvedOutput) return;
|
|
7220
|
+
navigator.clipboard.writeText(resolvedOutput).then(
|
|
7221
|
+
() => {
|
|
7222
|
+
toast.push({
|
|
7223
|
+
closable: true,
|
|
7224
|
+
status: "success",
|
|
7225
|
+
title: "Model output copied to clipboard"
|
|
7226
|
+
});
|
|
7227
|
+
},
|
|
7228
|
+
() => {
|
|
7229
|
+
toast.push({
|
|
7230
|
+
closable: true,
|
|
7231
|
+
status: "error",
|
|
7232
|
+
title: "Failed to copy output"
|
|
7233
|
+
});
|
|
7234
|
+
}
|
|
7235
|
+
);
|
|
7236
|
+
},
|
|
7237
|
+
[resolvedOutput, toast]
|
|
7238
|
+
);
|
|
7239
|
+
const handleToggleOutput = useCallback16(
|
|
7240
|
+
(e) => {
|
|
7241
|
+
e.stopPropagation();
|
|
7242
|
+
if (!outputExpanded && !resolvedOutput && artifactCache) {
|
|
7243
|
+
artifactCache.fetchArtifacts();
|
|
7244
|
+
}
|
|
7245
|
+
setOutputExpanded((prev) => !prev);
|
|
7246
|
+
},
|
|
7247
|
+
[outputExpanded, resolvedOutput, artifactCache]
|
|
7248
|
+
);
|
|
6936
7249
|
return /* @__PURE__ */ jsx28(
|
|
6937
7250
|
Box16,
|
|
6938
7251
|
{
|
|
@@ -7013,22 +7326,115 @@ function JudgmentCard({
|
|
|
7013
7326
|
)
|
|
7014
7327
|
}
|
|
7015
7328
|
),
|
|
7329
|
+
hasOutputOrCanFetch && /* @__PURE__ */ jsxs23(Box16, { children: [
|
|
7330
|
+
/* @__PURE__ */ jsxs23(Flex16, { align: "center", gap: 2, children: [
|
|
7331
|
+
/* @__PURE__ */ jsxs23(
|
|
7332
|
+
"button",
|
|
7333
|
+
{
|
|
7334
|
+
onClick: handleToggleOutput,
|
|
7335
|
+
style: {
|
|
7336
|
+
alignItems: "center",
|
|
7337
|
+
background: "none",
|
|
7338
|
+
border: "none",
|
|
7339
|
+
color: "var(--card-muted-fg-color)",
|
|
7340
|
+
cursor: "pointer",
|
|
7341
|
+
display: "flex",
|
|
7342
|
+
fontSize: 13,
|
|
7343
|
+
gap: 4,
|
|
7344
|
+
padding: 0
|
|
7345
|
+
},
|
|
7346
|
+
children: [
|
|
7347
|
+
/* @__PURE__ */ jsx28("span", { children: outputExpanded ? "\u25BE" : "\u25B8" }),
|
|
7348
|
+
/* @__PURE__ */ jsx28("span", { style: { fontWeight: 500 }, children: "Model Output" })
|
|
7349
|
+
]
|
|
7350
|
+
}
|
|
7351
|
+
),
|
|
7352
|
+
resolvedTruncated && /* @__PURE__ */ jsx28(
|
|
7353
|
+
"span",
|
|
7354
|
+
{
|
|
7355
|
+
style: {
|
|
7356
|
+
backgroundColor: "rgba(251,191,36,0.15)",
|
|
7357
|
+
borderRadius: 4,
|
|
7358
|
+
color: "#fbbf24",
|
|
7359
|
+
fontSize: 11,
|
|
7360
|
+
padding: "1px 6px"
|
|
7361
|
+
},
|
|
7362
|
+
children: "truncated"
|
|
7363
|
+
}
|
|
7364
|
+
),
|
|
7365
|
+
testResult?.latencyMs != null && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 0, children: [
|
|
7366
|
+
(testResult.latencyMs / 1e3).toFixed(1),
|
|
7367
|
+
"s"
|
|
7368
|
+
] }),
|
|
7369
|
+
testResult?.cost != null && testResult.cost > 0 && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 0, children: [
|
|
7370
|
+
"$",
|
|
7371
|
+
testResult.cost.toFixed(4)
|
|
7372
|
+
] })
|
|
7373
|
+
] }),
|
|
7374
|
+
outputExpanded && /* @__PURE__ */ jsxs23(
|
|
7375
|
+
Box16,
|
|
7376
|
+
{
|
|
7377
|
+
marginTop: 2,
|
|
7378
|
+
style: {
|
|
7379
|
+
backgroundColor: "var(--card-muted-bg-color, rgba(255,255,255,0.04))",
|
|
7380
|
+
borderRadius: 4,
|
|
7381
|
+
maxHeight: 400,
|
|
7382
|
+
overflowY: "auto",
|
|
7383
|
+
padding: 12
|
|
7384
|
+
},
|
|
7385
|
+
children: [
|
|
7386
|
+
!resolvedOutput && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
|
|
7387
|
+
!resolvedOutput && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
|
|
7388
|
+
"Failed to load model output",
|
|
7389
|
+
artifactCache.error ? `: ${artifactCache.error}` : ""
|
|
7390
|
+
] }),
|
|
7391
|
+
resolvedOutput && /* @__PURE__ */ jsx28(
|
|
7392
|
+
"pre",
|
|
7393
|
+
{
|
|
7394
|
+
style: {
|
|
7395
|
+
fontFamily: "var(--font-code-family, 'SF Mono', 'Fira Code', monospace)",
|
|
7396
|
+
fontSize: 13,
|
|
7397
|
+
lineHeight: 1.5,
|
|
7398
|
+
margin: 0,
|
|
7399
|
+
whiteSpace: "pre-wrap",
|
|
7400
|
+
wordBreak: "break-word"
|
|
7401
|
+
},
|
|
7402
|
+
children: resolvedOutput
|
|
7403
|
+
}
|
|
7404
|
+
)
|
|
7405
|
+
]
|
|
7406
|
+
}
|
|
7407
|
+
)
|
|
7408
|
+
] }),
|
|
7016
7409
|
/* @__PURE__ */ jsxs23(Flex16, { align: "center", gap: 2, wrap: "wrap", children: [
|
|
7017
7410
|
judgment.canonicalDocs && judgment.canonicalDocs.length > 0 && /* @__PURE__ */ jsxs23(Fragment9, { children: [
|
|
7018
7411
|
/* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Docs:" }),
|
|
7019
7412
|
judgment.canonicalDocs.map((doc) => /* @__PURE__ */ jsx28(DocBadge, { doc }, doc.slug))
|
|
7020
7413
|
] }),
|
|
7021
|
-
/* @__PURE__ */
|
|
7022
|
-
|
|
7023
|
-
|
|
7024
|
-
|
|
7025
|
-
|
|
7026
|
-
|
|
7027
|
-
|
|
7028
|
-
|
|
7029
|
-
|
|
7030
|
-
|
|
7031
|
-
|
|
7414
|
+
/* @__PURE__ */ jsxs23("span", { style: { display: "flex", gap: 4, marginLeft: "auto" }, children: [
|
|
7415
|
+
resolvedOutput && /* @__PURE__ */ jsx28(
|
|
7416
|
+
Button3,
|
|
7417
|
+
{
|
|
7418
|
+
fontSize: 1,
|
|
7419
|
+
icon: CopyIcon,
|
|
7420
|
+
mode: "ghost",
|
|
7421
|
+
onClick: handleCopyOutput,
|
|
7422
|
+
padding: 2,
|
|
7423
|
+
text: "Copy Output"
|
|
7424
|
+
}
|
|
7425
|
+
),
|
|
7426
|
+
/* @__PURE__ */ jsx28(
|
|
7427
|
+
Button3,
|
|
7428
|
+
{
|
|
7429
|
+
fontSize: 1,
|
|
7430
|
+
icon: CopyIcon,
|
|
7431
|
+
mode: "ghost",
|
|
7432
|
+
onClick: handleCopyLink,
|
|
7433
|
+
padding: 2,
|
|
7434
|
+
text: "Copy Link"
|
|
7435
|
+
}
|
|
7436
|
+
)
|
|
7437
|
+
] })
|
|
7032
7438
|
] })
|
|
7033
7439
|
] })
|
|
7034
7440
|
] })
|
|
@@ -7038,7 +7444,7 @@ function JudgmentCard({
|
|
|
7038
7444
|
function DocBadge({
|
|
7039
7445
|
doc
|
|
7040
7446
|
}) {
|
|
7041
|
-
const [hovered, setHovered] =
|
|
7447
|
+
const [hovered, setHovered] = useState13(false);
|
|
7042
7448
|
const isLinked = Boolean(doc.documentId);
|
|
7043
7449
|
const tooltipLabel = isLinked ? `Edit "${doc.title || doc.slug}"` : doc.title || doc.slug;
|
|
7044
7450
|
const badge = /* @__PURE__ */ jsx28(
|
|
@@ -7204,20 +7610,20 @@ import {
|
|
|
7204
7610
|
MenuDivider,
|
|
7205
7611
|
useToast as useToast9
|
|
7206
7612
|
} from "@sanity/ui";
|
|
7207
|
-
import { useCallback as
|
|
7613
|
+
import { useCallback as useCallback22, useState as useState17 } from "react";
|
|
7208
7614
|
import { useClient as useClient9 } from "sanity";
|
|
7209
7615
|
|
|
7210
7616
|
// src/components/report-detail/report-actions/CopyReportAction.tsx
|
|
7211
7617
|
import { ClipboardIcon } from "@sanity/icons";
|
|
7212
7618
|
import { MenuItem, useToast as useToast4 } from "@sanity/ui";
|
|
7213
|
-
import { useCallback as
|
|
7619
|
+
import { useCallback as useCallback17, useState as useState14 } from "react";
|
|
7214
7620
|
import { useClient as useClient6 } from "sanity";
|
|
7215
7621
|
import { jsx as jsx31 } from "react/jsx-runtime";
|
|
7216
7622
|
function CopyReportAction({ documentId }) {
|
|
7217
7623
|
const client = useClient6({ apiVersion: API_VERSION });
|
|
7218
7624
|
const toast = useToast4();
|
|
7219
|
-
const [copying, setCopying] =
|
|
7220
|
-
const handleClick =
|
|
7625
|
+
const [copying, setCopying] = useState14(false);
|
|
7626
|
+
const handleClick = useCallback17(async () => {
|
|
7221
7627
|
setCopying(true);
|
|
7222
7628
|
try {
|
|
7223
7629
|
const doc = await client.fetch(
|
|
@@ -7263,11 +7669,11 @@ function CopyReportAction({ documentId }) {
|
|
|
7263
7669
|
// src/components/report-detail/report-actions/CopyReportIdAction.tsx
|
|
7264
7670
|
import { CopyIcon as CopyIcon2 } from "@sanity/icons";
|
|
7265
7671
|
import { MenuItem as MenuItem2, useToast as useToast5 } from "@sanity/ui";
|
|
7266
|
-
import { useCallback as
|
|
7672
|
+
import { useCallback as useCallback18 } from "react";
|
|
7267
7673
|
import { jsx as jsx32 } from "react/jsx-runtime";
|
|
7268
7674
|
function CopyReportIdAction({ reportId }) {
|
|
7269
7675
|
const toast = useToast5();
|
|
7270
|
-
const handleClick =
|
|
7676
|
+
const handleClick = useCallback18(() => {
|
|
7271
7677
|
navigator.clipboard.writeText(reportId).then(
|
|
7272
7678
|
() => {
|
|
7273
7679
|
toast.push({
|
|
@@ -7291,13 +7697,13 @@ function CopyReportIdAction({ reportId }) {
|
|
|
7291
7697
|
// src/components/report-detail/report-actions/CopyVisionQueryAction.tsx
|
|
7292
7698
|
import { SearchIcon as SearchIcon5 } from "@sanity/icons";
|
|
7293
7699
|
import { MenuItem as MenuItem3, useToast as useToast6 } from "@sanity/ui";
|
|
7294
|
-
import { useCallback as
|
|
7700
|
+
import { useCallback as useCallback19 } from "react";
|
|
7295
7701
|
import { jsx as jsx33 } from "react/jsx-runtime";
|
|
7296
7702
|
function CopyVisionQueryAction({
|
|
7297
7703
|
reportId
|
|
7298
7704
|
}) {
|
|
7299
7705
|
const toast = useToast6();
|
|
7300
|
-
const handleClick =
|
|
7706
|
+
const handleClick = useCallback19(() => {
|
|
7301
7707
|
const query = `*[_type == "ailf.report" && reportId == "${reportId}"][0]`;
|
|
7302
7708
|
navigator.clipboard.writeText(query).then(
|
|
7303
7709
|
() => {
|
|
@@ -7400,7 +7806,7 @@ function DeleteReportAction({
|
|
|
7400
7806
|
// src/components/report-detail/report-actions/DownloadReportAction.tsx
|
|
7401
7807
|
import { DownloadIcon } from "@sanity/icons";
|
|
7402
7808
|
import { MenuItem as MenuItem5, useToast as useToast7 } from "@sanity/ui";
|
|
7403
|
-
import { useCallback as
|
|
7809
|
+
import { useCallback as useCallback20, useState as useState15 } from "react";
|
|
7404
7810
|
import { useClient as useClient7 } from "sanity";
|
|
7405
7811
|
import { jsx as jsx36 } from "react/jsx-runtime";
|
|
7406
7812
|
function DownloadReportAction({
|
|
@@ -7409,8 +7815,8 @@ function DownloadReportAction({
|
|
|
7409
7815
|
}) {
|
|
7410
7816
|
const client = useClient7({ apiVersion: API_VERSION });
|
|
7411
7817
|
const toast = useToast7();
|
|
7412
|
-
const [downloading, setDownloading] =
|
|
7413
|
-
const handleClick =
|
|
7818
|
+
const [downloading, setDownloading] = useState15(false);
|
|
7819
|
+
const handleClick = useCallback20(async () => {
|
|
7414
7820
|
setDownloading(true);
|
|
7415
7821
|
try {
|
|
7416
7822
|
const doc = await client.fetch(
|
|
@@ -7464,7 +7870,7 @@ function DownloadReportAction({
|
|
|
7464
7870
|
// src/components/report-detail/report-actions/RerunEvaluationAction.tsx
|
|
7465
7871
|
import { PlayIcon as PlayIcon2 } from "@sanity/icons";
|
|
7466
7872
|
import { MenuItem as MenuItem6, useToast as useToast8 } from "@sanity/ui";
|
|
7467
|
-
import { useCallback as
|
|
7873
|
+
import { useCallback as useCallback21, useState as useState16 } from "react";
|
|
7468
7874
|
import { useClient as useClient8, useCurrentUser as useCurrentUser3 } from "sanity";
|
|
7469
7875
|
|
|
7470
7876
|
// src/lib/eval-scope.ts
|
|
@@ -7519,8 +7925,8 @@ function RerunEvaluationAction({
|
|
|
7519
7925
|
const client = useClient8({ apiVersion: API_VERSION });
|
|
7520
7926
|
const currentUser = useCurrentUser3();
|
|
7521
7927
|
const toast = useToast8();
|
|
7522
|
-
const [requesting, setRequesting] =
|
|
7523
|
-
const handleClick =
|
|
7928
|
+
const [requesting, setRequesting] = useState16(false);
|
|
7929
|
+
const handleClick = useCallback21(async () => {
|
|
7524
7930
|
setRequesting(true);
|
|
7525
7931
|
try {
|
|
7526
7932
|
const scope = extractEvalScope(provenance);
|
|
@@ -7568,7 +7974,7 @@ function ReportActions({
|
|
|
7568
7974
|
}) {
|
|
7569
7975
|
const client = useClient9({ apiVersion: API_VERSION });
|
|
7570
7976
|
const toast = useToast9();
|
|
7571
|
-
const handleCopyId =
|
|
7977
|
+
const handleCopyId = useCallback22(() => {
|
|
7572
7978
|
navigator.clipboard.writeText(reportId).then(
|
|
7573
7979
|
() => {
|
|
7574
7980
|
toast.push({
|
|
@@ -7586,15 +7992,15 @@ function ReportActions({
|
|
|
7586
7992
|
}
|
|
7587
7993
|
);
|
|
7588
7994
|
}, [reportId, toast]);
|
|
7589
|
-
const [deleteDialogOpen, setDeleteDialogOpen] =
|
|
7590
|
-
const [deleting, setDeleting] =
|
|
7591
|
-
const handleRequestDelete =
|
|
7995
|
+
const [deleteDialogOpen, setDeleteDialogOpen] = useState17(false);
|
|
7996
|
+
const [deleting, setDeleting] = useState17(false);
|
|
7997
|
+
const handleRequestDelete = useCallback22(() => {
|
|
7592
7998
|
setDeleteDialogOpen(true);
|
|
7593
7999
|
}, []);
|
|
7594
|
-
const handleDeleteClose =
|
|
8000
|
+
const handleDeleteClose = useCallback22(() => {
|
|
7595
8001
|
if (!deleting) setDeleteDialogOpen(false);
|
|
7596
8002
|
}, [deleting]);
|
|
7597
|
-
const handleDeleteConfirm =
|
|
8003
|
+
const handleDeleteConfirm = useCallback22(async () => {
|
|
7598
8004
|
setDeleting(true);
|
|
7599
8005
|
try {
|
|
7600
8006
|
await client.delete(documentId);
|
|
@@ -7714,9 +8120,9 @@ import { Box as Box19, Flex as Flex23, Stack as Stack25, Text as Text31 } from "
|
|
|
7714
8120
|
|
|
7715
8121
|
// src/components/report-detail/AreaScoresGrid.tsx
|
|
7716
8122
|
import React3, {
|
|
7717
|
-
useCallback as
|
|
8123
|
+
useCallback as useCallback23,
|
|
7718
8124
|
useMemo as useMemo6,
|
|
7719
|
-
useState as
|
|
8125
|
+
useState as useState18
|
|
7720
8126
|
} from "react";
|
|
7721
8127
|
import { WarningOutlineIcon as WarningOutlineIcon2 } from "@sanity/icons";
|
|
7722
8128
|
import { Box as Box18, Flex as Flex21, Stack as Stack24, Text as Text29 } from "@sanity/ui";
|
|
@@ -7766,9 +8172,9 @@ function AreaScoresGrid({
|
|
|
7766
8172
|
);
|
|
7767
8173
|
const showLift = isLiteracyMode(mode);
|
|
7768
8174
|
const dimKeys = useMemo6(() => collectDimensionKeys(scores), [scores]);
|
|
7769
|
-
const [sortField, setSortField] =
|
|
7770
|
-
const [sortDir, setSortDir] =
|
|
7771
|
-
const handleSort =
|
|
8175
|
+
const [sortField, setSortField] = useState18("score");
|
|
8176
|
+
const [sortDir, setSortDir] = useState18("desc");
|
|
8177
|
+
const handleSort = useCallback23(
|
|
7772
8178
|
(field) => {
|
|
7773
8179
|
if (field === sortField) {
|
|
7774
8180
|
setSortDir((d) => d === "asc" ? "desc" : "asc");
|
|
@@ -8251,7 +8657,7 @@ function ColHeader2({
|
|
|
8251
8657
|
onClick,
|
|
8252
8658
|
tooltip
|
|
8253
8659
|
}) {
|
|
8254
|
-
const handleKeyDown =
|
|
8660
|
+
const handleKeyDown = useCallback23(
|
|
8255
8661
|
(e) => {
|
|
8256
8662
|
if (onClick && (e.key === "Enter" || e.key === " ")) {
|
|
8257
8663
|
e.preventDefault();
|
|
@@ -8296,7 +8702,7 @@ function ColHeader2({
|
|
|
8296
8702
|
}
|
|
8297
8703
|
|
|
8298
8704
|
// src/components/report-detail/ModelSelector.tsx
|
|
8299
|
-
import { useCallback as
|
|
8705
|
+
import { useCallback as useCallback24 } from "react";
|
|
8300
8706
|
import { Flex as Flex22, Text as Text30 } from "@sanity/ui";
|
|
8301
8707
|
import { jsx as jsx41, jsxs as jsxs29 } from "react/jsx-runtime";
|
|
8302
8708
|
var pillBase = {
|
|
@@ -8373,7 +8779,7 @@ function Pill2({
|
|
|
8373
8779
|
label,
|
|
8374
8780
|
onClick
|
|
8375
8781
|
}) {
|
|
8376
|
-
const handleKeyDown =
|
|
8782
|
+
const handleKeyDown = useCallback24(
|
|
8377
8783
|
(e) => {
|
|
8378
8784
|
if (e.key === "Enter" || e.key === " ") {
|
|
8379
8785
|
e.preventDefault();
|
|
@@ -8406,13 +8812,13 @@ function Pill2({
|
|
|
8406
8812
|
}
|
|
8407
8813
|
|
|
8408
8814
|
// src/components/report-detail/useModelSelection.ts
|
|
8409
|
-
import { useCallback as
|
|
8815
|
+
import { useCallback as useCallback25, useMemo as useMemo7, useState as useState19 } from "react";
|
|
8410
8816
|
function useModelSelection({
|
|
8411
8817
|
scores,
|
|
8412
8818
|
perModel
|
|
8413
8819
|
}) {
|
|
8414
|
-
const [selection, setSelection] =
|
|
8415
|
-
const onSelectionChange =
|
|
8820
|
+
const [selection, setSelection] = useState19(null);
|
|
8821
|
+
const onSelectionChange = useCallback25((next) => {
|
|
8416
8822
|
setSelection(next);
|
|
8417
8823
|
}, []);
|
|
8418
8824
|
const isExpanded = selection === "expanded";
|
|
@@ -8915,8 +9321,8 @@ function ReportDetail({
|
|
|
8915
9321
|
subTab
|
|
8916
9322
|
}) {
|
|
8917
9323
|
const client = useClient10({ apiVersion: API_VERSION });
|
|
8918
|
-
const [loading, setLoading] =
|
|
8919
|
-
const [report, setReport] =
|
|
9324
|
+
const [loading, setLoading] = useState20(true);
|
|
9325
|
+
const [report, setReport] = useState20(null);
|
|
8920
9326
|
useEffect9(() => {
|
|
8921
9327
|
let cancelled = false;
|
|
8922
9328
|
setLoading(true);
|
|
@@ -8932,6 +9338,11 @@ function ReportDetail({
|
|
|
8932
9338
|
cancelled = true;
|
|
8933
9339
|
};
|
|
8934
9340
|
}, [client, reportId]);
|
|
9341
|
+
const artifactCache = useArtifactCache(
|
|
9342
|
+
reportId,
|
|
9343
|
+
report?.summary?.artifacts?.testOutputs,
|
|
9344
|
+
client
|
|
9345
|
+
);
|
|
8935
9346
|
const { summary } = report ?? {};
|
|
8936
9347
|
const hasWeaknesses = Boolean(
|
|
8937
9348
|
summary?.lowScoringJudgments && summary.lowScoringJudgments.length > 0
|
|
@@ -8957,7 +9368,7 @@ function ReportDetail({
|
|
|
8957
9368
|
if (disabledTabs.has(parsed)) return "overview";
|
|
8958
9369
|
return tabs.some((t) => t.id === parsed) ? parsed : "overview";
|
|
8959
9370
|
}, [activeTab, disabledTabs, tabs]);
|
|
8960
|
-
const handleTabClick =
|
|
9371
|
+
const handleTabClick = useCallback26(
|
|
8961
9372
|
(tabId) => {
|
|
8962
9373
|
onTabChange(tabId === "overview" ? null : tabId, null, null);
|
|
8963
9374
|
},
|
|
@@ -9064,6 +9475,7 @@ function ReportDetail({
|
|
|
9064
9475
|
currentTab === "diagnostics" && hasDiagnostics && /* @__PURE__ */ jsx44(
|
|
9065
9476
|
DiagnosticsPanel,
|
|
9066
9477
|
{
|
|
9478
|
+
artifactCache,
|
|
9067
9479
|
comparison,
|
|
9068
9480
|
focus,
|
|
9069
9481
|
judgments: summary.lowScoringJudgments,
|
|
@@ -9071,7 +9483,8 @@ function ReportDetail({
|
|
|
9071
9483
|
onNavigate: (newSubTab, newFocus) => onTabChange("diagnostics", newSubTab, newFocus),
|
|
9072
9484
|
perModel: summary.perModel,
|
|
9073
9485
|
scores: summary.scores,
|
|
9074
|
-
subTab
|
|
9486
|
+
subTab,
|
|
9487
|
+
testResults: summary.testResults
|
|
9075
9488
|
}
|
|
9076
9489
|
),
|
|
9077
9490
|
currentTab === "activity" && hasAgentActivity && /* @__PURE__ */ jsx44(
|
|
@@ -9100,6 +9513,7 @@ var DIAG_TABS = [
|
|
|
9100
9513
|
{ id: "issues", label: "Issues" }
|
|
9101
9514
|
];
|
|
9102
9515
|
function DiagnosticsPanel({
|
|
9516
|
+
artifactCache,
|
|
9103
9517
|
comparison,
|
|
9104
9518
|
focus,
|
|
9105
9519
|
judgments,
|
|
@@ -9107,7 +9521,8 @@ function DiagnosticsPanel({
|
|
|
9107
9521
|
onNavigate,
|
|
9108
9522
|
perModel,
|
|
9109
9523
|
scores,
|
|
9110
|
-
subTab: subTabParam
|
|
9524
|
+
subTab: subTabParam,
|
|
9525
|
+
testResults
|
|
9111
9526
|
}) {
|
|
9112
9527
|
const subTab = parseDiagSubTab(subTabParam);
|
|
9113
9528
|
const issueCount = scores.filter((s) => s.totalScore < SCORE_CAUTION).length + scores.filter((s) => s.negativeDocLift).length + scores.filter(
|
|
@@ -9186,9 +9601,11 @@ function DiagnosticsPanel({
|
|
|
9186
9601
|
judgments && judgments.length > 0 && /* @__PURE__ */ jsx44(
|
|
9187
9602
|
JudgmentList,
|
|
9188
9603
|
{
|
|
9604
|
+
artifactCache,
|
|
9189
9605
|
focus,
|
|
9190
9606
|
judgments,
|
|
9191
|
-
onFocusChange: (slug) => onNavigate("issues", slug)
|
|
9607
|
+
onFocusChange: (slug) => onNavigate("issues", slug),
|
|
9608
|
+
testResults
|
|
9192
9609
|
}
|
|
9193
9610
|
)
|
|
9194
9611
|
] })
|
|
@@ -9283,7 +9700,7 @@ import { jsx as jsx51, jsxs as jsxs38 } from "react/jsx-runtime";
|
|
|
9283
9700
|
|
|
9284
9701
|
// src/components/ScoreTimeline.tsx
|
|
9285
9702
|
import { Card as Card20, Flex as Flex30, Select as Select2, Stack as Stack33, Text as Text40 } from "@sanity/ui";
|
|
9286
|
-
import { useCallback as
|
|
9703
|
+
import { useCallback as useCallback27, useEffect as useEffect10, useMemo as useMemo10, useState as useState21 } from "react";
|
|
9287
9704
|
import { useClient as useClient11 } from "sanity";
|
|
9288
9705
|
import { jsx as jsx52, jsxs as jsxs39 } from "react/jsx-runtime";
|
|
9289
9706
|
var CHART_HEIGHT = 220;
|
|
@@ -9320,10 +9737,10 @@ function scoreForPoint(point, area) {
|
|
|
9320
9737
|
}
|
|
9321
9738
|
function ScoreTimeline({ mode = null, source = null }) {
|
|
9322
9739
|
const client = useClient11({ apiVersion: API_VERSION });
|
|
9323
|
-
const [dataPoints, setDataPoints] =
|
|
9324
|
-
const [loading, setLoading] =
|
|
9325
|
-
const [rangeDays, setRangeDays] =
|
|
9326
|
-
const [selectedArea, setSelectedArea] =
|
|
9740
|
+
const [dataPoints, setDataPoints] = useState21([]);
|
|
9741
|
+
const [loading, setLoading] = useState21(true);
|
|
9742
|
+
const [rangeDays, setRangeDays] = useState21(30);
|
|
9743
|
+
const [selectedArea, setSelectedArea] = useState21(null);
|
|
9327
9744
|
const areaNames = useMemo10(() => {
|
|
9328
9745
|
const names = /* @__PURE__ */ new Set();
|
|
9329
9746
|
for (const dp of dataPoints) {
|
|
@@ -9333,7 +9750,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9333
9750
|
}
|
|
9334
9751
|
return Array.from(names).sort();
|
|
9335
9752
|
}, [dataPoints]);
|
|
9336
|
-
const fetchData =
|
|
9753
|
+
const fetchData = useCallback27(async () => {
|
|
9337
9754
|
setLoading(true);
|
|
9338
9755
|
try {
|
|
9339
9756
|
const startDate = rangeDays ? daysAgo(rangeDays) : "1970-01-01T00:00:00Z";
|
|
@@ -9369,14 +9786,14 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9369
9786
|
if (chartPoints.length === 0) return 0;
|
|
9370
9787
|
return chartPoints.reduce((sum, p) => sum + p.score, 0) / chartPoints.length;
|
|
9371
9788
|
}, [chartPoints]);
|
|
9372
|
-
const handleRangeChange =
|
|
9789
|
+
const handleRangeChange = useCallback27(
|
|
9373
9790
|
(e) => {
|
|
9374
9791
|
const val = e.currentTarget.value;
|
|
9375
9792
|
setRangeDays(val === "all" ? null : Number(val));
|
|
9376
9793
|
},
|
|
9377
9794
|
[]
|
|
9378
9795
|
);
|
|
9379
|
-
const handleAreaChange =
|
|
9796
|
+
const handleAreaChange = useCallback27(
|
|
9380
9797
|
(e) => {
|
|
9381
9798
|
const val = e.currentTarget.value;
|
|
9382
9799
|
setSelectedArea(val || null);
|
|
@@ -9522,7 +9939,7 @@ function DashboardContent() {
|
|
|
9522
9939
|
const isDetail = reportId !== null;
|
|
9523
9940
|
const activeTab = isDetail ? "latest" : VIEW_PARAM_MAP[routerState.view ?? ""] ?? "latest";
|
|
9524
9941
|
const defaultTopic = deriveHelpTopic(routerState);
|
|
9525
|
-
const navigateToTab =
|
|
9942
|
+
const navigateToTab = useCallback28(
|
|
9526
9943
|
(tab) => {
|
|
9527
9944
|
if (tab === "latest") {
|
|
9528
9945
|
router.navigate({});
|
|
@@ -9532,13 +9949,13 @@ function DashboardContent() {
|
|
|
9532
9949
|
},
|
|
9533
9950
|
[router]
|
|
9534
9951
|
);
|
|
9535
|
-
const handleSelectReport =
|
|
9952
|
+
const handleSelectReport = useCallback28(
|
|
9536
9953
|
(id) => {
|
|
9537
9954
|
router.navigate({ reportId: id });
|
|
9538
9955
|
},
|
|
9539
9956
|
[router]
|
|
9540
9957
|
);
|
|
9541
|
-
const handleTabChange =
|
|
9958
|
+
const handleTabChange = useCallback28(
|
|
9542
9959
|
(tab, subTab, focus) => {
|
|
9543
9960
|
if (!routerState.reportId) return;
|
|
9544
9961
|
const state = {
|
|
@@ -9551,10 +9968,10 @@ function DashboardContent() {
|
|
|
9551
9968
|
},
|
|
9552
9969
|
[router, routerState.reportId]
|
|
9553
9970
|
);
|
|
9554
|
-
const handleBack =
|
|
9971
|
+
const handleBack = useCallback28(() => {
|
|
9555
9972
|
router.navigate({});
|
|
9556
9973
|
}, [router]);
|
|
9557
|
-
const handleOpenHelp =
|
|
9974
|
+
const handleOpenHelp = useCallback28(() => {
|
|
9558
9975
|
openHelp(defaultTopic);
|
|
9559
9976
|
}, [openHelp, defaultTopic]);
|
|
9560
9977
|
return /* @__PURE__ */ jsx53(Container, { width: 4, children: /* @__PURE__ */ jsxs40(Stack34, { padding: 4, space: 4, children: [
|
|
@@ -9645,7 +10062,7 @@ function ailfTool(options = {}) {
|
|
|
9645
10062
|
// src/actions/RunEvaluationAction.tsx
|
|
9646
10063
|
import { BarChartIcon as BarChartIcon2 } from "@sanity/icons";
|
|
9647
10064
|
import { useToast as useToast10 } from "@sanity/ui";
|
|
9648
|
-
import { useCallback as
|
|
10065
|
+
import { useCallback as useCallback29, useEffect as useEffect11, useRef as useRef7, useState as useState22 } from "react";
|
|
9649
10066
|
import {
|
|
9650
10067
|
getReleaseIdFromReleaseDocumentId as getReleaseIdFromReleaseDocumentId3,
|
|
9651
10068
|
useClient as useClient12,
|
|
@@ -9676,8 +10093,8 @@ function createRunEvaluationAction(options = {}) {
|
|
|
9676
10093
|
const projectId = useProjectId2();
|
|
9677
10094
|
const currentUser = useCurrentUser4();
|
|
9678
10095
|
const toast = useToast10();
|
|
9679
|
-
const [state, setState] =
|
|
9680
|
-
const requestedAtRef =
|
|
10096
|
+
const [state, setState] = useState22({ status: "loading" });
|
|
10097
|
+
const requestedAtRef = useRef7(null);
|
|
9681
10098
|
const perspectiveId = getReleaseIdFromReleaseDocumentId3(release._id);
|
|
9682
10099
|
useEffect11(() => {
|
|
9683
10100
|
let cancelled = false;
|
|
@@ -9772,7 +10189,7 @@ function createRunEvaluationAction(options = {}) {
|
|
|
9772
10189
|
}, 15e3);
|
|
9773
10190
|
return () => clearTimeout(timer);
|
|
9774
10191
|
}, [client, perspectiveId, state]);
|
|
9775
|
-
const handleRequest =
|
|
10192
|
+
const handleRequest = useCallback29(async () => {
|
|
9776
10193
|
const releaseTitle = release.metadata?.title ?? perspectiveId ?? "release";
|
|
9777
10194
|
const tag = `release-${slugify3(releaseTitle)}-${dateStamp3()}`;
|
|
9778
10195
|
const now = Date.now();
|