@sanity/ailf-studio 1.2.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +50 -1
- package/dist/index.js +485 -84
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -434,6 +434,14 @@ interface ScoreItem {
|
|
|
434
434
|
/** Ceiling score — gold-standard docs injected */
|
|
435
435
|
ceilingScore?: number;
|
|
436
436
|
}
|
|
437
|
+
/** Reference to an artifact stored in an external object store (GCS). */
|
|
438
|
+
interface ArtifactRef {
|
|
439
|
+
store: "gcs";
|
|
440
|
+
bucket: string;
|
|
441
|
+
path: string;
|
|
442
|
+
bytes?: number;
|
|
443
|
+
entryCount?: number;
|
|
444
|
+
}
|
|
437
445
|
/** A single gap/recommendation from gap analysis */
|
|
438
446
|
interface RecommendationGap {
|
|
439
447
|
affectedTaskIds: string[];
|
|
@@ -451,6 +459,38 @@ interface RecommendationsData {
|
|
|
451
459
|
generatedAt: string;
|
|
452
460
|
totalPotentialLift: number;
|
|
453
461
|
}
|
|
462
|
+
/**
|
|
463
|
+
* Per-test result stored in reports for drill-down and audit.
|
|
464
|
+
* Mirrors StoredTestResult from @sanity/ailf-core.
|
|
465
|
+
*
|
|
466
|
+
* Per D0030, new reports omit `responseOutput` / `responseOutputTruncated`
|
|
467
|
+
* inline — the full output lives in the `testOutputs` GCS artifact and is
|
|
468
|
+
* fetched via `useArtifactCache`. Both fields remain optional so the
|
|
469
|
+
* reader path tolerates legacy reports that were published before W0045.
|
|
470
|
+
*/
|
|
471
|
+
interface StoredTestResultData {
|
|
472
|
+
area: string;
|
|
473
|
+
canonicalDocs?: DocumentRef[];
|
|
474
|
+
compositeScore?: number;
|
|
475
|
+
cost?: number;
|
|
476
|
+
dimensions: {
|
|
477
|
+
dimension: string;
|
|
478
|
+
reason: string;
|
|
479
|
+
score: number;
|
|
480
|
+
}[];
|
|
481
|
+
latencyMs?: number;
|
|
482
|
+
modelId: string;
|
|
483
|
+
outputFailure?: boolean;
|
|
484
|
+
responseOutput?: string;
|
|
485
|
+
responseOutputTruncated?: boolean;
|
|
486
|
+
taskId: string;
|
|
487
|
+
tokenUsage?: {
|
|
488
|
+
completion: number;
|
|
489
|
+
prompt: number;
|
|
490
|
+
total: number;
|
|
491
|
+
};
|
|
492
|
+
variant: "baseline" | "gold";
|
|
493
|
+
}
|
|
454
494
|
/** A single low-scoring grader judgment stored in reports */
|
|
455
495
|
interface JudgmentData {
|
|
456
496
|
/** Docs the task expected the model to use */
|
|
@@ -499,6 +539,13 @@ interface PerModelData {
|
|
|
499
539
|
interface SummaryData {
|
|
500
540
|
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
501
541
|
agentBehavior?: FeatureAgentBehaviorData[] | null;
|
|
542
|
+
/** External artifact references — present when pipeline uploads to GCS (D0030) */
|
|
543
|
+
artifacts?: {
|
|
544
|
+
testOutputs?: ArtifactRef;
|
|
545
|
+
renderedPrompts?: ArtifactRef;
|
|
546
|
+
rawResults?: ArtifactRef;
|
|
547
|
+
traces?: ArtifactRef;
|
|
548
|
+
};
|
|
502
549
|
belowCritical: string[];
|
|
503
550
|
/** All Sanity documents used across the entire evaluation */
|
|
504
551
|
documentManifest?: DocumentRef[];
|
|
@@ -523,6 +570,8 @@ interface SummaryData {
|
|
|
523
570
|
/** Gap analysis recommendations (when gap analysis was run) */
|
|
524
571
|
recommendations: null | RecommendationsData;
|
|
525
572
|
scores: ScoreItem[];
|
|
573
|
+
/** Per-test results with model output and metadata (D0029) */
|
|
574
|
+
testResults?: StoredTestResultData[] | null;
|
|
526
575
|
timestamp: string;
|
|
527
576
|
}
|
|
528
577
|
/** Shape returned by scoreTimelineQuery */
|
|
@@ -734,7 +783,7 @@ declare const scoreTimelineQuery: string;
|
|
|
734
783
|
*
|
|
735
784
|
* Used by: ReportDetail view
|
|
736
785
|
*/
|
|
737
|
-
declare const reportDetailQuery = "\n *[_type == \"ailf.report\" && reportId == $reportId][0] {\n _id,\n reportId,\n completedAt,\n durationMs,\n tag,\n title,\n provenance,\n summary,\n comparison\n }\n";
|
|
786
|
+
declare const reportDetailQuery = "\n *[_type == \"ailf.report\" && reportId == $reportId][0] {\n _id,\n reportId,\n completedAt,\n durationMs,\n tag,\n title,\n provenance,\n summary,\n \"comparison\": comparison {\n areas,\n deltas,\n generatedAt,\n improved,\n mismatched,\n noiseThreshold,\n noiseThresholdEmpirical,\n notEvaluated,\n regressed,\n unchanged\n }\n }\n";
|
|
738
787
|
/**
|
|
739
788
|
* Find all reports that evaluated a specific Sanity document or perspective.
|
|
740
789
|
*
|
package/dist/index.js
CHANGED
|
@@ -9,6 +9,8 @@ import { useClient, useCurrentUser } from "sanity";
|
|
|
9
9
|
|
|
10
10
|
// src/lib/constants.ts
|
|
11
11
|
var API_VERSION = "2026-03-11";
|
|
12
|
+
var isProduction = process.env.NODE_ENV === "production";
|
|
13
|
+
var ARTIFACT_API_BASE_URL = isProduction ? "https://ailf-api.sanity.build/v1" : "http://localhost:3000/v1";
|
|
12
14
|
|
|
13
15
|
// src/actions/GraduateToNativeAction.tsx
|
|
14
16
|
import { jsx, jsxs } from "react/jsx-runtime";
|
|
@@ -721,6 +723,22 @@ function documentRefSchema() {
|
|
|
721
723
|
type: "object"
|
|
722
724
|
};
|
|
723
725
|
}
|
|
726
|
+
function artifactRefSchema() {
|
|
727
|
+
return {
|
|
728
|
+
fields: [
|
|
729
|
+
defineField4({ name: "store", title: "Store", type: "string" }),
|
|
730
|
+
defineField4({ name: "bucket", title: "Bucket", type: "string" }),
|
|
731
|
+
defineField4({ name: "path", title: "Path", type: "string" }),
|
|
732
|
+
defineField4({ name: "bytes", title: "Size (bytes)", type: "number" }),
|
|
733
|
+
defineField4({
|
|
734
|
+
name: "entryCount",
|
|
735
|
+
title: "Entry Count",
|
|
736
|
+
type: "number"
|
|
737
|
+
})
|
|
738
|
+
],
|
|
739
|
+
type: "object"
|
|
740
|
+
};
|
|
741
|
+
}
|
|
724
742
|
var reportSchema = defineType4({
|
|
725
743
|
groups: [
|
|
726
744
|
{ name: "main", title: "Main", default: true },
|
|
@@ -1341,6 +1359,117 @@ var reportSchema = defineType4({
|
|
|
1341
1359
|
title: "Low-Scoring Judgments",
|
|
1342
1360
|
type: "array"
|
|
1343
1361
|
}),
|
|
1362
|
+
defineField4({
|
|
1363
|
+
description: "Per-test results with model output, grader reasoning, and metadata. One entry per test \xD7 model combination. See D0029.",
|
|
1364
|
+
name: "testResults",
|
|
1365
|
+
of: [
|
|
1366
|
+
{
|
|
1367
|
+
fields: [
|
|
1368
|
+
defineField4({
|
|
1369
|
+
name: "taskId",
|
|
1370
|
+
title: "Task",
|
|
1371
|
+
type: "string"
|
|
1372
|
+
}),
|
|
1373
|
+
defineField4({
|
|
1374
|
+
name: "modelId",
|
|
1375
|
+
title: "Model ID",
|
|
1376
|
+
type: "string"
|
|
1377
|
+
}),
|
|
1378
|
+
defineField4({
|
|
1379
|
+
name: "area",
|
|
1380
|
+
title: "Feature Area",
|
|
1381
|
+
type: "string"
|
|
1382
|
+
}),
|
|
1383
|
+
defineField4({
|
|
1384
|
+
name: "variant",
|
|
1385
|
+
options: { list: ["gold", "baseline"] },
|
|
1386
|
+
title: "Variant",
|
|
1387
|
+
type: "string"
|
|
1388
|
+
}),
|
|
1389
|
+
defineField4({
|
|
1390
|
+
name: "dimensions",
|
|
1391
|
+
of: [
|
|
1392
|
+
{
|
|
1393
|
+
fields: [
|
|
1394
|
+
defineField4({
|
|
1395
|
+
name: "dimension",
|
|
1396
|
+
title: "Dimension",
|
|
1397
|
+
type: "string"
|
|
1398
|
+
}),
|
|
1399
|
+
defineField4({
|
|
1400
|
+
name: "score",
|
|
1401
|
+
title: "Score (0\u2013100)",
|
|
1402
|
+
type: "number"
|
|
1403
|
+
}),
|
|
1404
|
+
defineField4({
|
|
1405
|
+
name: "reason",
|
|
1406
|
+
title: "Reason",
|
|
1407
|
+
type: "text"
|
|
1408
|
+
})
|
|
1409
|
+
],
|
|
1410
|
+
type: "object"
|
|
1411
|
+
}
|
|
1412
|
+
],
|
|
1413
|
+
title: "Dimensions",
|
|
1414
|
+
type: "array"
|
|
1415
|
+
}),
|
|
1416
|
+
defineField4({
|
|
1417
|
+
name: "compositeScore",
|
|
1418
|
+
title: "Composite Score",
|
|
1419
|
+
type: "number"
|
|
1420
|
+
}),
|
|
1421
|
+
defineField4({
|
|
1422
|
+
name: "latencyMs",
|
|
1423
|
+
title: "Latency (ms)",
|
|
1424
|
+
type: "number"
|
|
1425
|
+
}),
|
|
1426
|
+
defineField4({
|
|
1427
|
+
name: "tokenUsage",
|
|
1428
|
+
fields: [
|
|
1429
|
+
defineField4({
|
|
1430
|
+
name: "prompt",
|
|
1431
|
+
title: "Prompt",
|
|
1432
|
+
type: "number"
|
|
1433
|
+
}),
|
|
1434
|
+
defineField4({
|
|
1435
|
+
name: "completion",
|
|
1436
|
+
title: "Completion",
|
|
1437
|
+
type: "number"
|
|
1438
|
+
}),
|
|
1439
|
+
defineField4({
|
|
1440
|
+
name: "total",
|
|
1441
|
+
title: "Total",
|
|
1442
|
+
type: "number"
|
|
1443
|
+
})
|
|
1444
|
+
],
|
|
1445
|
+
title: "Token Usage",
|
|
1446
|
+
type: "object"
|
|
1447
|
+
}),
|
|
1448
|
+
defineField4({
|
|
1449
|
+
name: "cost",
|
|
1450
|
+
title: "Cost",
|
|
1451
|
+
type: "number"
|
|
1452
|
+
}),
|
|
1453
|
+
defineField4({
|
|
1454
|
+
description: "True when the model failed to produce output (empty response, API error, or refusal).",
|
|
1455
|
+
name: "outputFailure",
|
|
1456
|
+
title: "Output Failure",
|
|
1457
|
+
type: "boolean"
|
|
1458
|
+
}),
|
|
1459
|
+
defineField4({
|
|
1460
|
+
description: "Documentation pages the task expected the model to use.",
|
|
1461
|
+
name: "canonicalDocs",
|
|
1462
|
+
of: [documentRefSchema()],
|
|
1463
|
+
title: "Canonical Docs",
|
|
1464
|
+
type: "array"
|
|
1465
|
+
})
|
|
1466
|
+
],
|
|
1467
|
+
type: "object"
|
|
1468
|
+
}
|
|
1469
|
+
],
|
|
1470
|
+
title: "Test Results",
|
|
1471
|
+
type: "array"
|
|
1472
|
+
}),
|
|
1344
1473
|
defineField4({
|
|
1345
1474
|
name: "perModel",
|
|
1346
1475
|
of: [
|
|
@@ -1519,6 +1648,34 @@ var reportSchema = defineType4({
|
|
|
1519
1648
|
],
|
|
1520
1649
|
title: "Agent Behavior",
|
|
1521
1650
|
type: "array"
|
|
1651
|
+
}),
|
|
1652
|
+
defineField4({
|
|
1653
|
+
description: "External artifact references \u2014 points to large data in GCS that was too big for inline storage (D0030).",
|
|
1654
|
+
fields: [
|
|
1655
|
+
defineField4({
|
|
1656
|
+
...artifactRefSchema(),
|
|
1657
|
+
name: "testOutputs",
|
|
1658
|
+
title: "Test Outputs"
|
|
1659
|
+
}),
|
|
1660
|
+
defineField4({
|
|
1661
|
+
...artifactRefSchema(),
|
|
1662
|
+
name: "renderedPrompts",
|
|
1663
|
+
title: "Rendered Prompts"
|
|
1664
|
+
}),
|
|
1665
|
+
defineField4({
|
|
1666
|
+
...artifactRefSchema(),
|
|
1667
|
+
name: "rawResults",
|
|
1668
|
+
title: "Raw Results"
|
|
1669
|
+
}),
|
|
1670
|
+
defineField4({
|
|
1671
|
+
...artifactRefSchema(),
|
|
1672
|
+
name: "traces",
|
|
1673
|
+
title: "Traces"
|
|
1674
|
+
})
|
|
1675
|
+
],
|
|
1676
|
+
name: "artifacts",
|
|
1677
|
+
title: "Artifacts",
|
|
1678
|
+
type: "object"
|
|
1522
1679
|
})
|
|
1523
1680
|
],
|
|
1524
1681
|
group: ["main", "all-fields"],
|
|
@@ -3089,7 +3246,7 @@ import {
|
|
|
3089
3246
|
TabPanel as TabPanel2,
|
|
3090
3247
|
Text as Text41
|
|
3091
3248
|
} from "@sanity/ui";
|
|
3092
|
-
import { useCallback as
|
|
3249
|
+
import { useCallback as useCallback28 } from "react";
|
|
3093
3250
|
import { useRouter as useRouter3 } from "sanity/router";
|
|
3094
3251
|
|
|
3095
3252
|
// src/lib/help-context.ts
|
|
@@ -3752,7 +3909,18 @@ var reportDetailQuery = (
|
|
|
3752
3909
|
title,
|
|
3753
3910
|
provenance,
|
|
3754
3911
|
summary,
|
|
3755
|
-
comparison
|
|
3912
|
+
"comparison": comparison {
|
|
3913
|
+
areas,
|
|
3914
|
+
deltas,
|
|
3915
|
+
generatedAt,
|
|
3916
|
+
improved,
|
|
3917
|
+
mismatched,
|
|
3918
|
+
noiseThreshold,
|
|
3919
|
+
noiseThresholdEmpirical,
|
|
3920
|
+
notEvaluated,
|
|
3921
|
+
regressed,
|
|
3922
|
+
unchanged
|
|
3923
|
+
}
|
|
3756
3924
|
}
|
|
3757
3925
|
`
|
|
3758
3926
|
);
|
|
@@ -5713,13 +5881,78 @@ import {
|
|
|
5713
5881
|
Tooltip as Tooltip8
|
|
5714
5882
|
} from "@sanity/ui";
|
|
5715
5883
|
import {
|
|
5716
|
-
useCallback as
|
|
5884
|
+
useCallback as useCallback26,
|
|
5717
5885
|
useEffect as useEffect9,
|
|
5718
5886
|
useMemo as useMemo9,
|
|
5719
|
-
useState as
|
|
5887
|
+
useState as useState20
|
|
5720
5888
|
} from "react";
|
|
5721
5889
|
import { useClient as useClient10 } from "sanity";
|
|
5722
5890
|
|
|
5891
|
+
// src/lib/useArtifactCache.ts
|
|
5892
|
+
import { useCallback as useCallback14, useRef as useRef5, useState as useState10 } from "react";
|
|
5893
|
+
function useArtifactCache(reportId, artifactRef) {
|
|
5894
|
+
const cacheRef = useRef5(/* @__PURE__ */ new Map());
|
|
5895
|
+
const [status, setStatus] = useState10("idle");
|
|
5896
|
+
const [error, setError] = useState10(null);
|
|
5897
|
+
const fetchingRef = useRef5(false);
|
|
5898
|
+
const fetchArtifacts = useCallback14(async () => {
|
|
5899
|
+
if (fetchingRef.current || cacheRef.current.size > 0) return;
|
|
5900
|
+
if (!artifactRef) return;
|
|
5901
|
+
fetchingRef.current = true;
|
|
5902
|
+
setStatus("loading");
|
|
5903
|
+
setError(null);
|
|
5904
|
+
try {
|
|
5905
|
+
const signingRes = await fetch(
|
|
5906
|
+
`${ARTIFACT_API_BASE_URL}/artifacts/${encodeURIComponent(reportId)}?type=testOutputs`,
|
|
5907
|
+
{
|
|
5908
|
+
credentials: "omit",
|
|
5909
|
+
headers: { Accept: "application/json" }
|
|
5910
|
+
}
|
|
5911
|
+
);
|
|
5912
|
+
if (!signingRes.ok) {
|
|
5913
|
+
const body = await signingRes.text().catch(() => "");
|
|
5914
|
+
throw new Error(
|
|
5915
|
+
`Artifact signing failed: ${signingRes.status} ${signingRes.statusText}${body ? ` \u2014 ${body.slice(0, 200)}` : ""}`
|
|
5916
|
+
);
|
|
5917
|
+
}
|
|
5918
|
+
const envelope = await signingRes.json();
|
|
5919
|
+
if (envelope.object === "error" || !envelope.url) {
|
|
5920
|
+
throw new Error(
|
|
5921
|
+
envelope.error?.message ?? "Invalid signing response \u2014 missing signed URL"
|
|
5922
|
+
);
|
|
5923
|
+
}
|
|
5924
|
+
const artifactRes = await fetch(envelope.url, {
|
|
5925
|
+
credentials: "omit"
|
|
5926
|
+
});
|
|
5927
|
+
if (!artifactRes.ok) {
|
|
5928
|
+
throw new Error(
|
|
5929
|
+
`GCS artifact fetch failed: ${artifactRes.status} ${artifactRes.statusText}`
|
|
5930
|
+
);
|
|
5931
|
+
}
|
|
5932
|
+
const artifact = await artifactRes.json();
|
|
5933
|
+
const map = /* @__PURE__ */ new Map();
|
|
5934
|
+
for (const [key, entry] of Object.entries(artifact.entries)) {
|
|
5935
|
+
map.set(key, entry);
|
|
5936
|
+
}
|
|
5937
|
+
cacheRef.current = map;
|
|
5938
|
+
setStatus("ready");
|
|
5939
|
+
} catch (err) {
|
|
5940
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
5941
|
+
setError(message);
|
|
5942
|
+
setStatus("error");
|
|
5943
|
+
} finally {
|
|
5944
|
+
fetchingRef.current = false;
|
|
5945
|
+
}
|
|
5946
|
+
}, [reportId, artifactRef]);
|
|
5947
|
+
const getOutput = useCallback14(
|
|
5948
|
+
(taskId, modelId) => {
|
|
5949
|
+
return cacheRef.current.get(`${taskId}::${modelId}`) ?? null;
|
|
5950
|
+
},
|
|
5951
|
+
[]
|
|
5952
|
+
);
|
|
5953
|
+
return { status, error, getOutput, fetchArtifacts };
|
|
5954
|
+
}
|
|
5955
|
+
|
|
5723
5956
|
// src/lib/thresholds.ts
|
|
5724
5957
|
var SCORE_POSITIVE = 80;
|
|
5725
5958
|
var SCORE_CAUTION = 70;
|
|
@@ -5761,7 +5994,7 @@ function negativeDocLiftSentiment(count) {
|
|
|
5761
5994
|
}
|
|
5762
5995
|
|
|
5763
5996
|
// src/components/report-detail/AgentActivitySection.tsx
|
|
5764
|
-
import { useMemo as useMemo5, useState as
|
|
5997
|
+
import { useMemo as useMemo5, useState as useState11 } from "react";
|
|
5765
5998
|
import { HelpCircleIcon as HelpCircleIcon6, SearchIcon as SearchIcon4 } from "@sanity/icons";
|
|
5766
5999
|
import {
|
|
5767
6000
|
Badge as Badge5,
|
|
@@ -5936,7 +6169,7 @@ function FeatureActivityCard({
|
|
|
5936
6169
|
] }) });
|
|
5937
6170
|
}
|
|
5938
6171
|
function SearchQueryList({ queries }) {
|
|
5939
|
-
const [filter, setFilter] =
|
|
6172
|
+
const [filter, setFilter] = useState11("");
|
|
5940
6173
|
const filtered = useMemo5(() => {
|
|
5941
6174
|
if (!filter) return queries;
|
|
5942
6175
|
const lower = filter.toLowerCase();
|
|
@@ -6582,7 +6815,7 @@ function Pill({
|
|
|
6582
6815
|
// src/components/report-detail/LineageCard.tsx
|
|
6583
6816
|
import { LinkIcon as LinkIcon2 } from "@sanity/icons";
|
|
6584
6817
|
import { Badge as Badge6, Card as Card13, Flex as Flex15, Stack as Stack19, Text as Text24 } from "@sanity/ui";
|
|
6585
|
-
import { useCallback as
|
|
6818
|
+
import { useCallback as useCallback15, useEffect as useEffect7, useState as useState12 } from "react";
|
|
6586
6819
|
import { useClient as useClient5 } from "sanity";
|
|
6587
6820
|
import { useRouter as useRouter2 } from "sanity/router";
|
|
6588
6821
|
import { jsx as jsx26, jsxs as jsxs22 } from "react/jsx-runtime";
|
|
@@ -6602,7 +6835,7 @@ function LineageCard({ provenance, reportId }) {
|
|
|
6602
6835
|
const { lineage } = provenance;
|
|
6603
6836
|
const router = useRouter2();
|
|
6604
6837
|
const client = useClient5({ apiVersion: API_VERSION });
|
|
6605
|
-
const [spawned, setSpawned] =
|
|
6838
|
+
const [spawned, setSpawned] = useState12([]);
|
|
6606
6839
|
useEffect7(() => {
|
|
6607
6840
|
let cancelled = false;
|
|
6608
6841
|
client.fetch(SPAWNED_REPORTS_QUERY, {
|
|
@@ -6650,7 +6883,7 @@ function LineageLink({
|
|
|
6650
6883
|
reportId,
|
|
6651
6884
|
router
|
|
6652
6885
|
}) {
|
|
6653
|
-
const handleClick =
|
|
6886
|
+
const handleClick = useCallback15(() => {
|
|
6654
6887
|
router.navigate({ reportId });
|
|
6655
6888
|
}, [reportId, router]);
|
|
6656
6889
|
return /* @__PURE__ */ jsxs22(Flex15, { align: "center", gap: 2, children: [
|
|
@@ -6679,7 +6912,7 @@ function SpawnedReportRow({
|
|
|
6679
6912
|
report,
|
|
6680
6913
|
router
|
|
6681
6914
|
}) {
|
|
6682
|
-
const handleClick =
|
|
6915
|
+
const handleClick = useCallback15(() => {
|
|
6683
6916
|
router.navigate({ reportId: report.reportId });
|
|
6684
6917
|
}, [report.reportId, router]);
|
|
6685
6918
|
const dateLabel = formatShortDate(report.completedAt);
|
|
@@ -6714,7 +6947,7 @@ function formatShortDate(iso) {
|
|
|
6714
6947
|
}
|
|
6715
6948
|
|
|
6716
6949
|
// src/components/report-detail/JudgmentList.tsx
|
|
6717
|
-
import { useCallback as
|
|
6950
|
+
import React2, { useCallback as useCallback16, useEffect as useEffect8, useRef as useRef6, useState as useState13 } from "react";
|
|
6718
6951
|
import { CopyIcon, ErrorOutlineIcon as ErrorOutlineIcon2, HelpCircleIcon as HelpCircleIcon7 } from "@sanity/icons";
|
|
6719
6952
|
import { Box as Box16, Button as Button3, Flex as Flex16, Stack as Stack20, Text as Text25, Tooltip as Tooltip7, useToast as useToast2 } from "@sanity/ui";
|
|
6720
6953
|
|
|
@@ -6768,14 +7001,29 @@ function groupByArea(judgments) {
|
|
|
6768
7001
|
function dimensionLabel2(dim) {
|
|
6769
7002
|
return dim.split("-").map((w) => w.charAt(0).toUpperCase() + w.slice(1)).join(" ");
|
|
6770
7003
|
}
|
|
7004
|
+
function testResultKey(taskId, dimension, modelId) {
|
|
7005
|
+
return `${taskId}::${dimension}::${modelId}`;
|
|
7006
|
+
}
|
|
6771
7007
|
function JudgmentList({
|
|
7008
|
+
artifactCache,
|
|
6772
7009
|
focus,
|
|
6773
7010
|
judgments,
|
|
6774
|
-
onFocusChange
|
|
7011
|
+
onFocusChange,
|
|
7012
|
+
testResults
|
|
6775
7013
|
}) {
|
|
6776
7014
|
if (judgments.length === 0) return null;
|
|
6777
7015
|
const pruned = pruneNoise(judgments);
|
|
6778
7016
|
if (pruned.length === 0) return null;
|
|
7017
|
+
const testResultMap = React2.useMemo(() => {
|
|
7018
|
+
const map = /* @__PURE__ */ new Map();
|
|
7019
|
+
if (!testResults) return map;
|
|
7020
|
+
for (const tr of testResults) {
|
|
7021
|
+
for (const dim of tr.dimensions) {
|
|
7022
|
+
map.set(testResultKey(tr.taskId, dim.dimension, tr.modelId), tr);
|
|
7023
|
+
}
|
|
7024
|
+
}
|
|
7025
|
+
return map;
|
|
7026
|
+
}, [testResults]);
|
|
6779
7027
|
const outputFailureCount = judgments.length - pruned.length;
|
|
6780
7028
|
const grouped = groupByArea(pruned);
|
|
6781
7029
|
return /* @__PURE__ */ jsxs23(Stack20, { space: 3, children: [
|
|
@@ -6868,13 +7116,18 @@ function JudgmentList({
|
|
|
6868
7116
|
] }),
|
|
6869
7117
|
/* @__PURE__ */ jsx28(Stack20, { space: 2, children: areaJudgments.map((j) => {
|
|
6870
7118
|
const slug = judgmentSlug(j);
|
|
7119
|
+
const matchedResult = testResultMap.get(
|
|
7120
|
+
testResultKey(j.taskId, j.dimension, j.modelId)
|
|
7121
|
+
);
|
|
6871
7122
|
return /* @__PURE__ */ jsx28(
|
|
6872
7123
|
JudgmentCard,
|
|
6873
7124
|
{
|
|
7125
|
+
artifactCache,
|
|
6874
7126
|
focused: slug === focus,
|
|
6875
7127
|
judgment: j,
|
|
6876
7128
|
onFocusChange,
|
|
6877
|
-
slug
|
|
7129
|
+
slug,
|
|
7130
|
+
testResult: matchedResult
|
|
6878
7131
|
},
|
|
6879
7132
|
slug
|
|
6880
7133
|
);
|
|
@@ -6886,17 +7139,29 @@ function JudgmentList({
|
|
|
6886
7139
|
] });
|
|
6887
7140
|
}
|
|
6888
7141
|
function JudgmentCard({
|
|
7142
|
+
artifactCache,
|
|
6889
7143
|
focused,
|
|
6890
7144
|
judgment,
|
|
6891
7145
|
onFocusChange,
|
|
6892
|
-
slug
|
|
7146
|
+
slug,
|
|
7147
|
+
testResult
|
|
6893
7148
|
}) {
|
|
6894
|
-
const [expanded, setExpanded] =
|
|
6895
|
-
const
|
|
7149
|
+
const [expanded, setExpanded] = useState13(focused);
|
|
7150
|
+
const [outputExpanded, setOutputExpanded] = useState13(false);
|
|
7151
|
+
const cardRef = useRef6(null);
|
|
6896
7152
|
const toast = useToast2();
|
|
6897
7153
|
const dimLabel = dimensionLabel2(judgment.dimension);
|
|
6898
7154
|
const sep = judgment.taskId.indexOf(" - ");
|
|
6899
7155
|
const taskName = sep > 0 ? judgment.taskId.substring(sep + 3) : judgment.taskId;
|
|
7156
|
+
const inlineOutput = testResult?.responseOutput;
|
|
7157
|
+
const artifactEntry = artifactCache?.getOutput(
|
|
7158
|
+
judgment.taskId,
|
|
7159
|
+
judgment.modelId
|
|
7160
|
+
);
|
|
7161
|
+
const resolvedOutput = inlineOutput ?? artifactEntry?.responseOutput ?? null;
|
|
7162
|
+
const resolvedTruncated = testResult?.responseOutputTruncated ?? artifactEntry?.responseOutputTruncated ?? false;
|
|
7163
|
+
const canFetchArtifact = !inlineOutput && !artifactEntry && artifactCache != null && artifactCache.status !== "ready";
|
|
7164
|
+
const hasOutputOrCanFetch = resolvedOutput != null || canFetchArtifact;
|
|
6900
7165
|
useEffect8(() => {
|
|
6901
7166
|
if (focused) {
|
|
6902
7167
|
setExpanded(true);
|
|
@@ -6906,10 +7171,10 @@ function JudgmentCard({
|
|
|
6906
7171
|
return () => clearTimeout(timer);
|
|
6907
7172
|
}
|
|
6908
7173
|
}, []);
|
|
6909
|
-
const handleToggle =
|
|
7174
|
+
const handleToggle = useCallback16(() => {
|
|
6910
7175
|
setExpanded((prev) => !prev);
|
|
6911
7176
|
}, []);
|
|
6912
|
-
const handleCopyLink =
|
|
7177
|
+
const handleCopyLink = useCallback16(
|
|
6913
7178
|
(e) => {
|
|
6914
7179
|
e.stopPropagation();
|
|
6915
7180
|
onFocusChange?.(slug);
|
|
@@ -6933,6 +7198,39 @@ function JudgmentCard({
|
|
|
6933
7198
|
},
|
|
6934
7199
|
[toast]
|
|
6935
7200
|
);
|
|
7201
|
+
const handleCopyOutput = useCallback16(
|
|
7202
|
+
(e) => {
|
|
7203
|
+
e.stopPropagation();
|
|
7204
|
+
if (!resolvedOutput) return;
|
|
7205
|
+
navigator.clipboard.writeText(resolvedOutput).then(
|
|
7206
|
+
() => {
|
|
7207
|
+
toast.push({
|
|
7208
|
+
closable: true,
|
|
7209
|
+
status: "success",
|
|
7210
|
+
title: "Model output copied to clipboard"
|
|
7211
|
+
});
|
|
7212
|
+
},
|
|
7213
|
+
() => {
|
|
7214
|
+
toast.push({
|
|
7215
|
+
closable: true,
|
|
7216
|
+
status: "error",
|
|
7217
|
+
title: "Failed to copy output"
|
|
7218
|
+
});
|
|
7219
|
+
}
|
|
7220
|
+
);
|
|
7221
|
+
},
|
|
7222
|
+
[resolvedOutput, toast]
|
|
7223
|
+
);
|
|
7224
|
+
const handleToggleOutput = useCallback16(
|
|
7225
|
+
(e) => {
|
|
7226
|
+
e.stopPropagation();
|
|
7227
|
+
if (!outputExpanded && !resolvedOutput && artifactCache) {
|
|
7228
|
+
artifactCache.fetchArtifacts();
|
|
7229
|
+
}
|
|
7230
|
+
setOutputExpanded((prev) => !prev);
|
|
7231
|
+
},
|
|
7232
|
+
[outputExpanded, resolvedOutput, artifactCache]
|
|
7233
|
+
);
|
|
6936
7234
|
return /* @__PURE__ */ jsx28(
|
|
6937
7235
|
Box16,
|
|
6938
7236
|
{
|
|
@@ -7013,22 +7311,115 @@ function JudgmentCard({
|
|
|
7013
7311
|
)
|
|
7014
7312
|
}
|
|
7015
7313
|
),
|
|
7314
|
+
hasOutputOrCanFetch && /* @__PURE__ */ jsxs23(Box16, { children: [
|
|
7315
|
+
/* @__PURE__ */ jsxs23(Flex16, { align: "center", gap: 2, children: [
|
|
7316
|
+
/* @__PURE__ */ jsxs23(
|
|
7317
|
+
"button",
|
|
7318
|
+
{
|
|
7319
|
+
onClick: handleToggleOutput,
|
|
7320
|
+
style: {
|
|
7321
|
+
alignItems: "center",
|
|
7322
|
+
background: "none",
|
|
7323
|
+
border: "none",
|
|
7324
|
+
color: "var(--card-muted-fg-color)",
|
|
7325
|
+
cursor: "pointer",
|
|
7326
|
+
display: "flex",
|
|
7327
|
+
fontSize: 13,
|
|
7328
|
+
gap: 4,
|
|
7329
|
+
padding: 0
|
|
7330
|
+
},
|
|
7331
|
+
children: [
|
|
7332
|
+
/* @__PURE__ */ jsx28("span", { children: outputExpanded ? "\u25BE" : "\u25B8" }),
|
|
7333
|
+
/* @__PURE__ */ jsx28("span", { style: { fontWeight: 500 }, children: "Model Output" })
|
|
7334
|
+
]
|
|
7335
|
+
}
|
|
7336
|
+
),
|
|
7337
|
+
resolvedTruncated && /* @__PURE__ */ jsx28(
|
|
7338
|
+
"span",
|
|
7339
|
+
{
|
|
7340
|
+
style: {
|
|
7341
|
+
backgroundColor: "rgba(251,191,36,0.15)",
|
|
7342
|
+
borderRadius: 4,
|
|
7343
|
+
color: "#fbbf24",
|
|
7344
|
+
fontSize: 11,
|
|
7345
|
+
padding: "1px 6px"
|
|
7346
|
+
},
|
|
7347
|
+
children: "truncated"
|
|
7348
|
+
}
|
|
7349
|
+
),
|
|
7350
|
+
testResult?.latencyMs != null && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 0, children: [
|
|
7351
|
+
(testResult.latencyMs / 1e3).toFixed(1),
|
|
7352
|
+
"s"
|
|
7353
|
+
] }),
|
|
7354
|
+
testResult?.cost != null && testResult.cost > 0 && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 0, children: [
|
|
7355
|
+
"$",
|
|
7356
|
+
testResult.cost.toFixed(4)
|
|
7357
|
+
] })
|
|
7358
|
+
] }),
|
|
7359
|
+
outputExpanded && /* @__PURE__ */ jsxs23(
|
|
7360
|
+
Box16,
|
|
7361
|
+
{
|
|
7362
|
+
marginTop: 2,
|
|
7363
|
+
style: {
|
|
7364
|
+
backgroundColor: "var(--card-muted-bg-color, rgba(255,255,255,0.04))",
|
|
7365
|
+
borderRadius: 4,
|
|
7366
|
+
maxHeight: 400,
|
|
7367
|
+
overflowY: "auto",
|
|
7368
|
+
padding: 12
|
|
7369
|
+
},
|
|
7370
|
+
children: [
|
|
7371
|
+
!resolvedOutput && artifactCache?.status === "loading" && /* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Fetching model output\u2026" }),
|
|
7372
|
+
!resolvedOutput && artifactCache?.status === "error" && /* @__PURE__ */ jsxs23(Text25, { muted: true, size: 1, style: { color: "#f87171" }, children: [
|
|
7373
|
+
"Failed to load model output",
|
|
7374
|
+
artifactCache.error ? `: ${artifactCache.error}` : ""
|
|
7375
|
+
] }),
|
|
7376
|
+
resolvedOutput && /* @__PURE__ */ jsx28(
|
|
7377
|
+
"pre",
|
|
7378
|
+
{
|
|
7379
|
+
style: {
|
|
7380
|
+
fontFamily: "var(--font-code-family, 'SF Mono', 'Fira Code', monospace)",
|
|
7381
|
+
fontSize: 13,
|
|
7382
|
+
lineHeight: 1.5,
|
|
7383
|
+
margin: 0,
|
|
7384
|
+
whiteSpace: "pre-wrap",
|
|
7385
|
+
wordBreak: "break-word"
|
|
7386
|
+
},
|
|
7387
|
+
children: resolvedOutput
|
|
7388
|
+
}
|
|
7389
|
+
)
|
|
7390
|
+
]
|
|
7391
|
+
}
|
|
7392
|
+
)
|
|
7393
|
+
] }),
|
|
7016
7394
|
/* @__PURE__ */ jsxs23(Flex16, { align: "center", gap: 2, wrap: "wrap", children: [
|
|
7017
7395
|
judgment.canonicalDocs && judgment.canonicalDocs.length > 0 && /* @__PURE__ */ jsxs23(Fragment9, { children: [
|
|
7018
7396
|
/* @__PURE__ */ jsx28(Text25, { muted: true, size: 1, children: "Docs:" }),
|
|
7019
7397
|
judgment.canonicalDocs.map((doc) => /* @__PURE__ */ jsx28(DocBadge, { doc }, doc.slug))
|
|
7020
7398
|
] }),
|
|
7021
|
-
/* @__PURE__ */
|
|
7022
|
-
|
|
7023
|
-
|
|
7024
|
-
|
|
7025
|
-
|
|
7026
|
-
|
|
7027
|
-
|
|
7028
|
-
|
|
7029
|
-
|
|
7030
|
-
|
|
7031
|
-
|
|
7399
|
+
/* @__PURE__ */ jsxs23("span", { style: { display: "flex", gap: 4, marginLeft: "auto" }, children: [
|
|
7400
|
+
resolvedOutput && /* @__PURE__ */ jsx28(
|
|
7401
|
+
Button3,
|
|
7402
|
+
{
|
|
7403
|
+
fontSize: 1,
|
|
7404
|
+
icon: CopyIcon,
|
|
7405
|
+
mode: "ghost",
|
|
7406
|
+
onClick: handleCopyOutput,
|
|
7407
|
+
padding: 2,
|
|
7408
|
+
text: "Copy Output"
|
|
7409
|
+
}
|
|
7410
|
+
),
|
|
7411
|
+
/* @__PURE__ */ jsx28(
|
|
7412
|
+
Button3,
|
|
7413
|
+
{
|
|
7414
|
+
fontSize: 1,
|
|
7415
|
+
icon: CopyIcon,
|
|
7416
|
+
mode: "ghost",
|
|
7417
|
+
onClick: handleCopyLink,
|
|
7418
|
+
padding: 2,
|
|
7419
|
+
text: "Copy Link"
|
|
7420
|
+
}
|
|
7421
|
+
)
|
|
7422
|
+
] })
|
|
7032
7423
|
] })
|
|
7033
7424
|
] })
|
|
7034
7425
|
] })
|
|
@@ -7038,7 +7429,7 @@ function JudgmentCard({
|
|
|
7038
7429
|
function DocBadge({
|
|
7039
7430
|
doc
|
|
7040
7431
|
}) {
|
|
7041
|
-
const [hovered, setHovered] =
|
|
7432
|
+
const [hovered, setHovered] = useState13(false);
|
|
7042
7433
|
const isLinked = Boolean(doc.documentId);
|
|
7043
7434
|
const tooltipLabel = isLinked ? `Edit "${doc.title || doc.slug}"` : doc.title || doc.slug;
|
|
7044
7435
|
const badge = /* @__PURE__ */ jsx28(
|
|
@@ -7204,20 +7595,20 @@ import {
|
|
|
7204
7595
|
MenuDivider,
|
|
7205
7596
|
useToast as useToast9
|
|
7206
7597
|
} from "@sanity/ui";
|
|
7207
|
-
import { useCallback as
|
|
7598
|
+
import { useCallback as useCallback22, useState as useState17 } from "react";
|
|
7208
7599
|
import { useClient as useClient9 } from "sanity";
|
|
7209
7600
|
|
|
7210
7601
|
// src/components/report-detail/report-actions/CopyReportAction.tsx
|
|
7211
7602
|
import { ClipboardIcon } from "@sanity/icons";
|
|
7212
7603
|
import { MenuItem, useToast as useToast4 } from "@sanity/ui";
|
|
7213
|
-
import { useCallback as
|
|
7604
|
+
import { useCallback as useCallback17, useState as useState14 } from "react";
|
|
7214
7605
|
import { useClient as useClient6 } from "sanity";
|
|
7215
7606
|
import { jsx as jsx31 } from "react/jsx-runtime";
|
|
7216
7607
|
function CopyReportAction({ documentId }) {
|
|
7217
7608
|
const client = useClient6({ apiVersion: API_VERSION });
|
|
7218
7609
|
const toast = useToast4();
|
|
7219
|
-
const [copying, setCopying] =
|
|
7220
|
-
const handleClick =
|
|
7610
|
+
const [copying, setCopying] = useState14(false);
|
|
7611
|
+
const handleClick = useCallback17(async () => {
|
|
7221
7612
|
setCopying(true);
|
|
7222
7613
|
try {
|
|
7223
7614
|
const doc = await client.fetch(
|
|
@@ -7263,11 +7654,11 @@ function CopyReportAction({ documentId }) {
|
|
|
7263
7654
|
// src/components/report-detail/report-actions/CopyReportIdAction.tsx
|
|
7264
7655
|
import { CopyIcon as CopyIcon2 } from "@sanity/icons";
|
|
7265
7656
|
import { MenuItem as MenuItem2, useToast as useToast5 } from "@sanity/ui";
|
|
7266
|
-
import { useCallback as
|
|
7657
|
+
import { useCallback as useCallback18 } from "react";
|
|
7267
7658
|
import { jsx as jsx32 } from "react/jsx-runtime";
|
|
7268
7659
|
function CopyReportIdAction({ reportId }) {
|
|
7269
7660
|
const toast = useToast5();
|
|
7270
|
-
const handleClick =
|
|
7661
|
+
const handleClick = useCallback18(() => {
|
|
7271
7662
|
navigator.clipboard.writeText(reportId).then(
|
|
7272
7663
|
() => {
|
|
7273
7664
|
toast.push({
|
|
@@ -7291,13 +7682,13 @@ function CopyReportIdAction({ reportId }) {
|
|
|
7291
7682
|
// src/components/report-detail/report-actions/CopyVisionQueryAction.tsx
|
|
7292
7683
|
import { SearchIcon as SearchIcon5 } from "@sanity/icons";
|
|
7293
7684
|
import { MenuItem as MenuItem3, useToast as useToast6 } from "@sanity/ui";
|
|
7294
|
-
import { useCallback as
|
|
7685
|
+
import { useCallback as useCallback19 } from "react";
|
|
7295
7686
|
import { jsx as jsx33 } from "react/jsx-runtime";
|
|
7296
7687
|
function CopyVisionQueryAction({
|
|
7297
7688
|
reportId
|
|
7298
7689
|
}) {
|
|
7299
7690
|
const toast = useToast6();
|
|
7300
|
-
const handleClick =
|
|
7691
|
+
const handleClick = useCallback19(() => {
|
|
7301
7692
|
const query = `*[_type == "ailf.report" && reportId == "${reportId}"][0]`;
|
|
7302
7693
|
navigator.clipboard.writeText(query).then(
|
|
7303
7694
|
() => {
|
|
@@ -7400,7 +7791,7 @@ function DeleteReportAction({
|
|
|
7400
7791
|
// src/components/report-detail/report-actions/DownloadReportAction.tsx
|
|
7401
7792
|
import { DownloadIcon } from "@sanity/icons";
|
|
7402
7793
|
import { MenuItem as MenuItem5, useToast as useToast7 } from "@sanity/ui";
|
|
7403
|
-
import { useCallback as
|
|
7794
|
+
import { useCallback as useCallback20, useState as useState15 } from "react";
|
|
7404
7795
|
import { useClient as useClient7 } from "sanity";
|
|
7405
7796
|
import { jsx as jsx36 } from "react/jsx-runtime";
|
|
7406
7797
|
function DownloadReportAction({
|
|
@@ -7409,8 +7800,8 @@ function DownloadReportAction({
|
|
|
7409
7800
|
}) {
|
|
7410
7801
|
const client = useClient7({ apiVersion: API_VERSION });
|
|
7411
7802
|
const toast = useToast7();
|
|
7412
|
-
const [downloading, setDownloading] =
|
|
7413
|
-
const handleClick =
|
|
7803
|
+
const [downloading, setDownloading] = useState15(false);
|
|
7804
|
+
const handleClick = useCallback20(async () => {
|
|
7414
7805
|
setDownloading(true);
|
|
7415
7806
|
try {
|
|
7416
7807
|
const doc = await client.fetch(
|
|
@@ -7464,7 +7855,7 @@ function DownloadReportAction({
|
|
|
7464
7855
|
// src/components/report-detail/report-actions/RerunEvaluationAction.tsx
|
|
7465
7856
|
import { PlayIcon as PlayIcon2 } from "@sanity/icons";
|
|
7466
7857
|
import { MenuItem as MenuItem6, useToast as useToast8 } from "@sanity/ui";
|
|
7467
|
-
import { useCallback as
|
|
7858
|
+
import { useCallback as useCallback21, useState as useState16 } from "react";
|
|
7468
7859
|
import { useClient as useClient8, useCurrentUser as useCurrentUser3 } from "sanity";
|
|
7469
7860
|
|
|
7470
7861
|
// src/lib/eval-scope.ts
|
|
@@ -7519,8 +7910,8 @@ function RerunEvaluationAction({
|
|
|
7519
7910
|
const client = useClient8({ apiVersion: API_VERSION });
|
|
7520
7911
|
const currentUser = useCurrentUser3();
|
|
7521
7912
|
const toast = useToast8();
|
|
7522
|
-
const [requesting, setRequesting] =
|
|
7523
|
-
const handleClick =
|
|
7913
|
+
const [requesting, setRequesting] = useState16(false);
|
|
7914
|
+
const handleClick = useCallback21(async () => {
|
|
7524
7915
|
setRequesting(true);
|
|
7525
7916
|
try {
|
|
7526
7917
|
const scope = extractEvalScope(provenance);
|
|
@@ -7568,7 +7959,7 @@ function ReportActions({
|
|
|
7568
7959
|
}) {
|
|
7569
7960
|
const client = useClient9({ apiVersion: API_VERSION });
|
|
7570
7961
|
const toast = useToast9();
|
|
7571
|
-
const handleCopyId =
|
|
7962
|
+
const handleCopyId = useCallback22(() => {
|
|
7572
7963
|
navigator.clipboard.writeText(reportId).then(
|
|
7573
7964
|
() => {
|
|
7574
7965
|
toast.push({
|
|
@@ -7586,15 +7977,15 @@ function ReportActions({
|
|
|
7586
7977
|
}
|
|
7587
7978
|
);
|
|
7588
7979
|
}, [reportId, toast]);
|
|
7589
|
-
const [deleteDialogOpen, setDeleteDialogOpen] =
|
|
7590
|
-
const [deleting, setDeleting] =
|
|
7591
|
-
const handleRequestDelete =
|
|
7980
|
+
const [deleteDialogOpen, setDeleteDialogOpen] = useState17(false);
|
|
7981
|
+
const [deleting, setDeleting] = useState17(false);
|
|
7982
|
+
const handleRequestDelete = useCallback22(() => {
|
|
7592
7983
|
setDeleteDialogOpen(true);
|
|
7593
7984
|
}, []);
|
|
7594
|
-
const handleDeleteClose =
|
|
7985
|
+
const handleDeleteClose = useCallback22(() => {
|
|
7595
7986
|
if (!deleting) setDeleteDialogOpen(false);
|
|
7596
7987
|
}, [deleting]);
|
|
7597
|
-
const handleDeleteConfirm =
|
|
7988
|
+
const handleDeleteConfirm = useCallback22(async () => {
|
|
7598
7989
|
setDeleting(true);
|
|
7599
7990
|
try {
|
|
7600
7991
|
await client.delete(documentId);
|
|
@@ -7714,9 +8105,9 @@ import { Box as Box19, Flex as Flex23, Stack as Stack25, Text as Text31 } from "
|
|
|
7714
8105
|
|
|
7715
8106
|
// src/components/report-detail/AreaScoresGrid.tsx
|
|
7716
8107
|
import React3, {
|
|
7717
|
-
useCallback as
|
|
8108
|
+
useCallback as useCallback23,
|
|
7718
8109
|
useMemo as useMemo6,
|
|
7719
|
-
useState as
|
|
8110
|
+
useState as useState18
|
|
7720
8111
|
} from "react";
|
|
7721
8112
|
import { WarningOutlineIcon as WarningOutlineIcon2 } from "@sanity/icons";
|
|
7722
8113
|
import { Box as Box18, Flex as Flex21, Stack as Stack24, Text as Text29 } from "@sanity/ui";
|
|
@@ -7766,9 +8157,9 @@ function AreaScoresGrid({
|
|
|
7766
8157
|
);
|
|
7767
8158
|
const showLift = isLiteracyMode(mode);
|
|
7768
8159
|
const dimKeys = useMemo6(() => collectDimensionKeys(scores), [scores]);
|
|
7769
|
-
const [sortField, setSortField] =
|
|
7770
|
-
const [sortDir, setSortDir] =
|
|
7771
|
-
const handleSort =
|
|
8160
|
+
const [sortField, setSortField] = useState18("score");
|
|
8161
|
+
const [sortDir, setSortDir] = useState18("desc");
|
|
8162
|
+
const handleSort = useCallback23(
|
|
7772
8163
|
(field) => {
|
|
7773
8164
|
if (field === sortField) {
|
|
7774
8165
|
setSortDir((d) => d === "asc" ? "desc" : "asc");
|
|
@@ -8251,7 +8642,7 @@ function ColHeader2({
|
|
|
8251
8642
|
onClick,
|
|
8252
8643
|
tooltip
|
|
8253
8644
|
}) {
|
|
8254
|
-
const handleKeyDown =
|
|
8645
|
+
const handleKeyDown = useCallback23(
|
|
8255
8646
|
(e) => {
|
|
8256
8647
|
if (onClick && (e.key === "Enter" || e.key === " ")) {
|
|
8257
8648
|
e.preventDefault();
|
|
@@ -8296,7 +8687,7 @@ function ColHeader2({
|
|
|
8296
8687
|
}
|
|
8297
8688
|
|
|
8298
8689
|
// src/components/report-detail/ModelSelector.tsx
|
|
8299
|
-
import { useCallback as
|
|
8690
|
+
import { useCallback as useCallback24 } from "react";
|
|
8300
8691
|
import { Flex as Flex22, Text as Text30 } from "@sanity/ui";
|
|
8301
8692
|
import { jsx as jsx41, jsxs as jsxs29 } from "react/jsx-runtime";
|
|
8302
8693
|
var pillBase = {
|
|
@@ -8373,7 +8764,7 @@ function Pill2({
|
|
|
8373
8764
|
label,
|
|
8374
8765
|
onClick
|
|
8375
8766
|
}) {
|
|
8376
|
-
const handleKeyDown =
|
|
8767
|
+
const handleKeyDown = useCallback24(
|
|
8377
8768
|
(e) => {
|
|
8378
8769
|
if (e.key === "Enter" || e.key === " ") {
|
|
8379
8770
|
e.preventDefault();
|
|
@@ -8406,13 +8797,13 @@ function Pill2({
|
|
|
8406
8797
|
}
|
|
8407
8798
|
|
|
8408
8799
|
// src/components/report-detail/useModelSelection.ts
|
|
8409
|
-
import { useCallback as
|
|
8800
|
+
import { useCallback as useCallback25, useMemo as useMemo7, useState as useState19 } from "react";
|
|
8410
8801
|
function useModelSelection({
|
|
8411
8802
|
scores,
|
|
8412
8803
|
perModel
|
|
8413
8804
|
}) {
|
|
8414
|
-
const [selection, setSelection] =
|
|
8415
|
-
const onSelectionChange =
|
|
8805
|
+
const [selection, setSelection] = useState19(null);
|
|
8806
|
+
const onSelectionChange = useCallback25((next) => {
|
|
8416
8807
|
setSelection(next);
|
|
8417
8808
|
}, []);
|
|
8418
8809
|
const isExpanded = selection === "expanded";
|
|
@@ -8915,8 +9306,8 @@ function ReportDetail({
|
|
|
8915
9306
|
subTab
|
|
8916
9307
|
}) {
|
|
8917
9308
|
const client = useClient10({ apiVersion: API_VERSION });
|
|
8918
|
-
const [loading, setLoading] =
|
|
8919
|
-
const [report, setReport] =
|
|
9309
|
+
const [loading, setLoading] = useState20(true);
|
|
9310
|
+
const [report, setReport] = useState20(null);
|
|
8920
9311
|
useEffect9(() => {
|
|
8921
9312
|
let cancelled = false;
|
|
8922
9313
|
setLoading(true);
|
|
@@ -8932,6 +9323,10 @@ function ReportDetail({
|
|
|
8932
9323
|
cancelled = true;
|
|
8933
9324
|
};
|
|
8934
9325
|
}, [client, reportId]);
|
|
9326
|
+
const artifactCache = useArtifactCache(
|
|
9327
|
+
reportId,
|
|
9328
|
+
report?.summary?.artifacts?.testOutputs
|
|
9329
|
+
);
|
|
8935
9330
|
const { summary } = report ?? {};
|
|
8936
9331
|
const hasWeaknesses = Boolean(
|
|
8937
9332
|
summary?.lowScoringJudgments && summary.lowScoringJudgments.length > 0
|
|
@@ -8957,7 +9352,7 @@ function ReportDetail({
|
|
|
8957
9352
|
if (disabledTabs.has(parsed)) return "overview";
|
|
8958
9353
|
return tabs.some((t) => t.id === parsed) ? parsed : "overview";
|
|
8959
9354
|
}, [activeTab, disabledTabs, tabs]);
|
|
8960
|
-
const handleTabClick =
|
|
9355
|
+
const handleTabClick = useCallback26(
|
|
8961
9356
|
(tabId) => {
|
|
8962
9357
|
onTabChange(tabId === "overview" ? null : tabId, null, null);
|
|
8963
9358
|
},
|
|
@@ -9064,6 +9459,7 @@ function ReportDetail({
|
|
|
9064
9459
|
currentTab === "diagnostics" && hasDiagnostics && /* @__PURE__ */ jsx44(
|
|
9065
9460
|
DiagnosticsPanel,
|
|
9066
9461
|
{
|
|
9462
|
+
artifactCache,
|
|
9067
9463
|
comparison,
|
|
9068
9464
|
focus,
|
|
9069
9465
|
judgments: summary.lowScoringJudgments,
|
|
@@ -9071,7 +9467,8 @@ function ReportDetail({
|
|
|
9071
9467
|
onNavigate: (newSubTab, newFocus) => onTabChange("diagnostics", newSubTab, newFocus),
|
|
9072
9468
|
perModel: summary.perModel,
|
|
9073
9469
|
scores: summary.scores,
|
|
9074
|
-
subTab
|
|
9470
|
+
subTab,
|
|
9471
|
+
testResults: summary.testResults
|
|
9075
9472
|
}
|
|
9076
9473
|
),
|
|
9077
9474
|
currentTab === "activity" && hasAgentActivity && /* @__PURE__ */ jsx44(
|
|
@@ -9100,6 +9497,7 @@ var DIAG_TABS = [
|
|
|
9100
9497
|
{ id: "issues", label: "Issues" }
|
|
9101
9498
|
];
|
|
9102
9499
|
function DiagnosticsPanel({
|
|
9500
|
+
artifactCache,
|
|
9103
9501
|
comparison,
|
|
9104
9502
|
focus,
|
|
9105
9503
|
judgments,
|
|
@@ -9107,7 +9505,8 @@ function DiagnosticsPanel({
|
|
|
9107
9505
|
onNavigate,
|
|
9108
9506
|
perModel,
|
|
9109
9507
|
scores,
|
|
9110
|
-
subTab: subTabParam
|
|
9508
|
+
subTab: subTabParam,
|
|
9509
|
+
testResults
|
|
9111
9510
|
}) {
|
|
9112
9511
|
const subTab = parseDiagSubTab(subTabParam);
|
|
9113
9512
|
const issueCount = scores.filter((s) => s.totalScore < SCORE_CAUTION).length + scores.filter((s) => s.negativeDocLift).length + scores.filter(
|
|
@@ -9186,9 +9585,11 @@ function DiagnosticsPanel({
|
|
|
9186
9585
|
judgments && judgments.length > 0 && /* @__PURE__ */ jsx44(
|
|
9187
9586
|
JudgmentList,
|
|
9188
9587
|
{
|
|
9588
|
+
artifactCache,
|
|
9189
9589
|
focus,
|
|
9190
9590
|
judgments,
|
|
9191
|
-
onFocusChange: (slug) => onNavigate("issues", slug)
|
|
9591
|
+
onFocusChange: (slug) => onNavigate("issues", slug),
|
|
9592
|
+
testResults
|
|
9192
9593
|
}
|
|
9193
9594
|
)
|
|
9194
9595
|
] })
|
|
@@ -9283,7 +9684,7 @@ import { jsx as jsx51, jsxs as jsxs38 } from "react/jsx-runtime";
|
|
|
9283
9684
|
|
|
9284
9685
|
// src/components/ScoreTimeline.tsx
|
|
9285
9686
|
import { Card as Card20, Flex as Flex30, Select as Select2, Stack as Stack33, Text as Text40 } from "@sanity/ui";
|
|
9286
|
-
import { useCallback as
|
|
9687
|
+
import { useCallback as useCallback27, useEffect as useEffect10, useMemo as useMemo10, useState as useState21 } from "react";
|
|
9287
9688
|
import { useClient as useClient11 } from "sanity";
|
|
9288
9689
|
import { jsx as jsx52, jsxs as jsxs39 } from "react/jsx-runtime";
|
|
9289
9690
|
var CHART_HEIGHT = 220;
|
|
@@ -9320,10 +9721,10 @@ function scoreForPoint(point, area) {
|
|
|
9320
9721
|
}
|
|
9321
9722
|
function ScoreTimeline({ mode = null, source = null }) {
|
|
9322
9723
|
const client = useClient11({ apiVersion: API_VERSION });
|
|
9323
|
-
const [dataPoints, setDataPoints] =
|
|
9324
|
-
const [loading, setLoading] =
|
|
9325
|
-
const [rangeDays, setRangeDays] =
|
|
9326
|
-
const [selectedArea, setSelectedArea] =
|
|
9724
|
+
const [dataPoints, setDataPoints] = useState21([]);
|
|
9725
|
+
const [loading, setLoading] = useState21(true);
|
|
9726
|
+
const [rangeDays, setRangeDays] = useState21(30);
|
|
9727
|
+
const [selectedArea, setSelectedArea] = useState21(null);
|
|
9327
9728
|
const areaNames = useMemo10(() => {
|
|
9328
9729
|
const names = /* @__PURE__ */ new Set();
|
|
9329
9730
|
for (const dp of dataPoints) {
|
|
@@ -9333,7 +9734,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9333
9734
|
}
|
|
9334
9735
|
return Array.from(names).sort();
|
|
9335
9736
|
}, [dataPoints]);
|
|
9336
|
-
const fetchData =
|
|
9737
|
+
const fetchData = useCallback27(async () => {
|
|
9337
9738
|
setLoading(true);
|
|
9338
9739
|
try {
|
|
9339
9740
|
const startDate = rangeDays ? daysAgo(rangeDays) : "1970-01-01T00:00:00Z";
|
|
@@ -9369,14 +9770,14 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9369
9770
|
if (chartPoints.length === 0) return 0;
|
|
9370
9771
|
return chartPoints.reduce((sum, p) => sum + p.score, 0) / chartPoints.length;
|
|
9371
9772
|
}, [chartPoints]);
|
|
9372
|
-
const handleRangeChange =
|
|
9773
|
+
const handleRangeChange = useCallback27(
|
|
9373
9774
|
(e) => {
|
|
9374
9775
|
const val = e.currentTarget.value;
|
|
9375
9776
|
setRangeDays(val === "all" ? null : Number(val));
|
|
9376
9777
|
},
|
|
9377
9778
|
[]
|
|
9378
9779
|
);
|
|
9379
|
-
const handleAreaChange =
|
|
9780
|
+
const handleAreaChange = useCallback27(
|
|
9380
9781
|
(e) => {
|
|
9381
9782
|
const val = e.currentTarget.value;
|
|
9382
9783
|
setSelectedArea(val || null);
|
|
@@ -9522,7 +9923,7 @@ function DashboardContent() {
|
|
|
9522
9923
|
const isDetail = reportId !== null;
|
|
9523
9924
|
const activeTab = isDetail ? "latest" : VIEW_PARAM_MAP[routerState.view ?? ""] ?? "latest";
|
|
9524
9925
|
const defaultTopic = deriveHelpTopic(routerState);
|
|
9525
|
-
const navigateToTab =
|
|
9926
|
+
const navigateToTab = useCallback28(
|
|
9526
9927
|
(tab) => {
|
|
9527
9928
|
if (tab === "latest") {
|
|
9528
9929
|
router.navigate({});
|
|
@@ -9532,13 +9933,13 @@ function DashboardContent() {
|
|
|
9532
9933
|
},
|
|
9533
9934
|
[router]
|
|
9534
9935
|
);
|
|
9535
|
-
const handleSelectReport =
|
|
9936
|
+
const handleSelectReport = useCallback28(
|
|
9536
9937
|
(id) => {
|
|
9537
9938
|
router.navigate({ reportId: id });
|
|
9538
9939
|
},
|
|
9539
9940
|
[router]
|
|
9540
9941
|
);
|
|
9541
|
-
const handleTabChange =
|
|
9942
|
+
const handleTabChange = useCallback28(
|
|
9542
9943
|
(tab, subTab, focus) => {
|
|
9543
9944
|
if (!routerState.reportId) return;
|
|
9544
9945
|
const state = {
|
|
@@ -9551,10 +9952,10 @@ function DashboardContent() {
|
|
|
9551
9952
|
},
|
|
9552
9953
|
[router, routerState.reportId]
|
|
9553
9954
|
);
|
|
9554
|
-
const handleBack =
|
|
9955
|
+
const handleBack = useCallback28(() => {
|
|
9555
9956
|
router.navigate({});
|
|
9556
9957
|
}, [router]);
|
|
9557
|
-
const handleOpenHelp =
|
|
9958
|
+
const handleOpenHelp = useCallback28(() => {
|
|
9558
9959
|
openHelp(defaultTopic);
|
|
9559
9960
|
}, [openHelp, defaultTopic]);
|
|
9560
9961
|
return /* @__PURE__ */ jsx53(Container, { width: 4, children: /* @__PURE__ */ jsxs40(Stack34, { padding: 4, space: 4, children: [
|
|
@@ -9645,7 +10046,7 @@ function ailfTool(options = {}) {
|
|
|
9645
10046
|
// src/actions/RunEvaluationAction.tsx
|
|
9646
10047
|
import { BarChartIcon as BarChartIcon2 } from "@sanity/icons";
|
|
9647
10048
|
import { useToast as useToast10 } from "@sanity/ui";
|
|
9648
|
-
import { useCallback as
|
|
10049
|
+
import { useCallback as useCallback29, useEffect as useEffect11, useRef as useRef7, useState as useState22 } from "react";
|
|
9649
10050
|
import {
|
|
9650
10051
|
getReleaseIdFromReleaseDocumentId as getReleaseIdFromReleaseDocumentId3,
|
|
9651
10052
|
useClient as useClient12,
|
|
@@ -9676,8 +10077,8 @@ function createRunEvaluationAction(options = {}) {
|
|
|
9676
10077
|
const projectId = useProjectId2();
|
|
9677
10078
|
const currentUser = useCurrentUser4();
|
|
9678
10079
|
const toast = useToast10();
|
|
9679
|
-
const [state, setState] =
|
|
9680
|
-
const requestedAtRef =
|
|
10080
|
+
const [state, setState] = useState22({ status: "loading" });
|
|
10081
|
+
const requestedAtRef = useRef7(null);
|
|
9681
10082
|
const perspectiveId = getReleaseIdFromReleaseDocumentId3(release._id);
|
|
9682
10083
|
useEffect11(() => {
|
|
9683
10084
|
let cancelled = false;
|
|
@@ -9772,7 +10173,7 @@ function createRunEvaluationAction(options = {}) {
|
|
|
9772
10173
|
}, 15e3);
|
|
9773
10174
|
return () => clearTimeout(timer);
|
|
9774
10175
|
}, [client, perspectiveId, state]);
|
|
9775
|
-
const handleRequest =
|
|
10176
|
+
const handleRequest = useCallback29(async () => {
|
|
9776
10177
|
const releaseTitle = release.metadata?.title ?? perspectiveId ?? "release";
|
|
9777
10178
|
const tag = `release-${slugify3(releaseTitle)}-${dateStamp3()}`;
|
|
9778
10179
|
const now = Date.now();
|