@sanity/ailf-studio 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +47 -1
- package/dist/index.js +838 -598
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -434,6 +434,14 @@ interface ScoreItem {
|
|
|
434
434
|
/** Ceiling score — gold-standard docs injected */
|
|
435
435
|
ceilingScore?: number;
|
|
436
436
|
}
|
|
437
|
+
/** Reference to an artifact stored in an external object store (GCS). */
|
|
438
|
+
interface ArtifactRef {
|
|
439
|
+
store: "gcs";
|
|
440
|
+
bucket: string;
|
|
441
|
+
path: string;
|
|
442
|
+
bytes?: number;
|
|
443
|
+
entryCount?: number;
|
|
444
|
+
}
|
|
437
445
|
/** A single gap/recommendation from gap analysis */
|
|
438
446
|
interface RecommendationGap {
|
|
439
447
|
affectedTaskIds: string[];
|
|
@@ -451,12 +459,41 @@ interface RecommendationsData {
|
|
|
451
459
|
generatedAt: string;
|
|
452
460
|
totalPotentialLift: number;
|
|
453
461
|
}
|
|
462
|
+
/**
|
|
463
|
+
* Per-test result stored in reports for drill-down and audit.
|
|
464
|
+
* Mirrors StoredTestResult from @sanity/ailf-core. See D0029.
|
|
465
|
+
*/
|
|
466
|
+
interface StoredTestResultData {
|
|
467
|
+
area: string;
|
|
468
|
+
canonicalDocs?: DocumentRef[];
|
|
469
|
+
compositeScore?: number;
|
|
470
|
+
cost?: number;
|
|
471
|
+
dimensions: {
|
|
472
|
+
dimension: string;
|
|
473
|
+
reason: string;
|
|
474
|
+
score: number;
|
|
475
|
+
}[];
|
|
476
|
+
latencyMs?: number;
|
|
477
|
+
modelId: string;
|
|
478
|
+
outputFailure?: boolean;
|
|
479
|
+
responseOutput: string;
|
|
480
|
+
responseOutputTruncated?: boolean;
|
|
481
|
+
taskId: string;
|
|
482
|
+
tokenUsage?: {
|
|
483
|
+
completion: number;
|
|
484
|
+
prompt: number;
|
|
485
|
+
total: number;
|
|
486
|
+
};
|
|
487
|
+
variant: "baseline" | "gold";
|
|
488
|
+
}
|
|
454
489
|
/** A single low-scoring grader judgment stored in reports */
|
|
455
490
|
interface JudgmentData {
|
|
456
491
|
/** Docs the task expected the model to use */
|
|
457
492
|
canonicalDocs?: DocumentRef[];
|
|
458
493
|
dimension: string;
|
|
459
494
|
modelId: string;
|
|
495
|
+
/** True when the model failed to produce output (empty response, API error, refusal) */
|
|
496
|
+
outputFailure?: boolean;
|
|
460
497
|
reason: string;
|
|
461
498
|
score: number;
|
|
462
499
|
taskId: string;
|
|
@@ -497,6 +534,13 @@ interface PerModelData {
|
|
|
497
534
|
interface SummaryData {
|
|
498
535
|
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
499
536
|
agentBehavior?: FeatureAgentBehaviorData[] | null;
|
|
537
|
+
/** External artifact references — present when pipeline uploads to GCS (D0030) */
|
|
538
|
+
artifacts?: {
|
|
539
|
+
testOutputs?: ArtifactRef;
|
|
540
|
+
renderedPrompts?: ArtifactRef;
|
|
541
|
+
rawResults?: ArtifactRef;
|
|
542
|
+
traces?: ArtifactRef;
|
|
543
|
+
};
|
|
500
544
|
belowCritical: string[];
|
|
501
545
|
/** All Sanity documents used across the entire evaluation */
|
|
502
546
|
documentManifest?: DocumentRef[];
|
|
@@ -521,6 +565,8 @@ interface SummaryData {
|
|
|
521
565
|
/** Gap analysis recommendations (when gap analysis was run) */
|
|
522
566
|
recommendations: null | RecommendationsData;
|
|
523
567
|
scores: ScoreItem[];
|
|
568
|
+
/** Per-test results with model output and metadata (D0029) */
|
|
569
|
+
testResults?: StoredTestResultData[] | null;
|
|
524
570
|
timestamp: string;
|
|
525
571
|
}
|
|
526
572
|
/** Shape returned by scoreTimelineQuery */
|
|
@@ -732,7 +778,7 @@ declare const scoreTimelineQuery: string;
|
|
|
732
778
|
*
|
|
733
779
|
* Used by: ReportDetail view
|
|
734
780
|
*/
|
|
735
|
-
declare const reportDetailQuery = "\n *[_type == \"ailf.report\" && reportId == $reportId][0] {\n _id,\n reportId,\n completedAt,\n durationMs,\n tag,\n title,\n provenance,\n summary,\n comparison\n }\n";
|
|
781
|
+
declare const reportDetailQuery = "\n *[_type == \"ailf.report\" && reportId == $reportId][0] {\n _id,\n reportId,\n completedAt,\n durationMs,\n tag,\n title,\n provenance,\n summary,\n \"comparison\": comparison {\n areas,\n deltas,\n generatedAt,\n improved,\n mismatched,\n noiseThreshold,\n noiseThresholdEmpirical,\n notEvaluated,\n regressed,\n unchanged\n }\n }\n";
|
|
736
782
|
/**
|
|
737
783
|
* Find all reports that evaluated a specific Sanity document or perspective.
|
|
738
784
|
*
|