@absolutejs/absolute 0.19.0-beta.534 → 0.19.0-beta.535

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import type { RAGAnswerGroundingCaseDifficultyHistory, RAGAnswerGroundingCaseDifficultyHistoryStore, RAGAnswerGroundingCaseDifficultyRun, RAGAnswerGroundingCaseDifficultyRunDiff, RAGAnswerGroundingEvaluationCase, RAGAnswerGroundingEvaluationCaseDifficultyEntry, RAGAnswerGroundingEvaluationCaseResult, RAGAnswerGroundingEvaluationHistory, RAGAnswerGroundingEvaluationLeaderboardEntry, RAGAnswerGroundingEvaluationHistoryStore, RAGAnswerGroundingEvaluationInput, RAGAnswerGroundingEvaluationResponse, RAGAnswerGroundingEvaluationRun, RAGAnswerGroundingEvaluationRunDiff, RAGCollection, RAGEvaluationCase, RAGEvaluationCaseResult, RAGEvaluationHistory, RAGEvaluationHistoryStore, RAGEvaluationInput, RAGEvaluationLeaderboardEntry, RAGEvaluationResponse, RAGEvaluationRunDiff, RAGLabelValueRow, RAGEvaluationSuite, RAGEvaluationSuiteRun, RAGRetrievalCandidate, RAGRetrievalComparison, RAGRetrievalComparisonEntry, RAGRetrievalComparisonSummary, RAGRerankerCandidate, RAGRerankerComparison, RAGRerankerComparisonEntry, RAGRerankerComparisonSummary, RAGRerankerProviderLike } from '../../../types/ai';
1
+ import type { RAGAnswerGroundingCaseDifficultyHistory, RAGAnswerGroundingCaseDifficultyHistoryStore, RAGAnswerGroundingCaseDifficultyRun, RAGAnswerGroundingCaseDifficultyRunDiff, RAGAnswerGroundingEvaluationCase, RAGAnswerGroundingEvaluationCaseDifficultyEntry, RAGAnswerGroundingEvaluationCaseResult, RAGAnswerGroundingEvaluationHistory, RAGAnswerGroundingEvaluationLeaderboardEntry, RAGAnswerGroundingEvaluationHistoryStore, RAGAnswerGroundingEvaluationInput, RAGAnswerGroundingEvaluationResponse, RAGAnswerGroundingEvaluationRun, RAGAnswerGroundingEvaluationRunDiff, RAGCollection, RAGEvaluationCase, RAGEvaluationCaseTraceSnapshot, RAGEvaluationCaseResult, RAGEvaluationHistory, RAGEvaluationHistoryStore, RAGEvaluationInput, RAGEvaluationLeaderboardEntry, RAGEvaluationResponse, RAGEvaluationRunDiff, RAGLabelValueRow, RAGEvaluationSuite, RAGEvaluationSuiteRun, RAGRetrievalCandidate, RAGRetrievalComparison, RAGRetrievalComparisonEntry, RAGRetrievalTraceComparisonSummary, RAGRetrievalComparisonSummary, RAGRerankerCandidate, RAGRerankerComparison, RAGRerankerComparisonEntry, RAGRerankerComparisonSummary, RAGRerankerProviderLike } from '../../../types/ai';
2
2
  export declare const buildRAGEvaluationLeaderboard: (runs: RAGEvaluationSuiteRun[]) => RAGEvaluationLeaderboardEntry[];
3
3
  export declare const buildRAGAnswerGroundingEvaluationLeaderboard: (runs: RAGAnswerGroundingEvaluationRun[]) => RAGAnswerGroundingEvaluationLeaderboardEntry[];
4
4
  export declare const buildRAGAnswerGroundingCaseDifficultyLeaderboard: (entries: Array<{
@@ -77,11 +77,13 @@ export declare const evaluateRAGCollection: ({ collection, input, defaultTopK, r
77
77
  rerank?: RAGRerankerProviderLike;
78
78
  }) => Promise<RAGEvaluationResponse>;
79
79
  export declare const executeDryRunRAGEvaluation: (input: RAGEvaluationInput, defaultTopK?: number) => RAGEvaluationCaseResult[];
80
- export declare const runRAGEvaluationSuite: ({ suite, evaluate, overrides }: {
80
+ export declare const runRAGEvaluationSuite: ({ suite, evaluate, overrides, artifacts }: {
81
81
  suite: RAGEvaluationSuite;
82
82
  evaluate: (input: RAGEvaluationInput) => Promise<RAGEvaluationResponse>;
83
83
  overrides?: Partial<RAGEvaluationInput>;
84
+ artifacts?: Pick<RAGEvaluationSuiteRun, "traceSummary" | "caseTraceSnapshots">;
84
85
  }) => Promise<{
86
+ caseTraceSnapshots: RAGEvaluationCaseTraceSnapshot[] | undefined;
85
87
  elapsedMs: number;
86
88
  finishedAt: number;
87
89
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -90,6 +92,7 @@ export declare const runRAGEvaluationSuite: ({ suite, evaluate, overrides }: {
90
92
  response: RAGEvaluationResponse;
91
93
  startedAt: number;
92
94
  suiteId: string;
95
+ traceSummary: RAGRetrievalTraceComparisonSummary | undefined;
93
96
  }>;
94
97
  export declare const summarizeRAGEvaluationCase: ({ caseIndex, caseInput, query, mode, retrievedIds, expectedIds, elapsedMs }: {
95
98
  caseIndex: number;
@@ -40,6 +40,7 @@ export declare const useRAG: (path: string, options?: UseRAGOptions) => {
40
40
  removeSuite: (id: string) => void;
41
41
  reset: () => void;
42
42
  runSuite: (suite: import("../..").RAGEvaluationSuite, overrides?: Partial<import("../..").RAGEvaluationInput>) => Promise<{
43
+ caseTraceSnapshots: import("../..").RAGEvaluationCaseTraceSnapshot[] | undefined;
43
44
  elapsedMs: number;
44
45
  finishedAt: number;
45
46
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -48,6 +49,7 @@ export declare const useRAG: (path: string, options?: UseRAGOptions) => {
48
49
  response: import("../..").RAGEvaluationResponse;
49
50
  startedAt: number;
50
51
  suiteId: string;
52
+ traceSummary: import("../..").RAGRetrievalTraceComparisonSummary | undefined;
51
53
  }>;
52
54
  saveSuite: (suite: import("../..").RAGEvaluationSuite) => import("../..").RAGEvaluationSuite;
53
55
  suiteRuns: import("../..").RAGEvaluationSuiteRun[];
@@ -10,6 +10,7 @@ export declare const useRAGEvaluate: (path: string) => {
10
10
  removeSuite: (id: string) => void;
11
11
  reset: () => void;
12
12
  runSuite: (suite: RAGEvaluationSuite, overrides?: Partial<RAGEvaluationInput>) => Promise<{
13
+ caseTraceSnapshots: import("../..").RAGEvaluationCaseTraceSnapshot[] | undefined;
13
14
  elapsedMs: number;
14
15
  finishedAt: number;
15
16
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -18,6 +19,7 @@ export declare const useRAGEvaluate: (path: string) => {
18
19
  response: RAGEvaluationResponse;
19
20
  startedAt: number;
20
21
  suiteId: string;
22
+ traceSummary: import("../..").RAGRetrievalTraceComparisonSummary | undefined;
21
23
  }>;
22
24
  saveSuite: (suite: RAGEvaluationSuite) => RAGEvaluationSuite;
23
25
  suiteRuns: RAGEvaluationSuiteRun[];
@@ -40,6 +40,7 @@ export declare const createRAG: (path: string, options?: CreateRAGOptions) => {
40
40
  removeSuite: (id: string) => void;
41
41
  reset: () => void;
42
42
  runSuite: (suite: import("../..").RAGEvaluationSuite, overrides?: Partial<import("../..").RAGEvaluationInput>) => Promise<{
43
+ caseTraceSnapshots: import("../..").RAGEvaluationCaseTraceSnapshot[] | undefined;
43
44
  elapsedMs: number;
44
45
  finishedAt: number;
45
46
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -48,6 +49,7 @@ export declare const createRAG: (path: string, options?: CreateRAGOptions) => {
48
49
  response: import("../..").RAGEvaluationResponse;
49
50
  startedAt: number;
50
51
  suiteId: string;
52
+ traceSummary: import("../..").RAGRetrievalTraceComparisonSummary | undefined;
51
53
  }>;
52
54
  saveSuite: (suite: import("../..").RAGEvaluationSuite) => import("../..").RAGEvaluationSuite;
53
55
  suiteRuns: import("svelte/store").Writable<import("../..").RAGEvaluationSuiteRun[]>;
@@ -10,6 +10,7 @@ export declare const createRAGEvaluate: (path: string) => {
10
10
  removeSuite: (id: string) => void;
11
11
  reset: () => void;
12
12
  runSuite: (suite: RAGEvaluationSuite, overrides?: Partial<RAGEvaluationInput>) => Promise<{
13
+ caseTraceSnapshots: import("../..").RAGEvaluationCaseTraceSnapshot[] | undefined;
13
14
  elapsedMs: number;
14
15
  finishedAt: number;
15
16
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -18,6 +19,7 @@ export declare const createRAGEvaluate: (path: string) => {
18
19
  response: RAGEvaluationResponse;
19
20
  startedAt: number;
20
21
  suiteId: string;
22
+ traceSummary: import("../..").RAGRetrievalTraceComparisonSummary | undefined;
21
23
  }>;
22
24
  saveSuite: (suite: RAGEvaluationSuite) => RAGEvaluationSuite;
23
25
  suiteRuns: import("svelte/store").Writable<RAGEvaluationSuiteRun[]>;
@@ -254,6 +254,7 @@ export declare const useRAG: (path: string, options?: UseRAGOptions) => {
254
254
  removeSuite: (id: string) => void;
255
255
  reset: () => void;
256
256
  runSuite: (suite: import("../..").RAGEvaluationSuite, overrides?: Partial<import("../..").RAGEvaluationInput>) => Promise<{
257
+ caseTraceSnapshots: import("../..").RAGEvaluationCaseTraceSnapshot[] | undefined;
257
258
  elapsedMs: number;
258
259
  finishedAt: number;
259
260
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -262,6 +263,7 @@ export declare const useRAG: (path: string, options?: UseRAGOptions) => {
262
263
  response: import("../..").RAGEvaluationResponse;
263
264
  startedAt: number;
264
265
  suiteId: string;
266
+ traceSummary: import("../..").RAGRetrievalTraceComparisonSummary | undefined;
265
267
  }>;
266
268
  saveSuite: (suite: import("../..").RAGEvaluationSuite) => import("../..").RAGEvaluationSuite;
267
269
  suiteRuns: import("vue").Ref<{
@@ -331,6 +333,51 @@ export declare const useRAG: (path: string, options?: UseRAGOptions) => {
331
333
  finalize?: number | undefined;
332
334
  };
333
335
  } | undefined;
336
+ caseTraceSnapshots?: {
337
+ caseId: string;
338
+ label?: string | undefined;
339
+ query: string;
340
+ status: import("../..").RAGEvaluationCaseResult["status"];
341
+ traceMode?: import("../..").RAGHybridRetrievalMode | undefined;
342
+ previousTraceMode?: import("../..").RAGHybridRetrievalMode | undefined;
343
+ transformedQuery?: string | undefined;
344
+ previousTransformedQuery?: string | undefined;
345
+ variantQueries: string[];
346
+ previousVariantQueries: string[];
347
+ finalCount: number;
348
+ previousFinalCount?: number | undefined;
349
+ vectorCount: number;
350
+ previousVectorCount?: number | undefined;
351
+ lexicalCount: number;
352
+ previousLexicalCount?: number | undefined;
353
+ candidateTopK: number;
354
+ previousCandidateTopK?: number | undefined;
355
+ lexicalTopK: number;
356
+ previousLexicalTopK?: number | undefined;
357
+ stageCounts: {
358
+ input?: number | undefined;
359
+ embed?: number | undefined;
360
+ query_transform?: number | undefined;
361
+ vector_search?: number | undefined;
362
+ lexical_search?: number | undefined;
363
+ fusion?: number | undefined;
364
+ rerank?: number | undefined;
365
+ score_filter?: number | undefined;
366
+ finalize?: number | undefined;
367
+ };
368
+ previousStageCounts: {
369
+ input?: number | undefined;
370
+ embed?: number | undefined;
371
+ query_transform?: number | undefined;
372
+ vector_search?: number | undefined;
373
+ lexical_search?: number | undefined;
374
+ fusion?: number | undefined;
375
+ rerank?: number | undefined;
376
+ score_filter?: number | undefined;
377
+ finalize?: number | undefined;
378
+ };
379
+ traceChange: "new" | "changed" | "unchanged";
380
+ }[] | undefined;
334
381
  metadata?: Record<string, unknown> | undefined;
335
382
  }[], import("../..").RAGEvaluationSuiteRun[] | {
336
383
  id: string;
@@ -399,6 +446,51 @@ export declare const useRAG: (path: string, options?: UseRAGOptions) => {
399
446
  finalize?: number | undefined;
400
447
  };
401
448
  } | undefined;
449
+ caseTraceSnapshots?: {
450
+ caseId: string;
451
+ label?: string | undefined;
452
+ query: string;
453
+ status: import("../..").RAGEvaluationCaseResult["status"];
454
+ traceMode?: import("../..").RAGHybridRetrievalMode | undefined;
455
+ previousTraceMode?: import("../..").RAGHybridRetrievalMode | undefined;
456
+ transformedQuery?: string | undefined;
457
+ previousTransformedQuery?: string | undefined;
458
+ variantQueries: string[];
459
+ previousVariantQueries: string[];
460
+ finalCount: number;
461
+ previousFinalCount?: number | undefined;
462
+ vectorCount: number;
463
+ previousVectorCount?: number | undefined;
464
+ lexicalCount: number;
465
+ previousLexicalCount?: number | undefined;
466
+ candidateTopK: number;
467
+ previousCandidateTopK?: number | undefined;
468
+ lexicalTopK: number;
469
+ previousLexicalTopK?: number | undefined;
470
+ stageCounts: {
471
+ input?: number | undefined;
472
+ embed?: number | undefined;
473
+ query_transform?: number | undefined;
474
+ vector_search?: number | undefined;
475
+ lexical_search?: number | undefined;
476
+ fusion?: number | undefined;
477
+ rerank?: number | undefined;
478
+ score_filter?: number | undefined;
479
+ finalize?: number | undefined;
480
+ };
481
+ previousStageCounts: {
482
+ input?: number | undefined;
483
+ embed?: number | undefined;
484
+ query_transform?: number | undefined;
485
+ vector_search?: number | undefined;
486
+ lexical_search?: number | undefined;
487
+ fusion?: number | undefined;
488
+ rerank?: number | undefined;
489
+ score_filter?: number | undefined;
490
+ finalize?: number | undefined;
491
+ };
492
+ traceChange: "new" | "changed" | "unchanged";
493
+ }[] | undefined;
402
494
  metadata?: Record<string, unknown> | undefined;
403
495
  }[]>;
404
496
  suites: import("vue").Ref<{
@@ -118,6 +118,7 @@ export declare const useRAGEvaluate: (path: string) => {
118
118
  removeSuite: (id: string) => void;
119
119
  reset: () => void;
120
120
  runSuite: (suite: RAGEvaluationSuite, overrides?: Partial<RAGEvaluationInput>) => Promise<{
121
+ caseTraceSnapshots: import("../..").RAGEvaluationCaseTraceSnapshot[] | undefined;
121
122
  elapsedMs: number;
122
123
  finishedAt: number;
123
124
  id: `${string}-${string}-${string}-${string}-${string}`;
@@ -126,6 +127,7 @@ export declare const useRAGEvaluate: (path: string) => {
126
127
  response: RAGEvaluationResponse;
127
128
  startedAt: number;
128
129
  suiteId: string;
130
+ traceSummary: import("../..").RAGRetrievalTraceComparisonSummary | undefined;
129
131
  }>;
130
132
  saveSuite: (suite: RAGEvaluationSuite) => RAGEvaluationSuite;
131
133
  suiteRuns: import("vue").Ref<{
@@ -195,6 +197,51 @@ export declare const useRAGEvaluate: (path: string) => {
195
197
  finalize?: number | undefined;
196
198
  };
197
199
  } | undefined;
200
+ caseTraceSnapshots?: {
201
+ caseId: string;
202
+ label?: string | undefined;
203
+ query: string;
204
+ status: import("../..").RAGEvaluationCaseResult["status"];
205
+ traceMode?: import("../..").RAGHybridRetrievalMode | undefined;
206
+ previousTraceMode?: import("../..").RAGHybridRetrievalMode | undefined;
207
+ transformedQuery?: string | undefined;
208
+ previousTransformedQuery?: string | undefined;
209
+ variantQueries: string[];
210
+ previousVariantQueries: string[];
211
+ finalCount: number;
212
+ previousFinalCount?: number | undefined;
213
+ vectorCount: number;
214
+ previousVectorCount?: number | undefined;
215
+ lexicalCount: number;
216
+ previousLexicalCount?: number | undefined;
217
+ candidateTopK: number;
218
+ previousCandidateTopK?: number | undefined;
219
+ lexicalTopK: number;
220
+ previousLexicalTopK?: number | undefined;
221
+ stageCounts: {
222
+ input?: number | undefined;
223
+ embed?: number | undefined;
224
+ query_transform?: number | undefined;
225
+ vector_search?: number | undefined;
226
+ lexical_search?: number | undefined;
227
+ fusion?: number | undefined;
228
+ rerank?: number | undefined;
229
+ score_filter?: number | undefined;
230
+ finalize?: number | undefined;
231
+ };
232
+ previousStageCounts: {
233
+ input?: number | undefined;
234
+ embed?: number | undefined;
235
+ query_transform?: number | undefined;
236
+ vector_search?: number | undefined;
237
+ lexical_search?: number | undefined;
238
+ fusion?: number | undefined;
239
+ rerank?: number | undefined;
240
+ score_filter?: number | undefined;
241
+ finalize?: number | undefined;
242
+ };
243
+ traceChange: "new" | "changed" | "unchanged";
244
+ }[] | undefined;
198
245
  metadata?: Record<string, unknown> | undefined;
199
246
  }[], RAGEvaluationSuiteRun[] | {
200
247
  id: string;
@@ -263,6 +310,51 @@ export declare const useRAGEvaluate: (path: string) => {
263
310
  finalize?: number | undefined;
264
311
  };
265
312
  } | undefined;
313
+ caseTraceSnapshots?: {
314
+ caseId: string;
315
+ label?: string | undefined;
316
+ query: string;
317
+ status: import("../..").RAGEvaluationCaseResult["status"];
318
+ traceMode?: import("../..").RAGHybridRetrievalMode | undefined;
319
+ previousTraceMode?: import("../..").RAGHybridRetrievalMode | undefined;
320
+ transformedQuery?: string | undefined;
321
+ previousTransformedQuery?: string | undefined;
322
+ variantQueries: string[];
323
+ previousVariantQueries: string[];
324
+ finalCount: number;
325
+ previousFinalCount?: number | undefined;
326
+ vectorCount: number;
327
+ previousVectorCount?: number | undefined;
328
+ lexicalCount: number;
329
+ previousLexicalCount?: number | undefined;
330
+ candidateTopK: number;
331
+ previousCandidateTopK?: number | undefined;
332
+ lexicalTopK: number;
333
+ previousLexicalTopK?: number | undefined;
334
+ stageCounts: {
335
+ input?: number | undefined;
336
+ embed?: number | undefined;
337
+ query_transform?: number | undefined;
338
+ vector_search?: number | undefined;
339
+ lexical_search?: number | undefined;
340
+ fusion?: number | undefined;
341
+ rerank?: number | undefined;
342
+ score_filter?: number | undefined;
343
+ finalize?: number | undefined;
344
+ };
345
+ previousStageCounts: {
346
+ input?: number | undefined;
347
+ embed?: number | undefined;
348
+ query_transform?: number | undefined;
349
+ vector_search?: number | undefined;
350
+ lexical_search?: number | undefined;
351
+ fusion?: number | undefined;
352
+ rerank?: number | undefined;
353
+ score_filter?: number | undefined;
354
+ finalize?: number | undefined;
355
+ };
356
+ traceChange: "new" | "changed" | "unchanged";
357
+ }[] | undefined;
266
358
  metadata?: Record<string, unknown> | undefined;
267
359
  }[]>;
268
360
  suites: import("vue").Ref<{
@@ -2157,6 +2157,98 @@ var buildGroundingCaseSnapshots = ({
2157
2157
  };
2158
2158
  });
2159
2159
  };
2160
+ var areStageCountsEqual = (left, right) => {
2161
+ const keys = new Set([
2162
+ ...Object.keys(left),
2163
+ ...Object.keys(right)
2164
+ ]);
2165
+ for (const key of keys) {
2166
+ if ((left[key] ?? 0) !== (right[key] ?? 0)) {
2167
+ return false;
2168
+ }
2169
+ }
2170
+ return true;
2171
+ };
2172
+ var buildEvaluationCaseTraceSnapshot = ({
2173
+ caseResult,
2174
+ currentTrace,
2175
+ previousTrace
2176
+ }) => {
2177
+ const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
2178
+ const previousStageCounts = previousTrace?.stageCounts ?? {};
2179
+ const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
2180
+ return {
2181
+ candidateTopK: currentTrace?.candidateTopK ?? 0,
2182
+ caseId: caseResult.caseId,
2183
+ finalCount: currentTrace?.resultCounts.final ?? 0,
2184
+ label: caseResult.label,
2185
+ lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
2186
+ lexicalTopK: currentTrace?.lexicalTopK ?? 0,
2187
+ previousCandidateTopK: previousTrace?.candidateTopK,
2188
+ previousFinalCount: previousTrace?.finalCount,
2189
+ previousLexicalCount: previousTrace?.lexicalCount,
2190
+ previousLexicalTopK: previousTrace?.lexicalTopK,
2191
+ previousStageCounts,
2192
+ previousTraceMode: previousTrace?.traceMode,
2193
+ previousTransformedQuery: previousTrace?.transformedQuery,
2194
+ previousVariantQueries: previousTrace?.variantQueries ?? [],
2195
+ previousVectorCount: previousTrace?.vectorCount,
2196
+ query: caseResult.query,
2197
+ stageCounts,
2198
+ status: caseResult.status,
2199
+ traceChange,
2200
+ traceMode: currentTrace?.mode,
2201
+ transformedQuery: currentTrace?.transformedQuery || undefined,
2202
+ variantQueries: currentTrace?.variantQueries ?? [],
2203
+ vectorCount: currentTrace?.resultCounts.vector ?? 0
2204
+ };
2205
+ };
2206
+ var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
2207
+ caseResult,
2208
+ currentTrace: trace
2209
+ }));
2210
+ var buildEvaluationCaseTraceSnapshots = ({
2211
+ current,
2212
+ previous
2213
+ }) => {
2214
+ if (!current) {
2215
+ return [];
2216
+ }
2217
+ const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
2218
+ const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
2219
+ entry.caseId,
2220
+ entry
2221
+ ]));
2222
+ return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
2223
+ caseResult,
2224
+ currentTrace: (() => {
2225
+ const currentSnapshot = currentTraces.get(caseResult.caseId);
2226
+ if (!currentSnapshot) {
2227
+ return;
2228
+ }
2229
+ return {
2230
+ candidateTopK: currentSnapshot.candidateTopK,
2231
+ lexicalTopK: currentSnapshot.lexicalTopK,
2232
+ mode: currentSnapshot.traceMode ?? "vector",
2233
+ query: caseResult.query,
2234
+ resultCounts: {
2235
+ final: currentSnapshot.finalCount,
2236
+ fused: currentSnapshot.finalCount,
2237
+ lexical: currentSnapshot.lexicalCount,
2238
+ reranked: currentSnapshot.finalCount,
2239
+ vector: currentSnapshot.vectorCount
2240
+ },
2241
+ runLexical: currentSnapshot.lexicalCount > 0,
2242
+ runVector: currentSnapshot.vectorCount > 0,
2243
+ steps: [],
2244
+ topK: caseResult.topK,
2245
+ transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
2246
+ variantQueries: currentSnapshot.variantQueries
2247
+ };
2248
+ })(),
2249
+ previousTrace: previousTraces.get(caseResult.caseId)
2250
+ }));
2251
+ };
2160
2252
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2161
2253
  var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2162
2254
  var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
@@ -2316,6 +2408,13 @@ var buildRAGEvaluationHistoryRows = (history) => {
2316
2408
  rows.push({ label: "Trace stage delta", value: stageDelta });
2317
2409
  }
2318
2410
  }
2411
+ if (history.caseTraceSnapshots.length > 0) {
2412
+ const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
2413
+ rows.push({
2414
+ label: "Trace drift cases",
2415
+ value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
2416
+ });
2417
+ }
2319
2418
  return rows;
2320
2419
  };
2321
2420
  var buildRAGEvaluationRunDiff = ({
@@ -2505,6 +2604,10 @@ var loadRAGEvaluationHistory = async ({
2505
2604
  const latestRun = runs[0];
2506
2605
  const previousRun = runs[1];
2507
2606
  return {
2607
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
2608
+ current: latestRun,
2609
+ previous: previousRun
2610
+ }),
2508
2611
  diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
2509
2612
  current: latestRun,
2510
2613
  previous: previousRun
@@ -2716,6 +2819,7 @@ var compareRAGRerankers = async ({
2716
2819
  });
2717
2820
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2718
2821
  return {
2822
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2719
2823
  label: candidate.label ?? candidate.id,
2720
2824
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2721
2825
  response,
@@ -2802,6 +2906,7 @@ var compareRAGRetrievalStrategies = async ({
2802
2906
  });
2803
2907
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2804
2908
  return {
2909
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2805
2910
  label: candidate.label ?? candidate.id,
2806
2911
  response,
2807
2912
  retrievalId: candidate.id,
@@ -2870,7 +2975,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.c
2870
2975
  var runRAGEvaluationSuite = async ({
2871
2976
  suite,
2872
2977
  evaluate,
2873
- overrides
2978
+ overrides,
2979
+ artifacts
2874
2980
  }) => {
2875
2981
  const startedAt = Date.now();
2876
2982
  const response = await evaluate({
@@ -2880,6 +2986,7 @@ var runRAGEvaluationSuite = async ({
2880
2986
  });
2881
2987
  const finishedAt = Date.now();
2882
2988
  return {
2989
+ caseTraceSnapshots: artifacts?.caseTraceSnapshots,
2883
2990
  elapsedMs: finishedAt - startedAt,
2884
2991
  finishedAt,
2885
2992
  id: generateId(),
@@ -2887,7 +2994,8 @@ var runRAGEvaluationSuite = async ({
2887
2994
  metadata: suite.metadata,
2888
2995
  response,
2889
2996
  startedAt,
2890
- suiteId: suite.id
2997
+ suiteId: suite.id,
2998
+ traceSummary: artifacts?.traceSummary
2891
2999
  };
2892
3000
  };
2893
3001
  var summarizeRAGEvaluationCase = ({
@@ -3614,5 +3722,5 @@ export {
3614
3722
  createAIStream
3615
3723
  };
3616
3724
 
3617
- //# debugId=E92A24FDA13560A364756E2164756E21
3725
+ //# debugId=609C67383E3D7B6464756E2164756E21
3618
3726
  //# sourceMappingURL=index.js.map