@absolutejs/absolute 0.19.0-beta.534 → 0.19.0-beta.535

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1195,6 +1195,7 @@ export type RAGEvaluationSuiteRun = {
1195
1195
  elapsedMs: number;
1196
1196
  response: RAGEvaluationResponse;
1197
1197
  traceSummary?: RAGRetrievalTraceComparisonSummary;
1198
+ caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
1198
1199
  metadata?: Record<string, unknown>;
1199
1200
  };
1200
1201
  export type RAGEvaluationHistoryStore = {
@@ -1246,11 +1247,37 @@ export type RAGEvaluationRunDiff = {
1246
1247
  stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1247
1248
  };
1248
1249
  };
1250
+ export type RAGEvaluationCaseTraceSnapshot = {
1251
+ caseId: string;
1252
+ label?: string;
1253
+ query: string;
1254
+ status: RAGEvaluationCaseResult['status'];
1255
+ traceMode?: RAGHybridRetrievalMode;
1256
+ previousTraceMode?: RAGHybridRetrievalMode;
1257
+ transformedQuery?: string;
1258
+ previousTransformedQuery?: string;
1259
+ variantQueries: string[];
1260
+ previousVariantQueries: string[];
1261
+ finalCount: number;
1262
+ previousFinalCount?: number;
1263
+ vectorCount: number;
1264
+ previousVectorCount?: number;
1265
+ lexicalCount: number;
1266
+ previousLexicalCount?: number;
1267
+ candidateTopK: number;
1268
+ previousCandidateTopK?: number;
1269
+ lexicalTopK: number;
1270
+ previousLexicalTopK?: number;
1271
+ stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1272
+ previousStageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1273
+ traceChange: 'new' | 'changed' | 'unchanged';
1274
+ };
1249
1275
  export type RAGEvaluationHistory = {
1250
1276
  suiteId: string;
1251
1277
  suiteLabel?: string;
1252
1278
  runs: RAGEvaluationSuiteRun[];
1253
1279
  leaderboard: RAGEvaluationLeaderboardEntry[];
1280
+ caseTraceSnapshots: RAGEvaluationCaseTraceSnapshot[];
1254
1281
  latestRun?: RAGEvaluationSuiteRun;
1255
1282
  previousRun?: RAGEvaluationSuiteRun;
1256
1283
  diff?: RAGEvaluationRunDiff;
@@ -1301,6 +1328,7 @@ export type RAGRerankerComparisonEntry = {
1301
1328
  providerName?: string;
1302
1329
  response: RAGEvaluationResponse;
1303
1330
  traceSummary?: RAGRetrievalTraceComparisonSummary;
1331
+ caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
1304
1332
  };
1305
1333
  export type RAGRerankerComparisonSummary = {
1306
1334
  bestByPassingRate?: string;
@@ -1320,6 +1348,7 @@ export type RAGRetrievalComparisonEntry = {
1320
1348
  retrievalMode: RAGHybridRetrievalMode;
1321
1349
  response: RAGEvaluationResponse;
1322
1350
  traceSummary?: RAGRetrievalTraceComparisonSummary;
1351
+ caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
1323
1352
  };
1324
1353
  export type RAGRetrievalComparisonSummary = {
1325
1354
  bestByPassingRate?: string;
@@ -2154,6 +2154,98 @@ var buildGroundingCaseSnapshots = ({
2154
2154
  };
2155
2155
  });
2156
2156
  };
2157
+ var areStageCountsEqual = (left, right) => {
2158
+ const keys = new Set([
2159
+ ...Object.keys(left),
2160
+ ...Object.keys(right)
2161
+ ]);
2162
+ for (const key of keys) {
2163
+ if ((left[key] ?? 0) !== (right[key] ?? 0)) {
2164
+ return false;
2165
+ }
2166
+ }
2167
+ return true;
2168
+ };
2169
+ var buildEvaluationCaseTraceSnapshot = ({
2170
+ caseResult,
2171
+ currentTrace,
2172
+ previousTrace
2173
+ }) => {
2174
+ const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
2175
+ const previousStageCounts = previousTrace?.stageCounts ?? {};
2176
+ const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
2177
+ return {
2178
+ candidateTopK: currentTrace?.candidateTopK ?? 0,
2179
+ caseId: caseResult.caseId,
2180
+ finalCount: currentTrace?.resultCounts.final ?? 0,
2181
+ label: caseResult.label,
2182
+ lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
2183
+ lexicalTopK: currentTrace?.lexicalTopK ?? 0,
2184
+ previousCandidateTopK: previousTrace?.candidateTopK,
2185
+ previousFinalCount: previousTrace?.finalCount,
2186
+ previousLexicalCount: previousTrace?.lexicalCount,
2187
+ previousLexicalTopK: previousTrace?.lexicalTopK,
2188
+ previousStageCounts,
2189
+ previousTraceMode: previousTrace?.traceMode,
2190
+ previousTransformedQuery: previousTrace?.transformedQuery,
2191
+ previousVariantQueries: previousTrace?.variantQueries ?? [],
2192
+ previousVectorCount: previousTrace?.vectorCount,
2193
+ query: caseResult.query,
2194
+ stageCounts,
2195
+ status: caseResult.status,
2196
+ traceChange,
2197
+ traceMode: currentTrace?.mode,
2198
+ transformedQuery: currentTrace?.transformedQuery || undefined,
2199
+ variantQueries: currentTrace?.variantQueries ?? [],
2200
+ vectorCount: currentTrace?.resultCounts.vector ?? 0
2201
+ };
2202
+ };
2203
+ var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
2204
+ caseResult,
2205
+ currentTrace: trace
2206
+ }));
2207
+ var buildEvaluationCaseTraceSnapshots = ({
2208
+ current,
2209
+ previous
2210
+ }) => {
2211
+ if (!current) {
2212
+ return [];
2213
+ }
2214
+ const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
2215
+ const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
2216
+ entry.caseId,
2217
+ entry
2218
+ ]));
2219
+ return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
2220
+ caseResult,
2221
+ currentTrace: (() => {
2222
+ const currentSnapshot = currentTraces.get(caseResult.caseId);
2223
+ if (!currentSnapshot) {
2224
+ return;
2225
+ }
2226
+ return {
2227
+ candidateTopK: currentSnapshot.candidateTopK,
2228
+ lexicalTopK: currentSnapshot.lexicalTopK,
2229
+ mode: currentSnapshot.traceMode ?? "vector",
2230
+ query: caseResult.query,
2231
+ resultCounts: {
2232
+ final: currentSnapshot.finalCount,
2233
+ fused: currentSnapshot.finalCount,
2234
+ lexical: currentSnapshot.lexicalCount,
2235
+ reranked: currentSnapshot.finalCount,
2236
+ vector: currentSnapshot.vectorCount
2237
+ },
2238
+ runLexical: currentSnapshot.lexicalCount > 0,
2239
+ runVector: currentSnapshot.vectorCount > 0,
2240
+ steps: [],
2241
+ topK: caseResult.topK,
2242
+ transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
2243
+ variantQueries: currentSnapshot.variantQueries
2244
+ };
2245
+ })(),
2246
+ previousTrace: previousTraces.get(caseResult.caseId)
2247
+ }));
2248
+ };
2157
2249
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2158
2250
  var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2159
2251
  var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
@@ -2313,6 +2405,13 @@ var buildRAGEvaluationHistoryRows = (history) => {
2313
2405
  rows.push({ label: "Trace stage delta", value: stageDelta });
2314
2406
  }
2315
2407
  }
2408
+ if (history.caseTraceSnapshots.length > 0) {
2409
+ const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
2410
+ rows.push({
2411
+ label: "Trace drift cases",
2412
+ value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
2413
+ });
2414
+ }
2316
2415
  return rows;
2317
2416
  };
2318
2417
  var buildRAGEvaluationRunDiff = ({
@@ -2502,6 +2601,10 @@ var loadRAGEvaluationHistory = async ({
2502
2601
  const latestRun = runs[0];
2503
2602
  const previousRun = runs[1];
2504
2603
  return {
2604
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
2605
+ current: latestRun,
2606
+ previous: previousRun
2607
+ }),
2505
2608
  diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
2506
2609
  current: latestRun,
2507
2610
  previous: previousRun
@@ -2713,6 +2816,7 @@ var compareRAGRerankers = async ({
2713
2816
  });
2714
2817
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2715
2818
  return {
2819
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2716
2820
  label: candidate.label ?? candidate.id,
2717
2821
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2718
2822
  response,
@@ -2799,6 +2903,7 @@ var compareRAGRetrievalStrategies = async ({
2799
2903
  });
2800
2904
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2801
2905
  return {
2906
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2802
2907
  label: candidate.label ?? candidate.id,
2803
2908
  response,
2804
2909
  retrievalId: candidate.id,
@@ -2867,7 +2972,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.c
2867
2972
  var runRAGEvaluationSuite = async ({
2868
2973
  suite,
2869
2974
  evaluate,
2870
- overrides
2975
+ overrides,
2976
+ artifacts
2871
2977
  }) => {
2872
2978
  const startedAt = Date.now();
2873
2979
  const response = await evaluate({
@@ -2877,6 +2983,7 @@ var runRAGEvaluationSuite = async ({
2877
2983
  });
2878
2984
  const finishedAt = Date.now();
2879
2985
  return {
2986
+ caseTraceSnapshots: artifacts?.caseTraceSnapshots,
2880
2987
  elapsedMs: finishedAt - startedAt,
2881
2988
  finishedAt,
2882
2989
  id: generateId(),
@@ -2884,7 +2991,8 @@ var runRAGEvaluationSuite = async ({
2884
2991
  metadata: suite.metadata,
2885
2992
  response,
2886
2993
  startedAt,
2887
- suiteId: suite.id
2994
+ suiteId: suite.id,
2995
+ traceSummary: artifacts?.traceSummary
2888
2996
  };
2889
2997
  };
2890
2998
  var summarizeRAGEvaluationCase = ({
@@ -3597,5 +3705,5 @@ export {
3597
3705
  AIStreamKey
3598
3706
  };
3599
3707
 
3600
- //# debugId=49142334D6D3EC2564756E2164756E21
3708
+ //# debugId=E235F4588786F00E64756E2164756E21
3601
3709
  //# sourceMappingURL=index.js.map