@absolutejs/absolute 0.19.0-beta.533 → 0.19.0-beta.535

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1195,6 +1195,7 @@ export type RAGEvaluationSuiteRun = {
1195
1195
  elapsedMs: number;
1196
1196
  response: RAGEvaluationResponse;
1197
1197
  traceSummary?: RAGRetrievalTraceComparisonSummary;
1198
+ caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
1198
1199
  metadata?: Record<string, unknown>;
1199
1200
  };
1200
1201
  export type RAGEvaluationHistoryStore = {
@@ -1246,15 +1247,45 @@ export type RAGEvaluationRunDiff = {
1246
1247
  stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1247
1248
  };
1248
1249
  };
1250
+ export type RAGEvaluationCaseTraceSnapshot = {
1251
+ caseId: string;
1252
+ label?: string;
1253
+ query: string;
1254
+ status: RAGEvaluationCaseResult['status'];
1255
+ traceMode?: RAGHybridRetrievalMode;
1256
+ previousTraceMode?: RAGHybridRetrievalMode;
1257
+ transformedQuery?: string;
1258
+ previousTransformedQuery?: string;
1259
+ variantQueries: string[];
1260
+ previousVariantQueries: string[];
1261
+ finalCount: number;
1262
+ previousFinalCount?: number;
1263
+ vectorCount: number;
1264
+ previousVectorCount?: number;
1265
+ lexicalCount: number;
1266
+ previousLexicalCount?: number;
1267
+ candidateTopK: number;
1268
+ previousCandidateTopK?: number;
1269
+ lexicalTopK: number;
1270
+ previousLexicalTopK?: number;
1271
+ stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1272
+ previousStageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1273
+ traceChange: 'new' | 'changed' | 'unchanged';
1274
+ };
1249
1275
  export type RAGEvaluationHistory = {
1250
1276
  suiteId: string;
1251
1277
  suiteLabel?: string;
1252
1278
  runs: RAGEvaluationSuiteRun[];
1253
1279
  leaderboard: RAGEvaluationLeaderboardEntry[];
1280
+ caseTraceSnapshots: RAGEvaluationCaseTraceSnapshot[];
1254
1281
  latestRun?: RAGEvaluationSuiteRun;
1255
1282
  previousRun?: RAGEvaluationSuiteRun;
1256
1283
  diff?: RAGEvaluationRunDiff;
1257
1284
  };
1285
+ export type RAGLabelValueRow = {
1286
+ label: string;
1287
+ value: string;
1288
+ };
1258
1289
  export type RAGEvaluationLeaderboardEntry = {
1259
1290
  runId: string;
1260
1291
  suiteId: string;
@@ -1297,6 +1328,7 @@ export type RAGRerankerComparisonEntry = {
1297
1328
  providerName?: string;
1298
1329
  response: RAGEvaluationResponse;
1299
1330
  traceSummary?: RAGRetrievalTraceComparisonSummary;
1331
+ caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
1300
1332
  };
1301
1333
  export type RAGRerankerComparisonSummary = {
1302
1334
  bestByPassingRate?: string;
@@ -1316,6 +1348,7 @@ export type RAGRetrievalComparisonEntry = {
1316
1348
  retrievalMode: RAGHybridRetrievalMode;
1317
1349
  response: RAGEvaluationResponse;
1318
1350
  traceSummary?: RAGRetrievalTraceComparisonSummary;
1351
+ caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
1319
1352
  };
1320
1353
  export type RAGRetrievalComparisonSummary = {
1321
1354
  bestByPassingRate?: string;
@@ -2154,7 +2154,266 @@ var buildGroundingCaseSnapshots = ({
2154
2154
  };
2155
2155
  });
2156
2156
  };
2157
+ var areStageCountsEqual = (left, right) => {
2158
+ const keys = new Set([
2159
+ ...Object.keys(left),
2160
+ ...Object.keys(right)
2161
+ ]);
2162
+ for (const key of keys) {
2163
+ if ((left[key] ?? 0) !== (right[key] ?? 0)) {
2164
+ return false;
2165
+ }
2166
+ }
2167
+ return true;
2168
+ };
2169
+ var buildEvaluationCaseTraceSnapshot = ({
2170
+ caseResult,
2171
+ currentTrace,
2172
+ previousTrace
2173
+ }) => {
2174
+ const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
2175
+ const previousStageCounts = previousTrace?.stageCounts ?? {};
2176
+ const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
2177
+ return {
2178
+ candidateTopK: currentTrace?.candidateTopK ?? 0,
2179
+ caseId: caseResult.caseId,
2180
+ finalCount: currentTrace?.resultCounts.final ?? 0,
2181
+ label: caseResult.label,
2182
+ lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
2183
+ lexicalTopK: currentTrace?.lexicalTopK ?? 0,
2184
+ previousCandidateTopK: previousTrace?.candidateTopK,
2185
+ previousFinalCount: previousTrace?.finalCount,
2186
+ previousLexicalCount: previousTrace?.lexicalCount,
2187
+ previousLexicalTopK: previousTrace?.lexicalTopK,
2188
+ previousStageCounts,
2189
+ previousTraceMode: previousTrace?.traceMode,
2190
+ previousTransformedQuery: previousTrace?.transformedQuery,
2191
+ previousVariantQueries: previousTrace?.variantQueries ?? [],
2192
+ previousVectorCount: previousTrace?.vectorCount,
2193
+ query: caseResult.query,
2194
+ stageCounts,
2195
+ status: caseResult.status,
2196
+ traceChange,
2197
+ traceMode: currentTrace?.mode,
2198
+ transformedQuery: currentTrace?.transformedQuery || undefined,
2199
+ variantQueries: currentTrace?.variantQueries ?? [],
2200
+ vectorCount: currentTrace?.resultCounts.vector ?? 0
2201
+ };
2202
+ };
2203
+ var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
2204
+ caseResult,
2205
+ currentTrace: trace
2206
+ }));
2207
+ var buildEvaluationCaseTraceSnapshots = ({
2208
+ current,
2209
+ previous
2210
+ }) => {
2211
+ if (!current) {
2212
+ return [];
2213
+ }
2214
+ const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
2215
+ const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
2216
+ entry.caseId,
2217
+ entry
2218
+ ]));
2219
+ return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
2220
+ caseResult,
2221
+ currentTrace: (() => {
2222
+ const currentSnapshot = currentTraces.get(caseResult.caseId);
2223
+ if (!currentSnapshot) {
2224
+ return;
2225
+ }
2226
+ return {
2227
+ candidateTopK: currentSnapshot.candidateTopK,
2228
+ lexicalTopK: currentSnapshot.lexicalTopK,
2229
+ mode: currentSnapshot.traceMode ?? "vector",
2230
+ query: caseResult.query,
2231
+ resultCounts: {
2232
+ final: currentSnapshot.finalCount,
2233
+ fused: currentSnapshot.finalCount,
2234
+ lexical: currentSnapshot.lexicalCount,
2235
+ reranked: currentSnapshot.finalCount,
2236
+ vector: currentSnapshot.vectorCount
2237
+ },
2238
+ runLexical: currentSnapshot.lexicalCount > 0,
2239
+ runVector: currentSnapshot.vectorCount > 0,
2240
+ steps: [],
2241
+ topK: caseResult.topK,
2242
+ transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
2243
+ variantQueries: currentSnapshot.variantQueries
2244
+ };
2245
+ })(),
2246
+ previousTrace: previousTraces.get(caseResult.caseId)
2247
+ }));
2248
+ };
2157
2249
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2250
+ var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2251
+ var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
2252
+ var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2253
+ var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
2254
+ var formatTraceStageSummary = (stageCounts) => {
2255
+ const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
2256
+ return topStages.length > 0 ? topStages.map(([stage, count]) => `${stage} ${count}`).join(" \xB7 ") : "n/a";
2257
+ };
2258
+ var formatTraceRatio = (count, total) => `${count}/${total}`;
2259
+ var formatTraceCountDelta = (value) => `${value >= 0 ? "+" : ""}${value}`;
2260
+ var buildRAGComparisonTraceSummaryRows = (entry) => {
2261
+ const trace = entry.traceSummary;
2262
+ if (!trace) {
2263
+ return [{ label: "Trace", value: "Unavailable" }];
2264
+ }
2265
+ return [
2266
+ { label: "Modes", value: formatTraceModes(trace.modes) },
2267
+ { label: "Avg final", value: trace.averageFinalCount.toFixed(1) },
2268
+ { label: "Avg vector", value: trace.averageVectorCount.toFixed(1) },
2269
+ { label: "Avg lexical", value: trace.averageLexicalCount.toFixed(1) },
2270
+ {
2271
+ label: "Transforms",
2272
+ value: formatTraceRatio(trace.transformedCases, trace.totalCases)
2273
+ },
2274
+ {
2275
+ label: "Variants",
2276
+ value: formatTraceRatio(trace.variantCases, trace.totalCases)
2277
+ },
2278
+ {
2279
+ label: "TopK",
2280
+ value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
2281
+ },
2282
+ {
2283
+ label: "Stages",
2284
+ value: formatTraceStageSummary(trace.stageCounts)
2285
+ }
2286
+ ];
2287
+ };
2288
+ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
2289
+ const trace = entry.traceSummary;
2290
+ if (!trace) {
2291
+ return [{ label: "Trace", value: "Unavailable for comparison" }];
2292
+ }
2293
+ const leaderTrace = leader?.traceSummary;
2294
+ if (!leaderTrace) {
2295
+ return [{ label: "Baseline", value: "Leader trace unavailable" }];
2296
+ }
2297
+ if (entry === leader) {
2298
+ return [{ label: "Baseline", value: "Leader strategy" }];
2299
+ }
2300
+ const stageDelta = Object.keys({
2301
+ ...leaderTrace.stageCounts,
2302
+ ...trace.stageCounts
2303
+ }).map((stage) => {
2304
+ const typedStage = stage;
2305
+ const delta = (trace.stageCounts[typedStage] ?? 0) - (leaderTrace.stageCounts[typedStage] ?? 0);
2306
+ return delta === 0 ? null : `${typedStage} ${formatTraceCountDelta(delta)}`;
2307
+ }).filter((value) => Boolean(value)).slice(0, 3).join(" \xB7 ");
2308
+ const rows = [
2309
+ { label: "Baseline", value: leader.label }
2310
+ ];
2311
+ if (formatTraceModes(trace.modes) !== formatTraceModes(leaderTrace.modes)) {
2312
+ rows.push({
2313
+ label: "Modes vs leader",
2314
+ value: `${formatTraceModes(trace.modes)} vs ${formatTraceModes(leaderTrace.modes)}`
2315
+ });
2316
+ }
2317
+ rows.push({
2318
+ label: "Final delta",
2319
+ value: formatSignedDelta(trace.averageFinalCount - leaderTrace.averageFinalCount, 1)
2320
+ }, {
2321
+ label: "Vector delta",
2322
+ value: formatSignedDelta(trace.averageVectorCount - leaderTrace.averageVectorCount, 1)
2323
+ }, {
2324
+ label: "Lexical delta",
2325
+ value: formatSignedDelta(trace.averageLexicalCount - leaderTrace.averageLexicalCount, 1)
2326
+ }, {
2327
+ label: "Transform delta",
2328
+ value: formatTraceCountDelta(trace.transformedCases - leaderTrace.transformedCases)
2329
+ });
2330
+ if (stageDelta) {
2331
+ rows.push({ label: "Stage delta", value: stageDelta });
2332
+ }
2333
+ return rows;
2334
+ };
2335
+ var buildRAGEvaluationHistoryRows = (history) => {
2336
+ if (!history?.latestRun) {
2337
+ return [
2338
+ { label: "History", value: "No persisted benchmark runs yet." }
2339
+ ];
2340
+ }
2341
+ const rows = [
2342
+ { label: "Runs recorded", value: String(history.runs.length) },
2343
+ {
2344
+ label: "Latest",
2345
+ value: `${history.latestRun.label} \xB7 ${formatEvaluationSummary(history.latestRun.response)}`
2346
+ }
2347
+ ];
2348
+ if (history.latestRun.traceSummary) {
2349
+ rows.push({
2350
+ label: "Latest trace",
2351
+ value: `${formatTraceModes(history.latestRun.traceSummary.modes)} \xB7 final ${history.latestRun.traceSummary.averageFinalCount.toFixed(1)} \xB7 vector ${history.latestRun.traceSummary.averageVectorCount.toFixed(1)} \xB7 lexical ${history.latestRun.traceSummary.averageLexicalCount.toFixed(1)}`
2352
+ });
2353
+ }
2354
+ if (history.previousRun) {
2355
+ rows.push({
2356
+ label: "Previous",
2357
+ value: `${history.previousRun.label} \xB7 ${formatEvaluationSummary(history.previousRun.response)}`
2358
+ });
2359
+ }
2360
+ if (!history.diff) {
2361
+ rows.push({
2362
+ label: "History diff",
2363
+ value: "Run the benchmark again to diff regressions over time."
2364
+ });
2365
+ return rows;
2366
+ }
2367
+ rows.push({
2368
+ label: "Passing delta",
2369
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
2370
+ }, {
2371
+ label: "Average F1 delta",
2372
+ value: formatSignedDelta(history.diff.summaryDelta.averageF1, 3)
2373
+ }, {
2374
+ label: "Latency delta",
2375
+ value: formatSignedDelta(history.diff.summaryDelta.averageLatencyMs, 1, "ms")
2376
+ }, {
2377
+ label: "Improved",
2378
+ value: formatHistoryCaseLabels(history.diff.improvedCases)
2379
+ }, {
2380
+ label: "Regressed",
2381
+ value: formatHistoryCaseLabels(history.diff.regressedCases)
2382
+ });
2383
+ if (history.diff.traceSummaryDelta) {
2384
+ rows.push({
2385
+ label: "Trace mode shift",
2386
+ value: history.diff.traceSummaryDelta.modesChanged ? "changed" : "stable"
2387
+ }, {
2388
+ label: "Trace final delta",
2389
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageFinalCount, 1)
2390
+ }, {
2391
+ label: "Trace vector delta",
2392
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageVectorCount, 1)
2393
+ }, {
2394
+ label: "Trace lexical delta",
2395
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageLexicalCount, 1)
2396
+ }, {
2397
+ label: "Trace transform delta",
2398
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.transformedCases)
2399
+ }, {
2400
+ label: "Trace variant delta",
2401
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
2402
+ });
2403
+ const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
2404
+ if (stageDelta) {
2405
+ rows.push({ label: "Trace stage delta", value: stageDelta });
2406
+ }
2407
+ }
2408
+ if (history.caseTraceSnapshots.length > 0) {
2409
+ const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
2410
+ rows.push({
2411
+ label: "Trace drift cases",
2412
+ value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
2413
+ });
2414
+ }
2415
+ return rows;
2416
+ };
2158
2417
  var buildRAGEvaluationRunDiff = ({
2159
2418
  current,
2160
2419
  previous
@@ -2342,6 +2601,10 @@ var loadRAGEvaluationHistory = async ({
2342
2601
  const latestRun = runs[0];
2343
2602
  const previousRun = runs[1];
2344
2603
  return {
2604
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
2605
+ current: latestRun,
2606
+ previous: previousRun
2607
+ }),
2345
2608
  diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
2346
2609
  current: latestRun,
2347
2610
  previous: previousRun
@@ -2553,6 +2816,7 @@ var compareRAGRerankers = async ({
2553
2816
  });
2554
2817
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2555
2818
  return {
2819
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2556
2820
  label: candidate.label ?? candidate.id,
2557
2821
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2558
2822
  response,
@@ -2639,6 +2903,7 @@ var compareRAGRetrievalStrategies = async ({
2639
2903
  });
2640
2904
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2641
2905
  return {
2906
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2642
2907
  label: candidate.label ?? candidate.id,
2643
2908
  response,
2644
2909
  retrievalId: candidate.id,
@@ -2707,7 +2972,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.c
2707
2972
  var runRAGEvaluationSuite = async ({
2708
2973
  suite,
2709
2974
  evaluate,
2710
- overrides
2975
+ overrides,
2976
+ artifacts
2711
2977
  }) => {
2712
2978
  const startedAt = Date.now();
2713
2979
  const response = await evaluate({
@@ -2717,6 +2983,7 @@ var runRAGEvaluationSuite = async ({
2717
2983
  });
2718
2984
  const finishedAt = Date.now();
2719
2985
  return {
2986
+ caseTraceSnapshots: artifacts?.caseTraceSnapshots,
2720
2987
  elapsedMs: finishedAt - startedAt,
2721
2988
  finishedAt,
2722
2989
  id: generateId(),
@@ -2724,7 +2991,8 @@ var runRAGEvaluationSuite = async ({
2724
2991
  metadata: suite.metadata,
2725
2992
  response,
2726
2993
  startedAt,
2727
- suiteId: suite.id
2994
+ suiteId: suite.id,
2995
+ traceSummary: artifacts?.traceSummary
2728
2996
  };
2729
2997
  };
2730
2998
  var summarizeRAGEvaluationCase = ({
@@ -3437,5 +3705,5 @@ export {
3437
3705
  AIStreamKey
3438
3706
  };
3439
3707
 
3440
- //# debugId=D6FF5517CF2D735064756E2164756E21
3708
+ //# debugId=E235F4588786F00E64756E2164756E21
3441
3709
  //# sourceMappingURL=index.js.map