@absolutejs/absolute 0.19.0-beta.531 → 0.19.0-beta.533

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1194,6 +1194,7 @@ export type RAGEvaluationSuiteRun = {
1194
1194
  finishedAt: number;
1195
1195
  elapsedMs: number;
1196
1196
  response: RAGEvaluationResponse;
1197
+ traceSummary?: RAGRetrievalTraceComparisonSummary;
1197
1198
  metadata?: Record<string, unknown>;
1198
1199
  };
1199
1200
  export type RAGEvaluationHistoryStore = {
@@ -1231,6 +1232,19 @@ export type RAGEvaluationRunDiff = {
1231
1232
  failedCases: number;
1232
1233
  partialCases: number;
1233
1234
  };
1235
+ traceSummaryDelta?: {
1236
+ modesChanged: boolean;
1237
+ vectorCases: number;
1238
+ lexicalCases: number;
1239
+ transformedCases: number;
1240
+ variantCases: number;
1241
+ averageFinalCount: number;
1242
+ averageVectorCount: number;
1243
+ averageLexicalCount: number;
1244
+ averageCandidateTopK: number;
1245
+ averageLexicalTopK: number;
1246
+ stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1247
+ };
1234
1248
  };
1235
1249
  export type RAGEvaluationHistory = {
1236
1250
  suiteId: string;
@@ -1256,6 +1270,20 @@ export type RAGRerankerCandidate = {
1256
1270
  label?: string;
1257
1271
  rerank?: RAGRerankerProviderLike;
1258
1272
  };
1273
+ export type RAGRetrievalTraceComparisonSummary = {
1274
+ totalCases: number;
1275
+ modes: RAGHybridRetrievalMode[];
1276
+ vectorCases: number;
1277
+ lexicalCases: number;
1278
+ transformedCases: number;
1279
+ variantCases: number;
1280
+ averageFinalCount: number;
1281
+ averageVectorCount: number;
1282
+ averageLexicalCount: number;
1283
+ averageCandidateTopK: number;
1284
+ averageLexicalTopK: number;
1285
+ stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
1286
+ };
1259
1287
  export type RAGRetrievalCandidate = {
1260
1288
  id: string;
1261
1289
  label?: string;
@@ -1268,6 +1296,7 @@ export type RAGRerankerComparisonEntry = {
1268
1296
  label: string;
1269
1297
  providerName?: string;
1270
1298
  response: RAGEvaluationResponse;
1299
+ traceSummary?: RAGRetrievalTraceComparisonSummary;
1271
1300
  };
1272
1301
  export type RAGRerankerComparisonSummary = {
1273
1302
  bestByPassingRate?: string;
@@ -1286,6 +1315,7 @@ export type RAGRetrievalComparisonEntry = {
1286
1315
  label: string;
1287
1316
  retrievalMode: RAGHybridRetrievalMode;
1288
1317
  response: RAGEvaluationResponse;
1318
+ traceSummary?: RAGRetrievalTraceComparisonSummary;
1289
1319
  };
1290
1320
  export type RAGRetrievalComparisonSummary = {
1291
1321
  bestByPassingRate?: string;
@@ -1768,6 +1768,131 @@ var buildRAGAnswerGroundingEvaluationLeaderboard = (runs) => {
1768
1768
  totalCases: run.response.totalCases
1769
1769
  }));
1770
1770
  };
1771
+ var buildTraceStageCounts = (traces) => {
1772
+ const counts = {};
1773
+ for (const trace of traces) {
1774
+ for (const step of trace.steps) {
1775
+ counts[step.stage] = (counts[step.stage] ?? 0) + 1;
1776
+ }
1777
+ }
1778
+ return counts;
1779
+ };
1780
+ var diffTraceStageCounts = ({
1781
+ current,
1782
+ previous
1783
+ }) => {
1784
+ const next = {};
1785
+ const stages = new Set([
1786
+ ...Object.keys(current),
1787
+ ...Object.keys(previous)
1788
+ ]);
1789
+ for (const stage of stages) {
1790
+ const delta = (current[stage] ?? 0) - (previous[stage] ?? 0);
1791
+ if (delta !== 0) {
1792
+ next[stage] = delta;
1793
+ }
1794
+ }
1795
+ return next;
1796
+ };
1797
+ var roundTraceAverage = (value, total) => total > 0 ? Number((value / total).toFixed(2)) : 0;
1798
+ var summarizeRetrievalTraces = (traces) => {
1799
+ if (traces.length === 0) {
1800
+ return;
1801
+ }
1802
+ const totalCases = traces.length;
1803
+ const modeSet = new Set;
1804
+ let vectorCases = 0;
1805
+ let lexicalCases = 0;
1806
+ let transformedCases = 0;
1807
+ let variantCases = 0;
1808
+ let finalCountSum = 0;
1809
+ let vectorCountSum = 0;
1810
+ let lexicalCountSum = 0;
1811
+ let candidateTopKSum = 0;
1812
+ let lexicalTopKSum = 0;
1813
+ for (const trace of traces) {
1814
+ modeSet.add(trace.mode);
1815
+ if (trace.runVector) {
1816
+ vectorCases += 1;
1817
+ }
1818
+ if (trace.runLexical) {
1819
+ lexicalCases += 1;
1820
+ }
1821
+ if (trace.transformedQuery !== trace.query) {
1822
+ transformedCases += 1;
1823
+ }
1824
+ if (trace.variantQueries.length > 0) {
1825
+ variantCases += 1;
1826
+ }
1827
+ finalCountSum += trace.resultCounts.final;
1828
+ vectorCountSum += trace.resultCounts.vector;
1829
+ lexicalCountSum += trace.resultCounts.lexical;
1830
+ candidateTopKSum += trace.candidateTopK;
1831
+ lexicalTopKSum += trace.lexicalTopK;
1832
+ }
1833
+ return {
1834
+ averageCandidateTopK: roundTraceAverage(candidateTopKSum, totalCases),
1835
+ averageFinalCount: roundTraceAverage(finalCountSum, totalCases),
1836
+ averageLexicalCount: roundTraceAverage(lexicalCountSum, totalCases),
1837
+ averageLexicalTopK: roundTraceAverage(lexicalTopKSum, totalCases),
1838
+ averageVectorCount: roundTraceAverage(vectorCountSum, totalCases),
1839
+ lexicalCases,
1840
+ modes: Array.from(modeSet),
1841
+ stageCounts: buildTraceStageCounts(traces),
1842
+ totalCases,
1843
+ transformedCases,
1844
+ variantCases,
1845
+ vectorCases
1846
+ };
1847
+ };
1848
+ var evaluateRAGCollectionCases = async ({
1849
+ collection,
1850
+ input,
1851
+ defaultTopK = DEFAULT_TOP_K,
1852
+ rerank,
1853
+ includeTrace = false
1854
+ }) => {
1855
+ if (input.dryRun) {
1856
+ return executeDryRunRAGEvaluation(input, defaultTopK).map((caseResult) => ({
1857
+ caseResult,
1858
+ trace: undefined
1859
+ }));
1860
+ }
1861
+ return Promise.all(input.cases.map(async (caseInput, caseIndex) => {
1862
+ const startedAt = Date.now();
1863
+ const mode = resolveEvaluationMode(caseInput);
1864
+ const query = caseInput.query.trim();
1865
+ const expectedIds = normalizeExpectedIds(mode === "chunkId" ? caseInput.expectedChunkIds ?? [] : mode === "source" ? caseInput.expectedSources ?? [] : caseInput.expectedDocumentIds ?? []);
1866
+ const topK = typeof caseInput.topK === "number" ? caseInput.topK : typeof input.topK === "number" ? input.topK : defaultTopK;
1867
+ const searchInput = {
1868
+ filter: typeof caseInput.filter === "object" ? caseInput.filter : input.filter,
1869
+ model: caseInput.model ?? input.model,
1870
+ query,
1871
+ rerank,
1872
+ scoreThreshold: typeof caseInput.scoreThreshold === "number" ? caseInput.scoreThreshold : input.scoreThreshold,
1873
+ topK
1874
+ };
1875
+ const searchOutcome = includeTrace ? await collection.searchWithTrace(searchInput) : {
1876
+ results: await collection.search(searchInput),
1877
+ trace: undefined
1878
+ };
1879
+ const sources = buildSources(searchOutcome.results);
1880
+ const elapsedMs = Date.now() - startedAt;
1881
+ const retrievedIds = normalizeExpectedIds(sources.map((source) => extractExpectedId(source, mode)));
1882
+ return {
1883
+ caseResult: summarizeRAGEvaluationCase({
1884
+ caseIndex,
1885
+ caseInput: { ...caseInput, topK },
1886
+ elapsedMs,
1887
+ expectedIds,
1888
+ mode,
1889
+ query,
1890
+ retrievedIds
1891
+ }),
1892
+ trace: searchOutcome.trace
1893
+ };
1894
+ }));
1895
+ };
1771
1896
  var buildRAGAnswerGroundingCaseDifficultyLeaderboard = (entries) => {
1772
1897
  const grouped = new Map;
1773
1898
  for (const entry of entries) {
@@ -2053,6 +2178,22 @@ var buildRAGEvaluationRunDiff = ({
2053
2178
  passingRate: current.response.passingRate - (previous?.response.passingRate ?? 0),
2054
2179
  partialCases: current.response.summary.partialCases - (previous?.response.summary.partialCases ?? 0)
2055
2180
  },
2181
+ traceSummaryDelta: current.traceSummary || previous?.traceSummary ? {
2182
+ averageCandidateTopK: (current.traceSummary?.averageCandidateTopK ?? 0) - (previous?.traceSummary?.averageCandidateTopK ?? 0),
2183
+ averageFinalCount: (current.traceSummary?.averageFinalCount ?? 0) - (previous?.traceSummary?.averageFinalCount ?? 0),
2184
+ averageLexicalCount: (current.traceSummary?.averageLexicalCount ?? 0) - (previous?.traceSummary?.averageLexicalCount ?? 0),
2185
+ averageLexicalTopK: (current.traceSummary?.averageLexicalTopK ?? 0) - (previous?.traceSummary?.averageLexicalTopK ?? 0),
2186
+ averageVectorCount: (current.traceSummary?.averageVectorCount ?? 0) - (previous?.traceSummary?.averageVectorCount ?? 0),
2187
+ lexicalCases: (current.traceSummary?.lexicalCases ?? 0) - (previous?.traceSummary?.lexicalCases ?? 0),
2188
+ modesChanged: (current.traceSummary?.modes ?? []).join("|") !== (previous?.traceSummary?.modes ?? []).join("|"),
2189
+ stageCounts: diffTraceStageCounts({
2190
+ current: current.traceSummary?.stageCounts ?? {},
2191
+ previous: previous?.traceSummary?.stageCounts ?? {}
2192
+ }),
2193
+ transformedCases: (current.traceSummary?.transformedCases ?? 0) - (previous?.traceSummary?.transformedCases ?? 0),
2194
+ variantCases: (current.traceSummary?.variantCases ?? 0) - (previous?.traceSummary?.variantCases ?? 0),
2195
+ vectorCases: (current.traceSummary?.vectorCases ?? 0) - (previous?.traceSummary?.vectorCases ?? 0)
2196
+ } : undefined,
2056
2197
  unchangedCases
2057
2198
  };
2058
2199
  };
@@ -2403,17 +2544,20 @@ var compareRAGRerankers = async ({
2403
2544
  defaultTopK = DEFAULT_TOP_K
2404
2545
  }) => {
2405
2546
  const entries = await Promise.all(rerankers.map(async (candidate) => {
2406
- const response = await evaluateRAGCollection({
2547
+ const evaluated = await evaluateRAGCollectionCases({
2407
2548
  collection,
2408
2549
  defaultTopK,
2409
2550
  input: suite.input,
2551
+ includeTrace: true,
2410
2552
  rerank: candidate.rerank
2411
2553
  });
2554
+ const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2412
2555
  return {
2413
2556
  label: candidate.label ?? candidate.id,
2414
2557
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2415
2558
  response,
2416
- rerankerId: candidate.id
2559
+ rerankerId: candidate.id,
2560
+ traceSummary: summarizeRetrievalTraces(evaluated.map((entry) => entry.trace).filter((trace) => Boolean(trace)))
2417
2561
  };
2418
2562
  }));
2419
2563
  const leaderboard = buildRAGEvaluationLeaderboard(entries.map((entry) => ({
@@ -2423,7 +2567,8 @@ var compareRAGRerankers = async ({
2423
2567
  label: entry.label,
2424
2568
  response: entry.response,
2425
2569
  startedAt: 0,
2426
- suiteId: suite.id
2570
+ suiteId: suite.id,
2571
+ traceSummary: entry.traceSummary
2427
2572
  })));
2428
2573
  return {
2429
2574
  entries,
@@ -2468,25 +2613,37 @@ var compareRAGRetrievalStrategies = async ({
2468
2613
  defaultTopK = DEFAULT_TOP_K
2469
2614
  }) => {
2470
2615
  const entries = await Promise.all(retrievals.map(async (candidate) => {
2471
- const response = await evaluateRAGCollection({
2616
+ const tracedCollection = {
2617
+ ...collection,
2618
+ search: (input) => collection.search({
2619
+ ...input,
2620
+ queryTransform: candidate.queryTransform ?? input.queryTransform,
2621
+ rerank: candidate.rerank ?? input.rerank,
2622
+ retrieval: candidate.retrieval ?? input.retrieval
2623
+ }),
2624
+ searchWithTrace: (input) => collection.searchWithTrace({
2625
+ ...input,
2626
+ queryTransform: candidate.queryTransform ?? input.queryTransform,
2627
+ rerank: candidate.rerank ?? input.rerank,
2628
+ retrieval: candidate.retrieval ?? input.retrieval
2629
+ })
2630
+ };
2631
+ const evaluated = await evaluateRAGCollectionCases({
2472
2632
  collection: {
2473
- ...collection,
2474
- search: (input) => collection.search({
2475
- ...input,
2476
- queryTransform: candidate.queryTransform ?? input.queryTransform,
2477
- rerank: candidate.rerank ?? input.rerank,
2478
- retrieval: candidate.retrieval ?? input.retrieval
2479
- })
2633
+ ...tracedCollection
2480
2634
  },
2481
2635
  defaultTopK,
2482
2636
  input: suite.input,
2637
+ includeTrace: true,
2483
2638
  rerank: candidate.rerank
2484
2639
  });
2640
+ const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2485
2641
  return {
2486
2642
  label: candidate.label ?? candidate.id,
2487
2643
  response,
2488
2644
  retrievalId: candidate.id,
2489
- retrievalMode: resolveRetrievalMode(candidate)
2645
+ retrievalMode: resolveRetrievalMode(candidate),
2646
+ traceSummary: summarizeRetrievalTraces(evaluated.map((entry) => entry.trace).filter((trace) => Boolean(trace)))
2490
2647
  };
2491
2648
  }));
2492
2649
  const leaderboard = buildRAGEvaluationLeaderboard(entries.map((entry) => ({
@@ -2496,7 +2653,8 @@ var compareRAGRetrievalStrategies = async ({
2496
2653
  label: entry.label,
2497
2654
  response: entry.response,
2498
2655
  startedAt: 0,
2499
- suiteId: suite.id
2656
+ suiteId: suite.id,
2657
+ traceSummary: entry.traceSummary
2500
2658
  })));
2501
2659
  return {
2502
2660
  entries,
@@ -2513,37 +2671,14 @@ var evaluateRAGCollection = async ({
2513
2671
  defaultTopK = DEFAULT_TOP_K,
2514
2672
  rerank
2515
2673
  }) => {
2516
- if (input.dryRun) {
2517
- return buildRAGEvaluationResponse(executeDryRunRAGEvaluation(input, defaultTopK));
2518
- }
2519
- const evaluated = await Promise.all(input.cases.map(async (caseInput, caseIndex) => {
2520
- const startedAt = Date.now();
2521
- const mode = resolveEvaluationMode(caseInput);
2522
- const query = caseInput.query.trim();
2523
- const expectedIds = normalizeExpectedIds(mode === "chunkId" ? caseInput.expectedChunkIds ?? [] : mode === "source" ? caseInput.expectedSources ?? [] : caseInput.expectedDocumentIds ?? []);
2524
- const topK = typeof caseInput.topK === "number" ? caseInput.topK : typeof input.topK === "number" ? input.topK : defaultTopK;
2525
- const searchResults = await collection.search({
2526
- filter: typeof caseInput.filter === "object" ? caseInput.filter : input.filter,
2527
- model: caseInput.model ?? input.model,
2528
- query,
2529
- rerank,
2530
- scoreThreshold: typeof caseInput.scoreThreshold === "number" ? caseInput.scoreThreshold : input.scoreThreshold,
2531
- topK
2532
- });
2533
- const sources = buildSources(searchResults);
2534
- const elapsedMs = Date.now() - startedAt;
2535
- const retrievedIds = normalizeExpectedIds(sources.map((source) => extractExpectedId(source, mode)));
2536
- return summarizeRAGEvaluationCase({
2537
- caseIndex,
2538
- caseInput: { ...caseInput, topK },
2539
- elapsedMs,
2540
- expectedIds,
2541
- mode,
2542
- query,
2543
- retrievedIds
2544
- });
2545
- }));
2546
- return buildRAGEvaluationResponse(evaluated);
2674
+ const evaluated = await evaluateRAGCollectionCases({
2675
+ collection,
2676
+ defaultTopK,
2677
+ includeTrace: false,
2678
+ input,
2679
+ rerank
2680
+ });
2681
+ return buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2547
2682
  };
2548
2683
  var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.cases.map((caseInput, caseIndex) => {
2549
2684
  const mode = resolveEvaluationMode(caseInput);
@@ -3302,5 +3437,5 @@ export {
3302
3437
  AIStreamKey
3303
3438
  };
3304
3439
 
3305
- //# debugId=116FD5B922628A2864756E2164756E21
3440
+ //# debugId=D6FF5517CF2D735064756E2164756E21
3306
3441
  //# sourceMappingURL=index.js.map