@absolutejs/absolute 0.19.0-beta.531 → 0.19.0-beta.533

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1744,6 +1744,131 @@ var buildRAGAnswerGroundingEvaluationLeaderboard = (runs) => {
1744
1744
  totalCases: run.response.totalCases
1745
1745
  }));
1746
1746
  };
1747
+ var buildTraceStageCounts = (traces) => {
1748
+ const counts = {};
1749
+ for (const trace of traces) {
1750
+ for (const step of trace.steps) {
1751
+ counts[step.stage] = (counts[step.stage] ?? 0) + 1;
1752
+ }
1753
+ }
1754
+ return counts;
1755
+ };
1756
+ var diffTraceStageCounts = ({
1757
+ current,
1758
+ previous
1759
+ }) => {
1760
+ const next = {};
1761
+ const stages = new Set([
1762
+ ...Object.keys(current),
1763
+ ...Object.keys(previous)
1764
+ ]);
1765
+ for (const stage of stages) {
1766
+ const delta = (current[stage] ?? 0) - (previous[stage] ?? 0);
1767
+ if (delta !== 0) {
1768
+ next[stage] = delta;
1769
+ }
1770
+ }
1771
+ return next;
1772
+ };
1773
+ var roundTraceAverage = (value, total) => total > 0 ? Number((value / total).toFixed(2)) : 0;
1774
+ var summarizeRetrievalTraces = (traces) => {
1775
+ if (traces.length === 0) {
1776
+ return;
1777
+ }
1778
+ const totalCases = traces.length;
1779
+ const modeSet = new Set;
1780
+ let vectorCases = 0;
1781
+ let lexicalCases = 0;
1782
+ let transformedCases = 0;
1783
+ let variantCases = 0;
1784
+ let finalCountSum = 0;
1785
+ let vectorCountSum = 0;
1786
+ let lexicalCountSum = 0;
1787
+ let candidateTopKSum = 0;
1788
+ let lexicalTopKSum = 0;
1789
+ for (const trace of traces) {
1790
+ modeSet.add(trace.mode);
1791
+ if (trace.runVector) {
1792
+ vectorCases += 1;
1793
+ }
1794
+ if (trace.runLexical) {
1795
+ lexicalCases += 1;
1796
+ }
1797
+ if (trace.transformedQuery !== trace.query) {
1798
+ transformedCases += 1;
1799
+ }
1800
+ if (trace.variantQueries.length > 0) {
1801
+ variantCases += 1;
1802
+ }
1803
+ finalCountSum += trace.resultCounts.final;
1804
+ vectorCountSum += trace.resultCounts.vector;
1805
+ lexicalCountSum += trace.resultCounts.lexical;
1806
+ candidateTopKSum += trace.candidateTopK;
1807
+ lexicalTopKSum += trace.lexicalTopK;
1808
+ }
1809
+ return {
1810
+ averageCandidateTopK: roundTraceAverage(candidateTopKSum, totalCases),
1811
+ averageFinalCount: roundTraceAverage(finalCountSum, totalCases),
1812
+ averageLexicalCount: roundTraceAverage(lexicalCountSum, totalCases),
1813
+ averageLexicalTopK: roundTraceAverage(lexicalTopKSum, totalCases),
1814
+ averageVectorCount: roundTraceAverage(vectorCountSum, totalCases),
1815
+ lexicalCases,
1816
+ modes: Array.from(modeSet),
1817
+ stageCounts: buildTraceStageCounts(traces),
1818
+ totalCases,
1819
+ transformedCases,
1820
+ variantCases,
1821
+ vectorCases
1822
+ };
1823
+ };
1824
+ var evaluateRAGCollectionCases = async ({
1825
+ collection,
1826
+ input,
1827
+ defaultTopK = DEFAULT_TOP_K,
1828
+ rerank,
1829
+ includeTrace = false
1830
+ }) => {
1831
+ if (input.dryRun) {
1832
+ return executeDryRunRAGEvaluation(input, defaultTopK).map((caseResult) => ({
1833
+ caseResult,
1834
+ trace: undefined
1835
+ }));
1836
+ }
1837
+ return Promise.all(input.cases.map(async (caseInput, caseIndex) => {
1838
+ const startedAt = Date.now();
1839
+ const mode = resolveEvaluationMode(caseInput);
1840
+ const query = caseInput.query.trim();
1841
+ const expectedIds = normalizeExpectedIds(mode === "chunkId" ? caseInput.expectedChunkIds ?? [] : mode === "source" ? caseInput.expectedSources ?? [] : caseInput.expectedDocumentIds ?? []);
1842
+ const topK = typeof caseInput.topK === "number" ? caseInput.topK : typeof input.topK === "number" ? input.topK : defaultTopK;
1843
+ const searchInput = {
1844
+ filter: typeof caseInput.filter === "object" ? caseInput.filter : input.filter,
1845
+ model: caseInput.model ?? input.model,
1846
+ query,
1847
+ rerank,
1848
+ scoreThreshold: typeof caseInput.scoreThreshold === "number" ? caseInput.scoreThreshold : input.scoreThreshold,
1849
+ topK
1850
+ };
1851
+ const searchOutcome = includeTrace ? await collection.searchWithTrace(searchInput) : {
1852
+ results: await collection.search(searchInput),
1853
+ trace: undefined
1854
+ };
1855
+ const sources = buildSources(searchOutcome.results);
1856
+ const elapsedMs = Date.now() - startedAt;
1857
+ const retrievedIds = normalizeExpectedIds(sources.map((source) => extractExpectedId(source, mode)));
1858
+ return {
1859
+ caseResult: summarizeRAGEvaluationCase({
1860
+ caseIndex,
1861
+ caseInput: { ...caseInput, topK },
1862
+ elapsedMs,
1863
+ expectedIds,
1864
+ mode,
1865
+ query,
1866
+ retrievedIds
1867
+ }),
1868
+ trace: searchOutcome.trace
1869
+ };
1870
+ }));
1871
+ };
1747
1872
  var buildRAGAnswerGroundingCaseDifficultyLeaderboard = (entries) => {
1748
1873
  const grouped = new Map;
1749
1874
  for (const entry of entries) {
@@ -2029,6 +2154,22 @@ var buildRAGEvaluationRunDiff = ({
2029
2154
  passingRate: current.response.passingRate - (previous?.response.passingRate ?? 0),
2030
2155
  partialCases: current.response.summary.partialCases - (previous?.response.summary.partialCases ?? 0)
2031
2156
  },
2157
+ traceSummaryDelta: current.traceSummary || previous?.traceSummary ? {
2158
+ averageCandidateTopK: (current.traceSummary?.averageCandidateTopK ?? 0) - (previous?.traceSummary?.averageCandidateTopK ?? 0),
2159
+ averageFinalCount: (current.traceSummary?.averageFinalCount ?? 0) - (previous?.traceSummary?.averageFinalCount ?? 0),
2160
+ averageLexicalCount: (current.traceSummary?.averageLexicalCount ?? 0) - (previous?.traceSummary?.averageLexicalCount ?? 0),
2161
+ averageLexicalTopK: (current.traceSummary?.averageLexicalTopK ?? 0) - (previous?.traceSummary?.averageLexicalTopK ?? 0),
2162
+ averageVectorCount: (current.traceSummary?.averageVectorCount ?? 0) - (previous?.traceSummary?.averageVectorCount ?? 0),
2163
+ lexicalCases: (current.traceSummary?.lexicalCases ?? 0) - (previous?.traceSummary?.lexicalCases ?? 0),
2164
+ modesChanged: (current.traceSummary?.modes ?? []).join("|") !== (previous?.traceSummary?.modes ?? []).join("|"),
2165
+ stageCounts: diffTraceStageCounts({
2166
+ current: current.traceSummary?.stageCounts ?? {},
2167
+ previous: previous?.traceSummary?.stageCounts ?? {}
2168
+ }),
2169
+ transformedCases: (current.traceSummary?.transformedCases ?? 0) - (previous?.traceSummary?.transformedCases ?? 0),
2170
+ variantCases: (current.traceSummary?.variantCases ?? 0) - (previous?.traceSummary?.variantCases ?? 0),
2171
+ vectorCases: (current.traceSummary?.vectorCases ?? 0) - (previous?.traceSummary?.vectorCases ?? 0)
2172
+ } : undefined,
2032
2173
  unchangedCases
2033
2174
  };
2034
2175
  };
@@ -2379,17 +2520,20 @@ var compareRAGRerankers = async ({
2379
2520
  defaultTopK = DEFAULT_TOP_K
2380
2521
  }) => {
2381
2522
  const entries = await Promise.all(rerankers.map(async (candidate) => {
2382
- const response = await evaluateRAGCollection({
2523
+ const evaluated = await evaluateRAGCollectionCases({
2383
2524
  collection,
2384
2525
  defaultTopK,
2385
2526
  input: suite.input,
2527
+ includeTrace: true,
2386
2528
  rerank: candidate.rerank
2387
2529
  });
2530
+ const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2388
2531
  return {
2389
2532
  label: candidate.label ?? candidate.id,
2390
2533
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2391
2534
  response,
2392
- rerankerId: candidate.id
2535
+ rerankerId: candidate.id,
2536
+ traceSummary: summarizeRetrievalTraces(evaluated.map((entry) => entry.trace).filter((trace) => Boolean(trace)))
2393
2537
  };
2394
2538
  }));
2395
2539
  const leaderboard = buildRAGEvaluationLeaderboard(entries.map((entry) => ({
@@ -2399,7 +2543,8 @@ var compareRAGRerankers = async ({
2399
2543
  label: entry.label,
2400
2544
  response: entry.response,
2401
2545
  startedAt: 0,
2402
- suiteId: suite.id
2546
+ suiteId: suite.id,
2547
+ traceSummary: entry.traceSummary
2403
2548
  })));
2404
2549
  return {
2405
2550
  entries,
@@ -2444,25 +2589,37 @@ var compareRAGRetrievalStrategies = async ({
2444
2589
  defaultTopK = DEFAULT_TOP_K
2445
2590
  }) => {
2446
2591
  const entries = await Promise.all(retrievals.map(async (candidate) => {
2447
- const response = await evaluateRAGCollection({
2592
+ const tracedCollection = {
2593
+ ...collection,
2594
+ search: (input) => collection.search({
2595
+ ...input,
2596
+ queryTransform: candidate.queryTransform ?? input.queryTransform,
2597
+ rerank: candidate.rerank ?? input.rerank,
2598
+ retrieval: candidate.retrieval ?? input.retrieval
2599
+ }),
2600
+ searchWithTrace: (input) => collection.searchWithTrace({
2601
+ ...input,
2602
+ queryTransform: candidate.queryTransform ?? input.queryTransform,
2603
+ rerank: candidate.rerank ?? input.rerank,
2604
+ retrieval: candidate.retrieval ?? input.retrieval
2605
+ })
2606
+ };
2607
+ const evaluated = await evaluateRAGCollectionCases({
2448
2608
  collection: {
2449
- ...collection,
2450
- search: (input) => collection.search({
2451
- ...input,
2452
- queryTransform: candidate.queryTransform ?? input.queryTransform,
2453
- rerank: candidate.rerank ?? input.rerank,
2454
- retrieval: candidate.retrieval ?? input.retrieval
2455
- })
2609
+ ...tracedCollection
2456
2610
  },
2457
2611
  defaultTopK,
2458
2612
  input: suite.input,
2613
+ includeTrace: true,
2459
2614
  rerank: candidate.rerank
2460
2615
  });
2616
+ const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2461
2617
  return {
2462
2618
  label: candidate.label ?? candidate.id,
2463
2619
  response,
2464
2620
  retrievalId: candidate.id,
2465
- retrievalMode: resolveRetrievalMode(candidate)
2621
+ retrievalMode: resolveRetrievalMode(candidate),
2622
+ traceSummary: summarizeRetrievalTraces(evaluated.map((entry) => entry.trace).filter((trace) => Boolean(trace)))
2466
2623
  };
2467
2624
  }));
2468
2625
  const leaderboard = buildRAGEvaluationLeaderboard(entries.map((entry) => ({
@@ -2472,7 +2629,8 @@ var compareRAGRetrievalStrategies = async ({
2472
2629
  label: entry.label,
2473
2630
  response: entry.response,
2474
2631
  startedAt: 0,
2475
- suiteId: suite.id
2632
+ suiteId: suite.id,
2633
+ traceSummary: entry.traceSummary
2476
2634
  })));
2477
2635
  return {
2478
2636
  entries,
@@ -2489,37 +2647,14 @@ var evaluateRAGCollection = async ({
2489
2647
  defaultTopK = DEFAULT_TOP_K,
2490
2648
  rerank
2491
2649
  }) => {
2492
- if (input.dryRun) {
2493
- return buildRAGEvaluationResponse(executeDryRunRAGEvaluation(input, defaultTopK));
2494
- }
2495
- const evaluated = await Promise.all(input.cases.map(async (caseInput, caseIndex) => {
2496
- const startedAt = Date.now();
2497
- const mode = resolveEvaluationMode(caseInput);
2498
- const query = caseInput.query.trim();
2499
- const expectedIds = normalizeExpectedIds(mode === "chunkId" ? caseInput.expectedChunkIds ?? [] : mode === "source" ? caseInput.expectedSources ?? [] : caseInput.expectedDocumentIds ?? []);
2500
- const topK = typeof caseInput.topK === "number" ? caseInput.topK : typeof input.topK === "number" ? input.topK : defaultTopK;
2501
- const searchResults = await collection.search({
2502
- filter: typeof caseInput.filter === "object" ? caseInput.filter : input.filter,
2503
- model: caseInput.model ?? input.model,
2504
- query,
2505
- rerank,
2506
- scoreThreshold: typeof caseInput.scoreThreshold === "number" ? caseInput.scoreThreshold : input.scoreThreshold,
2507
- topK
2508
- });
2509
- const sources = buildSources(searchResults);
2510
- const elapsedMs = Date.now() - startedAt;
2511
- const retrievedIds = normalizeExpectedIds(sources.map((source) => extractExpectedId(source, mode)));
2512
- return summarizeRAGEvaluationCase({
2513
- caseIndex,
2514
- caseInput: { ...caseInput, topK },
2515
- elapsedMs,
2516
- expectedIds,
2517
- mode,
2518
- query,
2519
- retrievedIds
2520
- });
2521
- }));
2522
- return buildRAGEvaluationResponse(evaluated);
2650
+ const evaluated = await evaluateRAGCollectionCases({
2651
+ collection,
2652
+ defaultTopK,
2653
+ includeTrace: false,
2654
+ input,
2655
+ rerank
2656
+ });
2657
+ return buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2523
2658
  };
2524
2659
  var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.cases.map((caseInput, caseIndex) => {
2525
2660
  const mode = resolveEvaluationMode(caseInput);
@@ -2639,5 +2774,5 @@ export {
2639
2774
  buildRAGAnswerWorkflowState
2640
2775
  };
2641
2776
 
2642
- //# debugId=CBECE8C7471759AC64756E2164756E21
2777
+ //# debugId=BCBFBA9A1CF59F3A64756E2164756E21
2643
2778
  //# sourceMappingURL=index.js.map