@absolutejs/absolute 0.19.0-beta.533 → 0.19.0-beta.535

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -5677,7 +5677,266 @@ var buildGroundingCaseSnapshots = ({
5677
5677
  };
5678
5678
  });
5679
5679
  };
5680
+ var areStageCountsEqual = (left, right) => {
5681
+ const keys = new Set([
5682
+ ...Object.keys(left),
5683
+ ...Object.keys(right)
5684
+ ]);
5685
+ for (const key of keys) {
5686
+ if ((left[key] ?? 0) !== (right[key] ?? 0)) {
5687
+ return false;
5688
+ }
5689
+ }
5690
+ return true;
5691
+ };
5692
+ var buildEvaluationCaseTraceSnapshot = ({
5693
+ caseResult,
5694
+ currentTrace,
5695
+ previousTrace
5696
+ }) => {
5697
+ const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
5698
+ const previousStageCounts = previousTrace?.stageCounts ?? {};
5699
+ const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
5700
+ return {
5701
+ candidateTopK: currentTrace?.candidateTopK ?? 0,
5702
+ caseId: caseResult.caseId,
5703
+ finalCount: currentTrace?.resultCounts.final ?? 0,
5704
+ label: caseResult.label,
5705
+ lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
5706
+ lexicalTopK: currentTrace?.lexicalTopK ?? 0,
5707
+ previousCandidateTopK: previousTrace?.candidateTopK,
5708
+ previousFinalCount: previousTrace?.finalCount,
5709
+ previousLexicalCount: previousTrace?.lexicalCount,
5710
+ previousLexicalTopK: previousTrace?.lexicalTopK,
5711
+ previousStageCounts,
5712
+ previousTraceMode: previousTrace?.traceMode,
5713
+ previousTransformedQuery: previousTrace?.transformedQuery,
5714
+ previousVariantQueries: previousTrace?.variantQueries ?? [],
5715
+ previousVectorCount: previousTrace?.vectorCount,
5716
+ query: caseResult.query,
5717
+ stageCounts,
5718
+ status: caseResult.status,
5719
+ traceChange,
5720
+ traceMode: currentTrace?.mode,
5721
+ transformedQuery: currentTrace?.transformedQuery || undefined,
5722
+ variantQueries: currentTrace?.variantQueries ?? [],
5723
+ vectorCount: currentTrace?.resultCounts.vector ?? 0
5724
+ };
5725
+ };
5726
+ var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
5727
+ caseResult,
5728
+ currentTrace: trace
5729
+ }));
5730
+ var buildEvaluationCaseTraceSnapshots = ({
5731
+ current,
5732
+ previous
5733
+ }) => {
5734
+ if (!current) {
5735
+ return [];
5736
+ }
5737
+ const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
5738
+ const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
5739
+ entry.caseId,
5740
+ entry
5741
+ ]));
5742
+ return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
5743
+ caseResult,
5744
+ currentTrace: (() => {
5745
+ const currentSnapshot = currentTraces.get(caseResult.caseId);
5746
+ if (!currentSnapshot) {
5747
+ return;
5748
+ }
5749
+ return {
5750
+ candidateTopK: currentSnapshot.candidateTopK,
5751
+ lexicalTopK: currentSnapshot.lexicalTopK,
5752
+ mode: currentSnapshot.traceMode ?? "vector",
5753
+ query: caseResult.query,
5754
+ resultCounts: {
5755
+ final: currentSnapshot.finalCount,
5756
+ fused: currentSnapshot.finalCount,
5757
+ lexical: currentSnapshot.lexicalCount,
5758
+ reranked: currentSnapshot.finalCount,
5759
+ vector: currentSnapshot.vectorCount
5760
+ },
5761
+ runLexical: currentSnapshot.lexicalCount > 0,
5762
+ runVector: currentSnapshot.vectorCount > 0,
5763
+ steps: [],
5764
+ topK: caseResult.topK,
5765
+ transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
5766
+ variantQueries: currentSnapshot.variantQueries
5767
+ };
5768
+ })(),
5769
+ previousTrace: previousTraces.get(caseResult.caseId)
5770
+ }));
5771
+ };
5680
5772
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
5773
+ var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
5774
+ var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
5775
+ var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
5776
+ var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
5777
+ var formatTraceStageSummary = (stageCounts) => {
5778
+ const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
5779
+ return topStages.length > 0 ? topStages.map(([stage, count]) => `${stage} ${count}`).join(" \xB7 ") : "n/a";
5780
+ };
5781
+ var formatTraceRatio = (count, total) => `${count}/${total}`;
5782
+ var formatTraceCountDelta = (value) => `${value >= 0 ? "+" : ""}${value}`;
5783
+ var buildRAGComparisonTraceSummaryRows = (entry) => {
5784
+ const trace = entry.traceSummary;
5785
+ if (!trace) {
5786
+ return [{ label: "Trace", value: "Unavailable" }];
5787
+ }
5788
+ return [
5789
+ { label: "Modes", value: formatTraceModes(trace.modes) },
5790
+ { label: "Avg final", value: trace.averageFinalCount.toFixed(1) },
5791
+ { label: "Avg vector", value: trace.averageVectorCount.toFixed(1) },
5792
+ { label: "Avg lexical", value: trace.averageLexicalCount.toFixed(1) },
5793
+ {
5794
+ label: "Transforms",
5795
+ value: formatTraceRatio(trace.transformedCases, trace.totalCases)
5796
+ },
5797
+ {
5798
+ label: "Variants",
5799
+ value: formatTraceRatio(trace.variantCases, trace.totalCases)
5800
+ },
5801
+ {
5802
+ label: "TopK",
5803
+ value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
5804
+ },
5805
+ {
5806
+ label: "Stages",
5807
+ value: formatTraceStageSummary(trace.stageCounts)
5808
+ }
5809
+ ];
5810
+ };
5811
+ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
5812
+ const trace = entry.traceSummary;
5813
+ if (!trace) {
5814
+ return [{ label: "Trace", value: "Unavailable for comparison" }];
5815
+ }
5816
+ const leaderTrace = leader?.traceSummary;
5817
+ if (!leaderTrace) {
5818
+ return [{ label: "Baseline", value: "Leader trace unavailable" }];
5819
+ }
5820
+ if (entry === leader) {
5821
+ return [{ label: "Baseline", value: "Leader strategy" }];
5822
+ }
5823
+ const stageDelta = Object.keys({
5824
+ ...leaderTrace.stageCounts,
5825
+ ...trace.stageCounts
5826
+ }).map((stage) => {
5827
+ const typedStage = stage;
5828
+ const delta = (trace.stageCounts[typedStage] ?? 0) - (leaderTrace.stageCounts[typedStage] ?? 0);
5829
+ return delta === 0 ? null : `${typedStage} ${formatTraceCountDelta(delta)}`;
5830
+ }).filter((value) => Boolean(value)).slice(0, 3).join(" \xB7 ");
5831
+ const rows = [
5832
+ { label: "Baseline", value: leader.label }
5833
+ ];
5834
+ if (formatTraceModes(trace.modes) !== formatTraceModes(leaderTrace.modes)) {
5835
+ rows.push({
5836
+ label: "Modes vs leader",
5837
+ value: `${formatTraceModes(trace.modes)} vs ${formatTraceModes(leaderTrace.modes)}`
5838
+ });
5839
+ }
5840
+ rows.push({
5841
+ label: "Final delta",
5842
+ value: formatSignedDelta(trace.averageFinalCount - leaderTrace.averageFinalCount, 1)
5843
+ }, {
5844
+ label: "Vector delta",
5845
+ value: formatSignedDelta(trace.averageVectorCount - leaderTrace.averageVectorCount, 1)
5846
+ }, {
5847
+ label: "Lexical delta",
5848
+ value: formatSignedDelta(trace.averageLexicalCount - leaderTrace.averageLexicalCount, 1)
5849
+ }, {
5850
+ label: "Transform delta",
5851
+ value: formatTraceCountDelta(trace.transformedCases - leaderTrace.transformedCases)
5852
+ });
5853
+ if (stageDelta) {
5854
+ rows.push({ label: "Stage delta", value: stageDelta });
5855
+ }
5856
+ return rows;
5857
+ };
5858
+ var buildRAGEvaluationHistoryRows = (history) => {
5859
+ if (!history?.latestRun) {
5860
+ return [
5861
+ { label: "History", value: "No persisted benchmark runs yet." }
5862
+ ];
5863
+ }
5864
+ const rows = [
5865
+ { label: "Runs recorded", value: String(history.runs.length) },
5866
+ {
5867
+ label: "Latest",
5868
+ value: `${history.latestRun.label} \xB7 ${formatEvaluationSummary(history.latestRun.response)}`
5869
+ }
5870
+ ];
5871
+ if (history.latestRun.traceSummary) {
5872
+ rows.push({
5873
+ label: "Latest trace",
5874
+ value: `${formatTraceModes(history.latestRun.traceSummary.modes)} \xB7 final ${history.latestRun.traceSummary.averageFinalCount.toFixed(1)} \xB7 vector ${history.latestRun.traceSummary.averageVectorCount.toFixed(1)} \xB7 lexical ${history.latestRun.traceSummary.averageLexicalCount.toFixed(1)}`
5875
+ });
5876
+ }
5877
+ if (history.previousRun) {
5878
+ rows.push({
5879
+ label: "Previous",
5880
+ value: `${history.previousRun.label} \xB7 ${formatEvaluationSummary(history.previousRun.response)}`
5881
+ });
5882
+ }
5883
+ if (!history.diff) {
5884
+ rows.push({
5885
+ label: "History diff",
5886
+ value: "Run the benchmark again to diff regressions over time."
5887
+ });
5888
+ return rows;
5889
+ }
5890
+ rows.push({
5891
+ label: "Passing delta",
5892
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
5893
+ }, {
5894
+ label: "Average F1 delta",
5895
+ value: formatSignedDelta(history.diff.summaryDelta.averageF1, 3)
5896
+ }, {
5897
+ label: "Latency delta",
5898
+ value: formatSignedDelta(history.diff.summaryDelta.averageLatencyMs, 1, "ms")
5899
+ }, {
5900
+ label: "Improved",
5901
+ value: formatHistoryCaseLabels(history.diff.improvedCases)
5902
+ }, {
5903
+ label: "Regressed",
5904
+ value: formatHistoryCaseLabels(history.diff.regressedCases)
5905
+ });
5906
+ if (history.diff.traceSummaryDelta) {
5907
+ rows.push({
5908
+ label: "Trace mode shift",
5909
+ value: history.diff.traceSummaryDelta.modesChanged ? "changed" : "stable"
5910
+ }, {
5911
+ label: "Trace final delta",
5912
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageFinalCount, 1)
5913
+ }, {
5914
+ label: "Trace vector delta",
5915
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageVectorCount, 1)
5916
+ }, {
5917
+ label: "Trace lexical delta",
5918
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageLexicalCount, 1)
5919
+ }, {
5920
+ label: "Trace transform delta",
5921
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.transformedCases)
5922
+ }, {
5923
+ label: "Trace variant delta",
5924
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
5925
+ });
5926
+ const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
5927
+ if (stageDelta) {
5928
+ rows.push({ label: "Trace stage delta", value: stageDelta });
5929
+ }
5930
+ }
5931
+ if (history.caseTraceSnapshots.length > 0) {
5932
+ const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
5933
+ rows.push({
5934
+ label: "Trace drift cases",
5935
+ value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
5936
+ });
5937
+ }
5938
+ return rows;
5939
+ };
5681
5940
  var buildRAGEvaluationRunDiff = ({
5682
5941
  current,
5683
5942
  previous
@@ -5865,6 +6124,10 @@ var loadRAGEvaluationHistory = async ({
5865
6124
  const latestRun = runs[0];
5866
6125
  const previousRun = runs[1];
5867
6126
  return {
6127
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
6128
+ current: latestRun,
6129
+ previous: previousRun
6130
+ }),
5868
6131
  diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
5869
6132
  current: latestRun,
5870
6133
  previous: previousRun
@@ -6076,6 +6339,7 @@ var compareRAGRerankers = async ({
6076
6339
  });
6077
6340
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
6078
6341
  return {
6342
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
6079
6343
  label: candidate.label ?? candidate.id,
6080
6344
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
6081
6345
  response,
@@ -6162,6 +6426,7 @@ var compareRAGRetrievalStrategies = async ({
6162
6426
  });
6163
6427
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
6164
6428
  return {
6429
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
6165
6430
  label: candidate.label ?? candidate.id,
6166
6431
  response,
6167
6432
  retrievalId: candidate.id,
@@ -6230,7 +6495,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K2) => input.
6230
6495
  var runRAGEvaluationSuite = async ({
6231
6496
  suite,
6232
6497
  evaluate,
6233
- overrides
6498
+ overrides,
6499
+ artifacts
6234
6500
  }) => {
6235
6501
  const startedAt = Date.now();
6236
6502
  const response = await evaluate({
@@ -6240,6 +6506,7 @@ var runRAGEvaluationSuite = async ({
6240
6506
  });
6241
6507
  const finishedAt = Date.now();
6242
6508
  return {
6509
+ caseTraceSnapshots: artifacts?.caseTraceSnapshots,
6243
6510
  elapsedMs: finishedAt - startedAt,
6244
6511
  finishedAt,
6245
6512
  id: generateId(),
@@ -6247,7 +6514,8 @@ var runRAGEvaluationSuite = async ({
6247
6514
  metadata: suite.metadata,
6248
6515
  response,
6249
6516
  startedAt,
6250
- suiteId: suite.id
6517
+ suiteId: suite.id,
6518
+ traceSummary: artifacts?.traceSummary
6251
6519
  };
6252
6520
  };
6253
6521
  var summarizeRAGEvaluationCase = ({
@@ -11422,7 +11690,10 @@ export {
11422
11690
  buildRAGEvaluationRunDiff,
11423
11691
  buildRAGEvaluationResponse,
11424
11692
  buildRAGEvaluationLeaderboard,
11693
+ buildRAGEvaluationHistoryRows,
11425
11694
  buildRAGContext,
11695
+ buildRAGComparisonTraceSummaryRows,
11696
+ buildRAGComparisonTraceDiffRows,
11426
11697
  buildRAGCitations,
11427
11698
  buildRAGCitationReferenceMap,
11428
11699
  buildRAGAnswerGroundingEvaluationRunDiff,
@@ -11438,5 +11709,5 @@ export {
11438
11709
  aiChat
11439
11710
  };
11440
11711
 
11441
- //# debugId=5129E1811D01124064756E2164756E21
11712
+ //# debugId=3B5842716560251D64756E2164756E21
11442
11713
  //# sourceMappingURL=index.js.map