@absolutejs/absolute 0.19.0-beta.538 → 0.19.0-beta.539

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1311,6 +1311,16 @@ export type RAGEvaluationCaseTracePresentation = {
1311
1311
  traceChange: RAGEvaluationCaseTraceSnapshot['traceChange'];
1312
1312
  rows: RAGLabelValueRow[];
1313
1313
  };
1314
+ export type RAGEvaluationHistoryPresentation = {
1315
+ summary: string;
1316
+ rows: RAGLabelValueRow[];
1317
+ caseTraces: RAGEvaluationCaseTracePresentation[];
1318
+ };
1319
+ export type RAGAnswerGroundingHistoryPresentation = {
1320
+ summary: string;
1321
+ rows: RAGLabelValueRow[];
1322
+ caseSnapshots: RAGAnswerGroundingCaseSnapshotPresentation[];
1323
+ };
1314
1324
  export type RAGEvaluationLeaderboardEntry = {
1315
1325
  runId: string;
1316
1326
  suiteId: string;
@@ -2311,8 +2311,11 @@ var buildEvaluationCaseTraceSnapshots = ({
2311
2311
  };
2312
2312
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2313
2313
  var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2314
+ var formatEvaluationPassingRate = (value) => `${value.toFixed(1)}%`;
2314
2315
  var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
2316
+ var formatGroundingHistorySummaryValue = (response) => `${response.summary.passedCases}/${response.summary.totalCases} pass \xB7 grounded ${response.summary.groundedCases} \xB7 partial ${response.summary.partialCases} \xB7 ungrounded ${response.summary.ungroundedCases} \xB7 resolved citations ${(response.summary.averageResolvedCitationRate * 100).toFixed(1)}% \xB7 citation f1 ${response.summary.averageCitationF1.toFixed(3)}`;
2315
2317
  var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2318
+ var formatGroundingHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2316
2319
  var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
2317
2320
  var formatTraceStageSummary = (stageCounts) => {
2318
2321
  const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
@@ -2535,6 +2538,11 @@ var buildRAGEvaluationCaseTracePresentations = (history) => {
2535
2538
  };
2536
2539
  });
2537
2540
  };
2541
+ var buildRAGEvaluationHistoryPresentation = (history) => ({
2542
+ caseTraces: buildRAGEvaluationCaseTracePresentations(history),
2543
+ rows: buildRAGEvaluationHistoryRows(history),
2544
+ summary: history?.latestRun ? history.latestRun.label : "No persisted benchmark runs yet."
2545
+ });
2538
2546
  var buildRAGEvaluationRunDiff = ({
2539
2547
  current,
2540
2548
  previous
@@ -2666,6 +2674,66 @@ var buildRAGAnswerGroundingCaseSnapshotPresentations = (history) => {
2666
2674
  };
2667
2675
  });
2668
2676
  };
2677
+ var buildRAGAnswerGroundingHistoryRows = (history) => {
2678
+ if (!history?.latestRun) {
2679
+ return [{ label: "History", value: "No persisted provider runs yet." }];
2680
+ }
2681
+ const rows = [
2682
+ { label: "Runs recorded", value: String(history.runs.length) },
2683
+ {
2684
+ label: "Latest",
2685
+ value: `${history.latestRun.label} \xB7 ${formatGroundingHistorySummaryValue(history.latestRun.response)}`
2686
+ }
2687
+ ];
2688
+ if (history.previousRun) {
2689
+ rows.push({
2690
+ label: "Previous",
2691
+ value: `${history.previousRun.label} \xB7 ${formatGroundingHistorySummaryValue(history.previousRun.response)}`
2692
+ });
2693
+ }
2694
+ if (history.leaderboard[0]) {
2695
+ rows.push({
2696
+ label: "Best recorded",
2697
+ value: `#${history.leaderboard[0].rank} \xB7 ${history.leaderboard[0].label} \xB7 passing ${formatEvaluationPassingRate(history.leaderboard[0].passingRate)} \xB7 citation f1 ${history.leaderboard[0].averageCitationF1.toFixed(3)} \xB7 resolved ${formatEvaluationPassingRate(history.leaderboard[0].averageResolvedCitationRate)}`
2698
+ });
2699
+ }
2700
+ if (history.caseSnapshots.length > 0) {
2701
+ const changedAnswers = history.caseSnapshots.filter((entry) => entry.answerChange === "changed").length;
2702
+ rows.push({
2703
+ label: "Answer drift",
2704
+ value: `${changedAnswers}/${history.caseSnapshots.length} changed`
2705
+ });
2706
+ }
2707
+ if (!history.diff) {
2708
+ rows.push({
2709
+ label: "History diff",
2710
+ value: "Run the provider comparison again to diff grounding regressions over time."
2711
+ });
2712
+ return rows;
2713
+ }
2714
+ rows.push({
2715
+ label: "Passing delta",
2716
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
2717
+ }, {
2718
+ label: "Citation F1 delta",
2719
+ value: formatSignedDelta(history.diff.summaryDelta.averageCitationF1, 3)
2720
+ }, {
2721
+ label: "Resolved citation delta",
2722
+ value: formatSignedDelta(history.diff.summaryDelta.averageResolvedCitationRate * 100, 1, "%")
2723
+ }, {
2724
+ label: "Improved",
2725
+ value: formatGroundingHistoryCaseLabels(history.diff.improvedCases)
2726
+ }, {
2727
+ label: "Regressed",
2728
+ value: formatGroundingHistoryCaseLabels(history.diff.regressedCases)
2729
+ });
2730
+ return rows;
2731
+ };
2732
+ var buildRAGAnswerGroundingHistoryPresentation = (history) => ({
2733
+ caseSnapshots: buildRAGAnswerGroundingCaseSnapshotPresentations(history),
2734
+ rows: buildRAGAnswerGroundingHistoryRows(history),
2735
+ summary: history?.latestRun ? history.latestRun.label : "No persisted provider runs yet."
2736
+ });
2669
2737
  var createRAGFileEvaluationHistoryStore = (path) => ({
2670
2738
  listRuns: async ({ limit, suiteId } = {}) => {
2671
2739
  let parsed = [];
@@ -3889,5 +3957,5 @@ export {
3889
3957
  AIStreamKey
3890
3958
  };
3891
3959
 
3892
- //# debugId=69A98548515264F064756E2164756E21
3960
+ //# debugId=557BB3208F1A1E8F64756E2164756E21
3893
3961
  //# sourceMappingURL=index.js.map