@absolutejs/absolute 0.19.0-beta.537 → 0.19.0-beta.539

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1133,6 +1133,13 @@ export type RAGAnswerGroundingEvaluationHistory = {
1133
1133
  caseSnapshots: RAGAnswerGroundingEvaluationCaseSnapshot[];
1134
1134
  diff?: RAGAnswerGroundingEvaluationRunDiff;
1135
1135
  };
1136
+ export type RAGAnswerGroundingCaseSnapshotPresentation = {
1137
+ caseId: string;
1138
+ label: string;
1139
+ summary: string;
1140
+ answerChange: RAGAnswerGroundingEvaluationCaseSnapshot['answerChange'];
1141
+ rows: RAGLabelValueRow[];
1142
+ };
1136
1143
  export type RAGEvaluationInput = {
1137
1144
  cases: RAGEvaluationCase[];
1138
1145
  topK?: number;
@@ -1304,6 +1311,16 @@ export type RAGEvaluationCaseTracePresentation = {
1304
1311
  traceChange: RAGEvaluationCaseTraceSnapshot['traceChange'];
1305
1312
  rows: RAGLabelValueRow[];
1306
1313
  };
1314
+ export type RAGEvaluationHistoryPresentation = {
1315
+ summary: string;
1316
+ rows: RAGLabelValueRow[];
1317
+ caseTraces: RAGEvaluationCaseTracePresentation[];
1318
+ };
1319
+ export type RAGAnswerGroundingHistoryPresentation = {
1320
+ summary: string;
1321
+ rows: RAGLabelValueRow[];
1322
+ caseSnapshots: RAGAnswerGroundingCaseSnapshotPresentation[];
1323
+ };
1307
1324
  export type RAGEvaluationLeaderboardEntry = {
1308
1325
  runId: string;
1309
1326
  suiteId: string;
@@ -2311,8 +2311,11 @@ var buildEvaluationCaseTraceSnapshots = ({
2311
2311
  };
2312
2312
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2313
2313
  var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2314
+ var formatEvaluationPassingRate = (value) => `${value.toFixed(1)}%`;
2314
2315
  var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
2316
+ var formatGroundingHistorySummaryValue = (response) => `${response.summary.passedCases}/${response.summary.totalCases} pass \xB7 grounded ${response.summary.groundedCases} \xB7 partial ${response.summary.partialCases} \xB7 ungrounded ${response.summary.ungroundedCases} \xB7 resolved citations ${(response.summary.averageResolvedCitationRate * 100).toFixed(1)}% \xB7 citation f1 ${response.summary.averageCitationF1.toFixed(3)}`;
2315
2317
  var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2318
+ var formatGroundingHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2316
2319
  var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
2317
2320
  var formatTraceStageSummary = (stageCounts) => {
2318
2321
  const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
@@ -2535,6 +2538,11 @@ var buildRAGEvaluationCaseTracePresentations = (history) => {
2535
2538
  };
2536
2539
  });
2537
2540
  };
2541
+ var buildRAGEvaluationHistoryPresentation = (history) => ({
2542
+ caseTraces: buildRAGEvaluationCaseTracePresentations(history),
2543
+ rows: buildRAGEvaluationHistoryRows(history),
2544
+ summary: history?.latestRun ? history.latestRun.label : "No persisted benchmark runs yet."
2545
+ });
2538
2546
  var buildRAGEvaluationRunDiff = ({
2539
2547
  current,
2540
2548
  previous
@@ -2603,6 +2611,129 @@ var buildRAGAnswerGroundingEvaluationRunDiff = ({
2603
2611
  unchangedCases
2604
2612
  };
2605
2613
  };
2614
+ var buildRAGAnswerGroundingCaseSnapshotPresentations = (history) => {
2615
+ if (!history?.caseSnapshots.length) {
2616
+ return [];
2617
+ }
2618
+ return history.caseSnapshots.map((entry) => {
2619
+ const label = entry.label ?? entry.caseId;
2620
+ return {
2621
+ answerChange: entry.answerChange,
2622
+ caseId: entry.caseId,
2623
+ label,
2624
+ rows: [
2625
+ {
2626
+ label: "Query",
2627
+ value: entry.query?.trim().length ? entry.query : "n/a"
2628
+ },
2629
+ { label: "Answer change", value: entry.answerChange },
2630
+ { label: "Coverage", value: entry.coverage },
2631
+ {
2632
+ label: "Resolved citations",
2633
+ value: `${entry.resolvedCitationCount}/${entry.citationCount}`
2634
+ },
2635
+ {
2636
+ label: "Resolved citation rate",
2637
+ value: entry.resolvedCitationRate.toFixed(3)
2638
+ },
2639
+ { label: "Citation F1", value: entry.citationF1.toFixed(3) },
2640
+ {
2641
+ label: "Reference count",
2642
+ value: String(entry.referenceCount)
2643
+ },
2644
+ {
2645
+ label: "Cited IDs",
2646
+ value: entry.citedIds.length > 0 ? entry.citedIds.join(", ") : "none"
2647
+ },
2648
+ {
2649
+ label: "Matched IDs",
2650
+ value: entry.matchedIds.length > 0 ? entry.matchedIds.join(", ") : "none"
2651
+ },
2652
+ {
2653
+ label: "Missing IDs",
2654
+ value: entry.missingIds.length > 0 ? entry.missingIds.join(", ") : "none"
2655
+ },
2656
+ {
2657
+ label: "Extra IDs",
2658
+ value: entry.extraIds.length > 0 ? entry.extraIds.join(", ") : "none"
2659
+ },
2660
+ {
2661
+ label: "Unresolved refs",
2662
+ value: entry.ungroundedReferenceNumbers.length > 0 ? entry.ungroundedReferenceNumbers.join(", ") : "none"
2663
+ },
2664
+ {
2665
+ label: "Answer",
2666
+ value: entry.answer.trim().length > 0 ? entry.answer : "n/a"
2667
+ },
2668
+ {
2669
+ label: "Previous answer",
2670
+ value: entry.previousAnswer && entry.previousAnswer.trim().length > 0 ? entry.previousAnswer : "n/a"
2671
+ }
2672
+ ],
2673
+ summary: `${entry.answerChange} \xB7 ${entry.coverage} \xB7 resolved ${entry.resolvedCitationCount}/${entry.citationCount} \xB7 refs ${entry.referenceCount}`
2674
+ };
2675
+ });
2676
+ };
2677
+ var buildRAGAnswerGroundingHistoryRows = (history) => {
2678
+ if (!history?.latestRun) {
2679
+ return [{ label: "History", value: "No persisted provider runs yet." }];
2680
+ }
2681
+ const rows = [
2682
+ { label: "Runs recorded", value: String(history.runs.length) },
2683
+ {
2684
+ label: "Latest",
2685
+ value: `${history.latestRun.label} \xB7 ${formatGroundingHistorySummaryValue(history.latestRun.response)}`
2686
+ }
2687
+ ];
2688
+ if (history.previousRun) {
2689
+ rows.push({
2690
+ label: "Previous",
2691
+ value: `${history.previousRun.label} \xB7 ${formatGroundingHistorySummaryValue(history.previousRun.response)}`
2692
+ });
2693
+ }
2694
+ if (history.leaderboard[0]) {
2695
+ rows.push({
2696
+ label: "Best recorded",
2697
+ value: `#${history.leaderboard[0].rank} \xB7 ${history.leaderboard[0].label} \xB7 passing ${formatEvaluationPassingRate(history.leaderboard[0].passingRate)} \xB7 citation f1 ${history.leaderboard[0].averageCitationF1.toFixed(3)} \xB7 resolved ${formatEvaluationPassingRate(history.leaderboard[0].averageResolvedCitationRate)}`
2698
+ });
2699
+ }
2700
+ if (history.caseSnapshots.length > 0) {
2701
+ const changedAnswers = history.caseSnapshots.filter((entry) => entry.answerChange === "changed").length;
2702
+ rows.push({
2703
+ label: "Answer drift",
2704
+ value: `${changedAnswers}/${history.caseSnapshots.length} changed`
2705
+ });
2706
+ }
2707
+ if (!history.diff) {
2708
+ rows.push({
2709
+ label: "History diff",
2710
+ value: "Run the provider comparison again to diff grounding regressions over time."
2711
+ });
2712
+ return rows;
2713
+ }
2714
+ rows.push({
2715
+ label: "Passing delta",
2716
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
2717
+ }, {
2718
+ label: "Citation F1 delta",
2719
+ value: formatSignedDelta(history.diff.summaryDelta.averageCitationF1, 3)
2720
+ }, {
2721
+ label: "Resolved citation delta",
2722
+ value: formatSignedDelta(history.diff.summaryDelta.averageResolvedCitationRate * 100, 1, "%")
2723
+ }, {
2724
+ label: "Improved",
2725
+ value: formatGroundingHistoryCaseLabels(history.diff.improvedCases)
2726
+ }, {
2727
+ label: "Regressed",
2728
+ value: formatGroundingHistoryCaseLabels(history.diff.regressedCases)
2729
+ });
2730
+ return rows;
2731
+ };
2732
+ var buildRAGAnswerGroundingHistoryPresentation = (history) => ({
2733
+ caseSnapshots: buildRAGAnswerGroundingCaseSnapshotPresentations(history),
2734
+ rows: buildRAGAnswerGroundingHistoryRows(history),
2735
+ summary: history?.latestRun ? history.latestRun.label : "No persisted provider runs yet."
2736
+ });
2606
2737
  var createRAGFileEvaluationHistoryStore = (path) => ({
2607
2738
  listRuns: async ({ limit, suiteId } = {}) => {
2608
2739
  let parsed = [];
@@ -3826,5 +3957,5 @@ export {
3826
3957
  AIStreamKey
3827
3958
  };
3828
3959
 
3829
- //# debugId=AF3F1B40777DBF8964756E2164756E21
3960
+ //# debugId=557BB3208F1A1E8F64756E2164756E21
3830
3961
  //# sourceMappingURL=index.js.map