@absolutejs/absolute 0.19.0-beta.537 → 0.19.0-beta.539

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2287,8 +2287,11 @@ var buildEvaluationCaseTraceSnapshots = ({
2287
2287
  };
2288
2288
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2289
2289
  var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2290
+ var formatEvaluationPassingRate = (value) => `${value.toFixed(1)}%`;
2290
2291
  var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
2292
+ var formatGroundingHistorySummaryValue = (response) => `${response.summary.passedCases}/${response.summary.totalCases} pass \xB7 grounded ${response.summary.groundedCases} \xB7 partial ${response.summary.partialCases} \xB7 ungrounded ${response.summary.ungroundedCases} \xB7 resolved citations ${(response.summary.averageResolvedCitationRate * 100).toFixed(1)}% \xB7 citation f1 ${response.summary.averageCitationF1.toFixed(3)}`;
2291
2293
  var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2294
+ var formatGroundingHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2292
2295
  var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
2293
2296
  var formatTraceStageSummary = (stageCounts) => {
2294
2297
  const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
@@ -2511,6 +2514,11 @@ var buildRAGEvaluationCaseTracePresentations = (history) => {
2511
2514
  };
2512
2515
  });
2513
2516
  };
2517
+ var buildRAGEvaluationHistoryPresentation = (history) => ({
2518
+ caseTraces: buildRAGEvaluationCaseTracePresentations(history),
2519
+ rows: buildRAGEvaluationHistoryRows(history),
2520
+ summary: history?.latestRun ? history.latestRun.label : "No persisted benchmark runs yet."
2521
+ });
2514
2522
  var buildRAGEvaluationRunDiff = ({
2515
2523
  current,
2516
2524
  previous
@@ -2579,6 +2587,129 @@ var buildRAGAnswerGroundingEvaluationRunDiff = ({
2579
2587
  unchangedCases
2580
2588
  };
2581
2589
  };
2590
+ var buildRAGAnswerGroundingCaseSnapshotPresentations = (history) => {
2591
+ if (!history?.caseSnapshots.length) {
2592
+ return [];
2593
+ }
2594
+ return history.caseSnapshots.map((entry) => {
2595
+ const label = entry.label ?? entry.caseId;
2596
+ return {
2597
+ answerChange: entry.answerChange,
2598
+ caseId: entry.caseId,
2599
+ label,
2600
+ rows: [
2601
+ {
2602
+ label: "Query",
2603
+ value: entry.query?.trim().length ? entry.query : "n/a"
2604
+ },
2605
+ { label: "Answer change", value: entry.answerChange },
2606
+ { label: "Coverage", value: entry.coverage },
2607
+ {
2608
+ label: "Resolved citations",
2609
+ value: `${entry.resolvedCitationCount}/${entry.citationCount}`
2610
+ },
2611
+ {
2612
+ label: "Resolved citation rate",
2613
+ value: entry.resolvedCitationRate.toFixed(3)
2614
+ },
2615
+ { label: "Citation F1", value: entry.citationF1.toFixed(3) },
2616
+ {
2617
+ label: "Reference count",
2618
+ value: String(entry.referenceCount)
2619
+ },
2620
+ {
2621
+ label: "Cited IDs",
2622
+ value: entry.citedIds.length > 0 ? entry.citedIds.join(", ") : "none"
2623
+ },
2624
+ {
2625
+ label: "Matched IDs",
2626
+ value: entry.matchedIds.length > 0 ? entry.matchedIds.join(", ") : "none"
2627
+ },
2628
+ {
2629
+ label: "Missing IDs",
2630
+ value: entry.missingIds.length > 0 ? entry.missingIds.join(", ") : "none"
2631
+ },
2632
+ {
2633
+ label: "Extra IDs",
2634
+ value: entry.extraIds.length > 0 ? entry.extraIds.join(", ") : "none"
2635
+ },
2636
+ {
2637
+ label: "Unresolved refs",
2638
+ value: entry.ungroundedReferenceNumbers.length > 0 ? entry.ungroundedReferenceNumbers.join(", ") : "none"
2639
+ },
2640
+ {
2641
+ label: "Answer",
2642
+ value: entry.answer.trim().length > 0 ? entry.answer : "n/a"
2643
+ },
2644
+ {
2645
+ label: "Previous answer",
2646
+ value: entry.previousAnswer && entry.previousAnswer.trim().length > 0 ? entry.previousAnswer : "n/a"
2647
+ }
2648
+ ],
2649
+ summary: `${entry.answerChange} \xB7 ${entry.coverage} \xB7 resolved ${entry.resolvedCitationCount}/${entry.citationCount} \xB7 refs ${entry.referenceCount}`
2650
+ };
2651
+ });
2652
+ };
2653
+ var buildRAGAnswerGroundingHistoryRows = (history) => {
2654
+ if (!history?.latestRun) {
2655
+ return [{ label: "History", value: "No persisted provider runs yet." }];
2656
+ }
2657
+ const rows = [
2658
+ { label: "Runs recorded", value: String(history.runs.length) },
2659
+ {
2660
+ label: "Latest",
2661
+ value: `${history.latestRun.label} \xB7 ${formatGroundingHistorySummaryValue(history.latestRun.response)}`
2662
+ }
2663
+ ];
2664
+ if (history.previousRun) {
2665
+ rows.push({
2666
+ label: "Previous",
2667
+ value: `${history.previousRun.label} \xB7 ${formatGroundingHistorySummaryValue(history.previousRun.response)}`
2668
+ });
2669
+ }
2670
+ if (history.leaderboard[0]) {
2671
+ rows.push({
2672
+ label: "Best recorded",
2673
+ value: `#${history.leaderboard[0].rank} \xB7 ${history.leaderboard[0].label} \xB7 passing ${formatEvaluationPassingRate(history.leaderboard[0].passingRate)} \xB7 citation f1 ${history.leaderboard[0].averageCitationF1.toFixed(3)} \xB7 resolved ${formatEvaluationPassingRate(history.leaderboard[0].averageResolvedCitationRate)}`
2674
+ });
2675
+ }
2676
+ if (history.caseSnapshots.length > 0) {
2677
+ const changedAnswers = history.caseSnapshots.filter((entry) => entry.answerChange === "changed").length;
2678
+ rows.push({
2679
+ label: "Answer drift",
2680
+ value: `${changedAnswers}/${history.caseSnapshots.length} changed`
2681
+ });
2682
+ }
2683
+ if (!history.diff) {
2684
+ rows.push({
2685
+ label: "History diff",
2686
+ value: "Run the provider comparison again to diff grounding regressions over time."
2687
+ });
2688
+ return rows;
2689
+ }
2690
+ rows.push({
2691
+ label: "Passing delta",
2692
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
2693
+ }, {
2694
+ label: "Citation F1 delta",
2695
+ value: formatSignedDelta(history.diff.summaryDelta.averageCitationF1, 3)
2696
+ }, {
2697
+ label: "Resolved citation delta",
2698
+ value: formatSignedDelta(history.diff.summaryDelta.averageResolvedCitationRate * 100, 1, "%")
2699
+ }, {
2700
+ label: "Improved",
2701
+ value: formatGroundingHistoryCaseLabels(history.diff.improvedCases)
2702
+ }, {
2703
+ label: "Regressed",
2704
+ value: formatGroundingHistoryCaseLabels(history.diff.regressedCases)
2705
+ });
2706
+ return rows;
2707
+ };
2708
+ var buildRAGAnswerGroundingHistoryPresentation = (history) => ({
2709
+ caseSnapshots: buildRAGAnswerGroundingCaseSnapshotPresentations(history),
2710
+ rows: buildRAGAnswerGroundingHistoryRows(history),
2711
+ summary: history?.latestRun ? history.latestRun.label : "No persisted provider runs yet."
2712
+ });
2582
2713
  var createRAGFileEvaluationHistoryStore = (path) => ({
2583
2714
  listRuns: async ({ limit, suiteId } = {}) => {
2584
2715
  let parsed = [];
@@ -4050,5 +4181,5 @@ export {
4050
4181
  AIStreamProvider
4051
4182
  };
4052
4183
 
4053
- //# debugId=23034E6004312CB964756E2164756E21
4184
+ //# debugId=7E7C7A3FF0EFC10564756E2164756E21
4054
4185
  //# sourceMappingURL=index.js.map