@absolutejs/absolute 0.19.0-beta.517 → 0.19.0-beta.519

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1692,8 +1692,34 @@ var buildRAGEvaluationLeaderboard = (runs) => {
1692
1692
  totalCases: run.response.totalCases
1693
1693
  }));
1694
1694
  };
1695
+ var buildRAGAnswerGroundingEvaluationLeaderboard = (runs) => {
1696
+ const sorted = [...runs].sort((left, right) => {
1697
+ if (right.response.passingRate !== left.response.passingRate) {
1698
+ return right.response.passingRate - left.response.passingRate;
1699
+ }
1700
+ if (right.response.summary.averageCitationF1 !== left.response.summary.averageCitationF1) {
1701
+ return right.response.summary.averageCitationF1 - left.response.summary.averageCitationF1;
1702
+ }
1703
+ if (right.response.summary.averageResolvedCitationRate !== left.response.summary.averageResolvedCitationRate) {
1704
+ return right.response.summary.averageResolvedCitationRate - left.response.summary.averageResolvedCitationRate;
1705
+ }
1706
+ return left.elapsedMs - right.elapsedMs;
1707
+ });
1708
+ return sorted.map((run, index) => ({
1709
+ averageCitationF1: run.response.summary.averageCitationF1,
1710
+ averageResolvedCitationRate: run.response.summary.averageResolvedCitationRate,
1711
+ label: run.label,
1712
+ passingRate: run.response.passingRate,
1713
+ rank: index + 1,
1714
+ runId: run.id,
1715
+ suiteId: run.suiteId,
1716
+ totalCases: run.response.totalCases
1717
+ }));
1718
+ };
1695
1719
  var toHistorySortOrder = (left, right) => right.finishedAt - left.finishedAt;
1696
1720
  var normalizeHistoryRuns = (runs) => [...runs].sort(toHistorySortOrder);
1721
+ var toGroundingHistorySortOrder = (left, right) => right.finishedAt - left.finishedAt;
1722
+ var normalizeGroundingHistoryRuns = (runs) => [...runs].sort(toGroundingHistorySortOrder);
1697
1723
  var buildCaseDiff = (currentCase, previousCase) => ({
1698
1724
  caseId: currentCase.caseId,
1699
1725
  currentF1: currentCase.f1,
@@ -1707,6 +1733,19 @@ var buildCaseDiff = (currentCase, previousCase) => ({
1707
1733
  previousStatus: previousCase?.status,
1708
1734
  query: currentCase.query
1709
1735
  });
1736
+ var buildGroundingCaseDiff = (currentCase, previousCase) => ({
1737
+ caseId: currentCase.caseId,
1738
+ currentCitationF1: currentCase.citationF1,
1739
+ currentMatchedIds: currentCase.matchedIds,
1740
+ currentMissingIds: currentCase.missingIds,
1741
+ currentStatus: currentCase.status,
1742
+ label: currentCase.label,
1743
+ previousCitationF1: previousCase?.citationF1,
1744
+ previousMatchedIds: previousCase?.matchedIds ?? [],
1745
+ previousMissingIds: previousCase?.missingIds ?? [],
1746
+ previousStatus: previousCase?.status,
1747
+ query: currentCase.query
1748
+ });
1710
1749
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
1711
1750
  var buildRAGEvaluationRunDiff = ({
1712
1751
  current,
@@ -1734,6 +1773,32 @@ var buildRAGEvaluationRunDiff = ({
1734
1773
  unchangedCases
1735
1774
  };
1736
1775
  };
1776
+ var buildRAGAnswerGroundingEvaluationRunDiff = ({
1777
+ current,
1778
+ previous
1779
+ }) => {
1780
+ const previousCases = new Map((previous?.response.cases ?? []).map((entry) => [entry.caseId, entry]));
1781
+ const diffs = current.response.cases.map((entry) => buildGroundingCaseDiff(entry, previousCases.get(entry.caseId)));
1782
+ const regressedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) < getStatusRank(entry.previousStatus ?? "fail"));
1783
+ const improvedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) > getStatusRank(entry.previousStatus ?? "fail"));
1784
+ const unchangedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) === getStatusRank(entry.previousStatus ?? "fail"));
1785
+ return {
1786
+ currentRunId: current.id,
1787
+ improvedCases,
1788
+ previousRunId: previous?.id,
1789
+ regressedCases,
1790
+ suiteId: current.suiteId,
1791
+ summaryDelta: {
1792
+ averageCitationF1: current.response.summary.averageCitationF1 - (previous?.response.summary.averageCitationF1 ?? 0),
1793
+ averageResolvedCitationRate: current.response.summary.averageResolvedCitationRate - (previous?.response.summary.averageResolvedCitationRate ?? 0),
1794
+ failedCases: current.response.summary.failedCases - (previous?.response.summary.failedCases ?? 0),
1795
+ passedCases: current.response.summary.passedCases - (previous?.response.summary.passedCases ?? 0),
1796
+ passingRate: current.response.passingRate - (previous?.response.passingRate ?? 0),
1797
+ partialCases: current.response.summary.partialCases - (previous?.response.summary.partialCases ?? 0)
1798
+ },
1799
+ unchangedCases
1800
+ };
1801
+ };
1737
1802
  var createRAGFileEvaluationHistoryStore = (path) => ({
1738
1803
  listRuns: async ({ limit, suiteId } = {}) => {
1739
1804
  let parsed = [];
@@ -1772,6 +1837,42 @@ var createRAGFileEvaluationHistoryStore = (path) => ({
1772
1837
  `, "utf8");
1773
1838
  }
1774
1839
  });
1840
+ var createRAGFileAnswerGroundingEvaluationHistoryStore = (path) => ({
1841
+ async listRuns(input) {
1842
+ try {
1843
+ const raw = await readFile(path, "utf8");
1844
+ const data = JSON.parse(raw);
1845
+ const runs = Array.isArray(data.runs) ? data.runs : [];
1846
+ const filtered = input?.suiteId ? runs.filter((run) => run.suiteId === input.suiteId) : runs;
1847
+ return filtered.sort(toGroundingHistorySortOrder).slice(0, input?.limit ?? DEFAULT_HISTORY_LIMIT);
1848
+ } catch (error) {
1849
+ if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
1850
+ return [];
1851
+ }
1852
+ throw error;
1853
+ }
1854
+ },
1855
+ async saveRun(run) {
1856
+ let runs = [];
1857
+ try {
1858
+ const raw = await readFile(path, "utf8");
1859
+ const data = JSON.parse(raw);
1860
+ runs = Array.isArray(data.runs) ? data.runs : [];
1861
+ } catch (error) {
1862
+ if (!error || typeof error !== "object" || !("code" in error) || error.code !== "ENOENT") {
1863
+ throw error;
1864
+ }
1865
+ }
1866
+ const nextRuns = normalizeGroundingHistoryRuns([
1867
+ run,
1868
+ ...runs.filter((entry) => entry.id !== run.id)
1869
+ ]);
1870
+ await mkdir(dirname(path), { recursive: true });
1871
+ await writeFile(path, JSON.stringify({
1872
+ runs: nextRuns
1873
+ }, null, 2));
1874
+ }
1875
+ });
1775
1876
  var loadRAGEvaluationHistory = async ({
1776
1877
  store,
1777
1878
  suite,
@@ -1793,6 +1894,30 @@ var loadRAGEvaluationHistory = async ({
1793
1894
  suiteLabel: suite.label ?? suite.id
1794
1895
  };
1795
1896
  };
1897
+ var loadRAGAnswerGroundingEvaluationHistory = async ({
1898
+ store,
1899
+ suite,
1900
+ limit = DEFAULT_HISTORY_LIMIT
1901
+ }) => {
1902
+ const runs = normalizeGroundingHistoryRuns(await Promise.resolve(store.listRuns({
1903
+ limit,
1904
+ suiteId: suite.id
1905
+ })));
1906
+ const latestRun = runs[0];
1907
+ const previousRun = runs[1];
1908
+ return {
1909
+ diff: latestRun && previousRun ? buildRAGAnswerGroundingEvaluationRunDiff({
1910
+ current: latestRun,
1911
+ previous: previousRun
1912
+ }) : undefined,
1913
+ latestRun,
1914
+ leaderboard: buildRAGAnswerGroundingEvaluationLeaderboard(runs),
1915
+ previousRun,
1916
+ runs,
1917
+ suiteId: suite.id,
1918
+ suiteLabel: suite.label ?? suite.id
1919
+ };
1920
+ };
1796
1921
  var persistRAGEvaluationSuiteRun = async ({
1797
1922
  store,
1798
1923
  run
@@ -1800,6 +1925,13 @@ var persistRAGEvaluationSuiteRun = async ({
1800
1925
  await Promise.resolve(store.saveRun(run));
1801
1926
  return run;
1802
1927
  };
1928
+ var persistRAGAnswerGroundingEvaluationRun = async ({
1929
+ store,
1930
+ run
1931
+ }) => {
1932
+ await Promise.resolve(store.saveRun(run));
1933
+ return run;
1934
+ };
1803
1935
  var buildRAGEvaluationResponse = (cases) => {
1804
1936
  const totalCases = cases.length;
1805
1937
  const passedCases = cases.filter((entry) => entry.status === "pass").length;
@@ -2177,5 +2309,5 @@ export {
2177
2309
  buildRAGAnswerWorkflowState
2178
2310
  };
2179
2311
 
2180
- //# debugId=C8A3C1E52C79B17864756E2164756E21
2312
+ //# debugId=D27F1DC10562733064756E2164756E21
2181
2313
  //# sourceMappingURL=index.js.map