@absolutejs/absolute 0.19.0-beta.516 → 0.19.0-beta.518

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -919,6 +919,60 @@ export type RAGAnswerGroundingEvaluationResponse = {
919
919
  totalCases: number;
920
920
  passingRate: number;
921
921
  };
922
+ export type RAGAnswerGroundingEvaluationRun = {
923
+ id: string;
924
+ suiteId: string;
925
+ label: string;
926
+ startedAt: number;
927
+ finishedAt: number;
928
+ elapsedMs: number;
929
+ response: RAGAnswerGroundingEvaluationResponse;
930
+ metadata?: Record<string, unknown>;
931
+ };
932
+ export type RAGAnswerGroundingEvaluationHistoryStore = {
933
+ saveRun: (run: RAGAnswerGroundingEvaluationRun) => Promise<void> | void;
934
+ listRuns: (input?: {
935
+ suiteId?: string;
936
+ limit?: number;
937
+ }) => Promise<RAGAnswerGroundingEvaluationRun[]> | RAGAnswerGroundingEvaluationRun[];
938
+ };
939
+ export type RAGAnswerGroundingEvaluationCaseDiff = {
940
+ caseId: string;
941
+ label?: string;
942
+ query?: string;
943
+ previousStatus?: RAGAnswerGroundingEvaluationCaseResult['status'];
944
+ currentStatus: RAGAnswerGroundingEvaluationCaseResult['status'];
945
+ previousCitationF1?: number;
946
+ currentCitationF1: number;
947
+ previousMatchedIds: string[];
948
+ currentMatchedIds: string[];
949
+ previousMissingIds: string[];
950
+ currentMissingIds: string[];
951
+ };
952
+ export type RAGAnswerGroundingEvaluationRunDiff = {
953
+ suiteId: string;
954
+ currentRunId: string;
955
+ previousRunId?: string;
956
+ regressedCases: RAGAnswerGroundingEvaluationCaseDiff[];
957
+ improvedCases: RAGAnswerGroundingEvaluationCaseDiff[];
958
+ unchangedCases: RAGAnswerGroundingEvaluationCaseDiff[];
959
+ summaryDelta: {
960
+ passingRate: number;
961
+ averageCitationF1: number;
962
+ averageResolvedCitationRate: number;
963
+ passedCases: number;
964
+ failedCases: number;
965
+ partialCases: number;
966
+ };
967
+ };
968
+ export type RAGAnswerGroundingEvaluationHistory = {
969
+ suiteId: string;
970
+ suiteLabel?: string;
971
+ runs: RAGAnswerGroundingEvaluationRun[];
972
+ latestRun?: RAGAnswerGroundingEvaluationRun;
973
+ previousRun?: RAGAnswerGroundingEvaluationRun;
974
+ diff?: RAGAnswerGroundingEvaluationRunDiff;
975
+ };
922
976
  export type RAGEvaluationInput = {
923
977
  cases: RAGEvaluationCase[];
924
978
  topK?: number;
@@ -1718,6 +1718,8 @@ var buildRAGEvaluationLeaderboard = (runs) => {
1718
1718
  };
1719
1719
  var toHistorySortOrder = (left, right) => right.finishedAt - left.finishedAt;
1720
1720
  var normalizeHistoryRuns = (runs) => [...runs].sort(toHistorySortOrder);
1721
+ var toGroundingHistorySortOrder = (left, right) => right.finishedAt - left.finishedAt;
1722
+ var normalizeGroundingHistoryRuns = (runs) => [...runs].sort(toGroundingHistorySortOrder);
1721
1723
  var buildCaseDiff = (currentCase, previousCase) => ({
1722
1724
  caseId: currentCase.caseId,
1723
1725
  currentF1: currentCase.f1,
@@ -1731,6 +1733,19 @@ var buildCaseDiff = (currentCase, previousCase) => ({
1731
1733
  previousStatus: previousCase?.status,
1732
1734
  query: currentCase.query
1733
1735
  });
1736
+ var buildGroundingCaseDiff = (currentCase, previousCase) => ({
1737
+ caseId: currentCase.caseId,
1738
+ currentCitationF1: currentCase.citationF1,
1739
+ currentMatchedIds: currentCase.matchedIds,
1740
+ currentMissingIds: currentCase.missingIds,
1741
+ currentStatus: currentCase.status,
1742
+ label: currentCase.label,
1743
+ previousCitationF1: previousCase?.citationF1,
1744
+ previousMatchedIds: previousCase?.matchedIds ?? [],
1745
+ previousMissingIds: previousCase?.missingIds ?? [],
1746
+ previousStatus: previousCase?.status,
1747
+ query: currentCase.query
1748
+ });
1734
1749
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
1735
1750
  var buildRAGEvaluationRunDiff = ({
1736
1751
  current,
@@ -1758,6 +1773,32 @@ var buildRAGEvaluationRunDiff = ({
1758
1773
  unchangedCases
1759
1774
  };
1760
1775
  };
1776
+ var buildRAGAnswerGroundingEvaluationRunDiff = ({
1777
+ current,
1778
+ previous
1779
+ }) => {
1780
+ const previousCases = new Map((previous?.response.cases ?? []).map((entry) => [entry.caseId, entry]));
1781
+ const diffs = current.response.cases.map((entry) => buildGroundingCaseDiff(entry, previousCases.get(entry.caseId)));
1782
+ const regressedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) < getStatusRank(entry.previousStatus ?? "fail"));
1783
+ const improvedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) > getStatusRank(entry.previousStatus ?? "fail"));
1784
+ const unchangedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) === getStatusRank(entry.previousStatus ?? "fail"));
1785
+ return {
1786
+ currentRunId: current.id,
1787
+ improvedCases,
1788
+ previousRunId: previous?.id,
1789
+ regressedCases,
1790
+ suiteId: current.suiteId,
1791
+ summaryDelta: {
1792
+ averageCitationF1: current.response.summary.averageCitationF1 - (previous?.response.summary.averageCitationF1 ?? 0),
1793
+ averageResolvedCitationRate: current.response.summary.averageResolvedCitationRate - (previous?.response.summary.averageResolvedCitationRate ?? 0),
1794
+ failedCases: current.response.summary.failedCases - (previous?.response.summary.failedCases ?? 0),
1795
+ passedCases: current.response.summary.passedCases - (previous?.response.summary.passedCases ?? 0),
1796
+ passingRate: current.response.passingRate - (previous?.response.passingRate ?? 0),
1797
+ partialCases: current.response.summary.partialCases - (previous?.response.summary.partialCases ?? 0)
1798
+ },
1799
+ unchangedCases
1800
+ };
1801
+ };
1761
1802
  var createRAGFileEvaluationHistoryStore = (path) => ({
1762
1803
  listRuns: async ({ limit, suiteId } = {}) => {
1763
1804
  let parsed = [];
@@ -1796,6 +1837,42 @@ var createRAGFileEvaluationHistoryStore = (path) => ({
1796
1837
  `, "utf8");
1797
1838
  }
1798
1839
  });
1840
+ var createRAGFileAnswerGroundingEvaluationHistoryStore = (path) => ({
1841
+ async listRuns(input) {
1842
+ try {
1843
+ const raw = await readFile(path, "utf8");
1844
+ const data = JSON.parse(raw);
1845
+ const runs = Array.isArray(data.runs) ? data.runs : [];
1846
+ const filtered = input?.suiteId ? runs.filter((run) => run.suiteId === input.suiteId) : runs;
1847
+ return filtered.sort(toGroundingHistorySortOrder).slice(0, input?.limit ?? DEFAULT_HISTORY_LIMIT);
1848
+ } catch (error) {
1849
+ if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
1850
+ return [];
1851
+ }
1852
+ throw error;
1853
+ }
1854
+ },
1855
+ async saveRun(run) {
1856
+ let runs = [];
1857
+ try {
1858
+ const raw = await readFile(path, "utf8");
1859
+ const data = JSON.parse(raw);
1860
+ runs = Array.isArray(data.runs) ? data.runs : [];
1861
+ } catch (error) {
1862
+ if (!error || typeof error !== "object" || !("code" in error) || error.code !== "ENOENT") {
1863
+ throw error;
1864
+ }
1865
+ }
1866
+ const nextRuns = normalizeGroundingHistoryRuns([
1867
+ run,
1868
+ ...runs.filter((entry) => entry.id !== run.id)
1869
+ ]);
1870
+ await mkdir(dirname(path), { recursive: true });
1871
+ await writeFile(path, JSON.stringify({
1872
+ runs: nextRuns
1873
+ }, null, 2));
1874
+ }
1875
+ });
1799
1876
  var loadRAGEvaluationHistory = async ({
1800
1877
  store,
1801
1878
  suite,
@@ -1817,6 +1894,29 @@ var loadRAGEvaluationHistory = async ({
1817
1894
  suiteLabel: suite.label ?? suite.id
1818
1895
  };
1819
1896
  };
1897
+ var loadRAGAnswerGroundingEvaluationHistory = async ({
1898
+ store,
1899
+ suite,
1900
+ limit = DEFAULT_HISTORY_LIMIT
1901
+ }) => {
1902
+ const runs = normalizeGroundingHistoryRuns(await Promise.resolve(store.listRuns({
1903
+ limit,
1904
+ suiteId: suite.id
1905
+ })));
1906
+ const latestRun = runs[0];
1907
+ const previousRun = runs[1];
1908
+ return {
1909
+ diff: latestRun && previousRun ? buildRAGAnswerGroundingEvaluationRunDiff({
1910
+ current: latestRun,
1911
+ previous: previousRun
1912
+ }) : undefined,
1913
+ latestRun,
1914
+ previousRun,
1915
+ runs,
1916
+ suiteId: suite.id,
1917
+ suiteLabel: suite.label ?? suite.id
1918
+ };
1919
+ };
1820
1920
  var persistRAGEvaluationSuiteRun = async ({
1821
1921
  store,
1822
1922
  run
@@ -1824,6 +1924,13 @@ var persistRAGEvaluationSuiteRun = async ({
1824
1924
  await Promise.resolve(store.saveRun(run));
1825
1925
  return run;
1826
1926
  };
1927
+ var persistRAGAnswerGroundingEvaluationRun = async ({
1928
+ store,
1929
+ run
1930
+ }) => {
1931
+ await Promise.resolve(store.saveRun(run));
1932
+ return run;
1933
+ };
1827
1934
  var buildRAGEvaluationResponse = (cases) => {
1828
1935
  const totalCases = cases.length;
1829
1936
  const passedCases = cases.filter((entry) => entry.status === "pass").length;
@@ -2840,5 +2947,5 @@ export {
2840
2947
  AIStreamKey
2841
2948
  };
2842
2949
 
2843
- //# debugId=EEFE8418E86C288B64756E2164756E21
2950
+ //# debugId=9275DFD130B50DC164756E2164756E21
2844
2951
  //# sourceMappingURL=index.js.map