@absolutejs/absolute 0.19.0-beta.517 → 0.19.0-beta.519

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -919,6 +919,71 @@ export type RAGAnswerGroundingEvaluationResponse = {
919
919
  totalCases: number;
920
920
  passingRate: number;
921
921
  };
922
+ export type RAGAnswerGroundingEvaluationRun = {
923
+ id: string;
924
+ suiteId: string;
925
+ label: string;
926
+ startedAt: number;
927
+ finishedAt: number;
928
+ elapsedMs: number;
929
+ response: RAGAnswerGroundingEvaluationResponse;
930
+ metadata?: Record<string, unknown>;
931
+ };
932
+ export type RAGAnswerGroundingEvaluationHistoryStore = {
933
+ saveRun: (run: RAGAnswerGroundingEvaluationRun) => Promise<void> | void;
934
+ listRuns: (input?: {
935
+ suiteId?: string;
936
+ limit?: number;
937
+ }) => Promise<RAGAnswerGroundingEvaluationRun[]> | RAGAnswerGroundingEvaluationRun[];
938
+ };
939
+ export type RAGAnswerGroundingEvaluationLeaderboardEntry = {
940
+ runId: string;
941
+ suiteId: string;
942
+ label: string;
943
+ passingRate: number;
944
+ averageCitationF1: number;
945
+ averageResolvedCitationRate: number;
946
+ rank: number;
947
+ totalCases: number;
948
+ };
949
+ export type RAGAnswerGroundingEvaluationCaseDiff = {
950
+ caseId: string;
951
+ label?: string;
952
+ query?: string;
953
+ previousStatus?: RAGAnswerGroundingEvaluationCaseResult['status'];
954
+ currentStatus: RAGAnswerGroundingEvaluationCaseResult['status'];
955
+ previousCitationF1?: number;
956
+ currentCitationF1: number;
957
+ previousMatchedIds: string[];
958
+ currentMatchedIds: string[];
959
+ previousMissingIds: string[];
960
+ currentMissingIds: string[];
961
+ };
962
+ export type RAGAnswerGroundingEvaluationRunDiff = {
963
+ suiteId: string;
964
+ currentRunId: string;
965
+ previousRunId?: string;
966
+ regressedCases: RAGAnswerGroundingEvaluationCaseDiff[];
967
+ improvedCases: RAGAnswerGroundingEvaluationCaseDiff[];
968
+ unchangedCases: RAGAnswerGroundingEvaluationCaseDiff[];
969
+ summaryDelta: {
970
+ passingRate: number;
971
+ averageCitationF1: number;
972
+ averageResolvedCitationRate: number;
973
+ passedCases: number;
974
+ failedCases: number;
975
+ partialCases: number;
976
+ };
977
+ };
978
+ export type RAGAnswerGroundingEvaluationHistory = {
979
+ suiteId: string;
980
+ suiteLabel?: string;
981
+ runs: RAGAnswerGroundingEvaluationRun[];
982
+ leaderboard: RAGAnswerGroundingEvaluationLeaderboardEntry[];
983
+ latestRun?: RAGAnswerGroundingEvaluationRun;
984
+ previousRun?: RAGAnswerGroundingEvaluationRun;
985
+ diff?: RAGAnswerGroundingEvaluationRunDiff;
986
+ };
922
987
  export type RAGEvaluationInput = {
923
988
  cases: RAGEvaluationCase[];
924
989
  topK?: number;
@@ -1716,8 +1716,34 @@ var buildRAGEvaluationLeaderboard = (runs) => {
1716
1716
  totalCases: run.response.totalCases
1717
1717
  }));
1718
1718
  };
1719
+ var buildRAGAnswerGroundingEvaluationLeaderboard = (runs) => {
1720
+ const sorted = [...runs].sort((left, right) => {
1721
+ if (right.response.passingRate !== left.response.passingRate) {
1722
+ return right.response.passingRate - left.response.passingRate;
1723
+ }
1724
+ if (right.response.summary.averageCitationF1 !== left.response.summary.averageCitationF1) {
1725
+ return right.response.summary.averageCitationF1 - left.response.summary.averageCitationF1;
1726
+ }
1727
+ if (right.response.summary.averageResolvedCitationRate !== left.response.summary.averageResolvedCitationRate) {
1728
+ return right.response.summary.averageResolvedCitationRate - left.response.summary.averageResolvedCitationRate;
1729
+ }
1730
+ return left.elapsedMs - right.elapsedMs;
1731
+ });
1732
+ return sorted.map((run, index) => ({
1733
+ averageCitationF1: run.response.summary.averageCitationF1,
1734
+ averageResolvedCitationRate: run.response.summary.averageResolvedCitationRate,
1735
+ label: run.label,
1736
+ passingRate: run.response.passingRate,
1737
+ rank: index + 1,
1738
+ runId: run.id,
1739
+ suiteId: run.suiteId,
1740
+ totalCases: run.response.totalCases
1741
+ }));
1742
+ };
1719
1743
  var toHistorySortOrder = (left, right) => right.finishedAt - left.finishedAt;
1720
1744
  var normalizeHistoryRuns = (runs) => [...runs].sort(toHistorySortOrder);
1745
+ var toGroundingHistorySortOrder = (left, right) => right.finishedAt - left.finishedAt;
1746
+ var normalizeGroundingHistoryRuns = (runs) => [...runs].sort(toGroundingHistorySortOrder);
1721
1747
  var buildCaseDiff = (currentCase, previousCase) => ({
1722
1748
  caseId: currentCase.caseId,
1723
1749
  currentF1: currentCase.f1,
@@ -1731,6 +1757,19 @@ var buildCaseDiff = (currentCase, previousCase) => ({
1731
1757
  previousStatus: previousCase?.status,
1732
1758
  query: currentCase.query
1733
1759
  });
1760
+ var buildGroundingCaseDiff = (currentCase, previousCase) => ({
1761
+ caseId: currentCase.caseId,
1762
+ currentCitationF1: currentCase.citationF1,
1763
+ currentMatchedIds: currentCase.matchedIds,
1764
+ currentMissingIds: currentCase.missingIds,
1765
+ currentStatus: currentCase.status,
1766
+ label: currentCase.label,
1767
+ previousCitationF1: previousCase?.citationF1,
1768
+ previousMatchedIds: previousCase?.matchedIds ?? [],
1769
+ previousMissingIds: previousCase?.missingIds ?? [],
1770
+ previousStatus: previousCase?.status,
1771
+ query: currentCase.query
1772
+ });
1734
1773
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
1735
1774
  var buildRAGEvaluationRunDiff = ({
1736
1775
  current,
@@ -1758,6 +1797,32 @@ var buildRAGEvaluationRunDiff = ({
1758
1797
  unchangedCases
1759
1798
  };
1760
1799
  };
1800
+ var buildRAGAnswerGroundingEvaluationRunDiff = ({
1801
+ current,
1802
+ previous
1803
+ }) => {
1804
+ const previousCases = new Map((previous?.response.cases ?? []).map((entry) => [entry.caseId, entry]));
1805
+ const diffs = current.response.cases.map((entry) => buildGroundingCaseDiff(entry, previousCases.get(entry.caseId)));
1806
+ const regressedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) < getStatusRank(entry.previousStatus ?? "fail"));
1807
+ const improvedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) > getStatusRank(entry.previousStatus ?? "fail"));
1808
+ const unchangedCases = diffs.filter((entry) => getStatusRank(entry.currentStatus) === getStatusRank(entry.previousStatus ?? "fail"));
1809
+ return {
1810
+ currentRunId: current.id,
1811
+ improvedCases,
1812
+ previousRunId: previous?.id,
1813
+ regressedCases,
1814
+ suiteId: current.suiteId,
1815
+ summaryDelta: {
1816
+ averageCitationF1: current.response.summary.averageCitationF1 - (previous?.response.summary.averageCitationF1 ?? 0),
1817
+ averageResolvedCitationRate: current.response.summary.averageResolvedCitationRate - (previous?.response.summary.averageResolvedCitationRate ?? 0),
1818
+ failedCases: current.response.summary.failedCases - (previous?.response.summary.failedCases ?? 0),
1819
+ passedCases: current.response.summary.passedCases - (previous?.response.summary.passedCases ?? 0),
1820
+ passingRate: current.response.passingRate - (previous?.response.passingRate ?? 0),
1821
+ partialCases: current.response.summary.partialCases - (previous?.response.summary.partialCases ?? 0)
1822
+ },
1823
+ unchangedCases
1824
+ };
1825
+ };
1761
1826
  var createRAGFileEvaluationHistoryStore = (path) => ({
1762
1827
  listRuns: async ({ limit, suiteId } = {}) => {
1763
1828
  let parsed = [];
@@ -1796,6 +1861,42 @@ var createRAGFileEvaluationHistoryStore = (path) => ({
1796
1861
  `, "utf8");
1797
1862
  }
1798
1863
  });
1864
+ var createRAGFileAnswerGroundingEvaluationHistoryStore = (path) => ({
1865
+ async listRuns(input) {
1866
+ try {
1867
+ const raw = await readFile(path, "utf8");
1868
+ const data = JSON.parse(raw);
1869
+ const runs = Array.isArray(data.runs) ? data.runs : [];
1870
+ const filtered = input?.suiteId ? runs.filter((run) => run.suiteId === input.suiteId) : runs;
1871
+ return filtered.sort(toGroundingHistorySortOrder).slice(0, input?.limit ?? DEFAULT_HISTORY_LIMIT);
1872
+ } catch (error) {
1873
+ if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") {
1874
+ return [];
1875
+ }
1876
+ throw error;
1877
+ }
1878
+ },
1879
+ async saveRun(run) {
1880
+ let runs = [];
1881
+ try {
1882
+ const raw = await readFile(path, "utf8");
1883
+ const data = JSON.parse(raw);
1884
+ runs = Array.isArray(data.runs) ? data.runs : [];
1885
+ } catch (error) {
1886
+ if (!error || typeof error !== "object" || !("code" in error) || error.code !== "ENOENT") {
1887
+ throw error;
1888
+ }
1889
+ }
1890
+ const nextRuns = normalizeGroundingHistoryRuns([
1891
+ run,
1892
+ ...runs.filter((entry) => entry.id !== run.id)
1893
+ ]);
1894
+ await mkdir(dirname(path), { recursive: true });
1895
+ await writeFile(path, JSON.stringify({
1896
+ runs: nextRuns
1897
+ }, null, 2));
1898
+ }
1899
+ });
1799
1900
  var loadRAGEvaluationHistory = async ({
1800
1901
  store,
1801
1902
  suite,
@@ -1817,6 +1918,30 @@ var loadRAGEvaluationHistory = async ({
1817
1918
  suiteLabel: suite.label ?? suite.id
1818
1919
  };
1819
1920
  };
1921
+ var loadRAGAnswerGroundingEvaluationHistory = async ({
1922
+ store,
1923
+ suite,
1924
+ limit = DEFAULT_HISTORY_LIMIT
1925
+ }) => {
1926
+ const runs = normalizeGroundingHistoryRuns(await Promise.resolve(store.listRuns({
1927
+ limit,
1928
+ suiteId: suite.id
1929
+ })));
1930
+ const latestRun = runs[0];
1931
+ const previousRun = runs[1];
1932
+ return {
1933
+ diff: latestRun && previousRun ? buildRAGAnswerGroundingEvaluationRunDiff({
1934
+ current: latestRun,
1935
+ previous: previousRun
1936
+ }) : undefined,
1937
+ latestRun,
1938
+ leaderboard: buildRAGAnswerGroundingEvaluationLeaderboard(runs),
1939
+ previousRun,
1940
+ runs,
1941
+ suiteId: suite.id,
1942
+ suiteLabel: suite.label ?? suite.id
1943
+ };
1944
+ };
1820
1945
  var persistRAGEvaluationSuiteRun = async ({
1821
1946
  store,
1822
1947
  run
@@ -1824,6 +1949,13 @@ var persistRAGEvaluationSuiteRun = async ({
1824
1949
  await Promise.resolve(store.saveRun(run));
1825
1950
  return run;
1826
1951
  };
1952
+ var persistRAGAnswerGroundingEvaluationRun = async ({
1953
+ store,
1954
+ run
1955
+ }) => {
1956
+ await Promise.resolve(store.saveRun(run));
1957
+ return run;
1958
+ };
1827
1959
  var buildRAGEvaluationResponse = (cases) => {
1828
1960
  const totalCases = cases.length;
1829
1961
  const passedCases = cases.filter((entry) => entry.status === "pass").length;
@@ -2840,5 +2972,5 @@ export {
2840
2972
  AIStreamKey
2841
2973
  };
2842
2974
 
2843
- //# debugId=EEFE8418E86C288B64756E2164756E21
2975
+ //# debugId=CE780A331BCCEACE64756E2164756E21
2844
2976
  //# sourceMappingURL=index.js.map