@absolutejs/absolute 0.19.0-beta.533 → 0.19.0-beta.535

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2130,7 +2130,266 @@ var buildGroundingCaseSnapshots = ({
2130
2130
  };
2131
2131
  });
2132
2132
  };
2133
+ var areStageCountsEqual = (left, right) => {
2134
+ const keys = new Set([
2135
+ ...Object.keys(left),
2136
+ ...Object.keys(right)
2137
+ ]);
2138
+ for (const key of keys) {
2139
+ if ((left[key] ?? 0) !== (right[key] ?? 0)) {
2140
+ return false;
2141
+ }
2142
+ }
2143
+ return true;
2144
+ };
2145
+ var buildEvaluationCaseTraceSnapshot = ({
2146
+ caseResult,
2147
+ currentTrace,
2148
+ previousTrace
2149
+ }) => {
2150
+ const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
2151
+ const previousStageCounts = previousTrace?.stageCounts ?? {};
2152
+ const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
2153
+ return {
2154
+ candidateTopK: currentTrace?.candidateTopK ?? 0,
2155
+ caseId: caseResult.caseId,
2156
+ finalCount: currentTrace?.resultCounts.final ?? 0,
2157
+ label: caseResult.label,
2158
+ lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
2159
+ lexicalTopK: currentTrace?.lexicalTopK ?? 0,
2160
+ previousCandidateTopK: previousTrace?.candidateTopK,
2161
+ previousFinalCount: previousTrace?.finalCount,
2162
+ previousLexicalCount: previousTrace?.lexicalCount,
2163
+ previousLexicalTopK: previousTrace?.lexicalTopK,
2164
+ previousStageCounts,
2165
+ previousTraceMode: previousTrace?.traceMode,
2166
+ previousTransformedQuery: previousTrace?.transformedQuery,
2167
+ previousVariantQueries: previousTrace?.variantQueries ?? [],
2168
+ previousVectorCount: previousTrace?.vectorCount,
2169
+ query: caseResult.query,
2170
+ stageCounts,
2171
+ status: caseResult.status,
2172
+ traceChange,
2173
+ traceMode: currentTrace?.mode,
2174
+ transformedQuery: currentTrace?.transformedQuery || undefined,
2175
+ variantQueries: currentTrace?.variantQueries ?? [],
2176
+ vectorCount: currentTrace?.resultCounts.vector ?? 0
2177
+ };
2178
+ };
2179
+ var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
2180
+ caseResult,
2181
+ currentTrace: trace
2182
+ }));
2183
+ var buildEvaluationCaseTraceSnapshots = ({
2184
+ current,
2185
+ previous
2186
+ }) => {
2187
+ if (!current) {
2188
+ return [];
2189
+ }
2190
+ const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
2191
+ const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
2192
+ entry.caseId,
2193
+ entry
2194
+ ]));
2195
+ return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
2196
+ caseResult,
2197
+ currentTrace: (() => {
2198
+ const currentSnapshot = currentTraces.get(caseResult.caseId);
2199
+ if (!currentSnapshot) {
2200
+ return;
2201
+ }
2202
+ return {
2203
+ candidateTopK: currentSnapshot.candidateTopK,
2204
+ lexicalTopK: currentSnapshot.lexicalTopK,
2205
+ mode: currentSnapshot.traceMode ?? "vector",
2206
+ query: caseResult.query,
2207
+ resultCounts: {
2208
+ final: currentSnapshot.finalCount,
2209
+ fused: currentSnapshot.finalCount,
2210
+ lexical: currentSnapshot.lexicalCount,
2211
+ reranked: currentSnapshot.finalCount,
2212
+ vector: currentSnapshot.vectorCount
2213
+ },
2214
+ runLexical: currentSnapshot.lexicalCount > 0,
2215
+ runVector: currentSnapshot.vectorCount > 0,
2216
+ steps: [],
2217
+ topK: caseResult.topK,
2218
+ transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
2219
+ variantQueries: currentSnapshot.variantQueries
2220
+ };
2221
+ })(),
2222
+ previousTrace: previousTraces.get(caseResult.caseId)
2223
+ }));
2224
+ };
2133
2225
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2226
+ var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2227
+ var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
2228
+ var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2229
+ var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
2230
+ var formatTraceStageSummary = (stageCounts) => {
2231
+ const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
2232
+ return topStages.length > 0 ? topStages.map(([stage, count]) => `${stage} ${count}`).join(" \xB7 ") : "n/a";
2233
+ };
2234
+ var formatTraceRatio = (count, total) => `${count}/${total}`;
2235
+ var formatTraceCountDelta = (value) => `${value >= 0 ? "+" : ""}${value}`;
2236
+ var buildRAGComparisonTraceSummaryRows = (entry) => {
2237
+ const trace = entry.traceSummary;
2238
+ if (!trace) {
2239
+ return [{ label: "Trace", value: "Unavailable" }];
2240
+ }
2241
+ return [
2242
+ { label: "Modes", value: formatTraceModes(trace.modes) },
2243
+ { label: "Avg final", value: trace.averageFinalCount.toFixed(1) },
2244
+ { label: "Avg vector", value: trace.averageVectorCount.toFixed(1) },
2245
+ { label: "Avg lexical", value: trace.averageLexicalCount.toFixed(1) },
2246
+ {
2247
+ label: "Transforms",
2248
+ value: formatTraceRatio(trace.transformedCases, trace.totalCases)
2249
+ },
2250
+ {
2251
+ label: "Variants",
2252
+ value: formatTraceRatio(trace.variantCases, trace.totalCases)
2253
+ },
2254
+ {
2255
+ label: "TopK",
2256
+ value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
2257
+ },
2258
+ {
2259
+ label: "Stages",
2260
+ value: formatTraceStageSummary(trace.stageCounts)
2261
+ }
2262
+ ];
2263
+ };
2264
+ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
2265
+ const trace = entry.traceSummary;
2266
+ if (!trace) {
2267
+ return [{ label: "Trace", value: "Unavailable for comparison" }];
2268
+ }
2269
+ const leaderTrace = leader?.traceSummary;
2270
+ if (!leaderTrace) {
2271
+ return [{ label: "Baseline", value: "Leader trace unavailable" }];
2272
+ }
2273
+ if (entry === leader) {
2274
+ return [{ label: "Baseline", value: "Leader strategy" }];
2275
+ }
2276
+ const stageDelta = Object.keys({
2277
+ ...leaderTrace.stageCounts,
2278
+ ...trace.stageCounts
2279
+ }).map((stage) => {
2280
+ const typedStage = stage;
2281
+ const delta = (trace.stageCounts[typedStage] ?? 0) - (leaderTrace.stageCounts[typedStage] ?? 0);
2282
+ return delta === 0 ? null : `${typedStage} ${formatTraceCountDelta(delta)}`;
2283
+ }).filter((value) => Boolean(value)).slice(0, 3).join(" \xB7 ");
2284
+ const rows = [
2285
+ { label: "Baseline", value: leader.label }
2286
+ ];
2287
+ if (formatTraceModes(trace.modes) !== formatTraceModes(leaderTrace.modes)) {
2288
+ rows.push({
2289
+ label: "Modes vs leader",
2290
+ value: `${formatTraceModes(trace.modes)} vs ${formatTraceModes(leaderTrace.modes)}`
2291
+ });
2292
+ }
2293
+ rows.push({
2294
+ label: "Final delta",
2295
+ value: formatSignedDelta(trace.averageFinalCount - leaderTrace.averageFinalCount, 1)
2296
+ }, {
2297
+ label: "Vector delta",
2298
+ value: formatSignedDelta(trace.averageVectorCount - leaderTrace.averageVectorCount, 1)
2299
+ }, {
2300
+ label: "Lexical delta",
2301
+ value: formatSignedDelta(trace.averageLexicalCount - leaderTrace.averageLexicalCount, 1)
2302
+ }, {
2303
+ label: "Transform delta",
2304
+ value: formatTraceCountDelta(trace.transformedCases - leaderTrace.transformedCases)
2305
+ });
2306
+ if (stageDelta) {
2307
+ rows.push({ label: "Stage delta", value: stageDelta });
2308
+ }
2309
+ return rows;
2310
+ };
2311
+ var buildRAGEvaluationHistoryRows = (history) => {
2312
+ if (!history?.latestRun) {
2313
+ return [
2314
+ { label: "History", value: "No persisted benchmark runs yet." }
2315
+ ];
2316
+ }
2317
+ const rows = [
2318
+ { label: "Runs recorded", value: String(history.runs.length) },
2319
+ {
2320
+ label: "Latest",
2321
+ value: `${history.latestRun.label} \xB7 ${formatEvaluationSummary(history.latestRun.response)}`
2322
+ }
2323
+ ];
2324
+ if (history.latestRun.traceSummary) {
2325
+ rows.push({
2326
+ label: "Latest trace",
2327
+ value: `${formatTraceModes(history.latestRun.traceSummary.modes)} \xB7 final ${history.latestRun.traceSummary.averageFinalCount.toFixed(1)} \xB7 vector ${history.latestRun.traceSummary.averageVectorCount.toFixed(1)} \xB7 lexical ${history.latestRun.traceSummary.averageLexicalCount.toFixed(1)}`
2328
+ });
2329
+ }
2330
+ if (history.previousRun) {
2331
+ rows.push({
2332
+ label: "Previous",
2333
+ value: `${history.previousRun.label} \xB7 ${formatEvaluationSummary(history.previousRun.response)}`
2334
+ });
2335
+ }
2336
+ if (!history.diff) {
2337
+ rows.push({
2338
+ label: "History diff",
2339
+ value: "Run the benchmark again to diff regressions over time."
2340
+ });
2341
+ return rows;
2342
+ }
2343
+ rows.push({
2344
+ label: "Passing delta",
2345
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
2346
+ }, {
2347
+ label: "Average F1 delta",
2348
+ value: formatSignedDelta(history.diff.summaryDelta.averageF1, 3)
2349
+ }, {
2350
+ label: "Latency delta",
2351
+ value: formatSignedDelta(history.diff.summaryDelta.averageLatencyMs, 1, "ms")
2352
+ }, {
2353
+ label: "Improved",
2354
+ value: formatHistoryCaseLabels(history.diff.improvedCases)
2355
+ }, {
2356
+ label: "Regressed",
2357
+ value: formatHistoryCaseLabels(history.diff.regressedCases)
2358
+ });
2359
+ if (history.diff.traceSummaryDelta) {
2360
+ rows.push({
2361
+ label: "Trace mode shift",
2362
+ value: history.diff.traceSummaryDelta.modesChanged ? "changed" : "stable"
2363
+ }, {
2364
+ label: "Trace final delta",
2365
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageFinalCount, 1)
2366
+ }, {
2367
+ label: "Trace vector delta",
2368
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageVectorCount, 1)
2369
+ }, {
2370
+ label: "Trace lexical delta",
2371
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageLexicalCount, 1)
2372
+ }, {
2373
+ label: "Trace transform delta",
2374
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.transformedCases)
2375
+ }, {
2376
+ label: "Trace variant delta",
2377
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
2378
+ });
2379
+ const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
2380
+ if (stageDelta) {
2381
+ rows.push({ label: "Trace stage delta", value: stageDelta });
2382
+ }
2383
+ }
2384
+ if (history.caseTraceSnapshots.length > 0) {
2385
+ const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
2386
+ rows.push({
2387
+ label: "Trace drift cases",
2388
+ value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
2389
+ });
2390
+ }
2391
+ return rows;
2392
+ };
2134
2393
  var buildRAGEvaluationRunDiff = ({
2135
2394
  current,
2136
2395
  previous
@@ -2318,6 +2577,10 @@ var loadRAGEvaluationHistory = async ({
2318
2577
  const latestRun = runs[0];
2319
2578
  const previousRun = runs[1];
2320
2579
  return {
2580
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
2581
+ current: latestRun,
2582
+ previous: previousRun
2583
+ }),
2321
2584
  diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
2322
2585
  current: latestRun,
2323
2586
  previous: previousRun
@@ -2529,6 +2792,7 @@ var compareRAGRerankers = async ({
2529
2792
  });
2530
2793
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2531
2794
  return {
2795
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2532
2796
  label: candidate.label ?? candidate.id,
2533
2797
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2534
2798
  response,
@@ -2615,6 +2879,7 @@ var compareRAGRetrievalStrategies = async ({
2615
2879
  });
2616
2880
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2617
2881
  return {
2882
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2618
2883
  label: candidate.label ?? candidate.id,
2619
2884
  response,
2620
2885
  retrievalId: candidate.id,
@@ -2683,7 +2948,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.c
2683
2948
  var runRAGEvaluationSuite = async ({
2684
2949
  suite,
2685
2950
  evaluate,
2686
- overrides
2951
+ overrides,
2952
+ artifacts
2687
2953
  }) => {
2688
2954
  const startedAt = Date.now();
2689
2955
  const response = await evaluate({
@@ -2693,6 +2959,7 @@ var runRAGEvaluationSuite = async ({
2693
2959
  });
2694
2960
  const finishedAt = Date.now();
2695
2961
  return {
2962
+ caseTraceSnapshots: artifacts?.caseTraceSnapshots,
2696
2963
  elapsedMs: finishedAt - startedAt,
2697
2964
  finishedAt,
2698
2965
  id: generateId(),
@@ -2700,7 +2967,8 @@ var runRAGEvaluationSuite = async ({
2700
2967
  metadata: suite.metadata,
2701
2968
  response,
2702
2969
  startedAt,
2703
- suiteId: suite.id
2970
+ suiteId: suite.id,
2971
+ traceSummary: artifacts?.traceSummary
2704
2972
  };
2705
2973
  };
2706
2974
  var summarizeRAGEvaluationCase = ({
@@ -2774,5 +3042,5 @@ export {
2774
3042
  buildRAGAnswerWorkflowState
2775
3043
  };
2776
3044
 
2777
- //# debugId=BCBFBA9A1CF59F3A64756E2164756E21
3045
+ //# debugId=EF37DD99ACE1DD0D64756E2164756E21
2778
3046
  //# sourceMappingURL=index.js.map