@absolutejs/absolute 0.19.0-beta.533 → 0.19.0-beta.535

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2157,7 +2157,266 @@ var buildGroundingCaseSnapshots = ({
2157
2157
  };
2158
2158
  });
2159
2159
  };
2160
+ var areStageCountsEqual = (left, right) => {
2161
+ const keys = new Set([
2162
+ ...Object.keys(left),
2163
+ ...Object.keys(right)
2164
+ ]);
2165
+ for (const key of keys) {
2166
+ if ((left[key] ?? 0) !== (right[key] ?? 0)) {
2167
+ return false;
2168
+ }
2169
+ }
2170
+ return true;
2171
+ };
2172
+ var buildEvaluationCaseTraceSnapshot = ({
2173
+ caseResult,
2174
+ currentTrace,
2175
+ previousTrace
2176
+ }) => {
2177
+ const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
2178
+ const previousStageCounts = previousTrace?.stageCounts ?? {};
2179
+ const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
2180
+ return {
2181
+ candidateTopK: currentTrace?.candidateTopK ?? 0,
2182
+ caseId: caseResult.caseId,
2183
+ finalCount: currentTrace?.resultCounts.final ?? 0,
2184
+ label: caseResult.label,
2185
+ lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
2186
+ lexicalTopK: currentTrace?.lexicalTopK ?? 0,
2187
+ previousCandidateTopK: previousTrace?.candidateTopK,
2188
+ previousFinalCount: previousTrace?.finalCount,
2189
+ previousLexicalCount: previousTrace?.lexicalCount,
2190
+ previousLexicalTopK: previousTrace?.lexicalTopK,
2191
+ previousStageCounts,
2192
+ previousTraceMode: previousTrace?.traceMode,
2193
+ previousTransformedQuery: previousTrace?.transformedQuery,
2194
+ previousVariantQueries: previousTrace?.variantQueries ?? [],
2195
+ previousVectorCount: previousTrace?.vectorCount,
2196
+ query: caseResult.query,
2197
+ stageCounts,
2198
+ status: caseResult.status,
2199
+ traceChange,
2200
+ traceMode: currentTrace?.mode,
2201
+ transformedQuery: currentTrace?.transformedQuery || undefined,
2202
+ variantQueries: currentTrace?.variantQueries ?? [],
2203
+ vectorCount: currentTrace?.resultCounts.vector ?? 0
2204
+ };
2205
+ };
2206
+ var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
2207
+ caseResult,
2208
+ currentTrace: trace
2209
+ }));
2210
+ var buildEvaluationCaseTraceSnapshots = ({
2211
+ current,
2212
+ previous
2213
+ }) => {
2214
+ if (!current) {
2215
+ return [];
2216
+ }
2217
+ const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
2218
+ const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
2219
+ entry.caseId,
2220
+ entry
2221
+ ]));
2222
+ return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
2223
+ caseResult,
2224
+ currentTrace: (() => {
2225
+ const currentSnapshot = currentTraces.get(caseResult.caseId);
2226
+ if (!currentSnapshot) {
2227
+ return;
2228
+ }
2229
+ return {
2230
+ candidateTopK: currentSnapshot.candidateTopK,
2231
+ lexicalTopK: currentSnapshot.lexicalTopK,
2232
+ mode: currentSnapshot.traceMode ?? "vector",
2233
+ query: caseResult.query,
2234
+ resultCounts: {
2235
+ final: currentSnapshot.finalCount,
2236
+ fused: currentSnapshot.finalCount,
2237
+ lexical: currentSnapshot.lexicalCount,
2238
+ reranked: currentSnapshot.finalCount,
2239
+ vector: currentSnapshot.vectorCount
2240
+ },
2241
+ runLexical: currentSnapshot.lexicalCount > 0,
2242
+ runVector: currentSnapshot.vectorCount > 0,
2243
+ steps: [],
2244
+ topK: caseResult.topK,
2245
+ transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
2246
+ variantQueries: currentSnapshot.variantQueries
2247
+ };
2248
+ })(),
2249
+ previousTrace: previousTraces.get(caseResult.caseId)
2250
+ }));
2251
+ };
2160
2252
  var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
2253
+ var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
2254
+ var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
2255
+ var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
2256
+ var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
2257
+ var formatTraceStageSummary = (stageCounts) => {
2258
+ const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
2259
+ return topStages.length > 0 ? topStages.map(([stage, count]) => `${stage} ${count}`).join(" \xB7 ") : "n/a";
2260
+ };
2261
+ var formatTraceRatio = (count, total) => `${count}/${total}`;
2262
+ var formatTraceCountDelta = (value) => `${value >= 0 ? "+" : ""}${value}`;
2263
+ var buildRAGComparisonTraceSummaryRows = (entry) => {
2264
+ const trace = entry.traceSummary;
2265
+ if (!trace) {
2266
+ return [{ label: "Trace", value: "Unavailable" }];
2267
+ }
2268
+ return [
2269
+ { label: "Modes", value: formatTraceModes(trace.modes) },
2270
+ { label: "Avg final", value: trace.averageFinalCount.toFixed(1) },
2271
+ { label: "Avg vector", value: trace.averageVectorCount.toFixed(1) },
2272
+ { label: "Avg lexical", value: trace.averageLexicalCount.toFixed(1) },
2273
+ {
2274
+ label: "Transforms",
2275
+ value: formatTraceRatio(trace.transformedCases, trace.totalCases)
2276
+ },
2277
+ {
2278
+ label: "Variants",
2279
+ value: formatTraceRatio(trace.variantCases, trace.totalCases)
2280
+ },
2281
+ {
2282
+ label: "TopK",
2283
+ value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
2284
+ },
2285
+ {
2286
+ label: "Stages",
2287
+ value: formatTraceStageSummary(trace.stageCounts)
2288
+ }
2289
+ ];
2290
+ };
2291
+ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
2292
+ const trace = entry.traceSummary;
2293
+ if (!trace) {
2294
+ return [{ label: "Trace", value: "Unavailable for comparison" }];
2295
+ }
2296
+ const leaderTrace = leader?.traceSummary;
2297
+ if (!leaderTrace) {
2298
+ return [{ label: "Baseline", value: "Leader trace unavailable" }];
2299
+ }
2300
+ if (entry === leader) {
2301
+ return [{ label: "Baseline", value: "Leader strategy" }];
2302
+ }
2303
+ const stageDelta = Object.keys({
2304
+ ...leaderTrace.stageCounts,
2305
+ ...trace.stageCounts
2306
+ }).map((stage) => {
2307
+ const typedStage = stage;
2308
+ const delta = (trace.stageCounts[typedStage] ?? 0) - (leaderTrace.stageCounts[typedStage] ?? 0);
2309
+ return delta === 0 ? null : `${typedStage} ${formatTraceCountDelta(delta)}`;
2310
+ }).filter((value) => Boolean(value)).slice(0, 3).join(" \xB7 ");
2311
+ const rows = [
2312
+ { label: "Baseline", value: leader.label }
2313
+ ];
2314
+ if (formatTraceModes(trace.modes) !== formatTraceModes(leaderTrace.modes)) {
2315
+ rows.push({
2316
+ label: "Modes vs leader",
2317
+ value: `${formatTraceModes(trace.modes)} vs ${formatTraceModes(leaderTrace.modes)}`
2318
+ });
2319
+ }
2320
+ rows.push({
2321
+ label: "Final delta",
2322
+ value: formatSignedDelta(trace.averageFinalCount - leaderTrace.averageFinalCount, 1)
2323
+ }, {
2324
+ label: "Vector delta",
2325
+ value: formatSignedDelta(trace.averageVectorCount - leaderTrace.averageVectorCount, 1)
2326
+ }, {
2327
+ label: "Lexical delta",
2328
+ value: formatSignedDelta(trace.averageLexicalCount - leaderTrace.averageLexicalCount, 1)
2329
+ }, {
2330
+ label: "Transform delta",
2331
+ value: formatTraceCountDelta(trace.transformedCases - leaderTrace.transformedCases)
2332
+ });
2333
+ if (stageDelta) {
2334
+ rows.push({ label: "Stage delta", value: stageDelta });
2335
+ }
2336
+ return rows;
2337
+ };
2338
+ var buildRAGEvaluationHistoryRows = (history) => {
2339
+ if (!history?.latestRun) {
2340
+ return [
2341
+ { label: "History", value: "No persisted benchmark runs yet." }
2342
+ ];
2343
+ }
2344
+ const rows = [
2345
+ { label: "Runs recorded", value: String(history.runs.length) },
2346
+ {
2347
+ label: "Latest",
2348
+ value: `${history.latestRun.label} \xB7 ${formatEvaluationSummary(history.latestRun.response)}`
2349
+ }
2350
+ ];
2351
+ if (history.latestRun.traceSummary) {
2352
+ rows.push({
2353
+ label: "Latest trace",
2354
+ value: `${formatTraceModes(history.latestRun.traceSummary.modes)} \xB7 final ${history.latestRun.traceSummary.averageFinalCount.toFixed(1)} \xB7 vector ${history.latestRun.traceSummary.averageVectorCount.toFixed(1)} \xB7 lexical ${history.latestRun.traceSummary.averageLexicalCount.toFixed(1)}`
2355
+ });
2356
+ }
2357
+ if (history.previousRun) {
2358
+ rows.push({
2359
+ label: "Previous",
2360
+ value: `${history.previousRun.label} \xB7 ${formatEvaluationSummary(history.previousRun.response)}`
2361
+ });
2362
+ }
2363
+ if (!history.diff) {
2364
+ rows.push({
2365
+ label: "History diff",
2366
+ value: "Run the benchmark again to diff regressions over time."
2367
+ });
2368
+ return rows;
2369
+ }
2370
+ rows.push({
2371
+ label: "Passing delta",
2372
+ value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
2373
+ }, {
2374
+ label: "Average F1 delta",
2375
+ value: formatSignedDelta(history.diff.summaryDelta.averageF1, 3)
2376
+ }, {
2377
+ label: "Latency delta",
2378
+ value: formatSignedDelta(history.diff.summaryDelta.averageLatencyMs, 1, "ms")
2379
+ }, {
2380
+ label: "Improved",
2381
+ value: formatHistoryCaseLabels(history.diff.improvedCases)
2382
+ }, {
2383
+ label: "Regressed",
2384
+ value: formatHistoryCaseLabels(history.diff.regressedCases)
2385
+ });
2386
+ if (history.diff.traceSummaryDelta) {
2387
+ rows.push({
2388
+ label: "Trace mode shift",
2389
+ value: history.diff.traceSummaryDelta.modesChanged ? "changed" : "stable"
2390
+ }, {
2391
+ label: "Trace final delta",
2392
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageFinalCount, 1)
2393
+ }, {
2394
+ label: "Trace vector delta",
2395
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageVectorCount, 1)
2396
+ }, {
2397
+ label: "Trace lexical delta",
2398
+ value: formatSignedDelta(history.diff.traceSummaryDelta.averageLexicalCount, 1)
2399
+ }, {
2400
+ label: "Trace transform delta",
2401
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.transformedCases)
2402
+ }, {
2403
+ label: "Trace variant delta",
2404
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
2405
+ });
2406
+ const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
2407
+ if (stageDelta) {
2408
+ rows.push({ label: "Trace stage delta", value: stageDelta });
2409
+ }
2410
+ }
2411
+ if (history.caseTraceSnapshots.length > 0) {
2412
+ const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
2413
+ rows.push({
2414
+ label: "Trace drift cases",
2415
+ value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
2416
+ });
2417
+ }
2418
+ return rows;
2419
+ };
2161
2420
  var buildRAGEvaluationRunDiff = ({
2162
2421
  current,
2163
2422
  previous
@@ -2345,6 +2604,10 @@ var loadRAGEvaluationHistory = async ({
2345
2604
  const latestRun = runs[0];
2346
2605
  const previousRun = runs[1];
2347
2606
  return {
2607
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
2608
+ current: latestRun,
2609
+ previous: previousRun
2610
+ }),
2348
2611
  diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
2349
2612
  current: latestRun,
2350
2613
  previous: previousRun
@@ -2556,6 +2819,7 @@ var compareRAGRerankers = async ({
2556
2819
  });
2557
2820
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2558
2821
  return {
2822
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2559
2823
  label: candidate.label ?? candidate.id,
2560
2824
  providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
2561
2825
  response,
@@ -2642,6 +2906,7 @@ var compareRAGRetrievalStrategies = async ({
2642
2906
  });
2643
2907
  const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
2644
2908
  return {
2909
+ caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
2645
2910
  label: candidate.label ?? candidate.id,
2646
2911
  response,
2647
2912
  retrievalId: candidate.id,
@@ -2710,7 +2975,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.c
2710
2975
  var runRAGEvaluationSuite = async ({
2711
2976
  suite,
2712
2977
  evaluate,
2713
- overrides
2978
+ overrides,
2979
+ artifacts
2714
2980
  }) => {
2715
2981
  const startedAt = Date.now();
2716
2982
  const response = await evaluate({
@@ -2720,6 +2986,7 @@ var runRAGEvaluationSuite = async ({
2720
2986
  });
2721
2987
  const finishedAt = Date.now();
2722
2988
  return {
2989
+ caseTraceSnapshots: artifacts?.caseTraceSnapshots,
2723
2990
  elapsedMs: finishedAt - startedAt,
2724
2991
  finishedAt,
2725
2992
  id: generateId(),
@@ -2727,7 +2994,8 @@ var runRAGEvaluationSuite = async ({
2727
2994
  metadata: suite.metadata,
2728
2995
  response,
2729
2996
  startedAt,
2730
- suiteId: suite.id
2997
+ suiteId: suite.id,
2998
+ traceSummary: artifacts?.traceSummary
2731
2999
  };
2732
3000
  };
2733
3001
  var summarizeRAGEvaluationCase = ({
@@ -3454,5 +3722,5 @@ export {
3454
3722
  createAIStream
3455
3723
  };
3456
3724
 
3457
- //# debugId=4CDEE3E3E09A9FC864756E2164756E21
3725
+ //# debugId=609C67383E3D7B6464756E2164756E21
3458
3726
  //# sourceMappingURL=index.js.map