@absolutejs/absolute 0.19.0-beta.533 → 0.19.0-beta.535
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +271 -3
- package/dist/ai/client/index.js.map +3 -3
- package/dist/ai/index.js +274 -3
- package/dist/ai/index.js.map +3 -3
- package/dist/ai-client/react/ai/index.js +5 -2
- package/dist/ai-client/vue/ai/index.js +5 -2
- package/dist/angular/index.js +2 -2
- package/dist/angular/index.js.map +1 -1
- package/dist/angular/server.js +2 -2
- package/dist/angular/server.js.map +1 -1
- package/dist/build.js +2 -2
- package/dist/build.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/react/ai/index.js +271 -3
- package/dist/react/ai/index.js.map +3 -3
- package/dist/src/ai/index.d.ts +1 -1
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/quality.d.ts +8 -2
- package/dist/src/react/ai/useRAG.d.ts +2 -0
- package/dist/src/react/ai/useRAGEvaluate.d.ts +2 -0
- package/dist/src/svelte/ai/createRAG.d.ts +2 -0
- package/dist/src/svelte/ai/createRAGEvaluate.d.ts +2 -0
- package/dist/src/vue/ai/useRAG.d.ts +92 -0
- package/dist/src/vue/ai/useRAGEvaluate.d.ts +92 -0
- package/dist/svelte/ai/index.js +271 -3
- package/dist/svelte/ai/index.js.map +3 -3
- package/dist/types/ai.d.ts +33 -0
- package/dist/vue/ai/index.js +271 -3
- package/dist/vue/ai/index.js.map +3 -3
- package/package.json +7 -7
package/dist/types/ai.d.ts
CHANGED
|
@@ -1195,6 +1195,7 @@ export type RAGEvaluationSuiteRun = {
|
|
|
1195
1195
|
elapsedMs: number;
|
|
1196
1196
|
response: RAGEvaluationResponse;
|
|
1197
1197
|
traceSummary?: RAGRetrievalTraceComparisonSummary;
|
|
1198
|
+
caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
|
|
1198
1199
|
metadata?: Record<string, unknown>;
|
|
1199
1200
|
};
|
|
1200
1201
|
export type RAGEvaluationHistoryStore = {
|
|
@@ -1246,15 +1247,45 @@ export type RAGEvaluationRunDiff = {
|
|
|
1246
1247
|
stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
|
|
1247
1248
|
};
|
|
1248
1249
|
};
|
|
1250
|
+
export type RAGEvaluationCaseTraceSnapshot = {
|
|
1251
|
+
caseId: string;
|
|
1252
|
+
label?: string;
|
|
1253
|
+
query: string;
|
|
1254
|
+
status: RAGEvaluationCaseResult['status'];
|
|
1255
|
+
traceMode?: RAGHybridRetrievalMode;
|
|
1256
|
+
previousTraceMode?: RAGHybridRetrievalMode;
|
|
1257
|
+
transformedQuery?: string;
|
|
1258
|
+
previousTransformedQuery?: string;
|
|
1259
|
+
variantQueries: string[];
|
|
1260
|
+
previousVariantQueries: string[];
|
|
1261
|
+
finalCount: number;
|
|
1262
|
+
previousFinalCount?: number;
|
|
1263
|
+
vectorCount: number;
|
|
1264
|
+
previousVectorCount?: number;
|
|
1265
|
+
lexicalCount: number;
|
|
1266
|
+
previousLexicalCount?: number;
|
|
1267
|
+
candidateTopK: number;
|
|
1268
|
+
previousCandidateTopK?: number;
|
|
1269
|
+
lexicalTopK: number;
|
|
1270
|
+
previousLexicalTopK?: number;
|
|
1271
|
+
stageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
|
|
1272
|
+
previousStageCounts: Partial<Record<RAGRetrievalTraceStage, number>>;
|
|
1273
|
+
traceChange: 'new' | 'changed' | 'unchanged';
|
|
1274
|
+
};
|
|
1249
1275
|
export type RAGEvaluationHistory = {
|
|
1250
1276
|
suiteId: string;
|
|
1251
1277
|
suiteLabel?: string;
|
|
1252
1278
|
runs: RAGEvaluationSuiteRun[];
|
|
1253
1279
|
leaderboard: RAGEvaluationLeaderboardEntry[];
|
|
1280
|
+
caseTraceSnapshots: RAGEvaluationCaseTraceSnapshot[];
|
|
1254
1281
|
latestRun?: RAGEvaluationSuiteRun;
|
|
1255
1282
|
previousRun?: RAGEvaluationSuiteRun;
|
|
1256
1283
|
diff?: RAGEvaluationRunDiff;
|
|
1257
1284
|
};
|
|
1285
|
+
export type RAGLabelValueRow = {
|
|
1286
|
+
label: string;
|
|
1287
|
+
value: string;
|
|
1288
|
+
};
|
|
1258
1289
|
export type RAGEvaluationLeaderboardEntry = {
|
|
1259
1290
|
runId: string;
|
|
1260
1291
|
suiteId: string;
|
|
@@ -1297,6 +1328,7 @@ export type RAGRerankerComparisonEntry = {
|
|
|
1297
1328
|
providerName?: string;
|
|
1298
1329
|
response: RAGEvaluationResponse;
|
|
1299
1330
|
traceSummary?: RAGRetrievalTraceComparisonSummary;
|
|
1331
|
+
caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
|
|
1300
1332
|
};
|
|
1301
1333
|
export type RAGRerankerComparisonSummary = {
|
|
1302
1334
|
bestByPassingRate?: string;
|
|
@@ -1316,6 +1348,7 @@ export type RAGRetrievalComparisonEntry = {
|
|
|
1316
1348
|
retrievalMode: RAGHybridRetrievalMode;
|
|
1317
1349
|
response: RAGEvaluationResponse;
|
|
1318
1350
|
traceSummary?: RAGRetrievalTraceComparisonSummary;
|
|
1351
|
+
caseTraceSnapshots?: RAGEvaluationCaseTraceSnapshot[];
|
|
1319
1352
|
};
|
|
1320
1353
|
export type RAGRetrievalComparisonSummary = {
|
|
1321
1354
|
bestByPassingRate?: string;
|
package/dist/vue/ai/index.js
CHANGED
|
@@ -2154,7 +2154,266 @@ var buildGroundingCaseSnapshots = ({
|
|
|
2154
2154
|
};
|
|
2155
2155
|
});
|
|
2156
2156
|
};
|
|
2157
|
+
var areStageCountsEqual = (left, right) => {
|
|
2158
|
+
const keys = new Set([
|
|
2159
|
+
...Object.keys(left),
|
|
2160
|
+
...Object.keys(right)
|
|
2161
|
+
]);
|
|
2162
|
+
for (const key of keys) {
|
|
2163
|
+
if ((left[key] ?? 0) !== (right[key] ?? 0)) {
|
|
2164
|
+
return false;
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
return true;
|
|
2168
|
+
};
|
|
2169
|
+
var buildEvaluationCaseTraceSnapshot = ({
|
|
2170
|
+
caseResult,
|
|
2171
|
+
currentTrace,
|
|
2172
|
+
previousTrace
|
|
2173
|
+
}) => {
|
|
2174
|
+
const stageCounts = currentTrace ? buildTraceStageCounts([currentTrace]) : {};
|
|
2175
|
+
const previousStageCounts = previousTrace?.stageCounts ?? {};
|
|
2176
|
+
const traceChange = !previousTrace ? currentTrace ? "new" : "unchanged" : previousTrace.traceMode !== currentTrace?.mode || previousTrace.transformedQuery !== (currentTrace?.transformedQuery || undefined) || previousTrace.variantQueries.join("|") !== (currentTrace?.variantQueries ?? []).join("|") || previousTrace.finalCount !== (currentTrace?.resultCounts.final ?? 0) || previousTrace.vectorCount !== (currentTrace?.resultCounts.vector ?? 0) || previousTrace.lexicalCount !== (currentTrace?.resultCounts.lexical ?? 0) || previousTrace.candidateTopK !== (currentTrace?.candidateTopK ?? 0) || previousTrace.lexicalTopK !== (currentTrace?.lexicalTopK ?? 0) || !areStageCountsEqual(previousStageCounts, stageCounts) ? "changed" : "unchanged";
|
|
2177
|
+
return {
|
|
2178
|
+
candidateTopK: currentTrace?.candidateTopK ?? 0,
|
|
2179
|
+
caseId: caseResult.caseId,
|
|
2180
|
+
finalCount: currentTrace?.resultCounts.final ?? 0,
|
|
2181
|
+
label: caseResult.label,
|
|
2182
|
+
lexicalCount: currentTrace?.resultCounts.lexical ?? 0,
|
|
2183
|
+
lexicalTopK: currentTrace?.lexicalTopK ?? 0,
|
|
2184
|
+
previousCandidateTopK: previousTrace?.candidateTopK,
|
|
2185
|
+
previousFinalCount: previousTrace?.finalCount,
|
|
2186
|
+
previousLexicalCount: previousTrace?.lexicalCount,
|
|
2187
|
+
previousLexicalTopK: previousTrace?.lexicalTopK,
|
|
2188
|
+
previousStageCounts,
|
|
2189
|
+
previousTraceMode: previousTrace?.traceMode,
|
|
2190
|
+
previousTransformedQuery: previousTrace?.transformedQuery,
|
|
2191
|
+
previousVariantQueries: previousTrace?.variantQueries ?? [],
|
|
2192
|
+
previousVectorCount: previousTrace?.vectorCount,
|
|
2193
|
+
query: caseResult.query,
|
|
2194
|
+
stageCounts,
|
|
2195
|
+
status: caseResult.status,
|
|
2196
|
+
traceChange,
|
|
2197
|
+
traceMode: currentTrace?.mode,
|
|
2198
|
+
transformedQuery: currentTrace?.transformedQuery || undefined,
|
|
2199
|
+
variantQueries: currentTrace?.variantQueries ?? [],
|
|
2200
|
+
vectorCount: currentTrace?.resultCounts.vector ?? 0
|
|
2201
|
+
};
|
|
2202
|
+
};
|
|
2203
|
+
var buildEvaluationCaseTraceSnapshotsFromEvaluated = (evaluated) => evaluated.map(({ caseResult, trace }) => buildEvaluationCaseTraceSnapshot({
|
|
2204
|
+
caseResult,
|
|
2205
|
+
currentTrace: trace
|
|
2206
|
+
}));
|
|
2207
|
+
var buildEvaluationCaseTraceSnapshots = ({
|
|
2208
|
+
current,
|
|
2209
|
+
previous
|
|
2210
|
+
}) => {
|
|
2211
|
+
if (!current) {
|
|
2212
|
+
return [];
|
|
2213
|
+
}
|
|
2214
|
+
const currentTraces = new Map((current.caseTraceSnapshots ?? []).map((entry) => [entry.caseId, entry]));
|
|
2215
|
+
const previousTraces = new Map((previous?.caseTraceSnapshots ?? []).map((entry) => [
|
|
2216
|
+
entry.caseId,
|
|
2217
|
+
entry
|
|
2218
|
+
]));
|
|
2219
|
+
return current.response.cases.map((caseResult) => buildEvaluationCaseTraceSnapshot({
|
|
2220
|
+
caseResult,
|
|
2221
|
+
currentTrace: (() => {
|
|
2222
|
+
const currentSnapshot = currentTraces.get(caseResult.caseId);
|
|
2223
|
+
if (!currentSnapshot) {
|
|
2224
|
+
return;
|
|
2225
|
+
}
|
|
2226
|
+
return {
|
|
2227
|
+
candidateTopK: currentSnapshot.candidateTopK,
|
|
2228
|
+
lexicalTopK: currentSnapshot.lexicalTopK,
|
|
2229
|
+
mode: currentSnapshot.traceMode ?? "vector",
|
|
2230
|
+
query: caseResult.query,
|
|
2231
|
+
resultCounts: {
|
|
2232
|
+
final: currentSnapshot.finalCount,
|
|
2233
|
+
fused: currentSnapshot.finalCount,
|
|
2234
|
+
lexical: currentSnapshot.lexicalCount,
|
|
2235
|
+
reranked: currentSnapshot.finalCount,
|
|
2236
|
+
vector: currentSnapshot.vectorCount
|
|
2237
|
+
},
|
|
2238
|
+
runLexical: currentSnapshot.lexicalCount > 0,
|
|
2239
|
+
runVector: currentSnapshot.vectorCount > 0,
|
|
2240
|
+
steps: [],
|
|
2241
|
+
topK: caseResult.topK,
|
|
2242
|
+
transformedQuery: currentSnapshot.transformedQuery ?? caseResult.query,
|
|
2243
|
+
variantQueries: currentSnapshot.variantQueries
|
|
2244
|
+
};
|
|
2245
|
+
})(),
|
|
2246
|
+
previousTrace: previousTraces.get(caseResult.caseId)
|
|
2247
|
+
}));
|
|
2248
|
+
};
|
|
2157
2249
|
var getStatusRank = (status) => status === "pass" ? 2 : status === "partial" ? 1 : 0;
|
|
2250
|
+
var formatSignedDelta = (value, decimals = 0, suffix = "") => `${value >= 0 ? "+" : ""}${value.toFixed(decimals)}${suffix}`;
|
|
2251
|
+
var formatEvaluationSummary = (response) => `${response.summary.passedCases}/${response.totalCases} pass \xB7 f1 ${response.summary.averageF1.toFixed(3)} \xB7 latency ${response.summary.averageLatencyMs.toFixed(1)}ms`;
|
|
2252
|
+
var formatHistoryCaseLabels = (cases) => cases.length > 0 ? cases.map((entry) => entry.label ?? entry.caseId).join(", ") : "none";
|
|
2253
|
+
var formatTraceModes = (modes) => modes.length > 0 ? modes.join(" / ") : "n/a";
|
|
2254
|
+
var formatTraceStageSummary = (stageCounts) => {
|
|
2255
|
+
const topStages = Object.entries(stageCounts).sort((left, right) => right[1] - left[1]).slice(0, 3);
|
|
2256
|
+
return topStages.length > 0 ? topStages.map(([stage, count]) => `${stage} ${count}`).join(" \xB7 ") : "n/a";
|
|
2257
|
+
};
|
|
2258
|
+
var formatTraceRatio = (count, total) => `${count}/${total}`;
|
|
2259
|
+
var formatTraceCountDelta = (value) => `${value >= 0 ? "+" : ""}${value}`;
|
|
2260
|
+
var buildRAGComparisonTraceSummaryRows = (entry) => {
|
|
2261
|
+
const trace = entry.traceSummary;
|
|
2262
|
+
if (!trace) {
|
|
2263
|
+
return [{ label: "Trace", value: "Unavailable" }];
|
|
2264
|
+
}
|
|
2265
|
+
return [
|
|
2266
|
+
{ label: "Modes", value: formatTraceModes(trace.modes) },
|
|
2267
|
+
{ label: "Avg final", value: trace.averageFinalCount.toFixed(1) },
|
|
2268
|
+
{ label: "Avg vector", value: trace.averageVectorCount.toFixed(1) },
|
|
2269
|
+
{ label: "Avg lexical", value: trace.averageLexicalCount.toFixed(1) },
|
|
2270
|
+
{
|
|
2271
|
+
label: "Transforms",
|
|
2272
|
+
value: formatTraceRatio(trace.transformedCases, trace.totalCases)
|
|
2273
|
+
},
|
|
2274
|
+
{
|
|
2275
|
+
label: "Variants",
|
|
2276
|
+
value: formatTraceRatio(trace.variantCases, trace.totalCases)
|
|
2277
|
+
},
|
|
2278
|
+
{
|
|
2279
|
+
label: "TopK",
|
|
2280
|
+
value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
|
|
2281
|
+
},
|
|
2282
|
+
{
|
|
2283
|
+
label: "Stages",
|
|
2284
|
+
value: formatTraceStageSummary(trace.stageCounts)
|
|
2285
|
+
}
|
|
2286
|
+
];
|
|
2287
|
+
};
|
|
2288
|
+
var buildRAGComparisonTraceDiffRows = (entry, leader) => {
|
|
2289
|
+
const trace = entry.traceSummary;
|
|
2290
|
+
if (!trace) {
|
|
2291
|
+
return [{ label: "Trace", value: "Unavailable for comparison" }];
|
|
2292
|
+
}
|
|
2293
|
+
const leaderTrace = leader?.traceSummary;
|
|
2294
|
+
if (!leaderTrace) {
|
|
2295
|
+
return [{ label: "Baseline", value: "Leader trace unavailable" }];
|
|
2296
|
+
}
|
|
2297
|
+
if (entry === leader) {
|
|
2298
|
+
return [{ label: "Baseline", value: "Leader strategy" }];
|
|
2299
|
+
}
|
|
2300
|
+
const stageDelta = Object.keys({
|
|
2301
|
+
...leaderTrace.stageCounts,
|
|
2302
|
+
...trace.stageCounts
|
|
2303
|
+
}).map((stage) => {
|
|
2304
|
+
const typedStage = stage;
|
|
2305
|
+
const delta = (trace.stageCounts[typedStage] ?? 0) - (leaderTrace.stageCounts[typedStage] ?? 0);
|
|
2306
|
+
return delta === 0 ? null : `${typedStage} ${formatTraceCountDelta(delta)}`;
|
|
2307
|
+
}).filter((value) => Boolean(value)).slice(0, 3).join(" \xB7 ");
|
|
2308
|
+
const rows = [
|
|
2309
|
+
{ label: "Baseline", value: leader.label }
|
|
2310
|
+
];
|
|
2311
|
+
if (formatTraceModes(trace.modes) !== formatTraceModes(leaderTrace.modes)) {
|
|
2312
|
+
rows.push({
|
|
2313
|
+
label: "Modes vs leader",
|
|
2314
|
+
value: `${formatTraceModes(trace.modes)} vs ${formatTraceModes(leaderTrace.modes)}`
|
|
2315
|
+
});
|
|
2316
|
+
}
|
|
2317
|
+
rows.push({
|
|
2318
|
+
label: "Final delta",
|
|
2319
|
+
value: formatSignedDelta(trace.averageFinalCount - leaderTrace.averageFinalCount, 1)
|
|
2320
|
+
}, {
|
|
2321
|
+
label: "Vector delta",
|
|
2322
|
+
value: formatSignedDelta(trace.averageVectorCount - leaderTrace.averageVectorCount, 1)
|
|
2323
|
+
}, {
|
|
2324
|
+
label: "Lexical delta",
|
|
2325
|
+
value: formatSignedDelta(trace.averageLexicalCount - leaderTrace.averageLexicalCount, 1)
|
|
2326
|
+
}, {
|
|
2327
|
+
label: "Transform delta",
|
|
2328
|
+
value: formatTraceCountDelta(trace.transformedCases - leaderTrace.transformedCases)
|
|
2329
|
+
});
|
|
2330
|
+
if (stageDelta) {
|
|
2331
|
+
rows.push({ label: "Stage delta", value: stageDelta });
|
|
2332
|
+
}
|
|
2333
|
+
return rows;
|
|
2334
|
+
};
|
|
2335
|
+
var buildRAGEvaluationHistoryRows = (history) => {
|
|
2336
|
+
if (!history?.latestRun) {
|
|
2337
|
+
return [
|
|
2338
|
+
{ label: "History", value: "No persisted benchmark runs yet." }
|
|
2339
|
+
];
|
|
2340
|
+
}
|
|
2341
|
+
const rows = [
|
|
2342
|
+
{ label: "Runs recorded", value: String(history.runs.length) },
|
|
2343
|
+
{
|
|
2344
|
+
label: "Latest",
|
|
2345
|
+
value: `${history.latestRun.label} \xB7 ${formatEvaluationSummary(history.latestRun.response)}`
|
|
2346
|
+
}
|
|
2347
|
+
];
|
|
2348
|
+
if (history.latestRun.traceSummary) {
|
|
2349
|
+
rows.push({
|
|
2350
|
+
label: "Latest trace",
|
|
2351
|
+
value: `${formatTraceModes(history.latestRun.traceSummary.modes)} \xB7 final ${history.latestRun.traceSummary.averageFinalCount.toFixed(1)} \xB7 vector ${history.latestRun.traceSummary.averageVectorCount.toFixed(1)} \xB7 lexical ${history.latestRun.traceSummary.averageLexicalCount.toFixed(1)}`
|
|
2352
|
+
});
|
|
2353
|
+
}
|
|
2354
|
+
if (history.previousRun) {
|
|
2355
|
+
rows.push({
|
|
2356
|
+
label: "Previous",
|
|
2357
|
+
value: `${history.previousRun.label} \xB7 ${formatEvaluationSummary(history.previousRun.response)}`
|
|
2358
|
+
});
|
|
2359
|
+
}
|
|
2360
|
+
if (!history.diff) {
|
|
2361
|
+
rows.push({
|
|
2362
|
+
label: "History diff",
|
|
2363
|
+
value: "Run the benchmark again to diff regressions over time."
|
|
2364
|
+
});
|
|
2365
|
+
return rows;
|
|
2366
|
+
}
|
|
2367
|
+
rows.push({
|
|
2368
|
+
label: "Passing delta",
|
|
2369
|
+
value: formatSignedDelta(history.diff.summaryDelta.passingRate, 1, "%")
|
|
2370
|
+
}, {
|
|
2371
|
+
label: "Average F1 delta",
|
|
2372
|
+
value: formatSignedDelta(history.diff.summaryDelta.averageF1, 3)
|
|
2373
|
+
}, {
|
|
2374
|
+
label: "Latency delta",
|
|
2375
|
+
value: formatSignedDelta(history.diff.summaryDelta.averageLatencyMs, 1, "ms")
|
|
2376
|
+
}, {
|
|
2377
|
+
label: "Improved",
|
|
2378
|
+
value: formatHistoryCaseLabels(history.diff.improvedCases)
|
|
2379
|
+
}, {
|
|
2380
|
+
label: "Regressed",
|
|
2381
|
+
value: formatHistoryCaseLabels(history.diff.regressedCases)
|
|
2382
|
+
});
|
|
2383
|
+
if (history.diff.traceSummaryDelta) {
|
|
2384
|
+
rows.push({
|
|
2385
|
+
label: "Trace mode shift",
|
|
2386
|
+
value: history.diff.traceSummaryDelta.modesChanged ? "changed" : "stable"
|
|
2387
|
+
}, {
|
|
2388
|
+
label: "Trace final delta",
|
|
2389
|
+
value: formatSignedDelta(history.diff.traceSummaryDelta.averageFinalCount, 1)
|
|
2390
|
+
}, {
|
|
2391
|
+
label: "Trace vector delta",
|
|
2392
|
+
value: formatSignedDelta(history.diff.traceSummaryDelta.averageVectorCount, 1)
|
|
2393
|
+
}, {
|
|
2394
|
+
label: "Trace lexical delta",
|
|
2395
|
+
value: formatSignedDelta(history.diff.traceSummaryDelta.averageLexicalCount, 1)
|
|
2396
|
+
}, {
|
|
2397
|
+
label: "Trace transform delta",
|
|
2398
|
+
value: formatTraceCountDelta(history.diff.traceSummaryDelta.transformedCases)
|
|
2399
|
+
}, {
|
|
2400
|
+
label: "Trace variant delta",
|
|
2401
|
+
value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
|
|
2402
|
+
});
|
|
2403
|
+
const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
|
|
2404
|
+
if (stageDelta) {
|
|
2405
|
+
rows.push({ label: "Trace stage delta", value: stageDelta });
|
|
2406
|
+
}
|
|
2407
|
+
}
|
|
2408
|
+
if (history.caseTraceSnapshots.length > 0) {
|
|
2409
|
+
const changedCases = history.caseTraceSnapshots.filter((entry) => entry.traceChange === "changed");
|
|
2410
|
+
rows.push({
|
|
2411
|
+
label: "Trace drift cases",
|
|
2412
|
+
value: changedCases.length > 0 ? changedCases.map((entry) => entry.label ?? entry.caseId).slice(0, 4).join(", ") : "none"
|
|
2413
|
+
});
|
|
2414
|
+
}
|
|
2415
|
+
return rows;
|
|
2416
|
+
};
|
|
2158
2417
|
var buildRAGEvaluationRunDiff = ({
|
|
2159
2418
|
current,
|
|
2160
2419
|
previous
|
|
@@ -2342,6 +2601,10 @@ var loadRAGEvaluationHistory = async ({
|
|
|
2342
2601
|
const latestRun = runs[0];
|
|
2343
2602
|
const previousRun = runs[1];
|
|
2344
2603
|
return {
|
|
2604
|
+
caseTraceSnapshots: buildEvaluationCaseTraceSnapshots({
|
|
2605
|
+
current: latestRun,
|
|
2606
|
+
previous: previousRun
|
|
2607
|
+
}),
|
|
2345
2608
|
diff: latestRun && previousRun ? buildRAGEvaluationRunDiff({
|
|
2346
2609
|
current: latestRun,
|
|
2347
2610
|
previous: previousRun
|
|
@@ -2553,6 +2816,7 @@ var compareRAGRerankers = async ({
|
|
|
2553
2816
|
});
|
|
2554
2817
|
const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
|
|
2555
2818
|
return {
|
|
2819
|
+
caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
|
|
2556
2820
|
label: candidate.label ?? candidate.id,
|
|
2557
2821
|
providerName: typeof candidate.rerank === "function" ? undefined : candidate.rerank?.providerName,
|
|
2558
2822
|
response,
|
|
@@ -2639,6 +2903,7 @@ var compareRAGRetrievalStrategies = async ({
|
|
|
2639
2903
|
});
|
|
2640
2904
|
const response = buildRAGEvaluationResponse(evaluated.map((entry) => entry.caseResult));
|
|
2641
2905
|
return {
|
|
2906
|
+
caseTraceSnapshots: buildEvaluationCaseTraceSnapshotsFromEvaluated(evaluated),
|
|
2642
2907
|
label: candidate.label ?? candidate.id,
|
|
2643
2908
|
response,
|
|
2644
2909
|
retrievalId: candidate.id,
|
|
@@ -2707,7 +2972,8 @@ var executeDryRunRAGEvaluation = (input, defaultTopK = DEFAULT_TOP_K) => input.c
|
|
|
2707
2972
|
var runRAGEvaluationSuite = async ({
|
|
2708
2973
|
suite,
|
|
2709
2974
|
evaluate,
|
|
2710
|
-
overrides
|
|
2975
|
+
overrides,
|
|
2976
|
+
artifacts
|
|
2711
2977
|
}) => {
|
|
2712
2978
|
const startedAt = Date.now();
|
|
2713
2979
|
const response = await evaluate({
|
|
@@ -2717,6 +2983,7 @@ var runRAGEvaluationSuite = async ({
|
|
|
2717
2983
|
});
|
|
2718
2984
|
const finishedAt = Date.now();
|
|
2719
2985
|
return {
|
|
2986
|
+
caseTraceSnapshots: artifacts?.caseTraceSnapshots,
|
|
2720
2987
|
elapsedMs: finishedAt - startedAt,
|
|
2721
2988
|
finishedAt,
|
|
2722
2989
|
id: generateId(),
|
|
@@ -2724,7 +2991,8 @@ var runRAGEvaluationSuite = async ({
|
|
|
2724
2991
|
metadata: suite.metadata,
|
|
2725
2992
|
response,
|
|
2726
2993
|
startedAt,
|
|
2727
|
-
suiteId: suite.id
|
|
2994
|
+
suiteId: suite.id,
|
|
2995
|
+
traceSummary: artifacts?.traceSummary
|
|
2728
2996
|
};
|
|
2729
2997
|
};
|
|
2730
2998
|
var summarizeRAGEvaluationCase = ({
|
|
@@ -3437,5 +3705,5 @@ export {
|
|
|
3437
3705
|
AIStreamKey
|
|
3438
3706
|
};
|
|
3439
3707
|
|
|
3440
|
-
//# debugId=
|
|
3708
|
+
//# debugId=E235F4588786F00E64756E2164756E21
|
|
3441
3709
|
//# sourceMappingURL=index.js.map
|