@absolutejs/absolute 0.19.0-beta.643 → 0.19.0-beta.644
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +283 -5
- package/dist/ai/client/index.js.map +5 -5
- package/dist/ai/client/ui.js +235 -5
- package/dist/ai/client/ui.js.map +4 -4
- package/dist/ai/index.js +658 -76
- package/dist/ai/index.js.map +11 -11
- package/dist/ai/rag/quality.js +237 -5
- package/dist/ai/rag/quality.js.map +4 -4
- package/dist/ai/rag/ui.js +235 -5
- package/dist/ai/rag/ui.js.map +4 -4
- package/dist/ai-client/angular/ai/index.js +48 -0
- package/dist/ai-client/react/ai/index.js +48 -0
- package/dist/ai-client/vue/ai/index.js +48 -0
- package/dist/angular/ai/index.js +283 -5
- package/dist/angular/ai/index.js.map +5 -5
- package/dist/react/ai/index.js +283 -5
- package/dist/react/ai/index.js.map +5 -5
- package/dist/src/ai/client/ragClient.d.ts +16 -1
- package/dist/src/ai/index.d.ts +1 -1
- package/dist/src/ai/rag/adapters/queryPlanning.d.ts +8 -0
- package/dist/src/ai/rag/chat.d.ts +49 -7
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/presentation.d.ts +5 -1
- package/dist/src/ai/rag/quality.d.ts +14 -0
- package/dist/src/vue/ai/useRAG.d.ts +4 -0
- package/dist/src/vue/ai/useRAGEvaluate.d.ts +4 -0
- package/dist/svelte/ai/index.js +283 -5
- package/dist/svelte/ai/index.js.map +5 -5
- package/dist/types/ai.d.ts +64 -5
- package/dist/vue/ai/index.js +283 -5
- package/dist/vue/ai/index.js.map +5 -5
- package/package.json +7 -7
package/dist/ai/rag/quality.js
CHANGED
|
@@ -2957,6 +2957,18 @@ var buildComparisonOverviewPresentation = (input) => {
|
|
|
2957
2957
|
value: input.resolveLabel(input.summary.bestByMultivectorVectorHitCases)
|
|
2958
2958
|
});
|
|
2959
2959
|
}
|
|
2960
|
+
if (input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
2961
|
+
rows.push({
|
|
2962
|
+
label: "Lowest runtime budget exhaustion",
|
|
2963
|
+
value: input.resolveLabel(input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases)
|
|
2964
|
+
});
|
|
2965
|
+
}
|
|
2966
|
+
if (input.summary.bestByLowestRuntimeUnderfilledTopKCases) {
|
|
2967
|
+
rows.push({
|
|
2968
|
+
label: "Lowest runtime underfilled TopK",
|
|
2969
|
+
value: input.resolveLabel(input.summary.bestByLowestRuntimeUnderfilledTopKCases)
|
|
2970
|
+
});
|
|
2971
|
+
}
|
|
2960
2972
|
return {
|
|
2961
2973
|
rows,
|
|
2962
2974
|
winnerLabel,
|
|
@@ -3009,6 +3021,9 @@ var buildRAGComparisonTraceSummaryRows = (entry) => {
|
|
|
3009
3021
|
}, {
|
|
3010
3022
|
label: "Multivector",
|
|
3011
3023
|
value: `${formatTraceRatio(trace.multiVectorCases, trace.totalCases)} \xB7 collapse ${formatTraceRatio(trace.multiVectorCollapsedCases, trace.totalCases)} \xB7 lexical ${formatTraceRatio(trace.multiVectorLexicalHitCases, trace.totalCases)} \xB7 vector ${formatTraceRatio(trace.multiVectorVectorHitCases, trace.totalCases)}`
|
|
3024
|
+
}, {
|
|
3025
|
+
label: "Runtime",
|
|
3026
|
+
value: `budget ${formatTraceRatio(trace.runtimeCandidateBudgetExhaustedCases, trace.totalCases)} \xB7 underfilled ${formatTraceRatio(trace.runtimeUnderfilledTopKCases, trace.totalCases)}`
|
|
3012
3027
|
}, {
|
|
3013
3028
|
label: "TopK",
|
|
3014
3029
|
value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
|
|
@@ -3113,6 +3128,12 @@ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
|
|
|
3113
3128
|
}, {
|
|
3114
3129
|
label: "Round robin delta",
|
|
3115
3130
|
value: formatTraceCountDelta(trace.roundRobinCases - leaderTrace.roundRobinCases)
|
|
3131
|
+
}, {
|
|
3132
|
+
label: "Runtime budget delta",
|
|
3133
|
+
value: formatTraceCountDelta(trace.runtimeCandidateBudgetExhaustedCases - leaderTrace.runtimeCandidateBudgetExhaustedCases)
|
|
3134
|
+
}, {
|
|
3135
|
+
label: "Runtime underfilled delta",
|
|
3136
|
+
value: formatTraceCountDelta(trace.runtimeUnderfilledTopKCases - leaderTrace.runtimeUnderfilledTopKCases)
|
|
3116
3137
|
});
|
|
3117
3138
|
if (stageDelta) {
|
|
3118
3139
|
rows.push({ label: "Stage delta", value: stageDelta });
|
|
@@ -3564,6 +3585,77 @@ var buildRAGEvaluationSuiteSnapshotHistoryPresentation = (history) => ({
|
|
|
3564
3585
|
snapshots: buildRAGEvaluationSuiteSnapshotPresentations(history),
|
|
3565
3586
|
summary: history?.latestSnapshot ? `v${history.latestSnapshot.version}` : "No saved suite snapshots yet."
|
|
3566
3587
|
});
|
|
3588
|
+
var isRuntimeGateReason = (reason) => /runtime|candidate-budget|underfilled/i.test(reason);
|
|
3589
|
+
var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
|
|
3590
|
+
const runtimeGateReasons = (run.decisionSummary?.gate?.reasons ?? run.releaseVerdict?.gate?.reasons ?? []).filter(isRuntimeGateReason);
|
|
3591
|
+
const rows = [
|
|
3592
|
+
{ label: "Finished", value: formatDateLabel(run.finishedAt) },
|
|
3593
|
+
{
|
|
3594
|
+
label: "Passing-rate winner",
|
|
3595
|
+
value: run.comparison.summary.bestByPassingRate ?? "n/a"
|
|
3596
|
+
},
|
|
3597
|
+
{
|
|
3598
|
+
label: "Average F1 winner",
|
|
3599
|
+
value: run.comparison.summary.bestByAverageF1 ?? "n/a"
|
|
3600
|
+
}
|
|
3601
|
+
];
|
|
3602
|
+
if (run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
3603
|
+
rows.push({
|
|
3604
|
+
label: "Lowest runtime budget exhaustion",
|
|
3605
|
+
value: run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases
|
|
3606
|
+
});
|
|
3607
|
+
}
|
|
3608
|
+
if (run.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases) {
|
|
3609
|
+
rows.push({
|
|
3610
|
+
label: "Lowest runtime underfilled TopK",
|
|
3611
|
+
value: run.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
3612
|
+
});
|
|
3613
|
+
}
|
|
3614
|
+
rows.push({
|
|
3615
|
+
label: "Gate status",
|
|
3616
|
+
value: run.decisionSummary?.gate?.status ?? run.releaseVerdict?.gate?.status ?? "n/a"
|
|
3617
|
+
}, {
|
|
3618
|
+
label: "Runtime gate failures",
|
|
3619
|
+
value: runtimeGateReasons.length > 0 ? runtimeGateReasons.join("; ") : "none"
|
|
3620
|
+
});
|
|
3621
|
+
return {
|
|
3622
|
+
label: run.label,
|
|
3623
|
+
rows,
|
|
3624
|
+
runId: run.id,
|
|
3625
|
+
summary: runtimeGateReasons.length > 0 ? `${run.label} \xB7 runtime gate blocked` : `${run.label} \xB7 ${run.comparison.summary.bestByPassingRate ?? "n/a"} leads passing rate`
|
|
3626
|
+
};
|
|
3627
|
+
};
|
|
3628
|
+
var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
|
|
3629
|
+
const recentRuns = (input.runs ?? []).map(buildRAGRetrievalReleaseHistoryRunPresentation);
|
|
3630
|
+
const runtimeBlockedRuns = recentRuns.filter((entry) => entry.rows.some((row) => row.label === "Runtime gate failures" && row.value !== "none")).length;
|
|
3631
|
+
const rows = [
|
|
3632
|
+
{
|
|
3633
|
+
label: "Latest decision",
|
|
3634
|
+
value: input.timeline?.latestDecisionKind ?? "none"
|
|
3635
|
+
},
|
|
3636
|
+
{
|
|
3637
|
+
label: "Latest decision at",
|
|
3638
|
+
value: formatDateLabel(input.timeline?.latestDecisionAt)
|
|
3639
|
+
},
|
|
3640
|
+
{
|
|
3641
|
+
label: "Last promoted",
|
|
3642
|
+
value: formatDateLabel(input.timeline?.lastPromotedAt)
|
|
3643
|
+
},
|
|
3644
|
+
{
|
|
3645
|
+
label: "Last reverted",
|
|
3646
|
+
value: formatDateLabel(input.timeline?.lastRevertedAt)
|
|
3647
|
+
},
|
|
3648
|
+
{
|
|
3649
|
+
label: "Recent runtime-blocked runs",
|
|
3650
|
+
value: String(runtimeBlockedRuns)
|
|
3651
|
+
}
|
|
3652
|
+
];
|
|
3653
|
+
return {
|
|
3654
|
+
recentRuns,
|
|
3655
|
+
rows,
|
|
3656
|
+
summary: input.timeline?.latestDecisionKind ? `${input.timeline.latestDecisionKind} \xB7 ${recentRuns.length} recent runs` : recentRuns.length > 0 ? `${recentRuns.length} recent runs` : "No release history yet."
|
|
3657
|
+
};
|
|
3658
|
+
};
|
|
3567
3659
|
var buildRAGAnswerGroundingCaseSnapshotPresentations = (history) => {
|
|
3568
3660
|
if (!history?.caseSnapshots.length) {
|
|
3569
3661
|
return [];
|
|
@@ -4074,6 +4166,12 @@ var evaluateRetrievalComparisonGate = ({
|
|
|
4074
4166
|
if (typeof policy.minMultiVectorVectorHitCasesDelta === "number" && (delta.multiVectorVectorHitCasesDelta ?? 0) < policy.minMultiVectorVectorHitCasesDelta) {
|
|
4075
4167
|
reasons.push(`multivector vector-hit delta ${delta.multiVectorVectorHitCasesDelta ?? 0} is below ${policy.minMultiVectorVectorHitCasesDelta}`);
|
|
4076
4168
|
}
|
|
4169
|
+
if (typeof policy.maxRuntimeCandidateBudgetExhaustedCasesDelta === "number" && (delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0) > policy.maxRuntimeCandidateBudgetExhaustedCasesDelta) {
|
|
4170
|
+
reasons.push(`runtime candidate-budget-exhausted delta ${delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0} exceeds ${policy.maxRuntimeCandidateBudgetExhaustedCasesDelta}`);
|
|
4171
|
+
}
|
|
4172
|
+
if (typeof policy.maxRuntimeUnderfilledTopKCasesDelta === "number" && (delta.runtimeUnderfilledTopKCasesDelta ?? 0) > policy.maxRuntimeUnderfilledTopKCasesDelta) {
|
|
4173
|
+
reasons.push(`runtime underfilled-topk delta ${delta.runtimeUnderfilledTopKCasesDelta ?? 0} exceeds ${policy.maxRuntimeUnderfilledTopKCasesDelta}`);
|
|
4174
|
+
}
|
|
4077
4175
|
if (reasons.length === 0) {
|
|
4078
4176
|
return {
|
|
4079
4177
|
policy,
|
|
@@ -4604,6 +4702,20 @@ var buildTraceSummaryAggregate = ({
|
|
|
4604
4702
|
direction: "flat",
|
|
4605
4703
|
metric: "multiVectorCollapsedCases",
|
|
4606
4704
|
previous: 0
|
|
4705
|
+
},
|
|
4706
|
+
{
|
|
4707
|
+
current: 0,
|
|
4708
|
+
delta: 0,
|
|
4709
|
+
direction: "flat",
|
|
4710
|
+
metric: "runtimeCandidateBudgetExhaustedCases",
|
|
4711
|
+
previous: 0
|
|
4712
|
+
},
|
|
4713
|
+
{
|
|
4714
|
+
current: 0,
|
|
4715
|
+
delta: 0,
|
|
4716
|
+
direction: "flat",
|
|
4717
|
+
metric: "runtimeUnderfilledTopKCases",
|
|
4718
|
+
previous: 0
|
|
4607
4719
|
}
|
|
4608
4720
|
];
|
|
4609
4721
|
return {
|
|
@@ -4726,6 +4838,20 @@ var buildTraceSummaryAggregate = ({
|
|
|
4726
4838
|
direction: buildTraceSummaryDirection(latest.multiVectorCollapsedCases - previous.multiVectorCollapsedCases),
|
|
4727
4839
|
metric: "multiVectorCollapsedCases",
|
|
4728
4840
|
previous: previous.multiVectorCollapsedCases
|
|
4841
|
+
},
|
|
4842
|
+
{
|
|
4843
|
+
current: latest.runtimeCandidateBudgetExhaustedCases,
|
|
4844
|
+
delta: latest.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
4845
|
+
direction: buildTraceSummaryDirection(latest.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases),
|
|
4846
|
+
metric: "runtimeCandidateBudgetExhaustedCases",
|
|
4847
|
+
previous: previous.runtimeCandidateBudgetExhaustedCases
|
|
4848
|
+
},
|
|
4849
|
+
{
|
|
4850
|
+
current: latest.runtimeUnderfilledTopKCases,
|
|
4851
|
+
delta: latest.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases,
|
|
4852
|
+
direction: buildTraceSummaryDirection(latest.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases),
|
|
4853
|
+
metric: "runtimeUnderfilledTopKCases",
|
|
4854
|
+
previous: previous.runtimeUnderfilledTopKCases
|
|
4729
4855
|
}
|
|
4730
4856
|
];
|
|
4731
4857
|
const absoluteSorted = [...aggregate].sort((left, right) => Math.abs(right.delta) - Math.abs(left.delta) || left.metric.localeCompare(right.metric));
|
|
@@ -4780,12 +4906,15 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4780
4906
|
let multiVectorVectorHitCases = 0;
|
|
4781
4907
|
let multiVectorLexicalHitCases = 0;
|
|
4782
4908
|
let multiVectorCollapsedCases = 0;
|
|
4909
|
+
let runtimeCandidateBudgetExhaustedCases = 0;
|
|
4910
|
+
let runtimeUnderfilledTopKCases = 0;
|
|
4783
4911
|
let finalCountSum = 0;
|
|
4784
4912
|
let vectorCountSum = 0;
|
|
4785
4913
|
let lexicalCountSum = 0;
|
|
4786
4914
|
let candidateTopKSum = 0;
|
|
4787
4915
|
let lexicalTopKSum = 0;
|
|
4788
4916
|
for (const trace of traces) {
|
|
4917
|
+
const vectorSearchMetadata = trace.steps.find((step) => step.stage === "vector_search")?.metadata;
|
|
4789
4918
|
modeSet.add(trace.mode);
|
|
4790
4919
|
sourceBalanceStrategySet.add(trace.sourceBalanceStrategy ?? "cap");
|
|
4791
4920
|
if (trace.runVector) {
|
|
@@ -4818,6 +4947,18 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4818
4947
|
if ((trace.multiVector?.collapsedParents ?? 0) > 0) {
|
|
4819
4948
|
multiVectorCollapsedCases += 1;
|
|
4820
4949
|
}
|
|
4950
|
+
if (vectorSearchMetadata?.sqliteQueryCandidateBudgetExhausted) {
|
|
4951
|
+
runtimeCandidateBudgetExhaustedCases += 1;
|
|
4952
|
+
}
|
|
4953
|
+
if (vectorSearchMetadata?.postgresQueryCandidateBudgetExhausted) {
|
|
4954
|
+
runtimeCandidateBudgetExhaustedCases += 1;
|
|
4955
|
+
}
|
|
4956
|
+
if (vectorSearchMetadata?.sqliteQueryUnderfilledTopK) {
|
|
4957
|
+
runtimeUnderfilledTopKCases += 1;
|
|
4958
|
+
}
|
|
4959
|
+
if (vectorSearchMetadata?.postgresQueryUnderfilledTopK) {
|
|
4960
|
+
runtimeUnderfilledTopKCases += 1;
|
|
4961
|
+
}
|
|
4821
4962
|
finalCountSum += trace.resultCounts.final;
|
|
4822
4963
|
vectorCountSum += trace.resultCounts.vector;
|
|
4823
4964
|
lexicalCountSum += trace.resultCounts.lexical;
|
|
@@ -4843,6 +4984,8 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4843
4984
|
multiVectorVectorHitCases,
|
|
4844
4985
|
multiVectorLexicalHitCases,
|
|
4845
4986
|
multiVectorCollapsedCases,
|
|
4987
|
+
runtimeCandidateBudgetExhaustedCases,
|
|
4988
|
+
runtimeUnderfilledTopKCases,
|
|
4846
4989
|
vectorCases
|
|
4847
4990
|
};
|
|
4848
4991
|
};
|
|
@@ -8428,7 +8571,9 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8428
8571
|
passingRateDelta: candidateEntry.response.passingRate - baselineEntry.response.passingRate,
|
|
8429
8572
|
multiVectorCollapsedCasesDelta: (candidateEntry.traceSummary?.multiVectorCollapsedCases ?? 0) - (baselineEntry.traceSummary?.multiVectorCollapsedCases ?? 0),
|
|
8430
8573
|
multiVectorLexicalHitCasesDelta: (candidateEntry.traceSummary?.multiVectorLexicalHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorLexicalHitCases ?? 0),
|
|
8431
|
-
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0)
|
|
8574
|
+
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0),
|
|
8575
|
+
runtimeCandidateBudgetExhaustedCasesDelta: (candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0) - (baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0),
|
|
8576
|
+
runtimeUnderfilledTopKCasesDelta: (candidateEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0) - (baselineEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0)
|
|
8432
8577
|
} : undefined;
|
|
8433
8578
|
return {
|
|
8434
8579
|
baseline: baselineEntry ? {
|
|
@@ -8438,6 +8583,8 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8438
8583
|
multiVectorCollapsedCases: baselineEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8439
8584
|
multiVectorLexicalHitCases: baselineEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8440
8585
|
multiVectorVectorHitCases: baselineEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8586
|
+
runtimeCandidateBudgetExhaustedCases: baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8587
|
+
runtimeUnderfilledTopKCases: baselineEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8441
8588
|
passingRate: baselineEntry.response.passingRate,
|
|
8442
8589
|
retrievalId: baselineEntry.retrievalId
|
|
8443
8590
|
} : undefined,
|
|
@@ -8449,6 +8596,8 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8449
8596
|
multiVectorCollapsedCases: candidateEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8450
8597
|
multiVectorLexicalHitCases: candidateEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8451
8598
|
multiVectorVectorHitCases: candidateEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8599
|
+
runtimeCandidateBudgetExhaustedCases: candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8600
|
+
runtimeUnderfilledTopKCases: candidateEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8452
8601
|
passingRate: candidateEntry.response.passingRate,
|
|
8453
8602
|
retrievalId: candidateEntry.retrievalId
|
|
8454
8603
|
} : undefined,
|
|
@@ -8460,7 +8609,9 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8460
8609
|
winnerByPassingRate: comparison.summary.bestByPassingRate,
|
|
8461
8610
|
winnerByMultivectorCollapsedCases: comparison.summary.bestByMultivectorCollapsedCases,
|
|
8462
8611
|
winnerByMultivectorLexicalHitCases: comparison.summary.bestByMultivectorLexicalHitCases,
|
|
8463
|
-
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases
|
|
8612
|
+
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases,
|
|
8613
|
+
winnerByLowestRuntimeCandidateBudgetExhaustedCases: comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
|
|
8614
|
+
winnerByLowestRuntimeUnderfilledTopKCases: comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
8464
8615
|
};
|
|
8465
8616
|
};
|
|
8466
8617
|
var loadRAGSearchTracePruneHistory = async ({
|
|
@@ -9148,6 +9299,24 @@ var selectComparisonEntryByTraceMetric = (entries, idKey, metric) => {
|
|
|
9148
9299
|
}
|
|
9149
9300
|
return typeof winner[idKey] === "string" ? winner[idKey] : undefined;
|
|
9150
9301
|
};
|
|
9302
|
+
var selectComparisonEntryByLowestTraceMetric = (entries, idKey, metric) => {
|
|
9303
|
+
const ranked = [...entries].sort((left, right) => {
|
|
9304
|
+
const leftMetric = left.traceSummary?.[metric] ?? 0;
|
|
9305
|
+
const rightMetric = right.traceSummary?.[metric] ?? 0;
|
|
9306
|
+
if (leftMetric !== rightMetric) {
|
|
9307
|
+
return leftMetric - rightMetric;
|
|
9308
|
+
}
|
|
9309
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
9310
|
+
return right.response.passingRate - left.response.passingRate;
|
|
9311
|
+
}
|
|
9312
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
9313
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
9314
|
+
}
|
|
9315
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
9316
|
+
});
|
|
9317
|
+
const winner = ranked[0];
|
|
9318
|
+
return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
|
|
9319
|
+
};
|
|
9151
9320
|
var resolveRetrievalMode = (candidate) => {
|
|
9152
9321
|
if (!candidate.retrieval) {
|
|
9153
9322
|
return "vector";
|
|
@@ -9241,7 +9410,9 @@ var compareRAGRetrievalTraceSummaries = (current, previous) => ({
|
|
|
9241
9410
|
multiVectorCasesDelta: current.multiVectorCases - previous.multiVectorCases,
|
|
9242
9411
|
multiVectorVectorHitCasesDelta: current.multiVectorVectorHitCases - previous.multiVectorVectorHitCases,
|
|
9243
9412
|
multiVectorLexicalHitCasesDelta: current.multiVectorLexicalHitCases - previous.multiVectorLexicalHitCases,
|
|
9244
|
-
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases
|
|
9413
|
+
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases,
|
|
9414
|
+
runtimeCandidateBudgetExhaustedCasesDelta: current.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
9415
|
+
runtimeUnderfilledTopKCasesDelta: current.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases
|
|
9245
9416
|
});
|
|
9246
9417
|
var buildSearchTraceResultSnapshots = (results) => results.map((result) => ({
|
|
9247
9418
|
chunkId: result.chunkId,
|
|
@@ -9603,6 +9774,63 @@ var generateRAGEvaluationSuiteFromDocuments = ({
|
|
|
9603
9774
|
metadata
|
|
9604
9775
|
});
|
|
9605
9776
|
};
|
|
9777
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID = "rag-native-planner-larger-corpus";
|
|
9778
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL = "Adaptive Native Planner Benchmark";
|
|
9779
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY = "Which launch checklist phrase is exact wording?";
|
|
9780
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER = {
|
|
9781
|
+
lane: "focus"
|
|
9782
|
+
};
|
|
9783
|
+
var createRAGAdaptiveNativePlannerBenchmarkSuite = (input) => createRAGEvaluationSuite({
|
|
9784
|
+
description: input?.description ?? "Stress-tests larger-corpus native planner selection, candidate-budget pressure, and transformed-query recovery on filtered retrieval.",
|
|
9785
|
+
id: input?.id ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID,
|
|
9786
|
+
input: {
|
|
9787
|
+
cases: [
|
|
9788
|
+
{
|
|
9789
|
+
expectedDocumentIds: ["focus-target"],
|
|
9790
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
9791
|
+
hardNegativeDocumentIds: [
|
|
9792
|
+
"focus-distractor-0",
|
|
9793
|
+
"focus-distractor-1",
|
|
9794
|
+
"focus-distractor-2"
|
|
9795
|
+
],
|
|
9796
|
+
id: "planner-pressure-exact-phrase",
|
|
9797
|
+
label: "Exact phrase survives larger-corpus native pressure",
|
|
9798
|
+
query: DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY,
|
|
9799
|
+
topK: input?.topK ?? 1
|
|
9800
|
+
}
|
|
9801
|
+
],
|
|
9802
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
9803
|
+
retrieval: "vector",
|
|
9804
|
+
topK: input?.topK ?? 1
|
|
9805
|
+
},
|
|
9806
|
+
label: input?.label ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL,
|
|
9807
|
+
metadata: {
|
|
9808
|
+
benchmarkKind: "adaptive_native_planner",
|
|
9809
|
+
benchmarkScope: "larger_corpus",
|
|
9810
|
+
expectedSignals: [
|
|
9811
|
+
"selected native planner profile",
|
|
9812
|
+
"candidate-budget exhaustion",
|
|
9813
|
+
"underfilled topk"
|
|
9814
|
+
],
|
|
9815
|
+
recommendedGroupKey: "runtime-native-planner",
|
|
9816
|
+
recommendedTags: ["runtime", "native", "planner"],
|
|
9817
|
+
...input?.metadata
|
|
9818
|
+
}
|
|
9819
|
+
});
|
|
9820
|
+
var createRAGAdaptiveNativePlannerBenchmarkSnapshot = (input) => {
|
|
9821
|
+
const suite = input?.suite ?? createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
9822
|
+
return createRAGEvaluationSuiteSnapshot({
|
|
9823
|
+
createdAt: input?.createdAt,
|
|
9824
|
+
id: input?.id,
|
|
9825
|
+
metadata: {
|
|
9826
|
+
artifactKind: "adaptive_native_planner_benchmark",
|
|
9827
|
+
persistForReleaseHistory: true,
|
|
9828
|
+
...input?.metadata
|
|
9829
|
+
},
|
|
9830
|
+
suite,
|
|
9831
|
+
version: input?.version
|
|
9832
|
+
});
|
|
9833
|
+
};
|
|
9606
9834
|
var createRAGEvaluationSuiteSnapshot = ({
|
|
9607
9835
|
suite,
|
|
9608
9836
|
id,
|
|
@@ -9810,7 +10038,9 @@ var summarizeRAGRetrievalComparison = (entries) => ({
|
|
|
9810
10038
|
...summarizeEvaluationResponseComparison(entries, "retrievalId"),
|
|
9811
10039
|
bestByMultivectorCollapsedCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorCollapsedCases"),
|
|
9812
10040
|
bestByMultivectorLexicalHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorLexicalHitCases"),
|
|
9813
|
-
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases")
|
|
10041
|
+
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases"),
|
|
10042
|
+
bestByLowestRuntimeCandidateBudgetExhaustedCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeCandidateBudgetExhaustedCases"),
|
|
10043
|
+
bestByLowestRuntimeUnderfilledTopKCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeUnderfilledTopKCases")
|
|
9814
10044
|
});
|
|
9815
10045
|
export {
|
|
9816
10046
|
updateRAGEvaluationSuiteCase,
|
|
@@ -9911,6 +10141,8 @@ export {
|
|
|
9911
10141
|
createRAGFileAnswerGroundingCaseDifficultyHistoryStore,
|
|
9912
10142
|
createRAGEvaluationSuiteSnapshot,
|
|
9913
10143
|
createRAGEvaluationSuite,
|
|
10144
|
+
createRAGAdaptiveNativePlannerBenchmarkSuite,
|
|
10145
|
+
createRAGAdaptiveNativePlannerBenchmarkSnapshot,
|
|
9914
10146
|
compareRAGRetrievalTraceSummaries,
|
|
9915
10147
|
compareRAGRetrievalStrategies,
|
|
9916
10148
|
compareRAGRerankers,
|
|
@@ -9935,5 +10167,5 @@ export {
|
|
|
9935
10167
|
addRAGEvaluationSuiteCase
|
|
9936
10168
|
};
|
|
9937
10169
|
|
|
9938
|
-
//# debugId=
|
|
10170
|
+
//# debugId=31297D55AFFF65BE64756E2164756E21
|
|
9939
10171
|
//# sourceMappingURL=quality.js.map
|