@absolutejs/absolute 0.19.0-beta.643 → 0.19.0-beta.644
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +283 -5
- package/dist/ai/client/index.js.map +5 -5
- package/dist/ai/client/ui.js +235 -5
- package/dist/ai/client/ui.js.map +4 -4
- package/dist/ai/index.js +658 -76
- package/dist/ai/index.js.map +11 -11
- package/dist/ai/rag/quality.js +237 -5
- package/dist/ai/rag/quality.js.map +4 -4
- package/dist/ai/rag/ui.js +235 -5
- package/dist/ai/rag/ui.js.map +4 -4
- package/dist/ai-client/angular/ai/index.js +48 -0
- package/dist/ai-client/react/ai/index.js +48 -0
- package/dist/ai-client/vue/ai/index.js +48 -0
- package/dist/angular/ai/index.js +283 -5
- package/dist/angular/ai/index.js.map +5 -5
- package/dist/react/ai/index.js +283 -5
- package/dist/react/ai/index.js.map +5 -5
- package/dist/src/ai/client/ragClient.d.ts +16 -1
- package/dist/src/ai/index.d.ts +1 -1
- package/dist/src/ai/rag/adapters/queryPlanning.d.ts +8 -0
- package/dist/src/ai/rag/chat.d.ts +49 -7
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/presentation.d.ts +5 -1
- package/dist/src/ai/rag/quality.d.ts +14 -0
- package/dist/src/vue/ai/useRAG.d.ts +4 -0
- package/dist/src/vue/ai/useRAGEvaluate.d.ts +4 -0
- package/dist/svelte/ai/index.js +283 -5
- package/dist/svelte/ai/index.js.map +5 -5
- package/dist/types/ai.d.ts +64 -5
- package/dist/vue/ai/index.js +283 -5
- package/dist/vue/ai/index.js.map +5 -5
- package/package.json +7 -7
package/dist/ai/client/index.js
CHANGED
|
@@ -2965,6 +2965,18 @@ var buildComparisonOverviewPresentation = (input) => {
|
|
|
2965
2965
|
value: input.resolveLabel(input.summary.bestByMultivectorVectorHitCases)
|
|
2966
2966
|
});
|
|
2967
2967
|
}
|
|
2968
|
+
if (input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
2969
|
+
rows.push({
|
|
2970
|
+
label: "Lowest runtime budget exhaustion",
|
|
2971
|
+
value: input.resolveLabel(input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases)
|
|
2972
|
+
});
|
|
2973
|
+
}
|
|
2974
|
+
if (input.summary.bestByLowestRuntimeUnderfilledTopKCases) {
|
|
2975
|
+
rows.push({
|
|
2976
|
+
label: "Lowest runtime underfilled TopK",
|
|
2977
|
+
value: input.resolveLabel(input.summary.bestByLowestRuntimeUnderfilledTopKCases)
|
|
2978
|
+
});
|
|
2979
|
+
}
|
|
2968
2980
|
return {
|
|
2969
2981
|
rows,
|
|
2970
2982
|
winnerLabel,
|
|
@@ -3017,6 +3029,9 @@ var buildRAGComparisonTraceSummaryRows = (entry) => {
|
|
|
3017
3029
|
}, {
|
|
3018
3030
|
label: "Multivector",
|
|
3019
3031
|
value: `${formatTraceRatio(trace.multiVectorCases, trace.totalCases)} \xB7 collapse ${formatTraceRatio(trace.multiVectorCollapsedCases, trace.totalCases)} \xB7 lexical ${formatTraceRatio(trace.multiVectorLexicalHitCases, trace.totalCases)} \xB7 vector ${formatTraceRatio(trace.multiVectorVectorHitCases, trace.totalCases)}`
|
|
3032
|
+
}, {
|
|
3033
|
+
label: "Runtime",
|
|
3034
|
+
value: `budget ${formatTraceRatio(trace.runtimeCandidateBudgetExhaustedCases, trace.totalCases)} \xB7 underfilled ${formatTraceRatio(trace.runtimeUnderfilledTopKCases, trace.totalCases)}`
|
|
3020
3035
|
}, {
|
|
3021
3036
|
label: "TopK",
|
|
3022
3037
|
value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
|
|
@@ -3121,6 +3136,12 @@ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
|
|
|
3121
3136
|
}, {
|
|
3122
3137
|
label: "Round robin delta",
|
|
3123
3138
|
value: formatTraceCountDelta(trace.roundRobinCases - leaderTrace.roundRobinCases)
|
|
3139
|
+
}, {
|
|
3140
|
+
label: "Runtime budget delta",
|
|
3141
|
+
value: formatTraceCountDelta(trace.runtimeCandidateBudgetExhaustedCases - leaderTrace.runtimeCandidateBudgetExhaustedCases)
|
|
3142
|
+
}, {
|
|
3143
|
+
label: "Runtime underfilled delta",
|
|
3144
|
+
value: formatTraceCountDelta(trace.runtimeUnderfilledTopKCases - leaderTrace.runtimeUnderfilledTopKCases)
|
|
3124
3145
|
});
|
|
3125
3146
|
if (stageDelta) {
|
|
3126
3147
|
rows.push({ label: "Stage delta", value: stageDelta });
|
|
@@ -3572,6 +3593,77 @@ var buildRAGEvaluationSuiteSnapshotHistoryPresentation = (history) => ({
|
|
|
3572
3593
|
snapshots: buildRAGEvaluationSuiteSnapshotPresentations(history),
|
|
3573
3594
|
summary: history?.latestSnapshot ? `v${history.latestSnapshot.version}` : "No saved suite snapshots yet."
|
|
3574
3595
|
});
|
|
3596
|
+
var isRuntimeGateReason = (reason) => /runtime|candidate-budget|underfilled/i.test(reason);
|
|
3597
|
+
var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
|
|
3598
|
+
const runtimeGateReasons = (run.decisionSummary?.gate?.reasons ?? run.releaseVerdict?.gate?.reasons ?? []).filter(isRuntimeGateReason);
|
|
3599
|
+
const rows = [
|
|
3600
|
+
{ label: "Finished", value: formatDateLabel(run.finishedAt) },
|
|
3601
|
+
{
|
|
3602
|
+
label: "Passing-rate winner",
|
|
3603
|
+
value: run.comparison.summary.bestByPassingRate ?? "n/a"
|
|
3604
|
+
},
|
|
3605
|
+
{
|
|
3606
|
+
label: "Average F1 winner",
|
|
3607
|
+
value: run.comparison.summary.bestByAverageF1 ?? "n/a"
|
|
3608
|
+
}
|
|
3609
|
+
];
|
|
3610
|
+
if (run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
3611
|
+
rows.push({
|
|
3612
|
+
label: "Lowest runtime budget exhaustion",
|
|
3613
|
+
value: run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases
|
|
3614
|
+
});
|
|
3615
|
+
}
|
|
3616
|
+
if (run.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases) {
|
|
3617
|
+
rows.push({
|
|
3618
|
+
label: "Lowest runtime underfilled TopK",
|
|
3619
|
+
value: run.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
3620
|
+
});
|
|
3621
|
+
}
|
|
3622
|
+
rows.push({
|
|
3623
|
+
label: "Gate status",
|
|
3624
|
+
value: run.decisionSummary?.gate?.status ?? run.releaseVerdict?.gate?.status ?? "n/a"
|
|
3625
|
+
}, {
|
|
3626
|
+
label: "Runtime gate failures",
|
|
3627
|
+
value: runtimeGateReasons.length > 0 ? runtimeGateReasons.join("; ") : "none"
|
|
3628
|
+
});
|
|
3629
|
+
return {
|
|
3630
|
+
label: run.label,
|
|
3631
|
+
rows,
|
|
3632
|
+
runId: run.id,
|
|
3633
|
+
summary: runtimeGateReasons.length > 0 ? `${run.label} \xB7 runtime gate blocked` : `${run.label} \xB7 ${run.comparison.summary.bestByPassingRate ?? "n/a"} leads passing rate`
|
|
3634
|
+
};
|
|
3635
|
+
};
|
|
3636
|
+
var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
|
|
3637
|
+
const recentRuns = (input.runs ?? []).map(buildRAGRetrievalReleaseHistoryRunPresentation);
|
|
3638
|
+
const runtimeBlockedRuns = recentRuns.filter((entry) => entry.rows.some((row) => row.label === "Runtime gate failures" && row.value !== "none")).length;
|
|
3639
|
+
const rows = [
|
|
3640
|
+
{
|
|
3641
|
+
label: "Latest decision",
|
|
3642
|
+
value: input.timeline?.latestDecisionKind ?? "none"
|
|
3643
|
+
},
|
|
3644
|
+
{
|
|
3645
|
+
label: "Latest decision at",
|
|
3646
|
+
value: formatDateLabel(input.timeline?.latestDecisionAt)
|
|
3647
|
+
},
|
|
3648
|
+
{
|
|
3649
|
+
label: "Last promoted",
|
|
3650
|
+
value: formatDateLabel(input.timeline?.lastPromotedAt)
|
|
3651
|
+
},
|
|
3652
|
+
{
|
|
3653
|
+
label: "Last reverted",
|
|
3654
|
+
value: formatDateLabel(input.timeline?.lastRevertedAt)
|
|
3655
|
+
},
|
|
3656
|
+
{
|
|
3657
|
+
label: "Recent runtime-blocked runs",
|
|
3658
|
+
value: String(runtimeBlockedRuns)
|
|
3659
|
+
}
|
|
3660
|
+
];
|
|
3661
|
+
return {
|
|
3662
|
+
recentRuns,
|
|
3663
|
+
rows,
|
|
3664
|
+
summary: input.timeline?.latestDecisionKind ? `${input.timeline.latestDecisionKind} \xB7 ${recentRuns.length} recent runs` : recentRuns.length > 0 ? `${recentRuns.length} recent runs` : "No release history yet."
|
|
3665
|
+
};
|
|
3666
|
+
};
|
|
3575
3667
|
var buildRAGAnswerGroundingCaseSnapshotPresentations = (history) => {
|
|
3576
3668
|
if (!history?.caseSnapshots.length) {
|
|
3577
3669
|
return [];
|
|
@@ -4082,6 +4174,12 @@ var evaluateRetrievalComparisonGate = ({
|
|
|
4082
4174
|
if (typeof policy.minMultiVectorVectorHitCasesDelta === "number" && (delta.multiVectorVectorHitCasesDelta ?? 0) < policy.minMultiVectorVectorHitCasesDelta) {
|
|
4083
4175
|
reasons.push(`multivector vector-hit delta ${delta.multiVectorVectorHitCasesDelta ?? 0} is below ${policy.minMultiVectorVectorHitCasesDelta}`);
|
|
4084
4176
|
}
|
|
4177
|
+
if (typeof policy.maxRuntimeCandidateBudgetExhaustedCasesDelta === "number" && (delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0) > policy.maxRuntimeCandidateBudgetExhaustedCasesDelta) {
|
|
4178
|
+
reasons.push(`runtime candidate-budget-exhausted delta ${delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0} exceeds ${policy.maxRuntimeCandidateBudgetExhaustedCasesDelta}`);
|
|
4179
|
+
}
|
|
4180
|
+
if (typeof policy.maxRuntimeUnderfilledTopKCasesDelta === "number" && (delta.runtimeUnderfilledTopKCasesDelta ?? 0) > policy.maxRuntimeUnderfilledTopKCasesDelta) {
|
|
4181
|
+
reasons.push(`runtime underfilled-topk delta ${delta.runtimeUnderfilledTopKCasesDelta ?? 0} exceeds ${policy.maxRuntimeUnderfilledTopKCasesDelta}`);
|
|
4182
|
+
}
|
|
4085
4183
|
if (reasons.length === 0) {
|
|
4086
4184
|
return {
|
|
4087
4185
|
policy,
|
|
@@ -4612,6 +4710,20 @@ var buildTraceSummaryAggregate = ({
|
|
|
4612
4710
|
direction: "flat",
|
|
4613
4711
|
metric: "multiVectorCollapsedCases",
|
|
4614
4712
|
previous: 0
|
|
4713
|
+
},
|
|
4714
|
+
{
|
|
4715
|
+
current: 0,
|
|
4716
|
+
delta: 0,
|
|
4717
|
+
direction: "flat",
|
|
4718
|
+
metric: "runtimeCandidateBudgetExhaustedCases",
|
|
4719
|
+
previous: 0
|
|
4720
|
+
},
|
|
4721
|
+
{
|
|
4722
|
+
current: 0,
|
|
4723
|
+
delta: 0,
|
|
4724
|
+
direction: "flat",
|
|
4725
|
+
metric: "runtimeUnderfilledTopKCases",
|
|
4726
|
+
previous: 0
|
|
4615
4727
|
}
|
|
4616
4728
|
];
|
|
4617
4729
|
return {
|
|
@@ -4734,6 +4846,20 @@ var buildTraceSummaryAggregate = ({
|
|
|
4734
4846
|
direction: buildTraceSummaryDirection(latest.multiVectorCollapsedCases - previous.multiVectorCollapsedCases),
|
|
4735
4847
|
metric: "multiVectorCollapsedCases",
|
|
4736
4848
|
previous: previous.multiVectorCollapsedCases
|
|
4849
|
+
},
|
|
4850
|
+
{
|
|
4851
|
+
current: latest.runtimeCandidateBudgetExhaustedCases,
|
|
4852
|
+
delta: latest.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
4853
|
+
direction: buildTraceSummaryDirection(latest.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases),
|
|
4854
|
+
metric: "runtimeCandidateBudgetExhaustedCases",
|
|
4855
|
+
previous: previous.runtimeCandidateBudgetExhaustedCases
|
|
4856
|
+
},
|
|
4857
|
+
{
|
|
4858
|
+
current: latest.runtimeUnderfilledTopKCases,
|
|
4859
|
+
delta: latest.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases,
|
|
4860
|
+
direction: buildTraceSummaryDirection(latest.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases),
|
|
4861
|
+
metric: "runtimeUnderfilledTopKCases",
|
|
4862
|
+
previous: previous.runtimeUnderfilledTopKCases
|
|
4737
4863
|
}
|
|
4738
4864
|
];
|
|
4739
4865
|
const absoluteSorted = [...aggregate].sort((left, right) => Math.abs(right.delta) - Math.abs(left.delta) || left.metric.localeCompare(right.metric));
|
|
@@ -4788,12 +4914,15 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4788
4914
|
let multiVectorVectorHitCases = 0;
|
|
4789
4915
|
let multiVectorLexicalHitCases = 0;
|
|
4790
4916
|
let multiVectorCollapsedCases = 0;
|
|
4917
|
+
let runtimeCandidateBudgetExhaustedCases = 0;
|
|
4918
|
+
let runtimeUnderfilledTopKCases = 0;
|
|
4791
4919
|
let finalCountSum = 0;
|
|
4792
4920
|
let vectorCountSum = 0;
|
|
4793
4921
|
let lexicalCountSum = 0;
|
|
4794
4922
|
let candidateTopKSum = 0;
|
|
4795
4923
|
let lexicalTopKSum = 0;
|
|
4796
4924
|
for (const trace of traces) {
|
|
4925
|
+
const vectorSearchMetadata = trace.steps.find((step) => step.stage === "vector_search")?.metadata;
|
|
4797
4926
|
modeSet.add(trace.mode);
|
|
4798
4927
|
sourceBalanceStrategySet.add(trace.sourceBalanceStrategy ?? "cap");
|
|
4799
4928
|
if (trace.runVector) {
|
|
@@ -4826,6 +4955,18 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4826
4955
|
if ((trace.multiVector?.collapsedParents ?? 0) > 0) {
|
|
4827
4956
|
multiVectorCollapsedCases += 1;
|
|
4828
4957
|
}
|
|
4958
|
+
if (vectorSearchMetadata?.sqliteQueryCandidateBudgetExhausted) {
|
|
4959
|
+
runtimeCandidateBudgetExhaustedCases += 1;
|
|
4960
|
+
}
|
|
4961
|
+
if (vectorSearchMetadata?.postgresQueryCandidateBudgetExhausted) {
|
|
4962
|
+
runtimeCandidateBudgetExhaustedCases += 1;
|
|
4963
|
+
}
|
|
4964
|
+
if (vectorSearchMetadata?.sqliteQueryUnderfilledTopK) {
|
|
4965
|
+
runtimeUnderfilledTopKCases += 1;
|
|
4966
|
+
}
|
|
4967
|
+
if (vectorSearchMetadata?.postgresQueryUnderfilledTopK) {
|
|
4968
|
+
runtimeUnderfilledTopKCases += 1;
|
|
4969
|
+
}
|
|
4829
4970
|
finalCountSum += trace.resultCounts.final;
|
|
4830
4971
|
vectorCountSum += trace.resultCounts.vector;
|
|
4831
4972
|
lexicalCountSum += trace.resultCounts.lexical;
|
|
@@ -4851,6 +4992,8 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4851
4992
|
multiVectorVectorHitCases,
|
|
4852
4993
|
multiVectorLexicalHitCases,
|
|
4853
4994
|
multiVectorCollapsedCases,
|
|
4995
|
+
runtimeCandidateBudgetExhaustedCases,
|
|
4996
|
+
runtimeUnderfilledTopKCases,
|
|
4854
4997
|
vectorCases
|
|
4855
4998
|
};
|
|
4856
4999
|
};
|
|
@@ -8436,7 +8579,9 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8436
8579
|
passingRateDelta: candidateEntry.response.passingRate - baselineEntry.response.passingRate,
|
|
8437
8580
|
multiVectorCollapsedCasesDelta: (candidateEntry.traceSummary?.multiVectorCollapsedCases ?? 0) - (baselineEntry.traceSummary?.multiVectorCollapsedCases ?? 0),
|
|
8438
8581
|
multiVectorLexicalHitCasesDelta: (candidateEntry.traceSummary?.multiVectorLexicalHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorLexicalHitCases ?? 0),
|
|
8439
|
-
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0)
|
|
8582
|
+
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0),
|
|
8583
|
+
runtimeCandidateBudgetExhaustedCasesDelta: (candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0) - (baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0),
|
|
8584
|
+
runtimeUnderfilledTopKCasesDelta: (candidateEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0) - (baselineEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0)
|
|
8440
8585
|
} : undefined;
|
|
8441
8586
|
return {
|
|
8442
8587
|
baseline: baselineEntry ? {
|
|
@@ -8446,6 +8591,8 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8446
8591
|
multiVectorCollapsedCases: baselineEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8447
8592
|
multiVectorLexicalHitCases: baselineEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8448
8593
|
multiVectorVectorHitCases: baselineEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8594
|
+
runtimeCandidateBudgetExhaustedCases: baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8595
|
+
runtimeUnderfilledTopKCases: baselineEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8449
8596
|
passingRate: baselineEntry.response.passingRate,
|
|
8450
8597
|
retrievalId: baselineEntry.retrievalId
|
|
8451
8598
|
} : undefined,
|
|
@@ -8457,6 +8604,8 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8457
8604
|
multiVectorCollapsedCases: candidateEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8458
8605
|
multiVectorLexicalHitCases: candidateEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8459
8606
|
multiVectorVectorHitCases: candidateEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8607
|
+
runtimeCandidateBudgetExhaustedCases: candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8608
|
+
runtimeUnderfilledTopKCases: candidateEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8460
8609
|
passingRate: candidateEntry.response.passingRate,
|
|
8461
8610
|
retrievalId: candidateEntry.retrievalId
|
|
8462
8611
|
} : undefined,
|
|
@@ -8468,7 +8617,9 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8468
8617
|
winnerByPassingRate: comparison.summary.bestByPassingRate,
|
|
8469
8618
|
winnerByMultivectorCollapsedCases: comparison.summary.bestByMultivectorCollapsedCases,
|
|
8470
8619
|
winnerByMultivectorLexicalHitCases: comparison.summary.bestByMultivectorLexicalHitCases,
|
|
8471
|
-
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases
|
|
8620
|
+
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases,
|
|
8621
|
+
winnerByLowestRuntimeCandidateBudgetExhaustedCases: comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
|
|
8622
|
+
winnerByLowestRuntimeUnderfilledTopKCases: comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
8472
8623
|
};
|
|
8473
8624
|
};
|
|
8474
8625
|
var loadRAGSearchTracePruneHistory = async ({
|
|
@@ -9156,6 +9307,24 @@ var selectComparisonEntryByTraceMetric = (entries, idKey, metric) => {
|
|
|
9156
9307
|
}
|
|
9157
9308
|
return typeof winner[idKey] === "string" ? winner[idKey] : undefined;
|
|
9158
9309
|
};
|
|
9310
|
+
var selectComparisonEntryByLowestTraceMetric = (entries, idKey, metric) => {
|
|
9311
|
+
const ranked = [...entries].sort((left, right) => {
|
|
9312
|
+
const leftMetric = left.traceSummary?.[metric] ?? 0;
|
|
9313
|
+
const rightMetric = right.traceSummary?.[metric] ?? 0;
|
|
9314
|
+
if (leftMetric !== rightMetric) {
|
|
9315
|
+
return leftMetric - rightMetric;
|
|
9316
|
+
}
|
|
9317
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
9318
|
+
return right.response.passingRate - left.response.passingRate;
|
|
9319
|
+
}
|
|
9320
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
9321
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
9322
|
+
}
|
|
9323
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
9324
|
+
});
|
|
9325
|
+
const winner = ranked[0];
|
|
9326
|
+
return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
|
|
9327
|
+
};
|
|
9159
9328
|
var resolveRetrievalMode = (candidate) => {
|
|
9160
9329
|
if (!candidate.retrieval) {
|
|
9161
9330
|
return "vector";
|
|
@@ -9249,7 +9418,9 @@ var compareRAGRetrievalTraceSummaries = (current, previous) => ({
|
|
|
9249
9418
|
multiVectorCasesDelta: current.multiVectorCases - previous.multiVectorCases,
|
|
9250
9419
|
multiVectorVectorHitCasesDelta: current.multiVectorVectorHitCases - previous.multiVectorVectorHitCases,
|
|
9251
9420
|
multiVectorLexicalHitCasesDelta: current.multiVectorLexicalHitCases - previous.multiVectorLexicalHitCases,
|
|
9252
|
-
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases
|
|
9421
|
+
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases,
|
|
9422
|
+
runtimeCandidateBudgetExhaustedCasesDelta: current.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
9423
|
+
runtimeUnderfilledTopKCasesDelta: current.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases
|
|
9253
9424
|
});
|
|
9254
9425
|
var buildSearchTraceResultSnapshots = (results) => results.map((result) => ({
|
|
9255
9426
|
chunkId: result.chunkId,
|
|
@@ -9611,6 +9782,63 @@ var generateRAGEvaluationSuiteFromDocuments = ({
|
|
|
9611
9782
|
metadata
|
|
9612
9783
|
});
|
|
9613
9784
|
};
|
|
9785
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID = "rag-native-planner-larger-corpus";
|
|
9786
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL = "Adaptive Native Planner Benchmark";
|
|
9787
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY = "Which launch checklist phrase is exact wording?";
|
|
9788
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER = {
|
|
9789
|
+
lane: "focus"
|
|
9790
|
+
};
|
|
9791
|
+
var createRAGAdaptiveNativePlannerBenchmarkSuite = (input) => createRAGEvaluationSuite({
|
|
9792
|
+
description: input?.description ?? "Stress-tests larger-corpus native planner selection, candidate-budget pressure, and transformed-query recovery on filtered retrieval.",
|
|
9793
|
+
id: input?.id ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID,
|
|
9794
|
+
input: {
|
|
9795
|
+
cases: [
|
|
9796
|
+
{
|
|
9797
|
+
expectedDocumentIds: ["focus-target"],
|
|
9798
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
9799
|
+
hardNegativeDocumentIds: [
|
|
9800
|
+
"focus-distractor-0",
|
|
9801
|
+
"focus-distractor-1",
|
|
9802
|
+
"focus-distractor-2"
|
|
9803
|
+
],
|
|
9804
|
+
id: "planner-pressure-exact-phrase",
|
|
9805
|
+
label: "Exact phrase survives larger-corpus native pressure",
|
|
9806
|
+
query: DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY,
|
|
9807
|
+
topK: input?.topK ?? 1
|
|
9808
|
+
}
|
|
9809
|
+
],
|
|
9810
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
9811
|
+
retrieval: "vector",
|
|
9812
|
+
topK: input?.topK ?? 1
|
|
9813
|
+
},
|
|
9814
|
+
label: input?.label ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL,
|
|
9815
|
+
metadata: {
|
|
9816
|
+
benchmarkKind: "adaptive_native_planner",
|
|
9817
|
+
benchmarkScope: "larger_corpus",
|
|
9818
|
+
expectedSignals: [
|
|
9819
|
+
"selected native planner profile",
|
|
9820
|
+
"candidate-budget exhaustion",
|
|
9821
|
+
"underfilled topk"
|
|
9822
|
+
],
|
|
9823
|
+
recommendedGroupKey: "runtime-native-planner",
|
|
9824
|
+
recommendedTags: ["runtime", "native", "planner"],
|
|
9825
|
+
...input?.metadata
|
|
9826
|
+
}
|
|
9827
|
+
});
|
|
9828
|
+
var createRAGAdaptiveNativePlannerBenchmarkSnapshot = (input) => {
|
|
9829
|
+
const suite = input?.suite ?? createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
9830
|
+
return createRAGEvaluationSuiteSnapshot({
|
|
9831
|
+
createdAt: input?.createdAt,
|
|
9832
|
+
id: input?.id,
|
|
9833
|
+
metadata: {
|
|
9834
|
+
artifactKind: "adaptive_native_planner_benchmark",
|
|
9835
|
+
persistForReleaseHistory: true,
|
|
9836
|
+
...input?.metadata
|
|
9837
|
+
},
|
|
9838
|
+
suite,
|
|
9839
|
+
version: input?.version
|
|
9840
|
+
});
|
|
9841
|
+
};
|
|
9614
9842
|
var createRAGEvaluationSuiteSnapshot = ({
|
|
9615
9843
|
suite,
|
|
9616
9844
|
id,
|
|
@@ -9818,7 +10046,9 @@ var summarizeRAGRetrievalComparison = (entries) => ({
|
|
|
9818
10046
|
...summarizeEvaluationResponseComparison(entries, "retrievalId"),
|
|
9819
10047
|
bestByMultivectorCollapsedCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorCollapsedCases"),
|
|
9820
10048
|
bestByMultivectorLexicalHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorLexicalHitCases"),
|
|
9821
|
-
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases")
|
|
10049
|
+
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases"),
|
|
10050
|
+
bestByLowestRuntimeCandidateBudgetExhaustedCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeCandidateBudgetExhaustedCases"),
|
|
10051
|
+
bestByLowestRuntimeUnderfilledTopKCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeUnderfilledTopKCases")
|
|
9822
10052
|
});
|
|
9823
10053
|
|
|
9824
10054
|
// src/ai/client/actions.ts
|
|
@@ -10843,6 +11073,9 @@ var createRAGClient = (options) => {
|
|
|
10843
11073
|
if (typeof input.runLimit === "number") {
|
|
10844
11074
|
searchParams.set("runLimit", String(input.runLimit));
|
|
10845
11075
|
}
|
|
11076
|
+
if (typeof input.benchmarkLimit === "number") {
|
|
11077
|
+
searchParams.set("benchmarkLimit", String(input.benchmarkLimit));
|
|
11078
|
+
}
|
|
10846
11079
|
if (input.targetRolloutLabel) {
|
|
10847
11080
|
searchParams.set("targetRolloutLabel", input.targetRolloutLabel);
|
|
10848
11081
|
}
|
|
@@ -10856,6 +11089,51 @@ var createRAGClient = (options) => {
|
|
|
10856
11089
|
}
|
|
10857
11090
|
return payload;
|
|
10858
11091
|
},
|
|
11092
|
+
async adaptiveNativePlannerBenchmark(input) {
|
|
11093
|
+
const searchParams = new URLSearchParams;
|
|
11094
|
+
if (typeof input?.limit === "number") {
|
|
11095
|
+
searchParams.set("limit", String(input.limit));
|
|
11096
|
+
}
|
|
11097
|
+
if (input?.label) {
|
|
11098
|
+
searchParams.set("label", input.label);
|
|
11099
|
+
}
|
|
11100
|
+
if (input?.description) {
|
|
11101
|
+
searchParams.set("description", input.description);
|
|
11102
|
+
}
|
|
11103
|
+
const suffix = searchParams.size ? `?${searchParams}` : "";
|
|
11104
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner${suffix}`);
|
|
11105
|
+
if (!response.ok) {
|
|
11106
|
+
throw new Error(await toErrorMessage(response));
|
|
11107
|
+
}
|
|
11108
|
+
const payload = await parseJson(response);
|
|
11109
|
+
if (!payload.ok) {
|
|
11110
|
+
throw new Error(payload.error ?? "Adaptive native planner benchmark history failed");
|
|
11111
|
+
}
|
|
11112
|
+
return payload;
|
|
11113
|
+
},
|
|
11114
|
+
async saveAdaptiveNativePlannerBenchmarkSnapshot(input) {
|
|
11115
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, {
|
|
11116
|
+
body: JSON.stringify({
|
|
11117
|
+
createdAt: input?.createdAt,
|
|
11118
|
+
description: input?.description,
|
|
11119
|
+
label: input?.label,
|
|
11120
|
+
limit: input?.limit,
|
|
11121
|
+
metadata: input?.metadata,
|
|
11122
|
+
snapshotMetadata: input?.snapshotMetadata,
|
|
11123
|
+
version: input?.version
|
|
11124
|
+
}),
|
|
11125
|
+
headers: jsonHeaders,
|
|
11126
|
+
method: "POST"
|
|
11127
|
+
});
|
|
11128
|
+
if (!response.ok) {
|
|
11129
|
+
throw new Error(await toErrorMessage(response));
|
|
11130
|
+
}
|
|
11131
|
+
const payload = await parseJson(response);
|
|
11132
|
+
if (!payload.ok) {
|
|
11133
|
+
throw new Error(payload.error ?? "Adaptive native planner benchmark snapshot failed");
|
|
11134
|
+
}
|
|
11135
|
+
return payload;
|
|
11136
|
+
},
|
|
10859
11137
|
async retrievalLaneHandoffs(input) {
|
|
10860
11138
|
const searchParams = new URLSearchParams;
|
|
10861
11139
|
if (input?.groupKey) {
|
|
@@ -11650,5 +11928,5 @@ export {
|
|
|
11650
11928
|
buildRAGEvaluationLeaderboard
|
|
11651
11929
|
};
|
|
11652
11930
|
|
|
11653
|
-
//# debugId=
|
|
11931
|
+
//# debugId=8A7101CB12F8D56B64756E2164756E21
|
|
11654
11932
|
//# sourceMappingURL=index.js.map
|