@absolutejs/absolute 0.19.0-beta.644 → 0.19.0-beta.645
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +694 -11
- package/dist/ai/client/index.js.map +6 -6
- package/dist/ai/client/ui.js +573 -11
- package/dist/ai/client/ui.js.map +5 -5
- package/dist/ai/index.js +2150 -136
- package/dist/ai/index.js.map +10 -10
- package/dist/ai/rag/quality.js +577 -11
- package/dist/ai/rag/quality.js.map +5 -5
- package/dist/ai/rag/ui.js +573 -11
- package/dist/ai/rag/ui.js.map +5 -5
- package/dist/ai-client/angular/ai/index.js +388 -8
- package/dist/ai-client/react/ai/index.js +388 -8
- package/dist/ai-client/vue/ai/index.js +388 -8
- package/dist/angular/ai/index.js +694 -11
- package/dist/angular/ai/index.js.map +6 -6
- package/dist/index.js +6 -6
- package/dist/index.js.map +2 -2
- package/dist/react/ai/index.js +694 -11
- package/dist/react/ai/index.js.map +6 -6
- package/dist/src/ai/client/ragClient.d.ts +58 -0
- package/dist/src/ai/index.d.ts +2 -1
- package/dist/src/ai/rag/chat.d.ts +90 -4
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/quality.d.ts +20 -1
- package/dist/src/vue/ai/useRAG.d.ts +80 -0
- package/dist/src/vue/ai/useRAGEvaluate.d.ts +70 -0
- package/dist/src/vue/ai/useRAGSearch.d.ts +10 -0
- package/dist/svelte/ai/index.js +694 -11
- package/dist/svelte/ai/index.js.map +6 -6
- package/dist/types/ai.d.ts +56 -13
- package/dist/types/index.d.ts +1 -0
- package/dist/types/session.d.ts +16 -0
- package/dist/vue/ai/index.js +694 -11
- package/dist/vue/ai/index.js.map +6 -6
- package/package.json +2 -1
package/dist/ai/index.js
CHANGED
|
@@ -208,6 +208,7 @@ var buildContextLabel = (metadata) => {
|
|
|
208
208
|
return;
|
|
209
209
|
}
|
|
210
210
|
const emailKind = getContextString(metadata.emailKind);
|
|
211
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
211
212
|
if (emailKind === "attachment") {
|
|
212
213
|
return "Attachment evidence";
|
|
213
214
|
}
|
|
@@ -245,6 +246,16 @@ var buildContextLabel = (metadata) => {
|
|
|
245
246
|
}
|
|
246
247
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
247
248
|
const sectionTitle = getContextString(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
249
|
+
const officeSectionLabel = sectionPath.length > 0 ? sectionPath.join(" > ") : sectionTitle;
|
|
250
|
+
if (officeBlockKind === "table" && officeSectionLabel) {
|
|
251
|
+
return `Office table block ${officeSectionLabel}`;
|
|
252
|
+
}
|
|
253
|
+
if (officeBlockKind === "list" && officeSectionLabel) {
|
|
254
|
+
return `Office list block ${officeSectionLabel}`;
|
|
255
|
+
}
|
|
256
|
+
if (officeBlockKind === "paragraph" && officeSectionLabel) {
|
|
257
|
+
return `Office paragraph block ${officeSectionLabel}`;
|
|
258
|
+
}
|
|
248
259
|
if (sectionTitle) {
|
|
249
260
|
return `Section ${sectionTitle}`;
|
|
250
261
|
}
|
|
@@ -266,6 +277,46 @@ var formatMediaDurationLabel = (value) => {
|
|
|
266
277
|
}
|
|
267
278
|
return formatMediaTimestamp(value);
|
|
268
279
|
};
|
|
280
|
+
var formatOfficeListLevelsLabel = (value) => {
|
|
281
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
const levels = value.map((entry) => getContextNumber(entry)).filter((entry) => typeof entry === "number").sort((left, right) => left - right);
|
|
285
|
+
if (levels.length === 0) {
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
const minLevel = levels[0];
|
|
289
|
+
const maxLevel = levels[levels.length - 1];
|
|
290
|
+
return minLevel === maxLevel ? `Office list level ${minLevel}` : `Office list levels ${minLevel}-${maxLevel}`;
|
|
291
|
+
};
|
|
292
|
+
var getOfficeTableCitationScope = (metadata) => {
|
|
293
|
+
if (!metadata) {
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
297
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
301
|
+
const sectionTitle = getContextString(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
302
|
+
const officeContextText = officeBlockKind === "table" ? getContextString(metadata.officeTableContextText) : getContextString(metadata.officeListContextText);
|
|
303
|
+
if (!sectionTitle) {
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
return {
|
|
307
|
+
blockKind: officeBlockKind,
|
|
308
|
+
pathDepth: sectionPath.length,
|
|
309
|
+
sectionTitle,
|
|
310
|
+
hasContext: typeof officeContextText === "string"
|
|
311
|
+
};
|
|
312
|
+
};
|
|
313
|
+
var getOfficeTableCitationPreference = (metadata) => {
|
|
314
|
+
const scope = getOfficeTableCitationScope(metadata);
|
|
315
|
+
if (!scope) {
|
|
316
|
+
return 0;
|
|
317
|
+
}
|
|
318
|
+
return scope.pathDepth * 10 + (scope.hasContext ? 1 : 0) + (scope.blockKind === "list" && typeof metadata?.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
|
|
319
|
+
};
|
|
269
320
|
var buildLocatorLabel = (metadata, source, title) => {
|
|
270
321
|
if (!metadata) {
|
|
271
322
|
return;
|
|
@@ -291,6 +342,10 @@ var buildLocatorLabel = (metadata, source, title) => {
|
|
|
291
342
|
return `Archive entry ${archiveEntry}`;
|
|
292
343
|
}
|
|
293
344
|
const emailKind = getContextString(metadata.emailKind);
|
|
345
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
346
|
+
const officeBlockNumber = getContextNumber(metadata.officeBlockNumber);
|
|
347
|
+
const officeTableBodyRowStart = getContextNumber(metadata.officeTableBodyRowStart);
|
|
348
|
+
const officeTableBodyRowEnd = getContextNumber(metadata.officeTableBodyRowEnd);
|
|
294
349
|
if (emailKind === "attachment") {
|
|
295
350
|
const attachmentName = getContextString(metadata.attachmentName) ?? getAttachmentName(source, title);
|
|
296
351
|
return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
|
|
@@ -303,6 +358,18 @@ var buildLocatorLabel = (metadata, source, title) => {
|
|
|
303
358
|
if (mediaStart) {
|
|
304
359
|
return `Timestamp ${mediaStart}`;
|
|
305
360
|
}
|
|
361
|
+
if (officeBlockNumber && officeBlockKind === "table") {
|
|
362
|
+
if (typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number") {
|
|
363
|
+
return officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table block ${officeBlockNumber} \xB7 Row ${officeTableBodyRowStart}` : `Office table block ${officeBlockNumber} \xB7 Rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}`;
|
|
364
|
+
}
|
|
365
|
+
return `Office table block ${officeBlockNumber}`;
|
|
366
|
+
}
|
|
367
|
+
if (officeBlockNumber && officeBlockKind === "list") {
|
|
368
|
+
return `Office list block ${officeBlockNumber}`;
|
|
369
|
+
}
|
|
370
|
+
if (officeBlockNumber && officeBlockKind === "paragraph") {
|
|
371
|
+
return `Office paragraph block ${officeBlockNumber}`;
|
|
372
|
+
}
|
|
306
373
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
307
374
|
if (sectionPath.length > 0) {
|
|
308
375
|
return `Section ${sectionPath.join(" > ")}`;
|
|
@@ -336,10 +403,31 @@ var buildProvenanceLabel = (metadata) => {
|
|
|
336
403
|
const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
|
|
337
404
|
const transcriptSource = getContextString(metadata.transcriptSource);
|
|
338
405
|
const pdfTextMode = getContextString(metadata.pdfTextMode);
|
|
406
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
407
|
+
const officeListContextText = getContextString(metadata.officeListContextText);
|
|
408
|
+
const officeListGroupItemCount = getContextNumber(metadata.officeListGroupItemCount);
|
|
409
|
+
const officeListLevelsLabel = formatOfficeListLevelsLabel(metadata.officeListLevels);
|
|
410
|
+
const officeTableHeaders = Array.isArray(metadata.officeTableHeaders) ? metadata.officeTableHeaders.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
411
|
+
const officeTableColumnCount = getContextNumber(metadata.officeTableColumnCount);
|
|
412
|
+
const officeTableBodyRowCount = getContextNumber(metadata.officeTableBodyRowCount);
|
|
413
|
+
const officeTableBodyRowStart = getContextNumber(metadata.officeTableBodyRowStart);
|
|
414
|
+
const officeTableBodyRowEnd = getContextNumber(metadata.officeTableBodyRowEnd);
|
|
415
|
+
const officeTableContextText = getContextString(metadata.officeTableContextText);
|
|
416
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
339
417
|
const ocrEngine = getContextString(metadata.ocrEngine);
|
|
340
418
|
const ocrConfidence = getContextNumber(metadata.ocrRegionConfidence) ?? getContextNumber(metadata.ocrConfidence);
|
|
341
419
|
const labels = [
|
|
342
420
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
421
|
+
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
422
|
+
typeof officeListGroupItemCount === "number" ? `Office list ${officeListGroupItemCount} items` : "",
|
|
423
|
+
officeListLevelsLabel ?? "",
|
|
424
|
+
sectionPath.length > 0 && officeBlockKind ? `Source-aware office ${officeBlockKind} block ${sectionPath.join(" > ")}` : "",
|
|
425
|
+
officeListContextText ? `Office list context ${officeListContextText}` : "",
|
|
426
|
+
officeTableHeaders.length > 0 ? `Office table ${officeTableHeaders.join(", ")}` : "",
|
|
427
|
+
typeof officeTableColumnCount === "number" ? `Office table ${officeTableColumnCount} cols` : "",
|
|
428
|
+
typeof officeTableBodyRowCount === "number" ? `Office table ${officeTableBodyRowCount} body rows` : "",
|
|
429
|
+
typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number" ? officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table row ${officeTableBodyRowStart}` : `Office table rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}` : "",
|
|
430
|
+
officeTableContextText ? `Office table context ${officeTableContextText}` : "",
|
|
343
431
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
344
432
|
typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
|
|
345
433
|
mediaKind ? `Media ${mediaKind}` : "",
|
|
@@ -503,6 +591,15 @@ var buildRAGCitations = (sources) => {
|
|
|
503
591
|
});
|
|
504
592
|
}
|
|
505
593
|
return [...unique.values()].sort((left, right) => {
|
|
594
|
+
const leftOfficeScope = getOfficeTableCitationScope(left.metadata);
|
|
595
|
+
const rightOfficeScope = getOfficeTableCitationScope(right.metadata);
|
|
596
|
+
if (left.source === right.source && leftOfficeScope && rightOfficeScope && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
|
|
597
|
+
const leftOfficePreference = getOfficeTableCitationPreference(left.metadata);
|
|
598
|
+
const rightOfficePreference = getOfficeTableCitationPreference(right.metadata);
|
|
599
|
+
if (rightOfficePreference !== leftOfficePreference) {
|
|
600
|
+
return rightOfficePreference - leftOfficePreference;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
506
603
|
if (right.score !== left.score) {
|
|
507
604
|
return right.score - left.score;
|
|
508
605
|
}
|
|
@@ -916,6 +1013,7 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
|
|
|
916
1013
|
const sectionKind = getContextString2(metadata.sectionKind);
|
|
917
1014
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
918
1015
|
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1016
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
919
1017
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
920
1018
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
921
1019
|
const sheetName = getContextString2(metadata.sheetName);
|
|
@@ -926,6 +1024,12 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
|
|
|
926
1024
|
return `Source-aware section ${sectionPath.join(" > ")}`;
|
|
927
1025
|
}
|
|
928
1026
|
if (sectionKind === "pdf_block") {
|
|
1027
|
+
if (pdfSemanticRole === "figure_caption" && sectionTitle) {
|
|
1028
|
+
return `Source-aware PDF figure caption ${sectionTitle}`;
|
|
1029
|
+
}
|
|
1030
|
+
if (pdfSemanticRole === "figure_body" && sectionTitle) {
|
|
1031
|
+
return `Source-aware PDF figure body ${sectionTitle}`;
|
|
1032
|
+
}
|
|
929
1033
|
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
930
1034
|
return `Source-aware PDF table block ${sectionTitle}`;
|
|
931
1035
|
}
|
|
@@ -935,11 +1039,12 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
|
|
|
935
1039
|
return "Source-aware PDF block";
|
|
936
1040
|
}
|
|
937
1041
|
if (sectionKind === "office_block") {
|
|
938
|
-
|
|
939
|
-
|
|
1042
|
+
const officeSectionLabel = sectionPath.length > 0 ? sectionPath.join(" > ") : sectionTitle;
|
|
1043
|
+
if (officeBlockKind && officeSectionLabel) {
|
|
1044
|
+
return `Source-aware office ${officeBlockKind} block ${officeSectionLabel}`;
|
|
940
1045
|
}
|
|
941
|
-
if (
|
|
942
|
-
return `Source-aware office block ${
|
|
1046
|
+
if (officeSectionLabel) {
|
|
1047
|
+
return `Source-aware office block ${officeSectionLabel}`;
|
|
943
1048
|
}
|
|
944
1049
|
return "Source-aware office block";
|
|
945
1050
|
}
|
|
@@ -1327,6 +1432,18 @@ var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
|
|
|
1327
1432
|
}
|
|
1328
1433
|
return `Table ${tableIndex}`;
|
|
1329
1434
|
};
|
|
1435
|
+
var formatOfficeListLevelsLabel2 = (value) => {
|
|
1436
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
1437
|
+
return;
|
|
1438
|
+
}
|
|
1439
|
+
const levels = value.map((entry) => getContextNumber2(entry)).filter((entry) => typeof entry === "number").sort((left, right) => left - right);
|
|
1440
|
+
if (levels.length === 0) {
|
|
1441
|
+
return;
|
|
1442
|
+
}
|
|
1443
|
+
const minLevel = levels[0];
|
|
1444
|
+
const maxLevel = levels[levels.length - 1];
|
|
1445
|
+
return minLevel === maxLevel ? `Office list level ${minLevel}` : `Office list levels ${minLevel}-${maxLevel}`;
|
|
1446
|
+
};
|
|
1330
1447
|
var formatMediaDurationLabel2 = (value) => {
|
|
1331
1448
|
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
1332
1449
|
return;
|
|
@@ -1338,9 +1455,18 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1338
1455
|
return;
|
|
1339
1456
|
}
|
|
1340
1457
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1458
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
1459
|
+
const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
|
|
1460
|
+
const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
|
|
1341
1461
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1342
1462
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1343
1463
|
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1464
|
+
if (pdfSemanticRole === "figure_caption" && sectionTitle) {
|
|
1465
|
+
return `PDF figure caption ${sectionTitle}`;
|
|
1466
|
+
}
|
|
1467
|
+
if (pdfSemanticRole === "figure_body" && sectionTitle) {
|
|
1468
|
+
return `PDF figure body ${sectionTitle}`;
|
|
1469
|
+
}
|
|
1344
1470
|
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
1345
1471
|
return `PDF table block ${sectionTitle}`;
|
|
1346
1472
|
}
|
|
@@ -1348,13 +1474,13 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1348
1474
|
return `PDF text block ${sectionTitle}`;
|
|
1349
1475
|
}
|
|
1350
1476
|
if (officeBlockKind === "table" && sectionTitle) {
|
|
1351
|
-
return `Office table block ${sectionTitle}`;
|
|
1477
|
+
return `Office table block ${sectionPath.join(" > ") || sectionTitle}`;
|
|
1352
1478
|
}
|
|
1353
1479
|
if (officeBlockKind === "list" && sectionTitle) {
|
|
1354
|
-
return `Office list block ${sectionTitle}`;
|
|
1480
|
+
return `Office list block ${sectionPath.join(" > ") || sectionTitle}`;
|
|
1355
1481
|
}
|
|
1356
1482
|
if (officeBlockKind === "paragraph" && sectionTitle) {
|
|
1357
|
-
return `Office paragraph block ${sectionTitle}`;
|
|
1483
|
+
return `Office paragraph block ${sectionPath.join(" > ") || sectionTitle}`;
|
|
1358
1484
|
}
|
|
1359
1485
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1360
1486
|
if (emailKind === "attachment") {
|
|
@@ -1452,9 +1578,14 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1452
1578
|
return;
|
|
1453
1579
|
}
|
|
1454
1580
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1581
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
1455
1582
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1456
1583
|
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
1584
|
+
const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
|
|
1585
|
+
const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
|
|
1457
1586
|
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
1587
|
+
const officeTableBodyRowStart = getContextNumber2(metadata.officeTableBodyRowStart);
|
|
1588
|
+
const officeTableBodyRowEnd = getContextNumber2(metadata.officeTableBodyRowEnd);
|
|
1458
1589
|
const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
|
|
1459
1590
|
const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
|
|
1460
1591
|
const slideTitle = getContextString2(metadata.slideTitle);
|
|
@@ -1465,7 +1596,16 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1465
1596
|
if (page && region) {
|
|
1466
1597
|
return `Page ${page} \xB7 Region ${region}`;
|
|
1467
1598
|
}
|
|
1599
|
+
if (page && pdfBlockNumber && pdfSemanticRole === "figure_caption") {
|
|
1600
|
+
return `Page ${page} \xB7 Figure Caption ${pdfBlockNumber}`;
|
|
1601
|
+
}
|
|
1602
|
+
if (page && pdfBlockNumber && pdfSemanticRole === "figure_body") {
|
|
1603
|
+
return `Page ${page} \xB7 Figure Body ${pdfBlockNumber}`;
|
|
1604
|
+
}
|
|
1468
1605
|
if (page && pdfBlockNumber && pdfTextKind === "table_like") {
|
|
1606
|
+
if (typeof pdfTableBodyRowStart === "number" && typeof pdfTableBodyRowEnd === "number") {
|
|
1607
|
+
return pdfTableBodyRowStart === pdfTableBodyRowEnd ? `Page ${page} \xB7 Table Block ${pdfBlockNumber} \xB7 Row ${pdfTableBodyRowStart}` : `Page ${page} \xB7 Table Block ${pdfBlockNumber} \xB7 Rows ${pdfTableBodyRowStart}-${pdfTableBodyRowEnd}`;
|
|
1608
|
+
}
|
|
1469
1609
|
return `Page ${page} \xB7 Table Block ${pdfBlockNumber}`;
|
|
1470
1610
|
}
|
|
1471
1611
|
if (page && pdfBlockNumber) {
|
|
@@ -1528,6 +1668,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1528
1668
|
return `Timestamp ${mediaStart}`;
|
|
1529
1669
|
}
|
|
1530
1670
|
if (officeBlockNumber && officeBlockKind === "table") {
|
|
1671
|
+
if (typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number") {
|
|
1672
|
+
return officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table block ${officeBlockNumber} \xB7 Row ${officeTableBodyRowStart}` : `Office table block ${officeBlockNumber} \xB7 Rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}`;
|
|
1673
|
+
}
|
|
1531
1674
|
return `Office table block ${officeBlockNumber}`;
|
|
1532
1675
|
}
|
|
1533
1676
|
if (officeBlockNumber && officeBlockKind === "list") {
|
|
@@ -1564,11 +1707,27 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1564
1707
|
const mediaSegmentWindowDurationLabel = formatMediaDurationLabel2(metadata.mediaSegmentGroupDurationMs);
|
|
1565
1708
|
const mediaSegmentGapLabel = formatMediaDurationLabel2(metadata.mediaSegmentGapFromPreviousMs);
|
|
1566
1709
|
const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
|
|
1710
|
+
const pdfTableHeaders = Array.isArray(metadata.pdfTableHeaders) ? metadata.pdfTableHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1711
|
+
const pdfTableColumnCount = getContextNumber2(metadata.pdfTableColumnCount);
|
|
1712
|
+
const pdfTableBodyRowCount = getContextNumber2(metadata.pdfTableBodyRowCount);
|
|
1567
1713
|
const spreadsheetColumnRange = formatSpreadsheetColumnRange(getContextString2(metadata.spreadsheetColumnStart), getContextString2(metadata.spreadsheetColumnEnd));
|
|
1568
1714
|
const slideNotesText = getContextString2(metadata.slideNotesText);
|
|
1569
1715
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
1716
|
+
const pdfEvidenceMode = getContextString2(metadata.pdfEvidenceMode);
|
|
1717
|
+
const pdfEvidenceOrigin = getContextString2(metadata.pdfEvidenceOrigin);
|
|
1718
|
+
const pdfEvidenceSupplement = getContextString2(metadata.pdfEvidenceSupplement);
|
|
1570
1719
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1720
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
1571
1721
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1722
|
+
const officeListContextText = getContextString2(metadata.officeListContextText);
|
|
1723
|
+
const officeListGroupItemCount = getContextNumber2(metadata.officeListGroupItemCount);
|
|
1724
|
+
const officeListLevelsLabel = formatOfficeListLevelsLabel2(metadata.officeListLevels);
|
|
1725
|
+
const officeTableHeaders = Array.isArray(metadata.officeTableHeaders) ? metadata.officeTableHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1726
|
+
const officeTableColumnCount = getContextNumber2(metadata.officeTableColumnCount);
|
|
1727
|
+
const officeTableBodyRowCount = getContextNumber2(metadata.officeTableBodyRowCount);
|
|
1728
|
+
const officeTableBodyRowStart = getContextNumber2(metadata.officeTableBodyRowStart);
|
|
1729
|
+
const officeTableBodyRowEnd = getContextNumber2(metadata.officeTableBodyRowEnd);
|
|
1730
|
+
const officeTableContextText = getContextString2(metadata.officeTableContextText);
|
|
1572
1731
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
1573
1732
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
1574
1733
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
@@ -1584,10 +1743,19 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1584
1743
|
const ocrMinConfidence = getContextNumber2(metadata.ocrPageMinConfidence) ?? getContextNumber2(metadata.ocrMinConfidence);
|
|
1585
1744
|
const ocrMaxConfidence = getContextNumber2(metadata.ocrPageMaxConfidence) ?? getContextNumber2(metadata.ocrMaxConfidence);
|
|
1586
1745
|
const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
|
|
1746
|
+
const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
|
|
1747
|
+
const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
|
|
1587
1748
|
const labels = [
|
|
1588
1749
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
1589
|
-
|
|
1750
|
+
pdfEvidenceMode ? `PDF evidence ${pdfEvidenceMode}` : "",
|
|
1751
|
+
pdfEvidenceOrigin ? `PDF origin ${pdfEvidenceOrigin}` : "",
|
|
1752
|
+
pdfEvidenceSupplement ? `PDF supplement ${pdfEvidenceSupplement}` : "",
|
|
1753
|
+
pdfSemanticRole === "figure_caption" ? "PDF figure caption" : "",
|
|
1754
|
+
pdfSemanticRole === "figure_body" ? "PDF figure body" : "",
|
|
1755
|
+
pdfSemanticRole === "figure_caption" ? "" : pdfSemanticRole === "figure_body" ? "" : pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
1590
1756
|
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
1757
|
+
typeof officeListGroupItemCount === "number" ? `Office list ${officeListGroupItemCount} items` : "",
|
|
1758
|
+
officeListLevelsLabel ?? "",
|
|
1591
1759
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
1592
1760
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
1593
1761
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
@@ -1597,6 +1765,16 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1597
1765
|
typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
|
|
1598
1766
|
typeof ocrMinConfidence === "number" && typeof ocrMaxConfidence === "number" && ocrMinConfidence !== ocrMaxConfidence ? `Range ${ocrMinConfidence.toFixed(2)}-${ocrMaxConfidence.toFixed(2)}` : "",
|
|
1599
1767
|
typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
|
|
1768
|
+
pdfTableHeaders.length > 0 ? `PDF table ${pdfTableHeaders.join(", ")}` : "",
|
|
1769
|
+
typeof pdfTableColumnCount === "number" ? `PDF table ${pdfTableColumnCount} cols` : "",
|
|
1770
|
+
typeof pdfTableBodyRowCount === "number" ? `PDF table ${pdfTableBodyRowCount} body rows` : "",
|
|
1771
|
+
typeof pdfTableBodyRowStart === "number" && typeof pdfTableBodyRowEnd === "number" ? pdfTableBodyRowStart === pdfTableBodyRowEnd ? `PDF table row ${pdfTableBodyRowStart}` : `PDF table rows ${pdfTableBodyRowStart}-${pdfTableBodyRowEnd}` : "",
|
|
1772
|
+
officeListContextText ? `Office list context ${officeListContextText}` : "",
|
|
1773
|
+
officeTableHeaders.length > 0 ? `Office table ${officeTableHeaders.join(", ")}` : "",
|
|
1774
|
+
typeof officeTableColumnCount === "number" ? `Office table ${officeTableColumnCount} cols` : "",
|
|
1775
|
+
typeof officeTableBodyRowCount === "number" ? `Office table ${officeTableBodyRowCount} body rows` : "",
|
|
1776
|
+
typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number" ? officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table row ${officeTableBodyRowStart}` : `Office table rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}` : "",
|
|
1777
|
+
officeTableContextText ? `Office table context ${officeTableContextText}` : "",
|
|
1600
1778
|
spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
|
|
1601
1779
|
spreadsheetColumnRange ? `Spreadsheet ${spreadsheetColumnRange}` : "",
|
|
1602
1780
|
spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
|
|
@@ -2028,12 +2206,92 @@ var getStructuredSectionScoreWeight = (metadata) => {
|
|
|
2028
2206
|
return 1;
|
|
2029
2207
|
};
|
|
2030
2208
|
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
2209
|
+
var getPDFLeadEvidencePreference = (metadata) => {
|
|
2210
|
+
if (!metadata) {
|
|
2211
|
+
return 0;
|
|
2212
|
+
}
|
|
2213
|
+
const pdfEvidenceMode = getContextString2(metadata.pdfEvidenceMode);
|
|
2214
|
+
const pdfEvidenceOrigin = getContextString2(metadata.pdfEvidenceOrigin);
|
|
2215
|
+
const pdfEvidenceSupplement = getContextString2(metadata.pdfEvidenceSupplement);
|
|
2216
|
+
if (pdfEvidenceMode === "hybrid" && pdfEvidenceOrigin === "native" && pdfEvidenceSupplement === "ocr") {
|
|
2217
|
+
return 3;
|
|
2218
|
+
}
|
|
2219
|
+
if (pdfEvidenceMode === "native" && pdfEvidenceOrigin === "native") {
|
|
2220
|
+
return 2;
|
|
2221
|
+
}
|
|
2222
|
+
if (pdfEvidenceMode === "ocr" && pdfEvidenceOrigin === "ocr") {
|
|
2223
|
+
return 1;
|
|
2224
|
+
}
|
|
2225
|
+
return 0;
|
|
2226
|
+
};
|
|
2227
|
+
var getPDFLeadScope = (metadata) => {
|
|
2228
|
+
if (!metadata) {
|
|
2229
|
+
return;
|
|
2230
|
+
}
|
|
2231
|
+
const pageNumber = getContextNumber2(metadata.pageNumber) ?? getContextNumber2(metadata.page) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
2232
|
+
const sectionTitle = getContextString2(metadata.sectionTitle);
|
|
2233
|
+
const sourceNativeKind = getContextString2(metadata.sourceNativeKind);
|
|
2234
|
+
if (typeof pageNumber !== "number" && !sectionTitle && !sourceNativeKind) {
|
|
2235
|
+
return;
|
|
2236
|
+
}
|
|
2237
|
+
return {
|
|
2238
|
+
pageNumber,
|
|
2239
|
+
sectionTitle,
|
|
2240
|
+
sourceNativeKind
|
|
2241
|
+
};
|
|
2242
|
+
};
|
|
2243
|
+
var getOfficeLeadScope = (metadata) => {
|
|
2244
|
+
if (!metadata) {
|
|
2245
|
+
return;
|
|
2246
|
+
}
|
|
2247
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
2248
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
2249
|
+
return;
|
|
2250
|
+
}
|
|
2251
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
2252
|
+
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
2253
|
+
const officeContextText = officeBlockKind === "table" ? getContextString2(metadata.officeTableContextText) : getContextString2(metadata.officeListContextText);
|
|
2254
|
+
if (!sectionTitle) {
|
|
2255
|
+
return;
|
|
2256
|
+
}
|
|
2257
|
+
return {
|
|
2258
|
+
blockKind: officeBlockKind,
|
|
2259
|
+
pathDepth: sectionPath.length,
|
|
2260
|
+
sectionTitle,
|
|
2261
|
+
hasContext: typeof officeContextText === "string"
|
|
2262
|
+
};
|
|
2263
|
+
};
|
|
2264
|
+
var getOfficeLeadEvidencePreference = (metadata) => {
|
|
2265
|
+
const scope = getOfficeLeadScope(metadata);
|
|
2266
|
+
if (!scope) {
|
|
2267
|
+
return 0;
|
|
2268
|
+
}
|
|
2269
|
+
return scope.pathDepth * 10 + (scope.hasContext ? 1 : 0) + (scope.blockKind === "list" && typeof metadata?.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
|
|
2270
|
+
};
|
|
2031
2271
|
var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
|
|
2272
|
+
const leftOfficeScope = getOfficeLeadScope(left.metadata);
|
|
2273
|
+
const rightOfficeScope = getOfficeLeadScope(right.metadata);
|
|
2274
|
+
if (left.source === right.source && leftOfficeScope && rightOfficeScope && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
|
|
2275
|
+
const leftOfficePreference = getOfficeLeadEvidencePreference(left.metadata);
|
|
2276
|
+
const rightOfficePreference = getOfficeLeadEvidencePreference(right.metadata);
|
|
2277
|
+
if (rightOfficePreference !== leftOfficePreference) {
|
|
2278
|
+
return rightOfficePreference - leftOfficePreference;
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2032
2281
|
const leftWeightedScore = getStructuredSourceLeadScore(left);
|
|
2033
2282
|
const rightWeightedScore = getStructuredSourceLeadScore(right);
|
|
2034
2283
|
if (rightWeightedScore !== leftWeightedScore) {
|
|
2035
2284
|
return rightWeightedScore - leftWeightedScore;
|
|
2036
2285
|
}
|
|
2286
|
+
const leftScope = getPDFLeadScope(left.metadata);
|
|
2287
|
+
const rightScope = getPDFLeadScope(right.metadata);
|
|
2288
|
+
if (left.source === right.source && leftScope && rightScope && (leftScope.sectionTitle && rightScope.sectionTitle && leftScope.sectionTitle === rightScope.sectionTitle || typeof leftScope.pageNumber === "number" && typeof rightScope.pageNumber === "number" && leftScope.pageNumber === rightScope.pageNumber)) {
|
|
2289
|
+
const leftEvidencePreference = getPDFLeadEvidencePreference(left.metadata);
|
|
2290
|
+
const rightEvidencePreference = getPDFLeadEvidencePreference(right.metadata);
|
|
2291
|
+
if (rightEvidencePreference !== leftEvidencePreference) {
|
|
2292
|
+
return rightEvidencePreference - leftEvidencePreference;
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2037
2295
|
if (right.score !== left.score) {
|
|
2038
2296
|
return right.score - left.score;
|
|
2039
2297
|
}
|
|
@@ -2287,6 +2545,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
2287
2545
|
queryTransformProvider: trace?.queryTransformProvider,
|
|
2288
2546
|
queryTransformReason: trace?.queryTransformReason,
|
|
2289
2547
|
reasons,
|
|
2548
|
+
evidenceReconcileApplied: trace?.steps.some((step) => step.stage === "evidence_reconcile"),
|
|
2290
2549
|
rerankApplied: trace?.steps.some((step) => step.stage === "rerank" && step.metadata?.applied === true),
|
|
2291
2550
|
scoreShare,
|
|
2292
2551
|
scoreThresholdApplied: trace?.steps.some((step) => step.stage === "score_filter"),
|
|
@@ -2965,6 +3224,12 @@ var buildComparisonOverviewPresentation = (input) => {
|
|
|
2965
3224
|
value: input.resolveLabel(input.summary.bestByMultivectorVectorHitCases)
|
|
2966
3225
|
});
|
|
2967
3226
|
}
|
|
3227
|
+
if (input.summary.bestByEvidenceReconcileCases) {
|
|
3228
|
+
rows.push({
|
|
3229
|
+
label: "Best evidence reconcile",
|
|
3230
|
+
value: input.resolveLabel(input.summary.bestByEvidenceReconcileCases)
|
|
3231
|
+
});
|
|
3232
|
+
}
|
|
2968
3233
|
if (input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
2969
3234
|
rows.push({
|
|
2970
3235
|
label: "Lowest runtime budget exhaustion",
|
|
@@ -3032,6 +3297,9 @@ var buildRAGComparisonTraceSummaryRows = (entry) => {
|
|
|
3032
3297
|
}, {
|
|
3033
3298
|
label: "Runtime",
|
|
3034
3299
|
value: `budget ${formatTraceRatio(trace.runtimeCandidateBudgetExhaustedCases, trace.totalCases)} \xB7 underfilled ${formatTraceRatio(trace.runtimeUnderfilledTopKCases, trace.totalCases)}`
|
|
3300
|
+
}, {
|
|
3301
|
+
label: "Evidence reconcile",
|
|
3302
|
+
value: `all ${formatTraceRatio(trace.stageCounts.evidence_reconcile ?? 0, trace.totalCases)} \xB7 office ${formatTraceRatio(trace.officeEvidenceReconcileCases, trace.totalCases)} \xB7 pdf ${formatTraceRatio(trace.pdfEvidenceReconcileCases, trace.totalCases)}`
|
|
3035
3303
|
}, {
|
|
3036
3304
|
label: "TopK",
|
|
3037
3305
|
value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
|
|
@@ -3389,6 +3657,25 @@ var buildRAGEvaluationHistoryRows = (history) => {
|
|
|
3389
3657
|
label: "Trace variant delta",
|
|
3390
3658
|
value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
|
|
3391
3659
|
});
|
|
3660
|
+
const evidenceReconcileDelta = history.diff.traceSummaryDelta.stageCounts?.evidence_reconcile;
|
|
3661
|
+
if (typeof evidenceReconcileDelta === "number") {
|
|
3662
|
+
rows.push({
|
|
3663
|
+
label: "Trace evidence reconcile delta",
|
|
3664
|
+
value: formatTraceCountDelta(evidenceReconcileDelta)
|
|
3665
|
+
});
|
|
3666
|
+
}
|
|
3667
|
+
if (typeof history.diff.traceSummaryDelta.officeEvidenceReconcileCasesDelta === "number") {
|
|
3668
|
+
rows.push({
|
|
3669
|
+
label: "Trace office evidence reconcile delta",
|
|
3670
|
+
value: formatTraceCountDelta(history.diff.traceSummaryDelta.officeEvidenceReconcileCasesDelta)
|
|
3671
|
+
});
|
|
3672
|
+
}
|
|
3673
|
+
if (typeof history.diff.traceSummaryDelta.pdfEvidenceReconcileCasesDelta === "number") {
|
|
3674
|
+
rows.push({
|
|
3675
|
+
label: "Trace PDF evidence reconcile delta",
|
|
3676
|
+
value: formatTraceCountDelta(history.diff.traceSummaryDelta.pdfEvidenceReconcileCasesDelta)
|
|
3677
|
+
});
|
|
3678
|
+
}
|
|
3392
3679
|
const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
|
|
3393
3680
|
if (stageDelta) {
|
|
3394
3681
|
rows.push({ label: "Trace stage delta", value: stageDelta });
|
|
@@ -3594,6 +3881,7 @@ var buildRAGEvaluationSuiteSnapshotHistoryPresentation = (history) => ({
|
|
|
3594
3881
|
summary: history?.latestSnapshot ? `v${history.latestSnapshot.version}` : "No saved suite snapshots yet."
|
|
3595
3882
|
});
|
|
3596
3883
|
var isRuntimeGateReason = (reason) => /runtime|candidate-budget|underfilled/i.test(reason);
|
|
3884
|
+
var getFixtureVariantsFromRunTags = (tags) => (tags ?? []).filter((tag) => tag.startsWith("fixture:")).map((tag) => tag.slice("fixture:".length)).filter((tag, index, all) => tag.length > 0 && all.indexOf(tag) === index);
|
|
3597
3885
|
var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
|
|
3598
3886
|
const runtimeGateReasons = (run.decisionSummary?.gate?.reasons ?? run.releaseVerdict?.gate?.reasons ?? []).filter(isRuntimeGateReason);
|
|
3599
3887
|
const rows = [
|
|
@@ -3607,6 +3895,13 @@ var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
|
|
|
3607
3895
|
value: run.comparison.summary.bestByAverageF1 ?? "n/a"
|
|
3608
3896
|
}
|
|
3609
3897
|
];
|
|
3898
|
+
const fixtureVariants = getFixtureVariantsFromRunTags(run.tags);
|
|
3899
|
+
if (fixtureVariants.length > 0) {
|
|
3900
|
+
rows.push({
|
|
3901
|
+
label: "Fixture variant",
|
|
3902
|
+
value: fixtureVariants.join(", ")
|
|
3903
|
+
});
|
|
3904
|
+
}
|
|
3610
3905
|
if (run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
3611
3906
|
rows.push({
|
|
3612
3907
|
label: "Lowest runtime budget exhaustion",
|
|
@@ -3635,6 +3930,7 @@ var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
|
|
|
3635
3930
|
};
|
|
3636
3931
|
var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
|
|
3637
3932
|
const recentRuns = (input.runs ?? []).map(buildRAGRetrievalReleaseHistoryRunPresentation);
|
|
3933
|
+
const fixtureVariants = (input.runs ?? []).flatMap((run) => getFixtureVariantsFromRunTags(run.tags)).filter((tag, index, all) => all.indexOf(tag) === index);
|
|
3638
3934
|
const runtimeBlockedRuns = recentRuns.filter((entry) => entry.rows.some((row) => row.label === "Runtime gate failures" && row.value !== "none")).length;
|
|
3639
3935
|
const rows = [
|
|
3640
3936
|
{
|
|
@@ -3658,6 +3954,12 @@ var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
|
|
|
3658
3954
|
value: String(runtimeBlockedRuns)
|
|
3659
3955
|
}
|
|
3660
3956
|
];
|
|
3957
|
+
if (fixtureVariants.length > 0) {
|
|
3958
|
+
rows.push({
|
|
3959
|
+
label: "Fixture variants",
|
|
3960
|
+
value: fixtureVariants.join(", ")
|
|
3961
|
+
});
|
|
3962
|
+
}
|
|
3661
3963
|
return {
|
|
3662
3964
|
recentRuns,
|
|
3663
3965
|
rows,
|
|
@@ -4174,6 +4476,9 @@ var evaluateRetrievalComparisonGate = ({
|
|
|
4174
4476
|
if (typeof policy.minMultiVectorVectorHitCasesDelta === "number" && (delta.multiVectorVectorHitCasesDelta ?? 0) < policy.minMultiVectorVectorHitCasesDelta) {
|
|
4175
4477
|
reasons.push(`multivector vector-hit delta ${delta.multiVectorVectorHitCasesDelta ?? 0} is below ${policy.minMultiVectorVectorHitCasesDelta}`);
|
|
4176
4478
|
}
|
|
4479
|
+
if (typeof policy.minEvidenceReconcileCasesDelta === "number" && (delta.evidenceReconcileCasesDelta ?? 0) < policy.minEvidenceReconcileCasesDelta) {
|
|
4480
|
+
reasons.push(`evidence reconcile delta ${delta.evidenceReconcileCasesDelta ?? 0} is below ${policy.minEvidenceReconcileCasesDelta}`);
|
|
4481
|
+
}
|
|
4177
4482
|
if (typeof policy.maxRuntimeCandidateBudgetExhaustedCasesDelta === "number" && (delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0) > policy.maxRuntimeCandidateBudgetExhaustedCasesDelta) {
|
|
4178
4483
|
reasons.push(`runtime candidate-budget-exhausted delta ${delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0} exceeds ${policy.maxRuntimeCandidateBudgetExhaustedCasesDelta}`);
|
|
4179
4484
|
}
|
|
@@ -4227,13 +4532,14 @@ var buildRAGRetrievalReleaseVerdict = ({
|
|
|
4227
4532
|
};
|
|
4228
4533
|
}
|
|
4229
4534
|
if (delta) {
|
|
4535
|
+
const requiresReview = delta.passingRateDelta < 0 || delta.averageF1Delta < 0 || (delta.evidenceReconcileCasesDelta ?? 0) < 0;
|
|
4230
4536
|
return {
|
|
4231
4537
|
baselineGroupKey: groupKey,
|
|
4232
4538
|
baselineRetrievalId,
|
|
4233
4539
|
candidateRetrievalId,
|
|
4234
4540
|
delta,
|
|
4235
|
-
status:
|
|
4236
|
-
summary:
|
|
4541
|
+
status: requiresReview ? "needs_review" : "pass",
|
|
4542
|
+
summary: requiresReview ? "Candidate should be reviewed before promotion." : "Candidate improved or matched the baseline."
|
|
4237
4543
|
};
|
|
4238
4544
|
}
|
|
4239
4545
|
return {
|
|
@@ -4914,6 +5220,8 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4914
5220
|
let multiVectorVectorHitCases = 0;
|
|
4915
5221
|
let multiVectorLexicalHitCases = 0;
|
|
4916
5222
|
let multiVectorCollapsedCases = 0;
|
|
5223
|
+
let officeEvidenceReconcileCases = 0;
|
|
5224
|
+
let pdfEvidenceReconcileCases = 0;
|
|
4917
5225
|
let runtimeCandidateBudgetExhaustedCases = 0;
|
|
4918
5226
|
let runtimeUnderfilledTopKCases = 0;
|
|
4919
5227
|
let finalCountSum = 0;
|
|
@@ -4955,6 +5263,13 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4955
5263
|
if ((trace.multiVector?.collapsedParents ?? 0) > 0) {
|
|
4956
5264
|
multiVectorCollapsedCases += 1;
|
|
4957
5265
|
}
|
|
5266
|
+
const evidenceReconcileMetadata = trace.steps.find((step) => step.stage === "evidence_reconcile")?.metadata;
|
|
5267
|
+
if (typeof evidenceReconcileMetadata?.officeAffectedScopes === "number" && evidenceReconcileMetadata.officeAffectedScopes > 0) {
|
|
5268
|
+
officeEvidenceReconcileCases += 1;
|
|
5269
|
+
}
|
|
5270
|
+
if (typeof evidenceReconcileMetadata?.pdfAffectedScopes === "number" && evidenceReconcileMetadata.pdfAffectedScopes > 0) {
|
|
5271
|
+
pdfEvidenceReconcileCases += 1;
|
|
5272
|
+
}
|
|
4958
5273
|
if (vectorSearchMetadata?.sqliteQueryCandidateBudgetExhausted) {
|
|
4959
5274
|
runtimeCandidateBudgetExhaustedCases += 1;
|
|
4960
5275
|
}
|
|
@@ -4992,6 +5307,8 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4992
5307
|
multiVectorVectorHitCases,
|
|
4993
5308
|
multiVectorLexicalHitCases,
|
|
4994
5309
|
multiVectorCollapsedCases,
|
|
5310
|
+
officeEvidenceReconcileCases,
|
|
5311
|
+
pdfEvidenceReconcileCases,
|
|
4995
5312
|
runtimeCandidateBudgetExhaustedCases,
|
|
4996
5313
|
runtimeUnderfilledTopKCases,
|
|
4997
5314
|
vectorCases
|
|
@@ -5796,6 +6113,8 @@ var buildRAGEvaluationRunDiff = ({
|
|
|
5796
6113
|
averageLexicalTopK: (current.traceSummary?.averageLexicalTopK ?? 0) - (previous?.traceSummary?.averageLexicalTopK ?? 0),
|
|
5797
6114
|
averageVectorCount: (current.traceSummary?.averageVectorCount ?? 0) - (previous?.traceSummary?.averageVectorCount ?? 0),
|
|
5798
6115
|
balancedCases: (current.traceSummary?.balancedCases ?? 0) - (previous?.traceSummary?.balancedCases ?? 0),
|
|
6116
|
+
officeEvidenceReconcileCasesDelta: (current.traceSummary?.officeEvidenceReconcileCases ?? 0) - (previous?.traceSummary?.officeEvidenceReconcileCases ?? 0),
|
|
6117
|
+
pdfEvidenceReconcileCasesDelta: (current.traceSummary?.pdfEvidenceReconcileCases ?? 0) - (previous?.traceSummary?.pdfEvidenceReconcileCases ?? 0),
|
|
5799
6118
|
lexicalCases: (current.traceSummary?.lexicalCases ?? 0) - (previous?.traceSummary?.lexicalCases ?? 0),
|
|
5800
6119
|
modesChanged: (current.traceSummary?.modes ?? []).join("|") !== (previous?.traceSummary?.modes ?? []).join("|"),
|
|
5801
6120
|
roundRobinCases: (current.traceSummary?.roundRobinCases ?? 0) - (previous?.traceSummary?.roundRobinCases ?? 0),
|
|
@@ -8580,6 +8899,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8580
8899
|
multiVectorCollapsedCasesDelta: (candidateEntry.traceSummary?.multiVectorCollapsedCases ?? 0) - (baselineEntry.traceSummary?.multiVectorCollapsedCases ?? 0),
|
|
8581
8900
|
multiVectorLexicalHitCasesDelta: (candidateEntry.traceSummary?.multiVectorLexicalHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorLexicalHitCases ?? 0),
|
|
8582
8901
|
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0),
|
|
8902
|
+
evidenceReconcileCasesDelta: (candidateEntry.traceSummary?.stageCounts?.evidence_reconcile ?? 0) - (baselineEntry.traceSummary?.stageCounts?.evidence_reconcile ?? 0),
|
|
8583
8903
|
runtimeCandidateBudgetExhaustedCasesDelta: (candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0) - (baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0),
|
|
8584
8904
|
runtimeUnderfilledTopKCasesDelta: (candidateEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0) - (baselineEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0)
|
|
8585
8905
|
} : undefined;
|
|
@@ -8591,6 +8911,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8591
8911
|
multiVectorCollapsedCases: baselineEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8592
8912
|
multiVectorLexicalHitCases: baselineEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8593
8913
|
multiVectorVectorHitCases: baselineEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8914
|
+
evidenceReconcileCases: baselineEntry.traceSummary?.stageCounts?.evidence_reconcile,
|
|
8594
8915
|
runtimeCandidateBudgetExhaustedCases: baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8595
8916
|
runtimeUnderfilledTopKCases: baselineEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8596
8917
|
passingRate: baselineEntry.response.passingRate,
|
|
@@ -8604,6 +8925,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8604
8925
|
multiVectorCollapsedCases: candidateEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8605
8926
|
multiVectorLexicalHitCases: candidateEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8606
8927
|
multiVectorVectorHitCases: candidateEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8928
|
+
evidenceReconcileCases: candidateEntry.traceSummary?.stageCounts?.evidence_reconcile,
|
|
8607
8929
|
runtimeCandidateBudgetExhaustedCases: candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8608
8930
|
runtimeUnderfilledTopKCases: candidateEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8609
8931
|
passingRate: candidateEntry.response.passingRate,
|
|
@@ -8618,6 +8940,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8618
8940
|
winnerByMultivectorCollapsedCases: comparison.summary.bestByMultivectorCollapsedCases,
|
|
8619
8941
|
winnerByMultivectorLexicalHitCases: comparison.summary.bestByMultivectorLexicalHitCases,
|
|
8620
8942
|
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases,
|
|
8943
|
+
winnerByEvidenceReconcileCases: comparison.summary.bestByEvidenceReconcileCases,
|
|
8621
8944
|
winnerByLowestRuntimeCandidateBudgetExhaustedCases: comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
|
|
8622
8945
|
winnerByLowestRuntimeUnderfilledTopKCases: comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
8623
8946
|
};
|
|
@@ -9325,6 +9648,27 @@ var selectComparisonEntryByLowestTraceMetric = (entries, idKey, metric) => {
|
|
|
9325
9648
|
const winner = ranked[0];
|
|
9326
9649
|
return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
|
|
9327
9650
|
};
|
|
9651
|
+
var selectComparisonEntryByTraceStageCount = (entries, idKey, stage) => {
|
|
9652
|
+
const ranked = [...entries].sort((left, right) => {
|
|
9653
|
+
const leftMetric = left.traceSummary?.stageCounts?.[stage] ?? 0;
|
|
9654
|
+
const rightMetric = right.traceSummary?.stageCounts?.[stage] ?? 0;
|
|
9655
|
+
if (rightMetric !== leftMetric) {
|
|
9656
|
+
return rightMetric - leftMetric;
|
|
9657
|
+
}
|
|
9658
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
9659
|
+
return right.response.passingRate - left.response.passingRate;
|
|
9660
|
+
}
|
|
9661
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
9662
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
9663
|
+
}
|
|
9664
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
9665
|
+
});
|
|
9666
|
+
const winner = ranked[0];
|
|
9667
|
+
if (!winner || (winner.traceSummary?.stageCounts?.[stage] ?? 0) === 0) {
|
|
9668
|
+
return;
|
|
9669
|
+
}
|
|
9670
|
+
return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
|
|
9671
|
+
};
|
|
9328
9672
|
var resolveRetrievalMode = (candidate) => {
|
|
9329
9673
|
if (!candidate.retrieval) {
|
|
9330
9674
|
return "vector";
|
|
@@ -9419,6 +9763,8 @@ var compareRAGRetrievalTraceSummaries = (current, previous) => ({
|
|
|
9419
9763
|
multiVectorVectorHitCasesDelta: current.multiVectorVectorHitCases - previous.multiVectorVectorHitCases,
|
|
9420
9764
|
multiVectorLexicalHitCasesDelta: current.multiVectorLexicalHitCases - previous.multiVectorLexicalHitCases,
|
|
9421
9765
|
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases,
|
|
9766
|
+
officeEvidenceReconcileCasesDelta: current.officeEvidenceReconcileCases - previous.officeEvidenceReconcileCases,
|
|
9767
|
+
pdfEvidenceReconcileCasesDelta: current.pdfEvidenceReconcileCases - previous.pdfEvidenceReconcileCases,
|
|
9422
9768
|
runtimeCandidateBudgetExhaustedCasesDelta: current.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
9423
9769
|
runtimeUnderfilledTopKCasesDelta: current.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases
|
|
9424
9770
|
});
|
|
@@ -9784,10 +10130,129 @@ var generateRAGEvaluationSuiteFromDocuments = ({
|
|
|
9784
10130
|
};
|
|
9785
10131
|
var DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID = "rag-native-planner-larger-corpus";
|
|
9786
10132
|
var DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL = "Adaptive Native Planner Benchmark";
|
|
10133
|
+
var DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_SUITE_ID = "rag-native-backend-larger-corpus";
|
|
10134
|
+
var DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_LABEL = "Native Backend Comparison Benchmark";
|
|
9787
10135
|
var DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY = "Which launch checklist phrase is exact wording?";
|
|
10136
|
+
var DEFAULT_NATIVE_BACKEND_HYBRID_QUERY = "aurora promotion checklist wording";
|
|
10137
|
+
var DEFAULT_NATIVE_BACKEND_FILTERED_QUERY = "focus lane launch checklist wording";
|
|
10138
|
+
var DEFAULT_NATIVE_BACKEND_REORDERED_QUERY = "exact aurora focus lane checklist wording";
|
|
10139
|
+
var DEFAULT_NATIVE_BACKEND_GUIDE_QUERY = "which focus lane guide contains exact aurora promotion wording";
|
|
9788
10140
|
var DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER = {
|
|
9789
10141
|
lane: "focus"
|
|
9790
10142
|
};
|
|
10143
|
+
var DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS = [
|
|
10144
|
+
"focus-distractor-0",
|
|
10145
|
+
"focus-distractor-1",
|
|
10146
|
+
"focus-distractor-2"
|
|
10147
|
+
];
|
|
10148
|
+
var createRAGNativeBackendBenchmarkMockEmbedding = async (text) => {
|
|
10149
|
+
const normalized = text.toLowerCase();
|
|
10150
|
+
if (normalized.includes("launch checklist exact wording for aurora promotion") || normalized.includes("launch checklist exact wording")) {
|
|
10151
|
+
return [0.995, 0.005];
|
|
10152
|
+
}
|
|
10153
|
+
if (normalized.includes("aurora") || normalized.includes("checklist") || normalized.includes("focus lane") || normalized.includes("exact wording") || normalized.includes("guide")) {
|
|
10154
|
+
return [1, 0];
|
|
10155
|
+
}
|
|
10156
|
+
return [0, 1];
|
|
10157
|
+
};
|
|
10158
|
+
var createRAGNativeBackendBenchmarkCorpus = (input) => {
|
|
10159
|
+
const noiseCount = input?.noiseCount ?? 5001;
|
|
10160
|
+
const backend = input?.backend ?? "generic";
|
|
10161
|
+
const genericChunks = [
|
|
10162
|
+
...Array.from({ length: noiseCount }, (_, index) => ({
|
|
10163
|
+
chunkId: `noise:${index}`,
|
|
10164
|
+
corpusKey: "noise",
|
|
10165
|
+
embedding: [0, 1],
|
|
10166
|
+
metadata: {
|
|
10167
|
+
corpusKey: "noise",
|
|
10168
|
+
documentId: `noise-${index}`,
|
|
10169
|
+
lane: "noise"
|
|
10170
|
+
},
|
|
10171
|
+
source: `noise/${index}.md`,
|
|
10172
|
+
text: `Background operations note ${index}.`
|
|
10173
|
+
})),
|
|
10174
|
+
...Array.from({ length: 3 }, (_, index) => ({
|
|
10175
|
+
chunkId: `focus:distractor:${index}`,
|
|
10176
|
+
corpusKey: "focus",
|
|
10177
|
+
embedding: [1, 0],
|
|
10178
|
+
metadata: {
|
|
10179
|
+
corpusKey: "focus",
|
|
10180
|
+
documentId: `focus-distractor-${index}`,
|
|
10181
|
+
lane: "focus"
|
|
10182
|
+
},
|
|
10183
|
+
source: `focus/distractor-${index}.md`,
|
|
10184
|
+
text: index === 0 ? "aurora promotion checklist overview" : index === 1 ? "launch checklist wording draft" : "focus lane promotion runbook notes"
|
|
10185
|
+
})),
|
|
10186
|
+
{
|
|
10187
|
+
chunkId: "focus:target",
|
|
10188
|
+
corpusKey: "focus",
|
|
10189
|
+
embedding: [0.995, 0.005],
|
|
10190
|
+
metadata: {
|
|
10191
|
+
corpusKey: "focus",
|
|
10192
|
+
documentId: "focus-target",
|
|
10193
|
+
lane: "focus"
|
|
10194
|
+
},
|
|
10195
|
+
source: "guide/planner-depth.md",
|
|
10196
|
+
text: "launch checklist exact wording for aurora promotion in the focus lane"
|
|
10197
|
+
}
|
|
10198
|
+
];
|
|
10199
|
+
const backendSpecificChunks = backend === "sqlite-native" ? [
|
|
10200
|
+
{
|
|
10201
|
+
chunkId: "focus:sqlite:phrase-matrix",
|
|
10202
|
+
corpusKey: "focus",
|
|
10203
|
+
embedding: [1, 0],
|
|
10204
|
+
metadata: {
|
|
10205
|
+
backendFixture: "sqlite-native",
|
|
10206
|
+
corpusKey: "focus",
|
|
10207
|
+
documentId: "focus-sqlite-phrase-matrix",
|
|
10208
|
+
lane: "focus"
|
|
10209
|
+
},
|
|
10210
|
+
source: "guide/sqlite-phrase-matrix.md",
|
|
10211
|
+
text: "exact aurora focus lane checklist wording matrix for sqlite validation"
|
|
10212
|
+
},
|
|
10213
|
+
{
|
|
10214
|
+
chunkId: "focus:sqlite:guide-table",
|
|
10215
|
+
corpusKey: "focus",
|
|
10216
|
+
embedding: [1, 0],
|
|
10217
|
+
metadata: {
|
|
10218
|
+
backendFixture: "sqlite-native",
|
|
10219
|
+
corpusKey: "focus",
|
|
10220
|
+
documentId: "focus-sqlite-guide-table",
|
|
10221
|
+
lane: "focus"
|
|
10222
|
+
},
|
|
10223
|
+
source: "guide/sqlite-guide-table.md",
|
|
10224
|
+
text: "which focus lane guide contains aurora promotion wording draft table for sqlite operators"
|
|
10225
|
+
}
|
|
10226
|
+
] : backend === "postgres" ? [
|
|
10227
|
+
{
|
|
10228
|
+
chunkId: "focus:postgres:appendix",
|
|
10229
|
+
corpusKey: "focus",
|
|
10230
|
+
embedding: [1, 0],
|
|
10231
|
+
metadata: {
|
|
10232
|
+
backendFixture: "postgres",
|
|
10233
|
+
corpusKey: "focus",
|
|
10234
|
+
documentId: "focus-postgres-appendix",
|
|
10235
|
+
lane: "focus"
|
|
10236
|
+
},
|
|
10237
|
+
source: "guide/postgres-appendix.md",
|
|
10238
|
+
text: "which focus lane guide contains exact aurora promotion wording appendix for postgres release review"
|
|
10239
|
+
},
|
|
10240
|
+
{
|
|
10241
|
+
chunkId: "focus:postgres:alternatives",
|
|
10242
|
+
corpusKey: "focus",
|
|
10243
|
+
embedding: [1, 0],
|
|
10244
|
+
metadata: {
|
|
10245
|
+
backendFixture: "postgres",
|
|
10246
|
+
corpusKey: "focus",
|
|
10247
|
+
documentId: "focus-postgres-alternatives",
|
|
10248
|
+
lane: "focus"
|
|
10249
|
+
},
|
|
10250
|
+
source: "guide/postgres-alternatives.md",
|
|
10251
|
+
text: "aurora promotion checklist wording alternatives and exact focus lane phrasing for postgres audits"
|
|
10252
|
+
}
|
|
10253
|
+
] : [];
|
|
10254
|
+
return [...genericChunks, ...backendSpecificChunks];
|
|
10255
|
+
};
|
|
9791
10256
|
var createRAGAdaptiveNativePlannerBenchmarkSuite = (input) => createRAGEvaluationSuite({
|
|
9792
10257
|
description: input?.description ?? "Stress-tests larger-corpus native planner selection, candidate-budget pressure, and transformed-query recovery on filtered retrieval.",
|
|
9793
10258
|
id: input?.id ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID,
|
|
@@ -9839,6 +10304,102 @@ var createRAGAdaptiveNativePlannerBenchmarkSnapshot = (input) => {
|
|
|
9839
10304
|
version: input?.version
|
|
9840
10305
|
});
|
|
9841
10306
|
};
|
|
10307
|
+
var createRAGNativeBackendComparisonBenchmarkSuite = (input) => createRAGEvaluationSuite({
|
|
10308
|
+
description: input?.description ?? "Captures larger-corpus native backend parity with filtered vector pressure and harder hybrid retrieval cases so sqlite-native and postgres runs can be compared over time.",
|
|
10309
|
+
id: input?.id ?? DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_SUITE_ID,
|
|
10310
|
+
input: {
|
|
10311
|
+
cases: [
|
|
10312
|
+
{
|
|
10313
|
+
expectedDocumentIds: ["focus-target"],
|
|
10314
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10315
|
+
hardNegativeDocumentIds: [
|
|
10316
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10317
|
+
],
|
|
10318
|
+
id: "planner-pressure-exact-phrase",
|
|
10319
|
+
label: "Exact phrase survives larger-corpus native pressure",
|
|
10320
|
+
query: DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY,
|
|
10321
|
+
topK: input?.topK ?? 1
|
|
10322
|
+
},
|
|
10323
|
+
{
|
|
10324
|
+
expectedDocumentIds: ["focus-target"],
|
|
10325
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10326
|
+
hardNegativeDocumentIds: [
|
|
10327
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10328
|
+
],
|
|
10329
|
+
id: "planner-pressure-hybrid-phrase",
|
|
10330
|
+
label: "Hybrid retrieval survives filtered lexical pressure",
|
|
10331
|
+
query: DEFAULT_NATIVE_BACKEND_HYBRID_QUERY,
|
|
10332
|
+
topK: input?.topK ?? 1
|
|
10333
|
+
},
|
|
10334
|
+
{
|
|
10335
|
+
expectedDocumentIds: ["focus-target"],
|
|
10336
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10337
|
+
hardNegativeDocumentIds: [
|
|
10338
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10339
|
+
],
|
|
10340
|
+
id: "planner-pressure-filtered-lane-query",
|
|
10341
|
+
label: "Filtered lane query survives broader corpus noise",
|
|
10342
|
+
query: DEFAULT_NATIVE_BACKEND_FILTERED_QUERY,
|
|
10343
|
+
topK: input?.topK ?? 1
|
|
10344
|
+
},
|
|
10345
|
+
{
|
|
10346
|
+
expectedDocumentIds: ["focus-target"],
|
|
10347
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10348
|
+
hardNegativeDocumentIds: [
|
|
10349
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10350
|
+
],
|
|
10351
|
+
id: "planner-pressure-reordered-phrase",
|
|
10352
|
+
label: "Reordered phrase survives transform pressure",
|
|
10353
|
+
query: DEFAULT_NATIVE_BACKEND_REORDERED_QUERY,
|
|
10354
|
+
topK: input?.topK ?? 1
|
|
10355
|
+
},
|
|
10356
|
+
{
|
|
10357
|
+
expectedDocumentIds: ["focus-target"],
|
|
10358
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10359
|
+
hardNegativeDocumentIds: [
|
|
10360
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10361
|
+
],
|
|
10362
|
+
id: "planner-pressure-guide-query",
|
|
10363
|
+
label: "Guide attribution survives filtered corpus pressure",
|
|
10364
|
+
query: DEFAULT_NATIVE_BACKEND_GUIDE_QUERY,
|
|
10365
|
+
topK: input?.topK ?? 1
|
|
10366
|
+
}
|
|
10367
|
+
],
|
|
10368
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10369
|
+
retrieval: "vector",
|
|
10370
|
+
topK: input?.topK ?? 1
|
|
10371
|
+
},
|
|
10372
|
+
label: input?.label ?? DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_LABEL,
|
|
10373
|
+
metadata: {
|
|
10374
|
+
benchmarkKind: "native_backend_comparison",
|
|
10375
|
+
benchmarkScope: "larger_corpus",
|
|
10376
|
+
expectedSignals: [
|
|
10377
|
+
"backend-tagged runtime artifacts",
|
|
10378
|
+
"selected native planner profile",
|
|
10379
|
+
"hybrid filtered retrieval",
|
|
10380
|
+
"candidate-budget exhaustion",
|
|
10381
|
+
"underfilled topk",
|
|
10382
|
+
"query transform pressure"
|
|
10383
|
+
],
|
|
10384
|
+
recommendedGroupKey: "runtime-native-backend-parity",
|
|
10385
|
+
recommendedTags: ["runtime", "backend", "native"],
|
|
10386
|
+
...input?.metadata
|
|
10387
|
+
}
|
|
10388
|
+
});
|
|
10389
|
+
var createRAGNativeBackendComparisonBenchmarkSnapshot = (input) => {
|
|
10390
|
+
const suite = input?.suite ?? createRAGNativeBackendComparisonBenchmarkSuite();
|
|
10391
|
+
return createRAGEvaluationSuiteSnapshot({
|
|
10392
|
+
createdAt: input?.createdAt,
|
|
10393
|
+
id: input?.id,
|
|
10394
|
+
metadata: {
|
|
10395
|
+
artifactKind: "native_backend_comparison_benchmark",
|
|
10396
|
+
persistForReleaseHistory: true,
|
|
10397
|
+
...input?.metadata
|
|
10398
|
+
},
|
|
10399
|
+
suite,
|
|
10400
|
+
version: input?.version
|
|
10401
|
+
});
|
|
10402
|
+
};
|
|
9842
10403
|
var createRAGEvaluationSuiteSnapshot = ({
|
|
9843
10404
|
suite,
|
|
9844
10405
|
id,
|
|
@@ -10047,6 +10608,7 @@ var summarizeRAGRetrievalComparison = (entries) => ({
|
|
|
10047
10608
|
bestByMultivectorCollapsedCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorCollapsedCases"),
|
|
10048
10609
|
bestByMultivectorLexicalHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorLexicalHitCases"),
|
|
10049
10610
|
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases"),
|
|
10611
|
+
bestByEvidenceReconcileCases: selectComparisonEntryByTraceStageCount(entries, "retrievalId", "evidence_reconcile"),
|
|
10050
10612
|
bestByLowestRuntimeCandidateBudgetExhaustedCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeCandidateBudgetExhaustedCases"),
|
|
10051
10613
|
bestByLowestRuntimeUnderfilledTopKCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeUnderfilledTopKCases")
|
|
10052
10614
|
});
|
|
@@ -13803,32 +14365,126 @@ var splitMarkdownPreferredChunkUnits = (value) => {
|
|
|
13803
14365
|
flushFence();
|
|
13804
14366
|
return units;
|
|
13805
14367
|
};
|
|
14368
|
+
var findNearestPDFContextHeading = (blockEntries, pageNumber) => {
|
|
14369
|
+
if (typeof pageNumber !== "number") {
|
|
14370
|
+
return;
|
|
14371
|
+
}
|
|
14372
|
+
for (let index = blockEntries.length - 1;index >= 0; index -= 1) {
|
|
14373
|
+
const entry = blockEntries[index];
|
|
14374
|
+
if (!entry || entry.pageNumber !== pageNumber) {
|
|
14375
|
+
if (entry && typeof entry.pageNumber === "number" && entry.pageNumber < pageNumber) {
|
|
14376
|
+
break;
|
|
14377
|
+
}
|
|
14378
|
+
continue;
|
|
14379
|
+
}
|
|
14380
|
+
if (entry.pdfSemanticRole || entry.pdfTextKind !== "paragraph") {
|
|
14381
|
+
continue;
|
|
14382
|
+
}
|
|
14383
|
+
const heading = inferPDFBlockHeading(entry.text);
|
|
14384
|
+
if (heading) {
|
|
14385
|
+
return heading;
|
|
14386
|
+
}
|
|
14387
|
+
}
|
|
14388
|
+
return;
|
|
14389
|
+
};
|
|
14390
|
+
var getPDFTableHeaders = (text) => {
|
|
14391
|
+
const lines = text.split(`
|
|
14392
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
14393
|
+
const headerLine = lines[0];
|
|
14394
|
+
if (!headerLine || !headerLine.includes(" | ")) {
|
|
14395
|
+
return;
|
|
14396
|
+
}
|
|
14397
|
+
const headers = headerLine.split(" | ").map((entry) => normalizeWhitespace(entry)).filter(Boolean);
|
|
14398
|
+
return headers.length >= 2 ? headers : undefined;
|
|
14399
|
+
};
|
|
13806
14400
|
var pdfNativeStructureUnits = (metadata) => {
|
|
13807
14401
|
const blocks = Array.isArray(metadata?.pdfTextBlocks) ? metadata.pdfTextBlocks : [];
|
|
13808
|
-
const
|
|
14402
|
+
const blockEntries = [];
|
|
13809
14403
|
for (const block of blocks) {
|
|
13810
14404
|
if (!block || typeof block !== "object") {
|
|
13811
14405
|
continue;
|
|
13812
14406
|
}
|
|
13813
|
-
const
|
|
14407
|
+
const rawText = typeof block.text === "string" ? block.text : "";
|
|
14408
|
+
const pdfTextKind = block.textKind === "table_like" ? "table_like" : "paragraph";
|
|
14409
|
+
const text = pdfTextKind === "table_like" ? rawText.split(`
|
|
14410
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean).join(`
|
|
14411
|
+
`) : normalizeWhitespace(rawText);
|
|
13814
14412
|
if (!text) {
|
|
13815
14413
|
continue;
|
|
13816
14414
|
}
|
|
13817
14415
|
const pageNumber = typeof block.pageNumber === "number" && Number.isFinite(block.pageNumber) ? block.pageNumber : undefined;
|
|
13818
14416
|
const pdfBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
|
|
13819
|
-
const
|
|
13820
|
-
const
|
|
13821
|
-
|
|
14417
|
+
const previousBlock = blockEntries.at(-1);
|
|
14418
|
+
const previousFigureCaption = previousBlock && previousBlock.pageNumber === pageNumber && previousBlock.pdfSemanticRole === "figure_caption" ? previousBlock : undefined;
|
|
14419
|
+
const pdfSemanticRole = block.semanticRole === "figure_caption" ? "figure_caption" : block.semanticRole === "figure_body" ? "figure_body" : pdfTextKind === "paragraph" && previousFigureCaption && !inferPDFBlockHeading(text) ? "figure_body" : undefined;
|
|
14420
|
+
const currentBlockHeading = pdfTextKind === "paragraph" && !pdfSemanticRole ? inferPDFBlockHeading(text) : undefined;
|
|
14421
|
+
const contextualHeading = pdfTextKind === "table_like" ? findNearestPDFContextHeading(blockEntries, pageNumber) : undefined;
|
|
14422
|
+
const contextualTableTitle = contextualHeading && pdfTextKind === "table_like" ? /\btable\b/i.test(contextualHeading) ? contextualHeading : `${contextualHeading} Table` : undefined;
|
|
14423
|
+
const pdfTableHeaders = pdfTextKind === "table_like" ? getPDFTableHeaders(text) : undefined;
|
|
14424
|
+
const pdfTableHeaderText = pdfTextKind === "table_like" ? text.split(`
|
|
14425
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean)[0] : undefined;
|
|
14426
|
+
const pdfTableRowCount = pdfTextKind === "table_like" ? text.split(`
|
|
14427
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean).length : undefined;
|
|
14428
|
+
const pdfTableBodyRowCount = typeof pdfTableRowCount === "number" ? Math.max(0, pdfTableRowCount - 1) : undefined;
|
|
14429
|
+
const pdfTableBodyRowStart = typeof pdfTableBodyRowCount === "number" && pdfTableBodyRowCount > 0 ? 1 : undefined;
|
|
14430
|
+
const pdfTableBodyRowEnd = typeof pdfTableBodyRowCount === "number" && pdfTableBodyRowCount > 0 ? pdfTableBodyRowCount : undefined;
|
|
14431
|
+
const pdfTableColumnCount = Array.isArray(pdfTableHeaders) ? pdfTableHeaders.length : undefined;
|
|
14432
|
+
const pdfTableSignature = Array.isArray(pdfTableHeaders) && pdfTableHeaders.length > 0 ? pdfTableHeaders.join(" | ") : undefined;
|
|
14433
|
+
const pdfFigureLabel = pdfSemanticRole === "figure_caption" ? extractPDFFigureLabel(text) : previousFigureCaption?.pdfFigureLabel;
|
|
14434
|
+
const pdfFigureCaptionBlockNumber = pdfSemanticRole === "figure_caption" ? pdfBlockNumber : previousFigureCaption?.pdfFigureCaptionBlockNumber;
|
|
14435
|
+
const baseSectionTitle = pdfSemanticRole === "figure_caption" ? pageNumber ? `Page ${pageNumber} Figure Caption` : "Figure Caption" : pdfSemanticRole === "figure_body" ? pdfFigureLabel ? `${pdfFigureLabel} Body` : pageNumber ? `Page ${pageNumber} Figure Body` : "Figure Body" : currentBlockHeading ? currentBlockHeading : contextualTableTitle ? contextualTableTitle : pageNumber ? pdfTextKind === "table_like" ? `Page ${pageNumber} Table Block` : `Page ${pageNumber} Text Block` : pdfTextKind === "table_like" ? "Table Block" : "Text Block";
|
|
14436
|
+
blockEntries.push({
|
|
14437
|
+
baseSectionTitle,
|
|
13822
14438
|
pageNumber,
|
|
13823
14439
|
pdfBlockNumber,
|
|
14440
|
+
...typeof pdfFigureCaptionBlockNumber === "number" ? { pdfFigureCaptionBlockNumber } : {},
|
|
14441
|
+
...pdfFigureLabel ? { pdfFigureLabel } : {},
|
|
14442
|
+
...pdfSemanticRole ? { pdfSemanticRole } : {},
|
|
14443
|
+
...typeof pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd } : {},
|
|
14444
|
+
...typeof pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount } : {},
|
|
14445
|
+
...typeof pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart } : {},
|
|
14446
|
+
...pdfTextKind === "table_like" ? { pdfTableChunkKind: "full_table" } : {},
|
|
14447
|
+
...typeof pdfTableColumnCount === "number" ? { pdfTableColumnCount } : {},
|
|
14448
|
+
...typeof pdfTableHeaderText === "string" ? { pdfTableHeaderText } : {},
|
|
14449
|
+
...Array.isArray(pdfTableHeaders) && pdfTableHeaders.length > 0 ? { pdfTableHeaders } : {},
|
|
14450
|
+
...typeof pdfTableRowCount === "number" ? { pdfTableRowCount } : {},
|
|
14451
|
+
...pdfTableSignature ? { pdfTableSignature } : {},
|
|
13824
14452
|
pdfTextKind,
|
|
13825
|
-
|
|
14453
|
+
text
|
|
14454
|
+
});
|
|
14455
|
+
}
|
|
14456
|
+
const titleCounts = new Map;
|
|
14457
|
+
for (const block of blockEntries) {
|
|
14458
|
+
titleCounts.set(block.baseSectionTitle, (titleCounts.get(block.baseSectionTitle) ?? 0) + 1);
|
|
14459
|
+
}
|
|
14460
|
+
const units = [];
|
|
14461
|
+
for (const block of blockEntries) {
|
|
14462
|
+
const sectionTitle = (titleCounts.get(block.baseSectionTitle) ?? 0) > 1 && typeof block.pdfBlockNumber === "number" ? `${block.baseSectionTitle} ${block.pdfBlockNumber}` : block.baseSectionTitle;
|
|
14463
|
+
units.push({
|
|
14464
|
+
pageNumber: block.pageNumber,
|
|
14465
|
+
pdfBlockNumber: block.pdfBlockNumber,
|
|
14466
|
+
...typeof block.pdfFigureCaptionBlockNumber === "number" ? {
|
|
14467
|
+
pdfFigureCaptionBlockNumber: block.pdfFigureCaptionBlockNumber
|
|
14468
|
+
} : {},
|
|
14469
|
+
...block.pdfFigureLabel ? { pdfFigureLabel: block.pdfFigureLabel } : {},
|
|
14470
|
+
...block.pdfSemanticRole ? { pdfSemanticRole: block.pdfSemanticRole } : {},
|
|
14471
|
+
...typeof block.pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd: block.pdfTableBodyRowEnd } : {},
|
|
14472
|
+
...typeof block.pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount: block.pdfTableBodyRowCount } : {},
|
|
14473
|
+
...typeof block.pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart: block.pdfTableBodyRowStart } : {},
|
|
14474
|
+
...block.pdfTableChunkKind ? { pdfTableChunkKind: block.pdfTableChunkKind } : {},
|
|
14475
|
+
...typeof block.pdfTableColumnCount === "number" ? { pdfTableColumnCount: block.pdfTableColumnCount } : {},
|
|
14476
|
+
...typeof block.pdfTableHeaderText === "string" ? { pdfTableHeaderText: block.pdfTableHeaderText } : {},
|
|
14477
|
+
...Array.isArray(block.pdfTableHeaders) && block.pdfTableHeaders.length > 0 ? { pdfTableHeaders: block.pdfTableHeaders } : {},
|
|
14478
|
+
...typeof block.pdfTableRowCount === "number" ? { pdfTableRowCount: block.pdfTableRowCount } : {},
|
|
14479
|
+
...block.pdfTableSignature ? { pdfTableSignature: block.pdfTableSignature } : {},
|
|
14480
|
+
pdfTextKind: block.pdfTextKind,
|
|
14481
|
+
preferredChunkUnits: block.pdfTextKind === "table_like" ? block.text.split(`
|
|
13826
14482
|
`).filter(Boolean) : undefined,
|
|
13827
14483
|
sectionDepth: 1,
|
|
13828
14484
|
sectionKind: "pdf_block",
|
|
13829
14485
|
sectionPath: [sectionTitle],
|
|
13830
14486
|
sectionTitle,
|
|
13831
|
-
text
|
|
14487
|
+
text: block.text
|
|
13832
14488
|
});
|
|
13833
14489
|
}
|
|
13834
14490
|
return units;
|
|
@@ -13837,6 +14493,10 @@ var officeNativeStructureUnits = (metadata) => {
|
|
|
13837
14493
|
const blocks = Array.isArray(metadata?.officeBlocks) ? metadata.officeBlocks : [];
|
|
13838
14494
|
const units = [];
|
|
13839
14495
|
const headingStack = [];
|
|
14496
|
+
const headingSiblingCounts = new Map;
|
|
14497
|
+
let pendingListContextText;
|
|
14498
|
+
let pendingTableContextText;
|
|
14499
|
+
let consumedOfficeListUntil = -1;
|
|
13840
14500
|
const decorateOfficeSectionText = (text, sectionTitle) => {
|
|
13841
14501
|
if (!sectionTitle || text.includes(sectionTitle)) {
|
|
13842
14502
|
return text;
|
|
@@ -13845,6 +14505,9 @@ var officeNativeStructureUnits = (metadata) => {
|
|
|
13845
14505
|
${text}`);
|
|
13846
14506
|
};
|
|
13847
14507
|
for (const [index, block] of blocks.entries()) {
|
|
14508
|
+
if (index <= consumedOfficeListUntil) {
|
|
14509
|
+
continue;
|
|
14510
|
+
}
|
|
13848
14511
|
if (!block || typeof block !== "object") {
|
|
13849
14512
|
continue;
|
|
13850
14513
|
}
|
|
@@ -13855,20 +14518,39 @@ ${text}`);
|
|
|
13855
14518
|
const officeBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
|
|
13856
14519
|
const officeBlockKind = block.blockKind === "title" || block.blockKind === "heading" || block.blockKind === "list" || block.blockKind === "table" ? block.blockKind : "paragraph";
|
|
13857
14520
|
const headingLevel = typeof block.headingLevel === "number" && Number.isFinite(block.headingLevel) ? block.headingLevel : undefined;
|
|
14521
|
+
const officeListLevel = typeof block.listLevel === "number" && Number.isFinite(block.listLevel) ? block.listLevel : undefined;
|
|
14522
|
+
const officeTableBodyRowCount = typeof block.tableBodyRowCount === "number" && Number.isFinite(block.tableBodyRowCount) ? block.tableBodyRowCount : undefined;
|
|
14523
|
+
const officeTableColumnCount = typeof block.tableColumnCount === "number" && Number.isFinite(block.tableColumnCount) ? block.tableColumnCount : undefined;
|
|
14524
|
+
const officeTableHeaderText = typeof block.tableHeaderText === "string" && block.tableHeaderText.length > 0 ? block.tableHeaderText : undefined;
|
|
14525
|
+
const officeTableHeaders = Array.isArray(block.tableHeaders) && block.tableHeaders.length > 0 ? block.tableHeaders.filter((value) => typeof value === "string" && value.length > 0) : undefined;
|
|
14526
|
+
const officeTableRowCount = typeof block.tableRowCount === "number" && Number.isFinite(block.tableRowCount) ? block.tableRowCount : undefined;
|
|
14527
|
+
const officeTableSignature = typeof block.tableSignature === "string" && block.tableSignature.length > 0 ? block.tableSignature : undefined;
|
|
13858
14528
|
if (officeBlockKind === "title" || officeBlockKind === "heading") {
|
|
13859
14529
|
const level = officeBlockKind === "title" ? 1 : headingLevel ?? 1;
|
|
13860
|
-
headingStack
|
|
14530
|
+
const parentScope = headingStack.slice(0, Math.max(0, level - 1)).join(" > ");
|
|
14531
|
+
const headingKey = `${level}:${parentScope}:${text}`;
|
|
14532
|
+
const headingCount = (headingSiblingCounts.get(headingKey) ?? 0) + 1;
|
|
14533
|
+
headingSiblingCounts.set(headingKey, headingCount);
|
|
14534
|
+
const resolvedHeadingText = headingCount > 1 ? `${text} (${headingCount})` : text;
|
|
14535
|
+
headingStack[level - 1] = resolvedHeadingText;
|
|
13861
14536
|
headingStack.length = level;
|
|
13862
|
-
const
|
|
13863
|
-
const
|
|
13864
|
-
if (
|
|
14537
|
+
const nextBlock2 = blocks[index + 1];
|
|
14538
|
+
const nextKind2 = nextBlock2 && typeof nextBlock2 === "object" ? nextBlock2.blockKind : undefined;
|
|
14539
|
+
if (nextKind2 === "title" || nextKind2 === "heading" || nextKind2 === "list" || nextKind2 === "table" || !nextBlock2) {
|
|
13865
14540
|
units.push({
|
|
13866
14541
|
officeBlockKind,
|
|
14542
|
+
officeListLevel,
|
|
13867
14543
|
officeBlockNumber,
|
|
14544
|
+
...typeof officeTableBodyRowCount === "number" ? { officeTableBodyRowCount } : {},
|
|
14545
|
+
...typeof officeTableColumnCount === "number" ? { officeTableColumnCount } : {},
|
|
14546
|
+
...typeof officeTableHeaderText === "string" ? { officeTableHeaderText } : {},
|
|
14547
|
+
...Array.isArray(officeTableHeaders) && officeTableHeaders.length > 0 ? { officeTableHeaders } : {},
|
|
14548
|
+
...typeof officeTableRowCount === "number" ? { officeTableRowCount } : {},
|
|
14549
|
+
...typeof officeTableSignature === "string" ? { officeTableSignature } : {},
|
|
13868
14550
|
sectionDepth: headingStack.length,
|
|
13869
14551
|
sectionKind: "office_heading",
|
|
13870
14552
|
sectionPath: [...headingStack],
|
|
13871
|
-
sectionTitle:
|
|
14553
|
+
sectionTitle: resolvedHeadingText,
|
|
13872
14554
|
text
|
|
13873
14555
|
});
|
|
13874
14556
|
}
|
|
@@ -13876,16 +14558,143 @@ ${text}`);
|
|
|
13876
14558
|
}
|
|
13877
14559
|
const sectionPath = headingStack.length > 0 ? [...headingStack] : undefined;
|
|
13878
14560
|
const sectionTitle = sectionPath?.at(-1);
|
|
14561
|
+
if (officeBlockKind === "list") {
|
|
14562
|
+
const runTexts = [];
|
|
14563
|
+
const runLevels = [];
|
|
14564
|
+
let runEnd = index;
|
|
14565
|
+
for (;runEnd < blocks.length; runEnd += 1) {
|
|
14566
|
+
const runBlock = blocks[runEnd];
|
|
14567
|
+
if (!runBlock || typeof runBlock !== "object") {
|
|
14568
|
+
break;
|
|
14569
|
+
}
|
|
14570
|
+
if (runBlock.blockKind !== "list") {
|
|
14571
|
+
break;
|
|
14572
|
+
}
|
|
14573
|
+
const runText = typeof runBlock.text === "string" ? normalizeWhitespace(runBlock.text) : "";
|
|
14574
|
+
if (!runText) {
|
|
14575
|
+
break;
|
|
14576
|
+
}
|
|
14577
|
+
runTexts.push(runText);
|
|
14578
|
+
if (typeof runBlock.listLevel === "number" && Number.isFinite(runBlock.listLevel)) {
|
|
14579
|
+
runLevels.push(runBlock.listLevel);
|
|
14580
|
+
}
|
|
14581
|
+
}
|
|
14582
|
+
runEnd -= 1;
|
|
14583
|
+
const nextAfterRun = blocks[runEnd + 1];
|
|
14584
|
+
const nextAfterRunKind = nextAfterRun && typeof nextAfterRun === "object" ? nextAfterRun.blockKind : undefined;
|
|
14585
|
+
const nextAfterRunText = nextAfterRun && typeof nextAfterRun === "object" && typeof nextAfterRun.text === "string" ? normalizeWhitespace(nextAfterRun.text) : undefined;
|
|
14586
|
+
const nextAfterRunNext = blocks[runEnd + 2];
|
|
14587
|
+
const nextAfterRunNextKind = nextAfterRunNext && typeof nextAfterRunNext === "object" ? nextAfterRunNext.blockKind : undefined;
|
|
14588
|
+
const nextAfterRunNextText = nextAfterRunNext && typeof nextAfterRunNext === "object" && typeof nextAfterRunNext.text === "string" ? nextAfterRunNext.text : undefined;
|
|
14589
|
+
const nextAfterRunNextNext = blocks[runEnd + 3];
|
|
14590
|
+
const nextAfterRunNextNextKind = nextAfterRunNextNext && typeof nextAfterRunNextNext === "object" ? nextAfterRunNextNext.blockKind : undefined;
|
|
14591
|
+
const nextAfterRunNextNextNext = blocks[runEnd + 4];
|
|
14592
|
+
const nextAfterRunNextNextNextKind = nextAfterRunNextNextNext && typeof nextAfterRunNextNextNext === "object" ? nextAfterRunNextNextNext.blockKind : undefined;
|
|
14593
|
+
const trailingTableBridgeText = nextAfterRunKind === "paragraph" && typeof nextAfterRunText === "string" && nextAfterRunText.length <= 200 && (nextAfterRunNextKind === "table" || nextAfterRunNextKind === "heading" && typeof nextAfterRunNextText === "string" && /\btable\b/i.test(nextAfterRunNextText) && (nextAfterRunNextNextKind === "table" || nextAfterRunNextNextKind === "paragraph" && nextAfterRunNextNextNextKind === "table")) ? nextAfterRunText : undefined;
|
|
14594
|
+
const officeListContextText2 = normalizeWhitespace([
|
|
14595
|
+
...typeof pendingListContextText === "string" ? [pendingListContextText] : [],
|
|
14596
|
+
...typeof trailingTableBridgeText === "string" ? [trailingTableBridgeText] : []
|
|
14597
|
+
].join(`
|
|
14598
|
+
|
|
14599
|
+
`));
|
|
14600
|
+
const distinctLevels = [...new Set(runLevels)];
|
|
14601
|
+
const officeListLevel2 = distinctLevels.length === 1 ? distinctLevels[0] : undefined;
|
|
14602
|
+
const groupedListText = normalizeWhitespace([
|
|
14603
|
+
...officeListContextText2 ? [officeListContextText2] : [],
|
|
14604
|
+
...runTexts
|
|
14605
|
+
].join(`
|
|
14606
|
+
|
|
14607
|
+
`));
|
|
14608
|
+
if (typeof trailingTableBridgeText === "string") {
|
|
14609
|
+
pendingTableContextText = trailingTableBridgeText;
|
|
14610
|
+
consumedOfficeListUntil = runEnd + 1;
|
|
14611
|
+
} else {
|
|
14612
|
+
consumedOfficeListUntil = runEnd;
|
|
14613
|
+
}
|
|
14614
|
+
pendingListContextText = undefined;
|
|
14615
|
+
units.push({
|
|
14616
|
+
officeBlockKind,
|
|
14617
|
+
...officeListContextText2 ? { officeListContextText: officeListContextText2 } : {},
|
|
14618
|
+
officeListGroupItemCount: runTexts.length,
|
|
14619
|
+
...typeof officeListLevel2 === "number" ? { officeListLevel: officeListLevel2 } : {},
|
|
14620
|
+
...distinctLevels.length > 0 ? { officeListLevels: distinctLevels } : {},
|
|
14621
|
+
officeBlockNumber,
|
|
14622
|
+
preferredChunkUnits: [
|
|
14623
|
+
...officeListContextText2 ? [officeListContextText2] : [],
|
|
14624
|
+
...runTexts
|
|
14625
|
+
],
|
|
14626
|
+
sectionDepth: sectionPath?.length,
|
|
14627
|
+
sectionKind: "office_block",
|
|
14628
|
+
sectionPath,
|
|
14629
|
+
sectionTitle,
|
|
14630
|
+
text: groupedListText
|
|
14631
|
+
});
|
|
14632
|
+
continue;
|
|
14633
|
+
}
|
|
14634
|
+
const nextBlock = blocks[index + 1];
|
|
14635
|
+
const nextKind = nextBlock && typeof nextBlock === "object" ? nextBlock.blockKind : undefined;
|
|
14636
|
+
const nextText = nextBlock && typeof nextBlock === "object" && typeof nextBlock.text === "string" ? nextBlock.text : undefined;
|
|
14637
|
+
const nextNextBlock = blocks[index + 2];
|
|
14638
|
+
const nextNextKind = nextNextBlock && typeof nextNextBlock === "object" ? nextNextBlock.blockKind : undefined;
|
|
14639
|
+
const nextNextText = nextNextBlock && typeof nextNextBlock === "object" && typeof nextNextBlock.text === "string" ? nextNextBlock.text : undefined;
|
|
14640
|
+
const nextNextNextBlock = blocks[index + 3];
|
|
14641
|
+
const nextNextNextKind = nextNextNextBlock && typeof nextNextNextBlock === "object" ? nextNextNextBlock.blockKind : undefined;
|
|
14642
|
+
const nextNextNextText = nextNextNextBlock && typeof nextNextNextBlock === "object" && typeof nextNextNextBlock.text === "string" ? nextNextNextBlock.text : undefined;
|
|
14643
|
+
const nextNextNextNextBlock = blocks[index + 4];
|
|
14644
|
+
const nextNextNextNextKind = nextNextNextNextBlock && typeof nextNextNextNextBlock === "object" ? nextNextNextNextBlock.blockKind : undefined;
|
|
14645
|
+
if (officeBlockKind === "paragraph" && (nextKind === "list" || nextKind === "paragraph" && nextNextKind === "list") && text.length <= 200) {
|
|
14646
|
+
pendingListContextText = normalizeWhitespace([
|
|
14647
|
+
...typeof pendingListContextText === "string" ? [pendingListContextText] : [],
|
|
14648
|
+
text
|
|
14649
|
+
].join(`
|
|
14650
|
+
|
|
14651
|
+
`));
|
|
14652
|
+
continue;
|
|
14653
|
+
}
|
|
14654
|
+
if (officeBlockKind === "paragraph" && (nextKind === "table" || nextKind === "paragraph" && nextNextKind === "table" || nextKind === "heading" && typeof nextText === "string" && /\btable\b/i.test(nextText) && (nextNextKind === "table" || nextNextKind === "paragraph" && nextNextNextKind === "table") || nextKind === "paragraph" && nextNextKind === "heading" && typeof nextNextText === "string" && /\btable\b/i.test(nextNextText) && (nextNextNextKind === "table" || nextNextNextKind === "paragraph" && nextNextNextNextKind === "table") || nextKind === "paragraph" && nextNextKind === "paragraph" && nextNextNextKind === "heading" && typeof nextNextNextText === "string" && /\btable\b/i.test(nextNextNextText) && (nextNextNextNextKind === "table" || nextNextNextNextKind === "paragraph" && blocks[index + 5]?.blockKind === "table")) && text.length <= 200) {
|
|
14655
|
+
pendingTableContextText = normalizeWhitespace([
|
|
14656
|
+
...typeof pendingTableContextText === "string" ? [pendingTableContextText] : [],
|
|
14657
|
+
text
|
|
14658
|
+
].join(`
|
|
14659
|
+
|
|
14660
|
+
`));
|
|
14661
|
+
continue;
|
|
14662
|
+
}
|
|
14663
|
+
const officeListContextText = officeBlockKind === "list" ? pendingListContextText : undefined;
|
|
14664
|
+
const officeTableContextText = officeBlockKind === "table" ? pendingTableContextText : undefined;
|
|
14665
|
+
if (officeBlockKind !== "list" || nextKind !== "list") {
|
|
14666
|
+
pendingListContextText = undefined;
|
|
14667
|
+
}
|
|
14668
|
+
pendingTableContextText = undefined;
|
|
13879
14669
|
units.push({
|
|
13880
14670
|
officeBlockKind,
|
|
14671
|
+
officeListLevel,
|
|
13881
14672
|
officeBlockNumber,
|
|
13882
|
-
|
|
13883
|
-
|
|
14673
|
+
...typeof officeListContextText === "string" ? { officeListContextText } : {},
|
|
14674
|
+
...typeof officeTableContextText === "string" ? { officeTableContextText } : {},
|
|
14675
|
+
...typeof officeTableBodyRowCount === "number" ? { officeTableBodyRowCount } : {},
|
|
14676
|
+
...typeof officeTableColumnCount === "number" ? { officeTableColumnCount } : {},
|
|
14677
|
+
...typeof officeTableHeaderText === "string" ? { officeTableHeaderText } : {},
|
|
14678
|
+
...Array.isArray(officeTableHeaders) && officeTableHeaders.length > 0 ? { officeTableHeaders } : {},
|
|
14679
|
+
...typeof officeTableRowCount === "number" ? { officeTableRowCount } : {},
|
|
14680
|
+
...typeof officeTableSignature === "string" ? { officeTableSignature } : {},
|
|
14681
|
+
preferredChunkUnits: officeBlockKind === "table" ? [
|
|
14682
|
+
...typeof officeTableContextText === "string" ? [officeTableContextText] : [],
|
|
14683
|
+
...text.split(`
|
|
14684
|
+
`).filter(Boolean)
|
|
14685
|
+
] : officeBlockKind === "list" ? [
|
|
14686
|
+
...typeof officeListContextText === "string" ? [officeListContextText] : [],
|
|
14687
|
+
text
|
|
14688
|
+
] : undefined,
|
|
13884
14689
|
sectionDepth: sectionPath?.length,
|
|
13885
14690
|
sectionKind: officeBlockKind === "paragraph" ? "office_heading" : "office_block",
|
|
13886
14691
|
sectionPath,
|
|
13887
14692
|
sectionTitle,
|
|
13888
|
-
text: officeBlockKind === "
|
|
14693
|
+
text: officeBlockKind === "table" && typeof officeTableContextText === "string" ? normalizeWhitespace(`${officeTableContextText}
|
|
14694
|
+
|
|
14695
|
+
${text}`) : officeBlockKind === "list" && typeof officeListContextText === "string" ? normalizeWhitespace(`${officeListContextText}
|
|
14696
|
+
|
|
14697
|
+
${text}`) : officeBlockKind === "paragraph" ? decorateOfficeSectionText(text, sectionTitle) : text
|
|
13889
14698
|
});
|
|
13890
14699
|
}
|
|
13891
14700
|
return units;
|
|
@@ -14823,6 +15632,13 @@ var appendPdfLineBreak = (parts) => {
|
|
|
14823
15632
|
`);
|
|
14824
15633
|
};
|
|
14825
15634
|
var PDF_CHROME_LINE_MAX_LENGTH = 80;
|
|
15635
|
+
var PDF_LINK_CLUSTER_LINE_MAX_LENGTH = 120;
|
|
15636
|
+
var PDF_FIGURE_LABEL_PATTERN = /^(?:figure|fig\.)\s*\d+[A-Za-z]?(?:\s*[:.-]\s*|\s+|$)/i;
|
|
15637
|
+
var PDF_LINK_CLUSTER_HEADING_PATTERN = /^(?:related|quick|useful|reference|references|resources|links|see also)\b/i;
|
|
15638
|
+
var PDF_PROMO_HEADING_PATTERN = /^(?:start|free trial|upgrade|subscribe|newsletter|contact sales|book demo|try|learn more)\b/i;
|
|
15639
|
+
var PDF_PROMO_BODY_PATTERN = /\b(?:free trial|upgrade|subscribe|newsletter|contact sales|book demo|learn more|pricing|enterprise|demo)\b/i;
|
|
15640
|
+
var OCR_SUMMARY_CONFIDENCE_THRESHOLD = 0.75;
|
|
15641
|
+
var OCR_SUMMARY_MIN_STRONG_TEXT_RATIO = 0.6;
|
|
14826
15642
|
var PDF_TEXT_OPERATOR_PATTERN = /(\[((?:\\.|[^\]])*)\]\s*TJ)|(\(((?:\\.|[^\\)])*)\)\s*Tj)|([-+]?\d*\.?\d+\s+[-+]?\d*\.?\d+\s+\(((?:\\.|[^\\)])*)\)\s*")|(\(((?:\\.|[^\\)])*)\)\s*')|((?:[-+]?\d*\.?\d+\s+){2}(?:Td|TD))|(T\*)|((?:[-+]?\d*\.?\d+\s+){6}Tm)/g;
|
|
14827
15643
|
var extractTextFromPDFTextObject = (value) => {
|
|
14828
15644
|
const parts = [];
|
|
@@ -14851,23 +15667,144 @@ var extractTextFromPDFTextObject = (value) => {
|
|
|
14851
15667
|
}
|
|
14852
15668
|
return parts.join("");
|
|
14853
15669
|
};
|
|
14854
|
-
var
|
|
14855
|
-
const
|
|
14856
|
-
if (
|
|
15670
|
+
var buildPDFNativeTextBlockSeed = (lines, pageNumber) => {
|
|
15671
|
+
const normalizedLines = lines.map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15672
|
+
if (normalizedLines.length === 0) {
|
|
14857
15673
|
return;
|
|
14858
15674
|
}
|
|
14859
|
-
const
|
|
14860
|
-
`)
|
|
14861
|
-
const
|
|
15675
|
+
const text = normalizedLines.join(`
|
|
15676
|
+
`);
|
|
15677
|
+
const semanticRole = normalizedLines.length >= 2 && PDF_FIGURE_LABEL_PATTERN.test(normalizedLines[0] ?? "") ? "figure_caption" : undefined;
|
|
14862
15678
|
return {
|
|
14863
|
-
|
|
14864
|
-
lineCount,
|
|
15679
|
+
lineCount: normalizedLines.length,
|
|
14865
15680
|
pageNumber,
|
|
14866
|
-
|
|
14867
|
-
|
|
15681
|
+
...semanticRole ? { semanticRole } : {},
|
|
15682
|
+
text,
|
|
15683
|
+
textKind: normalizedLines.some((line) => line.includes(" | ")) ? "table_like" : "paragraph"
|
|
14868
15684
|
};
|
|
14869
15685
|
};
|
|
15686
|
+
var inferPDFBlockHeading = (text) => {
|
|
15687
|
+
const lines = text.split(`
|
|
15688
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15689
|
+
const candidate = lines[0];
|
|
15690
|
+
if (!candidate || candidate.length > 80 || candidate.includes(" | ") || /[.!?]$/.test(candidate)) {
|
|
15691
|
+
return;
|
|
15692
|
+
}
|
|
15693
|
+
return candidate;
|
|
15694
|
+
};
|
|
15695
|
+
var extractPDFFigureLabel = (text) => {
|
|
15696
|
+
const lines = text.split(`
|
|
15697
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15698
|
+
const candidate = lines[0];
|
|
15699
|
+
return candidate && PDF_FIGURE_LABEL_PATTERN.test(candidate) ? candidate : undefined;
|
|
15700
|
+
};
|
|
15701
|
+
var splitPDFNativeTextBlocks = (text, pageNumber) => {
|
|
15702
|
+
const lines = text.split(`
|
|
15703
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15704
|
+
if (lines.length === 0) {
|
|
15705
|
+
return [];
|
|
15706
|
+
}
|
|
15707
|
+
const blocks = [];
|
|
15708
|
+
let currentLines = [];
|
|
15709
|
+
let currentKind;
|
|
15710
|
+
let currentSemanticRole;
|
|
15711
|
+
const flush = () => {
|
|
15712
|
+
if (currentLines.length === 0) {
|
|
15713
|
+
return;
|
|
15714
|
+
}
|
|
15715
|
+
const block = buildPDFNativeTextBlockSeed(currentLines, pageNumber);
|
|
15716
|
+
if (block) {
|
|
15717
|
+
blocks.push(block);
|
|
15718
|
+
}
|
|
15719
|
+
currentLines = [];
|
|
15720
|
+
currentKind = undefined;
|
|
15721
|
+
currentSemanticRole = undefined;
|
|
15722
|
+
};
|
|
15723
|
+
for (const [index, line] of lines.entries()) {
|
|
15724
|
+
const lineKind = line.includes(" | ") ? "table_like" : "paragraph";
|
|
15725
|
+
const isFigureLabel = PDF_FIGURE_LABEL_PATTERN.test(line);
|
|
15726
|
+
if (isFigureLabel) {
|
|
15727
|
+
flush();
|
|
15728
|
+
currentKind = "paragraph";
|
|
15729
|
+
currentSemanticRole = "figure_caption";
|
|
15730
|
+
currentLines.push(line);
|
|
15731
|
+
continue;
|
|
15732
|
+
}
|
|
15733
|
+
if (currentSemanticRole === "figure_caption") {
|
|
15734
|
+
if (lineKind === "paragraph" && currentLines.length < 2) {
|
|
15735
|
+
currentLines.push(line);
|
|
15736
|
+
continue;
|
|
15737
|
+
}
|
|
15738
|
+
flush();
|
|
15739
|
+
}
|
|
15740
|
+
if (currentKind && lineKind !== currentKind) {
|
|
15741
|
+
flush();
|
|
15742
|
+
}
|
|
15743
|
+
currentKind = lineKind;
|
|
15744
|
+
currentLines.push(line);
|
|
15745
|
+
}
|
|
15746
|
+
flush();
|
|
15747
|
+
return blocks;
|
|
15748
|
+
};
|
|
15749
|
+
var assignPDFBlockNumbers = (blocks) => blocks.map((block, index) => ({
|
|
15750
|
+
...block,
|
|
15751
|
+
blockNumber: index + 1
|
|
15752
|
+
}));
|
|
14870
15753
|
var isLikelyPDFPageLabel = (value) => /^page\s+\d+(?:\s+of\s+\d+)?$/i.test(value.trim());
|
|
15754
|
+
var isLikelyPDFChromeLine = (value) => {
|
|
15755
|
+
const normalized = value.trim();
|
|
15756
|
+
if (!normalized) {
|
|
15757
|
+
return false;
|
|
15758
|
+
}
|
|
15759
|
+
return isLikelyPDFPageLabel(normalized) || /\b(?:header|footer)\s*$/i.test(normalized);
|
|
15760
|
+
};
|
|
15761
|
+
var isLikelyPDFLinkLine = (value) => {
|
|
15762
|
+
const normalized = value.trim();
|
|
15763
|
+
if (!normalized || normalized.length > PDF_LINK_CLUSTER_LINE_MAX_LENGTH) {
|
|
15764
|
+
return false;
|
|
15765
|
+
}
|
|
15766
|
+
return /^https?:\/\//i.test(normalized) || /^www\./i.test(normalized) || /^\/[A-Za-z0-9/_#?&=%.-]+$/.test(normalized) || /\((?:https?:\/\/|\/)[^)]+\)/i.test(normalized);
|
|
15767
|
+
};
|
|
15768
|
+
var isLikelyPDFLinkClusterBlock = (block) => {
|
|
15769
|
+
if (block.semanticRole || block.textKind !== "paragraph") {
|
|
15770
|
+
return false;
|
|
15771
|
+
}
|
|
15772
|
+
const lines = block.text.split(`
|
|
15773
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15774
|
+
if (lines.length === 1) {
|
|
15775
|
+
return isLikelyPDFLinkLine(lines[0] ?? "");
|
|
15776
|
+
}
|
|
15777
|
+
if (lines.length < 2) {
|
|
15778
|
+
return false;
|
|
15779
|
+
}
|
|
15780
|
+
const heading = lines[0] ?? "";
|
|
15781
|
+
const bodyLines = lines.slice(1);
|
|
15782
|
+
const linkLikeCount = bodyLines.filter((line) => isLikelyPDFLinkLine(line)).length;
|
|
15783
|
+
if (bodyLines.length > 0 && linkLikeCount === bodyLines.length && PDF_LINK_CLUSTER_HEADING_PATTERN.test(heading)) {
|
|
15784
|
+
return true;
|
|
15785
|
+
}
|
|
15786
|
+
return linkLikeCount >= 2 && linkLikeCount >= Math.ceil(lines.length * 0.6);
|
|
15787
|
+
};
|
|
15788
|
+
var isLikelyPDFPromoBlock = (block) => {
|
|
15789
|
+
if (block.semanticRole || block.textKind !== "paragraph") {
|
|
15790
|
+
return false;
|
|
15791
|
+
}
|
|
15792
|
+
const lines = block.text.split(`
|
|
15793
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15794
|
+
if (lines.length === 1) {
|
|
15795
|
+
return PDF_PROMO_HEADING_PATTERN.test(lines[0] ?? "");
|
|
15796
|
+
}
|
|
15797
|
+
if (lines.length < 2 || lines.length > 4) {
|
|
15798
|
+
return false;
|
|
15799
|
+
}
|
|
15800
|
+
const heading = lines[0] ?? "";
|
|
15801
|
+
const bodyLines = lines.slice(1);
|
|
15802
|
+
const promoLikeCount = bodyLines.filter((line) => line.length <= PDF_LINK_CLUSTER_LINE_MAX_LENGTH && (PDF_PROMO_BODY_PATTERN.test(line) || isLikelyPDFLinkLine(line))).length;
|
|
15803
|
+
if (PDF_PROMO_HEADING_PATTERN.test(heading) && promoLikeCount >= Math.max(1, bodyLines.length - 1)) {
|
|
15804
|
+
return true;
|
|
15805
|
+
}
|
|
15806
|
+
return false;
|
|
15807
|
+
};
|
|
14871
15808
|
var suppressRepeatedPDFChrome = (blocks) => {
|
|
14872
15809
|
const linePages = new Map;
|
|
14873
15810
|
for (const block of blocks) {
|
|
@@ -14888,7 +15825,7 @@ var suppressRepeatedPDFChrome = (blocks) => {
|
|
|
14888
15825
|
if (!line) {
|
|
14889
15826
|
return false;
|
|
14890
15827
|
}
|
|
14891
|
-
if (
|
|
15828
|
+
if (isLikelyPDFChromeLine(line)) {
|
|
14892
15829
|
return false;
|
|
14893
15830
|
}
|
|
14894
15831
|
const repeatedPages = linePages.get(line);
|
|
@@ -14897,27 +15834,76 @@ var suppressRepeatedPDFChrome = (blocks) => {
|
|
|
14897
15834
|
}
|
|
14898
15835
|
return true;
|
|
14899
15836
|
});
|
|
14900
|
-
const text =
|
|
14901
|
-
`)
|
|
15837
|
+
const text = keptLines.join(`
|
|
15838
|
+
`);
|
|
14902
15839
|
if (!text) {
|
|
14903
15840
|
return;
|
|
14904
15841
|
}
|
|
14905
|
-
return
|
|
15842
|
+
return {
|
|
15843
|
+
...block,
|
|
15844
|
+
lineCount: text.split(`
|
|
15845
|
+
`).filter(Boolean).length,
|
|
15846
|
+
text,
|
|
15847
|
+
textKind: text.includes(" | ") ? "table_like" : "paragraph"
|
|
15848
|
+
};
|
|
14906
15849
|
}).filter((value) => Boolean(value));
|
|
14907
15850
|
};
|
|
15851
|
+
var suppressNonContentPDFBlocks = (blocks) => blocks.filter((block) => !isLikelyPDFLinkClusterBlock(block) && !isLikelyPDFPromoBlock(block));
|
|
15852
|
+
var mergePDFHeadingContinuationBlocks = (blocks) => {
|
|
15853
|
+
const merged = [];
|
|
15854
|
+
for (let index = 0;index < blocks.length; index += 1) {
|
|
15855
|
+
const block = blocks[index];
|
|
15856
|
+
if (!block) {
|
|
15857
|
+
continue;
|
|
15858
|
+
}
|
|
15859
|
+
const lines = block.text.split(`
|
|
15860
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15861
|
+
const nextBlock = blocks[index + 1];
|
|
15862
|
+
const isHeadingOnlyBlock = !block.semanticRole && block.textKind === "paragraph" && lines.length === 1 && inferPDFBlockHeading(block.text) === lines[0];
|
|
15863
|
+
const canMergeWithNext = isHeadingOnlyBlock && nextBlock && nextBlock.pageNumber === block.pageNumber && !nextBlock.semanticRole && nextBlock.textKind === "paragraph" && inferPDFBlockHeading(nextBlock.text) === undefined;
|
|
15864
|
+
if (canMergeWithNext) {
|
|
15865
|
+
const text = [block.text, nextBlock.text].flatMap((value) => value.split(`
|
|
15866
|
+
`)).map((line) => normalizeWhitespace(line)).filter(Boolean).join(`
|
|
15867
|
+
`);
|
|
15868
|
+
merged.push({
|
|
15869
|
+
...block,
|
|
15870
|
+
lineCount: text.split(`
|
|
15871
|
+
`).filter(Boolean).length,
|
|
15872
|
+
text
|
|
15873
|
+
});
|
|
15874
|
+
index += 1;
|
|
15875
|
+
continue;
|
|
15876
|
+
}
|
|
15877
|
+
merged.push(block);
|
|
15878
|
+
}
|
|
15879
|
+
return merged;
|
|
15880
|
+
};
|
|
15881
|
+
var associatePDFNativeFigureBodies = (blocks) => blocks.map((block, index) => {
|
|
15882
|
+
if (block.semanticRole || block.textKind !== "paragraph" || inferPDFBlockHeading(block.text)) {
|
|
15883
|
+
return block;
|
|
15884
|
+
}
|
|
15885
|
+
const previousBlock = index > 0 ? blocks[index - 1] : undefined;
|
|
15886
|
+
if (!previousBlock || previousBlock.pageNumber !== block.pageNumber || previousBlock.semanticRole !== "figure_caption") {
|
|
15887
|
+
return block;
|
|
15888
|
+
}
|
|
15889
|
+
return {
|
|
15890
|
+
...block,
|
|
15891
|
+
semanticRole: "figure_body"
|
|
15892
|
+
};
|
|
15893
|
+
});
|
|
14908
15894
|
var extractNativePDFText = (data) => {
|
|
14909
15895
|
const raw = Buffer.from(data).toString("latin1");
|
|
14910
15896
|
const count = [...raw.matchAll(/\/Type\s*\/Page\b/g)].length;
|
|
14911
15897
|
const pageCount = count > 0 ? count : 1;
|
|
14912
15898
|
const pageMarkers = [...raw.matchAll(/\/Type\s*\/Page\b/g)].map((match) => match.index ?? raw.length);
|
|
14913
|
-
const blocks = [...raw.matchAll(/BT([\s\S]*?)ET/g)].
|
|
15899
|
+
const blocks = assignPDFBlockNumbers([...raw.matchAll(/BT([\s\S]*?)ET/g)].flatMap((match) => {
|
|
14914
15900
|
const blockText = extractTextFromPDFTextObject(match[1] ?? "");
|
|
14915
15901
|
const objectEnd = (match.index ?? 0) + (match[0]?.length ?? 0);
|
|
14916
15902
|
const pageIndex = pageMarkers.findIndex((marker) => marker >= objectEnd);
|
|
14917
15903
|
const pageNumber = pageIndex >= 0 ? pageIndex + 1 : pageCount;
|
|
14918
|
-
return
|
|
14919
|
-
})
|
|
14920
|
-
const visibleBlocks = suppressRepeatedPDFChrome(blocks);
|
|
15904
|
+
return splitPDFNativeTextBlocks(blockText, pageNumber);
|
|
15905
|
+
}));
|
|
15906
|
+
const visibleBlocks = assignPDFBlockNumbers(associatePDFNativeFigureBodies(mergePDFHeadingContinuationBlocks(suppressNonContentPDFBlocks(suppressRepeatedPDFChrome(blocks)))));
|
|
14921
15907
|
const fallbackText = [...raw.matchAll(/\(((?:\\.|[^\\)])*)\)\s*Tj/g)].map((match) => decodePdfLiteral(match[1] ?? "")).join(`
|
|
14922
15908
|
`);
|
|
14923
15909
|
const text = visibleBlocks.length > 0 ? normalizeWhitespace(visibleBlocks.map((block) => block.text).join(`
|
|
@@ -15028,13 +16014,10 @@ var officeDocumentBlocks = (entries) => {
|
|
|
15028
16014
|
for (const match of body.matchAll(blockPattern)) {
|
|
15029
16015
|
const blockXml = match[0] ?? "";
|
|
15030
16016
|
if (blockXml.startsWith("<w:tbl")) {
|
|
15031
|
-
const
|
|
15032
|
-
|
|
15033
|
-
|
|
15034
|
-
|
|
15035
|
-
if (cells.length === 0) {
|
|
15036
|
-
return "";
|
|
15037
|
-
}
|
|
16017
|
+
const tableRows = [...blockXml.matchAll(/<w:tr\b[\s\S]*?<\/w:tr>/g)].map((rowMatch) => [
|
|
16018
|
+
...(rowMatch[0] ?? "").matchAll(/<w:tc\b[\s\S]*?<\/w:tc>/g)
|
|
16019
|
+
].map((cellMatch) => extractOfficeParagraphText(cellMatch[0] ?? "")).filter(Boolean)).filter((cells) => cells.length > 0);
|
|
16020
|
+
const rows = tableRows.map((cells, rowIndex) => {
|
|
15038
16021
|
return `Row ${rowIndex + 1}. ${cells.map((cell, cellIndex) => `${String.fromCharCode(65 + cellIndex)}: ${cell}`).join(" | ")}`;
|
|
15039
16022
|
}).filter(Boolean);
|
|
15040
16023
|
const text2 = normalizeWhitespace(rows.join(`
|
|
@@ -15042,9 +16025,21 @@ var officeDocumentBlocks = (entries) => {
|
|
|
15042
16025
|
if (!text2) {
|
|
15043
16026
|
continue;
|
|
15044
16027
|
}
|
|
16028
|
+
const tableHeaders = tableRows[0];
|
|
16029
|
+
const tableRowCount = tableRows.length;
|
|
16030
|
+
const tableBodyRowCount = tableRowCount > 0 ? Math.max(0, tableRowCount - 1) : undefined;
|
|
16031
|
+
const tableColumnCount = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.length : tableRows.reduce((max, row) => Math.max(max, row.length), 0) || undefined;
|
|
16032
|
+
const tableHeaderText = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.join(" | ") : undefined;
|
|
16033
|
+
const tableSignature = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.join(" | ") : undefined;
|
|
15045
16034
|
blocks.push({
|
|
15046
16035
|
blockKind: "table",
|
|
15047
16036
|
blockNumber: blocks.length + 1,
|
|
16037
|
+
...typeof tableBodyRowCount === "number" ? { tableBodyRowCount } : {},
|
|
16038
|
+
...typeof tableColumnCount === "number" ? { tableColumnCount } : {},
|
|
16039
|
+
...typeof tableHeaderText === "string" ? { tableHeaderText } : {},
|
|
16040
|
+
...Array.isArray(tableHeaders) && tableHeaders.length > 0 ? { tableHeaders } : {},
|
|
16041
|
+
...typeof tableRowCount === "number" ? { tableRowCount } : {},
|
|
16042
|
+
...typeof tableSignature === "string" ? { tableSignature } : {},
|
|
15048
16043
|
text: text2
|
|
15049
16044
|
});
|
|
15050
16045
|
continue;
|
|
@@ -15057,12 +16052,16 @@ var officeDocumentBlocks = (entries) => {
|
|
|
15057
16052
|
const style = (styleMatch?.[1] ?? "").toLowerCase();
|
|
15058
16053
|
const headingMatch = style.match(/^heading([1-6])$/);
|
|
15059
16054
|
const isListParagraph = /<w:numPr\b/i.test(blockXml) || style.includes("list") || style.includes("bullet");
|
|
16055
|
+
const listLevelMatch = blockXml.match(/<w:ilvl\b[^>]*w:val="(\d+)"[^>]*\/?>/i);
|
|
16056
|
+
const listLevel = listLevelMatch ? Number.parseInt(listLevelMatch[1] ?? "0", 10) : undefined;
|
|
15060
16057
|
const blockKind = style === "title" ? "title" : headingMatch ? "heading" : isListParagraph ? "list" : "paragraph";
|
|
15061
|
-
const
|
|
16058
|
+
const listPrefix = blockKind === "list" ? `${" ".repeat(Math.max(0, listLevel ?? 0))}- ` : "";
|
|
16059
|
+
const decoratedText = blockKind === "list" && !/^[-*]\s/.test(text) ? `${listPrefix}${text}` : text;
|
|
15062
16060
|
blocks.push({
|
|
15063
16061
|
blockKind,
|
|
15064
16062
|
blockNumber: blocks.length + 1,
|
|
15065
16063
|
headingLevel: headingMatch ? Number.parseInt(headingMatch[1] ?? "1", 10) : undefined,
|
|
16064
|
+
listLevel: blockKind === "list" && Number.isFinite(listLevel ?? NaN) ? listLevel : undefined,
|
|
15066
16065
|
style: style || undefined,
|
|
15067
16066
|
text: decoratedText
|
|
15068
16067
|
});
|
|
@@ -15678,10 +16677,32 @@ var splitOCRColumns = (regions) => {
|
|
|
15678
16677
|
var buildOCRReadingText = (regions) => normalizeWhitespace(splitOCRColumns(regions).map((column) => buildOCRReadingLinesText(column)).filter(Boolean).join(`
|
|
15679
16678
|
|
|
15680
16679
|
`));
|
|
15681
|
-
var
|
|
15682
|
-
const regions = result.regions?.filter((region) => normalizeWhitespace(region.text ?? "").length > 0);
|
|
15683
|
-
|
|
15684
|
-
|
|
16680
|
+
var buildOCRSummaryText = (result) => {
|
|
16681
|
+
const regions = result.regions?.filter((region) => normalizeWhitespace(region.text ?? "").length > 0) ?? [];
|
|
16682
|
+
if (regions.length === 0) {
|
|
16683
|
+
return {
|
|
16684
|
+
lowConfidenceRegionCount: 0,
|
|
16685
|
+
strongRegionCount: 0,
|
|
16686
|
+
summaryConfidenceThreshold: OCR_SUMMARY_CONFIDENCE_THRESHOLD,
|
|
16687
|
+
text: result.text,
|
|
16688
|
+
usedStrongRegionsOnly: false
|
|
16689
|
+
};
|
|
16690
|
+
}
|
|
16691
|
+
const strongRegions = regions.filter((region) => typeof region.confidence !== "number" || region.confidence >= OCR_SUMMARY_CONFIDENCE_THRESHOLD);
|
|
16692
|
+
const lowConfidenceRegionCount = regions.length - strongRegions.length;
|
|
16693
|
+
const strongTextLength = strongRegions.reduce((sum, region) => sum + normalizeWhitespace(region.text ?? "").length, 0);
|
|
16694
|
+
const totalTextLength = regions.reduce((sum, region) => sum + normalizeWhitespace(region.text ?? "").length, 0);
|
|
16695
|
+
const strongCoverageRatio = totalTextLength > 0 ? strongTextLength / totalTextLength : 0;
|
|
16696
|
+
const useStrongRegionsOnly = strongRegions.length > 0 && lowConfidenceRegionCount > 0 && strongCoverageRatio >= OCR_SUMMARY_MIN_STRONG_TEXT_RATIO;
|
|
16697
|
+
const strongReconstructed = buildOCRReadingText(strongRegions);
|
|
16698
|
+
const allReconstructed = buildOCRReadingText(regions);
|
|
16699
|
+
return {
|
|
16700
|
+
lowConfidenceRegionCount,
|
|
16701
|
+
strongRegionCount: strongRegions.length,
|
|
16702
|
+
summaryConfidenceThreshold: OCR_SUMMARY_CONFIDENCE_THRESHOLD,
|
|
16703
|
+
text: (useStrongRegionsOnly ? strongReconstructed : allReconstructed) || result.text,
|
|
16704
|
+
usedStrongRegionsOnly: useStrongRegionsOnly
|
|
16705
|
+
};
|
|
15685
16706
|
};
|
|
15686
16707
|
var ocrPageDocuments = (result, input, baseMetadata) => {
|
|
15687
16708
|
const grouped = new Map;
|
|
@@ -16065,6 +17086,7 @@ var createRAGImageOCRExtractor = (provider) => ({
|
|
|
16065
17086
|
supports: imageExtractorSupports,
|
|
16066
17087
|
extract: async (input) => {
|
|
16067
17088
|
const result = await provider.extractText(input);
|
|
17089
|
+
const summary = buildOCRSummaryText(result);
|
|
16068
17090
|
return {
|
|
16069
17091
|
chunking: input.chunking,
|
|
16070
17092
|
contentType: input.contentType,
|
|
@@ -16072,11 +17094,15 @@ var createRAGImageOCRExtractor = (provider) => ({
|
|
|
16072
17094
|
metadata: {
|
|
16073
17095
|
...input.metadata ?? {},
|
|
16074
17096
|
...ocrMetadata(result),
|
|
17097
|
+
ocrLowConfidenceRegionCount: summary.lowConfidenceRegionCount,
|
|
17098
|
+
ocrStrongRegionCount: summary.strongRegionCount,
|
|
17099
|
+
ocrSummaryConfidenceThreshold: summary.summaryConfidenceThreshold,
|
|
17100
|
+
ocrSummaryUsedStrongRegionsOnly: summary.usedStrongRegionsOnly,
|
|
16075
17101
|
fileKind: "image",
|
|
16076
17102
|
sourceNativeKind: "image_ocr"
|
|
16077
17103
|
},
|
|
16078
17104
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.image.txt`,
|
|
16079
|
-
text:
|
|
17105
|
+
text: summary.text,
|
|
16080
17106
|
title: result.title ?? input.title
|
|
16081
17107
|
};
|
|
16082
17108
|
}
|
|
@@ -16260,6 +17286,8 @@ var createPDFFileExtractor = () => ({
|
|
|
16260
17286
|
...input.metadata ?? {},
|
|
16261
17287
|
fileKind: "pdf",
|
|
16262
17288
|
pageCount: extracted.pageCount,
|
|
17289
|
+
pdfEvidenceMode: "native",
|
|
17290
|
+
pdfEvidenceOrigin: "native",
|
|
16263
17291
|
pdfTextBlockCount: extracted.textBlockCount,
|
|
16264
17292
|
pdfTextBlocks: extracted.textBlocks
|
|
16265
17293
|
},
|
|
@@ -16293,6 +17321,7 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16293
17321
|
const nativeText = extracted.text;
|
|
16294
17322
|
const minLength = options.minExtractedTextLength ?? 80;
|
|
16295
17323
|
const shouldUseNativeText = !options.alwaysOCR && nativeText.length >= minLength;
|
|
17324
|
+
const shouldUseHybridText = !options.alwaysOCR && nativeText.length > 0 && nativeText.length < minLength;
|
|
16296
17325
|
if (shouldUseNativeText) {
|
|
16297
17326
|
return {
|
|
16298
17327
|
chunking: input.chunking,
|
|
@@ -16302,6 +17331,8 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16302
17331
|
...input.metadata ?? {},
|
|
16303
17332
|
fileKind: "pdf",
|
|
16304
17333
|
pageCount: extracted.pageCount,
|
|
17334
|
+
pdfEvidenceMode: "native",
|
|
17335
|
+
pdfEvidenceOrigin: "native",
|
|
16305
17336
|
pdfTextBlockCount: extracted.textBlockCount,
|
|
16306
17337
|
pdfTextBlocks: extracted.textBlocks,
|
|
16307
17338
|
pdfTextMode: "native"
|
|
@@ -16315,12 +17346,49 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16315
17346
|
...input,
|
|
16316
17347
|
contentType: input.contentType ?? "application/pdf"
|
|
16317
17348
|
});
|
|
17349
|
+
const summary = buildOCRSummaryText(ocr);
|
|
16318
17350
|
const baseMetadata = {
|
|
16319
17351
|
...ocrMetadata(ocr),
|
|
17352
|
+
ocrLowConfidenceRegionCount: summary.lowConfidenceRegionCount,
|
|
17353
|
+
ocrStrongRegionCount: summary.strongRegionCount,
|
|
17354
|
+
ocrSummaryConfidenceThreshold: summary.summaryConfidenceThreshold,
|
|
17355
|
+
ocrSummaryUsedStrongRegionsOnly: summary.usedStrongRegionsOnly,
|
|
16320
17356
|
fileKind: "pdf",
|
|
16321
17357
|
pageCount: extracted.pageCount,
|
|
17358
|
+
pdfEvidenceMode: "ocr",
|
|
17359
|
+
pdfEvidenceOrigin: "ocr",
|
|
16322
17360
|
pdfTextMode: "ocr"
|
|
16323
17361
|
};
|
|
17362
|
+
if (shouldUseHybridText) {
|
|
17363
|
+
const hybridMetadata = {
|
|
17364
|
+
...input.metadata ?? {},
|
|
17365
|
+
...baseMetadata,
|
|
17366
|
+
pageCount: extracted.pageCount,
|
|
17367
|
+
pdfEvidenceMode: "hybrid",
|
|
17368
|
+
pdfEvidenceOrigin: "native",
|
|
17369
|
+
pdfEvidenceSupplement: "ocr",
|
|
17370
|
+
pdfHybridOCRSupplement: true,
|
|
17371
|
+
pdfNativeTextBlockCount: extracted.textBlockCount,
|
|
17372
|
+
pdfNativeTextLength: nativeText.length,
|
|
17373
|
+
pdfOCRFallbackReason: "native_below_min_length",
|
|
17374
|
+
pdfOCRTextLength: summary.text.length,
|
|
17375
|
+
pdfTextBlockCount: extracted.textBlockCount,
|
|
17376
|
+
pdfTextBlocks: extracted.textBlocks,
|
|
17377
|
+
pdfTextMode: "hybrid"
|
|
17378
|
+
};
|
|
17379
|
+
const hybridDocument = {
|
|
17380
|
+
chunking: input.chunking,
|
|
17381
|
+
contentType: input.contentType ?? "application/pdf",
|
|
17382
|
+
format: "text",
|
|
17383
|
+
metadata: hybridMetadata,
|
|
17384
|
+
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
|
|
17385
|
+
text: nativeText,
|
|
17386
|
+
title: input.title
|
|
17387
|
+
};
|
|
17388
|
+
const pageDocuments2 = ocrPageDocuments(ocr, input, baseMetadata);
|
|
17389
|
+
const regionDocuments2 = ocrRegionDocuments(ocr, input, baseMetadata);
|
|
17390
|
+
return [hybridDocument, ...pageDocuments2, ...regionDocuments2];
|
|
17391
|
+
}
|
|
16324
17392
|
const summaryDocument = {
|
|
16325
17393
|
chunking: input.chunking,
|
|
16326
17394
|
contentType: input.contentType ?? "application/pdf",
|
|
@@ -16330,7 +17398,7 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16330
17398
|
...baseMetadata
|
|
16331
17399
|
},
|
|
16332
17400
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
|
|
16333
|
-
text:
|
|
17401
|
+
text: summary.text,
|
|
16334
17402
|
title: ocr.title ?? input.title
|
|
16335
17403
|
};
|
|
16336
17404
|
const pageDocuments = ocrPageDocuments(ocr, input, baseMetadata);
|
|
@@ -16600,6 +17668,101 @@ var chunkFromUnits = (units, maxChunkLength, chunkOverlap, minChunkLength) => {
|
|
|
16600
17668
|
};
|
|
16601
17669
|
var chunkSourceAwareUnit = (unit, options) => {
|
|
16602
17670
|
const defaultSourceAwareChunkReason = unit.sectionKind === "markdown_heading" || unit.sectionKind === "html_heading" || unit.sectionKind === "office_heading" ? "section_boundary" : unit.sectionKind ? "source_native_unit" : unit.sourceAwareChunkReason;
|
|
17671
|
+
if (unit.officeBlockKind === "table" && typeof unit.officeTableHeaderText === "string" && typeof unit.officeTableBodyRowCount === "number" && unit.officeTableBodyRowCount > 0 && unit.text.length > options.maxChunkLength) {
|
|
17672
|
+
const headerLine = unit.officeTableHeaderText;
|
|
17673
|
+
const contextText = typeof unit.officeTableContextText === "string" ? unit.officeTableContextText : undefined;
|
|
17674
|
+
const bodyRows = unit.text.split(`
|
|
17675
|
+
`).map((line) => normalizeWhitespace(line)).filter((line) => /^Row \d+\./.test(line)).slice(1);
|
|
17676
|
+
const slices = [];
|
|
17677
|
+
let currentRows = [];
|
|
17678
|
+
let currentStart = 1;
|
|
17679
|
+
const pushSlice = () => {
|
|
17680
|
+
if (currentRows.length === 0) {
|
|
17681
|
+
return;
|
|
17682
|
+
}
|
|
17683
|
+
slices.push({
|
|
17684
|
+
bodyRowEnd: currentStart + currentRows.length - 1,
|
|
17685
|
+
bodyRowStart: currentStart,
|
|
17686
|
+
text: normalizeWhitespace([
|
|
17687
|
+
...typeof contextText === "string" ? [contextText] : [],
|
|
17688
|
+
headerLine,
|
|
17689
|
+
...currentRows
|
|
17690
|
+
].join(`
|
|
17691
|
+
`))
|
|
17692
|
+
});
|
|
17693
|
+
currentStart += currentRows.length;
|
|
17694
|
+
currentRows = [];
|
|
17695
|
+
};
|
|
17696
|
+
for (const row of bodyRows) {
|
|
17697
|
+
const candidateRows = [...currentRows, row];
|
|
17698
|
+
const candidateText = normalizeWhitespace([
|
|
17699
|
+
...typeof contextText === "string" ? [contextText] : [],
|
|
17700
|
+
headerLine,
|
|
17701
|
+
...candidateRows
|
|
17702
|
+
].join(`
|
|
17703
|
+
`));
|
|
17704
|
+
if (currentRows.length > 0 && candidateText.length > options.maxChunkLength) {
|
|
17705
|
+
pushSlice();
|
|
17706
|
+
}
|
|
17707
|
+
currentRows.push(row);
|
|
17708
|
+
}
|
|
17709
|
+
pushSlice();
|
|
17710
|
+
if (slices.length > 0) {
|
|
17711
|
+
return slices.map((slice) => ({
|
|
17712
|
+
...unit,
|
|
17713
|
+
officeTableBodyRowCount: slice.bodyRowEnd - slice.bodyRowStart + 1,
|
|
17714
|
+
officeTableBodyRowEnd: slice.bodyRowEnd,
|
|
17715
|
+
officeTableBodyRowStart: slice.bodyRowStart,
|
|
17716
|
+
officeTableChunkKind: slices.length > 1 ? "table_slice" : "full_table",
|
|
17717
|
+
officeTableRowCount: slice.bodyRowEnd - slice.bodyRowStart + 2,
|
|
17718
|
+
sourceAwareChunkReason: slices.length > 1 ? "size_limit" : defaultSourceAwareChunkReason,
|
|
17719
|
+
text: slice.text
|
|
17720
|
+
}));
|
|
17721
|
+
}
|
|
17722
|
+
}
|
|
17723
|
+
if (unit.pdfTextKind === "table_like" && typeof unit.pdfTableHeaderText === "string" && typeof unit.pdfTableBodyRowCount === "number" && unit.pdfTableBodyRowCount > 0 && unit.text.length > options.maxChunkLength) {
|
|
17724
|
+
const headerLine = unit.pdfTableHeaderText;
|
|
17725
|
+
const bodyRows = unit.text.split(`
|
|
17726
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean).slice(1);
|
|
17727
|
+
const slices = [];
|
|
17728
|
+
let currentRows = [];
|
|
17729
|
+
let currentStart = 1;
|
|
17730
|
+
const pushSlice = () => {
|
|
17731
|
+
if (currentRows.length === 0) {
|
|
17732
|
+
return;
|
|
17733
|
+
}
|
|
17734
|
+
slices.push({
|
|
17735
|
+
bodyRowEnd: currentStart + currentRows.length - 1,
|
|
17736
|
+
bodyRowStart: currentStart,
|
|
17737
|
+
text: normalizeWhitespace([headerLine, ...currentRows].join(`
|
|
17738
|
+
`))
|
|
17739
|
+
});
|
|
17740
|
+
currentStart += currentRows.length;
|
|
17741
|
+
currentRows = [];
|
|
17742
|
+
};
|
|
17743
|
+
for (const row of bodyRows) {
|
|
17744
|
+
const candidateRows = [...currentRows, row];
|
|
17745
|
+
const candidateText = normalizeWhitespace([headerLine, ...candidateRows].join(`
|
|
17746
|
+
`));
|
|
17747
|
+
if (currentRows.length > 0 && candidateText.length > options.maxChunkLength) {
|
|
17748
|
+
pushSlice();
|
|
17749
|
+
}
|
|
17750
|
+
currentRows.push(row);
|
|
17751
|
+
}
|
|
17752
|
+
pushSlice();
|
|
17753
|
+
if (slices.length > 0) {
|
|
17754
|
+
return slices.map((slice) => ({
|
|
17755
|
+
...unit,
|
|
17756
|
+
pdfTableBodyRowCount: slice.bodyRowEnd - slice.bodyRowStart + 1,
|
|
17757
|
+
pdfTableBodyRowEnd: slice.bodyRowEnd,
|
|
17758
|
+
pdfTableBodyRowStart: slice.bodyRowStart,
|
|
17759
|
+
pdfTableChunkKind: slices.length > 1 ? "table_slice" : "full_table",
|
|
17760
|
+
pdfTableRowCount: slice.bodyRowEnd - slice.bodyRowStart + 2,
|
|
17761
|
+
sourceAwareChunkReason: slices.length > 1 ? "size_limit" : defaultSourceAwareChunkReason,
|
|
17762
|
+
text: slice.text
|
|
17763
|
+
}));
|
|
17764
|
+
}
|
|
17765
|
+
}
|
|
16603
17766
|
if (unit.text.length <= options.maxChunkLength) {
|
|
16604
17767
|
return [
|
|
16605
17768
|
{
|
|
@@ -16896,7 +18059,37 @@ var prepareRAGDocument = (document, defaultChunking, chunkingRegistry) => {
|
|
|
16896
18059
|
...typeof entry.pageNumber === "number" ? { pageNumber: entry.pageNumber } : {},
|
|
16897
18060
|
...typeof entry.officeBlockNumber === "number" ? { officeBlockNumber: entry.officeBlockNumber } : {},
|
|
16898
18061
|
...entry.officeBlockKind ? { officeBlockKind: entry.officeBlockKind } : {},
|
|
18062
|
+
...typeof entry.officeListContextText === "string" ? { officeListContextText: entry.officeListContextText } : {},
|
|
18063
|
+
...typeof entry.officeListGroupItemCount === "number" ? {
|
|
18064
|
+
officeListGroupItemCount: entry.officeListGroupItemCount
|
|
18065
|
+
} : {},
|
|
18066
|
+
...typeof entry.officeListLevel === "number" ? { officeListLevel: entry.officeListLevel } : {},
|
|
18067
|
+
...Array.isArray(entry.officeListLevels) && entry.officeListLevels.length > 0 ? { officeListLevels: entry.officeListLevels } : {},
|
|
18068
|
+
...typeof entry.officeTableBodyRowCount === "number" ? { officeTableBodyRowCount: entry.officeTableBodyRowCount } : {},
|
|
18069
|
+
...typeof entry.officeTableBodyRowEnd === "number" ? { officeTableBodyRowEnd: entry.officeTableBodyRowEnd } : {},
|
|
18070
|
+
...typeof entry.officeTableBodyRowStart === "number" ? { officeTableBodyRowStart: entry.officeTableBodyRowStart } : {},
|
|
18071
|
+
...entry.officeTableChunkKind ? { officeTableChunkKind: entry.officeTableChunkKind } : {},
|
|
18072
|
+
...typeof entry.officeTableColumnCount === "number" ? { officeTableColumnCount: entry.officeTableColumnCount } : {},
|
|
18073
|
+
...typeof entry.officeTableContextText === "string" ? { officeTableContextText: entry.officeTableContextText } : {},
|
|
18074
|
+
...typeof entry.officeTableHeaderText === "string" ? { officeTableHeaderText: entry.officeTableHeaderText } : {},
|
|
18075
|
+
...Array.isArray(entry.officeTableHeaders) && entry.officeTableHeaders.length > 0 ? { officeTableHeaders: entry.officeTableHeaders } : {},
|
|
18076
|
+
...typeof entry.officeTableRowCount === "number" ? { officeTableRowCount: entry.officeTableRowCount } : {},
|
|
18077
|
+
...typeof entry.officeTableSignature === "string" ? { officeTableSignature: entry.officeTableSignature } : {},
|
|
16899
18078
|
...typeof entry.pdfBlockNumber === "number" ? { pdfBlockNumber: entry.pdfBlockNumber } : {},
|
|
18079
|
+
...typeof entry.pdfFigureCaptionBlockNumber === "number" ? {
|
|
18080
|
+
pdfFigureCaptionBlockNumber: entry.pdfFigureCaptionBlockNumber
|
|
18081
|
+
} : {},
|
|
18082
|
+
...typeof entry.pdfFigureLabel === "string" ? { pdfFigureLabel: entry.pdfFigureLabel } : {},
|
|
18083
|
+
...entry.pdfSemanticRole ? { pdfSemanticRole: entry.pdfSemanticRole } : {},
|
|
18084
|
+
...typeof entry.pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd: entry.pdfTableBodyRowEnd } : {},
|
|
18085
|
+
...typeof entry.pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount: entry.pdfTableBodyRowCount } : {},
|
|
18086
|
+
...typeof entry.pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart: entry.pdfTableBodyRowStart } : {},
|
|
18087
|
+
...entry.pdfTableChunkKind ? { pdfTableChunkKind: entry.pdfTableChunkKind } : {},
|
|
18088
|
+
...typeof entry.pdfTableColumnCount === "number" ? { pdfTableColumnCount: entry.pdfTableColumnCount } : {},
|
|
18089
|
+
...typeof entry.pdfTableHeaderText === "string" ? { pdfTableHeaderText: entry.pdfTableHeaderText } : {},
|
|
18090
|
+
...Array.isArray(entry.pdfTableHeaders) && entry.pdfTableHeaders.length > 0 ? { pdfTableHeaders: entry.pdfTableHeaders } : {},
|
|
18091
|
+
...typeof entry.pdfTableRowCount === "number" ? { pdfTableRowCount: entry.pdfTableRowCount } : {},
|
|
18092
|
+
...typeof entry.pdfTableSignature === "string" ? { pdfTableSignature: entry.pdfTableSignature } : {},
|
|
16900
18093
|
...entry.pdfTextKind ? { pdfTextKind: entry.pdfTextKind } : {},
|
|
16901
18094
|
...entry.sectionKind ? { sectionKind: entry.sectionKind } : {},
|
|
16902
18095
|
...entry.sourceAwareChunkReason ? { sourceAwareChunkReason: entry.sourceAwareChunkReason } : {},
|
|
@@ -17616,6 +18809,166 @@ var annotateRetrievalChannels = (input) => {
|
|
|
17616
18809
|
};
|
|
17617
18810
|
});
|
|
17618
18811
|
};
|
|
18812
|
+
var getPDFRetrievalEvidencePreference = (metadata) => {
|
|
18813
|
+
if (!metadata) {
|
|
18814
|
+
return 0;
|
|
18815
|
+
}
|
|
18816
|
+
const pdfEvidenceMode = typeof metadata.pdfEvidenceMode === "string" ? metadata.pdfEvidenceMode : undefined;
|
|
18817
|
+
const pdfEvidenceOrigin = typeof metadata.pdfEvidenceOrigin === "string" ? metadata.pdfEvidenceOrigin : undefined;
|
|
18818
|
+
const pdfEvidenceSupplement = typeof metadata.pdfEvidenceSupplement === "string" ? metadata.pdfEvidenceSupplement : undefined;
|
|
18819
|
+
if (pdfEvidenceMode === "hybrid" && pdfEvidenceOrigin === "native" && pdfEvidenceSupplement === "ocr") {
|
|
18820
|
+
return 3;
|
|
18821
|
+
}
|
|
18822
|
+
if (pdfEvidenceMode === "native" && pdfEvidenceOrigin === "native") {
|
|
18823
|
+
return 2;
|
|
18824
|
+
}
|
|
18825
|
+
if (pdfEvidenceMode === "ocr" && pdfEvidenceOrigin === "ocr") {
|
|
18826
|
+
return 1;
|
|
18827
|
+
}
|
|
18828
|
+
return 0;
|
|
18829
|
+
};
|
|
18830
|
+
var getPDFRetrievalScope = (result) => {
|
|
18831
|
+
const metadata = result.metadata;
|
|
18832
|
+
if (!metadata) {
|
|
18833
|
+
return;
|
|
18834
|
+
}
|
|
18835
|
+
const pageNumber = typeof metadata.pageNumber === "number" ? metadata.pageNumber : typeof metadata.page === "number" ? metadata.page : typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined;
|
|
18836
|
+
const sectionTitle = typeof metadata.sectionTitle === "string" && metadata.sectionTitle.length > 0 ? metadata.sectionTitle : undefined;
|
|
18837
|
+
const source = typeof result.source === "string" && result.source.length > 0 ? result.source : undefined;
|
|
18838
|
+
if (!source) {
|
|
18839
|
+
return;
|
|
18840
|
+
}
|
|
18841
|
+
return {
|
|
18842
|
+
pageNumber,
|
|
18843
|
+
sectionTitle,
|
|
18844
|
+
source
|
|
18845
|
+
};
|
|
18846
|
+
};
|
|
18847
|
+
var getPDFRetrievalComparableScopeKey = (scope) => {
|
|
18848
|
+
if (!scope) {
|
|
18849
|
+
return;
|
|
18850
|
+
}
|
|
18851
|
+
if (typeof scope.pageNumber === "number") {
|
|
18852
|
+
return `${scope.source}::page:${scope.pageNumber}`;
|
|
18853
|
+
}
|
|
18854
|
+
if (scope.sectionTitle) {
|
|
18855
|
+
return `${scope.source}::section:${scope.sectionTitle}`;
|
|
18856
|
+
}
|
|
18857
|
+
return;
|
|
18858
|
+
};
|
|
18859
|
+
var getOfficeRetrievalScope = (result) => {
|
|
18860
|
+
const metadata = result.metadata;
|
|
18861
|
+
if (!metadata) {
|
|
18862
|
+
return;
|
|
18863
|
+
}
|
|
18864
|
+
const officeBlockKind = metadata.officeBlockKind === "table" || metadata.officeBlockKind === "list" ? metadata.officeBlockKind : undefined;
|
|
18865
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
18866
|
+
return;
|
|
18867
|
+
}
|
|
18868
|
+
const source = typeof result.source === "string" && result.source.length > 0 ? result.source : undefined;
|
|
18869
|
+
if (!source) {
|
|
18870
|
+
return;
|
|
18871
|
+
}
|
|
18872
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
|
|
18873
|
+
const sectionTitle = (typeof metadata.sectionTitle === "string" && metadata.sectionTitle.trim().length > 0 ? metadata.sectionTitle.trim() : undefined) ?? sectionPath.at(-1);
|
|
18874
|
+
if (!sectionTitle) {
|
|
18875
|
+
return;
|
|
18876
|
+
}
|
|
18877
|
+
return {
|
|
18878
|
+
blockKind: officeBlockKind,
|
|
18879
|
+
hasContext: officeBlockKind === "table" ? typeof metadata.officeTableContextText === "string" && metadata.officeTableContextText.trim().length > 0 : typeof metadata.officeListContextText === "string" && metadata.officeListContextText.trim().length > 0,
|
|
18880
|
+
pathDepth: sectionPath.length,
|
|
18881
|
+
sectionTitle,
|
|
18882
|
+
source
|
|
18883
|
+
};
|
|
18884
|
+
};
|
|
18885
|
+
var getOfficeRetrievalComparableScopeKey = (scope) => {
|
|
18886
|
+
if (!scope) {
|
|
18887
|
+
return;
|
|
18888
|
+
}
|
|
18889
|
+
return `${scope.source}::office_section:${scope.blockKind}:${scope.sectionTitle}`;
|
|
18890
|
+
};
|
|
18891
|
+
var getOfficeRetrievalEvidencePreference = (metadata) => {
|
|
18892
|
+
if (!metadata) {
|
|
18893
|
+
return 0;
|
|
18894
|
+
}
|
|
18895
|
+
const officeBlockKind = metadata.officeBlockKind === "table" || metadata.officeBlockKind === "list" ? metadata.officeBlockKind : undefined;
|
|
18896
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
18897
|
+
return 0;
|
|
18898
|
+
}
|
|
18899
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
|
|
18900
|
+
return sectionPath.length * 10 + ((officeBlockKind === "table" ? typeof metadata.officeTableContextText === "string" && metadata.officeTableContextText.trim().length > 0 : typeof metadata.officeListContextText === "string" && metadata.officeListContextText.trim().length > 0) ? 1 : 0) + (officeBlockKind === "list" && typeof metadata.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
|
|
18901
|
+
};
|
|
18902
|
+
var buildStructuredEvidenceReconcileLabel = (input) => {
|
|
18903
|
+
if (input.officeAffectedScopeCount > 0 && input.pdfAffectedScopeCount === 0) {
|
|
18904
|
+
return "Preferred deeper office-structure evidence within matching sections";
|
|
18905
|
+
}
|
|
18906
|
+
if (input.pdfAffectedScopeCount > 0 && input.officeAffectedScopeCount === 0) {
|
|
18907
|
+
return "Preferred native-layout PDF evidence within matching sections";
|
|
18908
|
+
}
|
|
18909
|
+
return "Preferred stronger structured evidence within matching sections";
|
|
18910
|
+
};
|
|
18911
|
+
var reconcileStructuredEvidenceOrdering = (results) => {
|
|
18912
|
+
const indexed = results.map((result, index) => ({ index, result }));
|
|
18913
|
+
const sorted = [...indexed].sort((leftEntry, rightEntry) => {
|
|
18914
|
+
const left = leftEntry.result;
|
|
18915
|
+
const right = rightEntry.result;
|
|
18916
|
+
const leftOfficeScope = getOfficeRetrievalScope(left);
|
|
18917
|
+
const rightOfficeScope = getOfficeRetrievalScope(right);
|
|
18918
|
+
if (leftOfficeScope && rightOfficeScope && leftOfficeScope.source === rightOfficeScope.source && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
|
|
18919
|
+
const leftPreference = getOfficeRetrievalEvidencePreference(left.metadata);
|
|
18920
|
+
const rightPreference = getOfficeRetrievalEvidencePreference(right.metadata);
|
|
18921
|
+
if (rightPreference !== leftPreference) {
|
|
18922
|
+
return rightPreference - leftPreference;
|
|
18923
|
+
}
|
|
18924
|
+
}
|
|
18925
|
+
const leftScope = getPDFRetrievalScope(left);
|
|
18926
|
+
const rightScope = getPDFRetrievalScope(right);
|
|
18927
|
+
if (leftScope && rightScope && leftScope.source === rightScope.source && (leftScope.sectionTitle && rightScope.sectionTitle && leftScope.sectionTitle === rightScope.sectionTitle || typeof leftScope.pageNumber === "number" && typeof rightScope.pageNumber === "number" && leftScope.pageNumber === rightScope.pageNumber)) {
|
|
18928
|
+
const leftPreference = getPDFRetrievalEvidencePreference(left.metadata);
|
|
18929
|
+
const rightPreference = getPDFRetrievalEvidencePreference(right.metadata);
|
|
18930
|
+
if (rightPreference !== leftPreference) {
|
|
18931
|
+
return rightPreference - leftPreference;
|
|
18932
|
+
}
|
|
18933
|
+
}
|
|
18934
|
+
return leftEntry.index - rightEntry.index;
|
|
18935
|
+
});
|
|
18936
|
+
const orderedResults = sorted.map((entry) => entry.result);
|
|
18937
|
+
const reorderedResults = sorted.reduce((count, entry, index) => count + (results[index]?.chunkId === entry.result.chunkId ? 0 : 1), 0);
|
|
18938
|
+
const officeAffectedScopes = new Set;
|
|
18939
|
+
const pdfAffectedScopes = new Set;
|
|
18940
|
+
for (const [index, entry] of sorted.entries()) {
|
|
18941
|
+
if (results[index]?.chunkId === entry.result.chunkId) {
|
|
18942
|
+
continue;
|
|
18943
|
+
}
|
|
18944
|
+
const officeScope = getOfficeRetrievalScope(entry.result);
|
|
18945
|
+
if (officeScope) {
|
|
18946
|
+
const officeScopeKey = getOfficeRetrievalComparableScopeKey(officeScope);
|
|
18947
|
+
if (officeScopeKey) {
|
|
18948
|
+
officeAffectedScopes.add(officeScopeKey);
|
|
18949
|
+
}
|
|
18950
|
+
continue;
|
|
18951
|
+
}
|
|
18952
|
+
const pdfScope = getPDFRetrievalScope(entry.result);
|
|
18953
|
+
const pdfScopeKey = getPDFRetrievalComparableScopeKey(pdfScope);
|
|
18954
|
+
if (pdfScopeKey) {
|
|
18955
|
+
pdfAffectedScopes.add(pdfScopeKey);
|
|
18956
|
+
}
|
|
18957
|
+
}
|
|
18958
|
+
const affectedScopeCount = officeAffectedScopes.size + pdfAffectedScopes.size;
|
|
18959
|
+
return {
|
|
18960
|
+
affectedScopeCount,
|
|
18961
|
+
label: buildStructuredEvidenceReconcileLabel({
|
|
18962
|
+
officeAffectedScopeCount: officeAffectedScopes.size,
|
|
18963
|
+
pdfAffectedScopeCount: pdfAffectedScopes.size
|
|
18964
|
+
}),
|
|
18965
|
+
applied: reorderedResults > 0,
|
|
18966
|
+
officeAffectedScopeCount: officeAffectedScopes.size,
|
|
18967
|
+
pdfAffectedScopeCount: pdfAffectedScopes.size,
|
|
18968
|
+
results: orderedResults,
|
|
18969
|
+
reorderedResults
|
|
18970
|
+
};
|
|
18971
|
+
};
|
|
17619
18972
|
var getStructuredSectionScoreWeight2 = (metadata) => {
|
|
17620
18973
|
const pdfTextKind = typeof metadata?.pdfTextKind === "string" ? metadata.pdfTextKind : undefined;
|
|
17621
18974
|
const officeBlockKind = typeof metadata?.officeBlockKind === "string" ? metadata.officeBlockKind : undefined;
|
|
@@ -18084,9 +19437,25 @@ var createRAGCollection = (options) => {
|
|
|
18084
19437
|
stage: "source_balance"
|
|
18085
19438
|
});
|
|
18086
19439
|
}
|
|
19440
|
+
const evidenceReconciled = reconcileStructuredEvidenceOrdering(diversified);
|
|
19441
|
+
if (evidenceReconciled.applied) {
|
|
19442
|
+
steps.push({
|
|
19443
|
+
count: evidenceReconciled.results.length,
|
|
19444
|
+
label: evidenceReconciled.label,
|
|
19445
|
+
metadata: {
|
|
19446
|
+
affectedScopes: evidenceReconciled.affectedScopeCount,
|
|
19447
|
+
officeAffectedScopes: evidenceReconciled.officeAffectedScopeCount,
|
|
19448
|
+
pdfAffectedScopes: evidenceReconciled.pdfAffectedScopeCount,
|
|
19449
|
+
reorderedResults: evidenceReconciled.reorderedResults
|
|
19450
|
+
},
|
|
19451
|
+
sectionCounts: buildTraceSectionCounts(evidenceReconciled.results),
|
|
19452
|
+
sectionScores: buildTraceSectionScores(evidenceReconciled.results),
|
|
19453
|
+
stage: "evidence_reconcile"
|
|
19454
|
+
});
|
|
19455
|
+
}
|
|
18087
19456
|
const limited = annotateRetrievalChannels({
|
|
18088
19457
|
lexicalResults,
|
|
18089
|
-
results:
|
|
19458
|
+
results: evidenceReconciled.results.slice(0, topK),
|
|
18090
19459
|
vectorResults
|
|
18091
19460
|
});
|
|
18092
19461
|
if (typeof input.scoreThreshold !== "number") {
|
|
@@ -18252,6 +19621,37 @@ var searchDocuments = async (collection, input) => collection.search(input);
|
|
|
18252
19621
|
// src/ai/rag/htmxWorkflowRenderers.ts
|
|
18253
19622
|
init_constants();
|
|
18254
19623
|
var escapeHtml2 = (text) => text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
19624
|
+
var renderLabelValueRows = (rows) => rows.length > 0 ? `<dl class="rag-status">${rows.map((row) => `<div><dt>${escapeHtml2(row.label)}</dt><dd>${escapeHtml2(row.value)}</dd></div>`).join("")}</dl>` : "";
|
|
19625
|
+
var renderBenchmarkRuntimePanel = (input) => {
|
|
19626
|
+
const rows = [
|
|
19627
|
+
{
|
|
19628
|
+
label: "Suite",
|
|
19629
|
+
value: input.response.suite?.label ?? input.response.suite?.id ?? "n/a"
|
|
19630
|
+
},
|
|
19631
|
+
input.response.groupKey ? { label: "Group", value: input.response.groupKey } : undefined,
|
|
19632
|
+
input.response.corpusGroupKey ? { label: "Corpus group", value: input.response.corpusGroupKey } : undefined
|
|
19633
|
+
].filter((row) => Boolean(row));
|
|
19634
|
+
const latestRows = input.response.historyPresentation?.rows ?? [];
|
|
19635
|
+
const recentRuns = input.response.historyPresentation?.recentRuns ?? [];
|
|
19636
|
+
const snapshotRows = input.response.snapshotHistoryPresentation?.rows ?? [];
|
|
19637
|
+
const snapshots = input.response.snapshotHistoryPresentation?.snapshots ?? [];
|
|
19638
|
+
return `<section class="rag-status-governance"><h3>${escapeHtml2(input.title)}</h3>` + renderLabelValueRows(rows) + `<h4>Run history</h4>` + renderLabelValueRows(latestRows) + (recentRuns.length > 0 ? `<ul class="rag-status-capabilities">${recentRuns.slice(0, 3).map((run) => `<li><strong>${escapeHtml2(run.label)}</strong> ${escapeHtml2(run.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No persisted benchmark runs yet.</p>`) + `<h4>Snapshot history</h4>` + renderLabelValueRows(snapshotRows) + (snapshots.length > 0 ? `<ul class="rag-status-capabilities">${snapshots.slice(0, 3).map((snapshot) => `<li><strong>${escapeHtml2(snapshot.label)}</strong> ${escapeHtml2(snapshot.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No saved suite snapshots yet.</p>`) + `</section>`;
|
|
19639
|
+
};
|
|
19640
|
+
var renderBenchmarkSnapshotPanel = (input) => {
|
|
19641
|
+
const summaryRows = [
|
|
19642
|
+
{
|
|
19643
|
+
label: "Suite",
|
|
19644
|
+
value: input.response.suite?.label ?? input.response.suite?.id ?? "n/a"
|
|
19645
|
+
},
|
|
19646
|
+
input.response.snapshot ? {
|
|
19647
|
+
label: "Saved snapshot",
|
|
19648
|
+
value: `${input.response.snapshot.label ?? input.response.snapshot.suiteId} \xB7 v${input.response.snapshot.version}`
|
|
19649
|
+
} : undefined
|
|
19650
|
+
].filter((row) => Boolean(row));
|
|
19651
|
+
const snapshotRows = input.response.snapshotHistoryPresentation?.rows ?? [];
|
|
19652
|
+
const snapshots = input.response.snapshotHistoryPresentation?.snapshots ?? [];
|
|
19653
|
+
return `<section class="rag-status-governance"><h3>${escapeHtml2(input.title)}</h3>` + renderLabelValueRows(summaryRows) + renderLabelValueRows(snapshotRows) + (snapshots.length > 0 ? `<ul class="rag-status-capabilities">${snapshots.slice(0, 3).map((snapshot) => `<li><strong>${escapeHtml2(snapshot.label)}</strong> ${escapeHtml2(snapshot.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No saved suite snapshots yet.</p>`) + `</section>`;
|
|
19654
|
+
};
|
|
18255
19655
|
var renderSourceLabels = (input) => {
|
|
18256
19656
|
if (!input) {
|
|
18257
19657
|
return "";
|
|
@@ -18448,7 +19848,7 @@ var renderRetrievalGovernancePanel = (retrievalComparisons) => {
|
|
|
18448
19848
|
const latest = retrievalComparisons.latest;
|
|
18449
19849
|
const alerts = (retrievalComparisons.alerts ?? []).slice(0, 3);
|
|
18450
19850
|
const releaseGroups = (retrievalComparisons.releaseGroups ?? []).slice(0, 2);
|
|
18451
|
-
const formatClassification = (classification) => classification === "multivector" ? "multivector regression" : classification === "runtime" ? "runtime regression" : classification === "general" ? "general regression" : undefined;
|
|
19851
|
+
const formatClassification = (classification) => classification === "multivector" ? "multivector regression" : classification === "evidence" ? "evidence regression" : classification === "runtime" ? "runtime regression" : classification === "general" ? "general regression" : undefined;
|
|
18452
19852
|
return `<section class="rag-status-governance"><h3>Retrieval governance</h3>` + (latest ? `<dl class="rag-status">` + `<div><dt>Latest comparison</dt><dd>${escapeHtml2(latest.label)}</dd></div>` + (latest.bestByPassingRate ? `<div><dt>Best passing rate</dt><dd>${escapeHtml2(latest.bestByPassingRate)}</dd></div>` : "") + (latest.bestByAverageF1 ? `<div><dt>Best average F1</dt><dd>${escapeHtml2(latest.bestByAverageF1)}</dd></div>` : "") + (latest.bestByMultivectorCollapsedCases ? `<div><dt>Best multivector collapse</dt><dd>${escapeHtml2(latest.bestByMultivectorCollapsedCases)}</dd></div>` : "") + (latest.bestByMultivectorLexicalHitCases ? `<div><dt>Best multivector lexical hits</dt><dd>${escapeHtml2(latest.bestByMultivectorLexicalHitCases)}</dd></div>` : "") + (latest.bestByMultivectorVectorHitCases ? `<div><dt>Best multivector vector hits</dt><dd>${escapeHtml2(latest.bestByMultivectorVectorHitCases)}</dd></div>` : "") + (latest.decisionSummary?.gate?.status ? `<div><dt>Gate</dt><dd>${escapeHtml2(latest.decisionSummary.gate.status)}</dd></div>` : "") + (latest.releaseVerdict?.status ? `<div><dt>Verdict</dt><dd>${escapeHtml2(latest.releaseVerdict.status)}</dd></div>` : "") + `</dl>` : "") + `<h4>Active alerts</h4>` + (alerts.length > 0 ? `<ul class="rag-status-capabilities">${alerts.map((alert) => `<li><strong>${escapeHtml2(alert.kind)}</strong>${formatClassification(alert.classification) ? ` <span>${escapeHtml2(formatClassification(alert.classification) ?? "")}</span>` : ""} ${escapeHtml2(alert.message)}</li>`).join("")}</ul>` : `<p class="rag-empty">No active retrieval comparison alerts.</p>`) + (releaseGroups.length > 0 ? `<h4>Release groups</h4><ul class="rag-status-capabilities">${releaseGroups.map((group) => {
|
|
18453
19853
|
const reasons = group.recommendedActionReasons?.slice(0, 2).join("; ") ?? "No recommended action.";
|
|
18454
19854
|
return `<li><strong>${escapeHtml2(group.groupKey)}</strong>${formatClassification(group.classification) ? ` <span>${escapeHtml2(formatClassification(group.classification) ?? "")}</span>` : ""} ${escapeHtml2(group.recommendedAction ?? "monitor")} \xB7 ${escapeHtml2(reasons)}</li>`;
|
|
@@ -18512,6 +19912,22 @@ var defaultSearchResults = ({
|
|
|
18512
19912
|
return defaultSearchResultItem(result, index, sectionJumps);
|
|
18513
19913
|
}).join("")}</section>`;
|
|
18514
19914
|
})();
|
|
19915
|
+
var defaultAdaptiveNativePlannerBenchmark = (input) => renderBenchmarkRuntimePanel({
|
|
19916
|
+
response: input,
|
|
19917
|
+
title: "Adaptive native planner benchmark"
|
|
19918
|
+
});
|
|
19919
|
+
var defaultNativeBackendComparisonBenchmark = (input) => renderBenchmarkRuntimePanel({
|
|
19920
|
+
response: input,
|
|
19921
|
+
title: "Native backend comparison benchmark"
|
|
19922
|
+
});
|
|
19923
|
+
var defaultAdaptiveNativePlannerBenchmarkSnapshot = (input) => renderBenchmarkSnapshotPanel({
|
|
19924
|
+
response: input,
|
|
19925
|
+
title: "Adaptive native planner snapshots"
|
|
19926
|
+
});
|
|
19927
|
+
var defaultNativeBackendComparisonBenchmarkSnapshot = (input) => renderBenchmarkSnapshotPanel({
|
|
19928
|
+
response: input,
|
|
19929
|
+
title: "Native backend comparison snapshots"
|
|
19930
|
+
});
|
|
18515
19931
|
var defaultDocumentItem = (document, index) => '<article class="rag-document">' + `<h3>${escapeHtml2(document.title || `Document ${index + 1}`)}</h3>` + `<p class="rag-document-id">${escapeHtml2(document.id)}</p>` + `<p class="rag-document-source">${escapeHtml2(document.source)}</p>` + renderSourceLabels(document.labels) + `<p class="rag-document-meta">${escapeHtml2(document.format ?? "text")} \xB7 ${escapeHtml2(document.chunkStrategy ?? "paragraphs")} \xB7 ${document.chunkCount ?? 0} chunks</p>` + "</article>";
|
|
18516
19932
|
var defaultDocuments = ({
|
|
18517
19933
|
documents
|
|
@@ -18584,6 +20000,8 @@ var defaultEvaluateResult = ({
|
|
|
18584
20000
|
var defaultError2 = (message) => `<div class="rag-error">${escapeHtml2(message)}</div>`;
|
|
18585
20001
|
var defaultMaintenance = (input) => renderMaintenancePanel(input);
|
|
18586
20002
|
var resolveRAGWorkflowRenderers = (custom) => ({
|
|
20003
|
+
adaptiveNativePlannerBenchmark: custom?.adaptiveNativePlannerBenchmark ?? defaultAdaptiveNativePlannerBenchmark,
|
|
20004
|
+
adaptiveNativePlannerBenchmarkSnapshot: custom?.adaptiveNativePlannerBenchmarkSnapshot ?? defaultAdaptiveNativePlannerBenchmarkSnapshot,
|
|
18587
20005
|
chunkPreview: custom?.chunkPreview ?? defaultChunkPreview,
|
|
18588
20006
|
documentItem: custom?.documentItem ?? defaultDocumentItem,
|
|
18589
20007
|
documents: custom?.documents ?? defaultDocuments,
|
|
@@ -18591,6 +20009,8 @@ var resolveRAGWorkflowRenderers = (custom) => ({
|
|
|
18591
20009
|
error: custom?.error ?? defaultError2,
|
|
18592
20010
|
maintenance: custom?.maintenance ?? defaultMaintenance,
|
|
18593
20011
|
mutationResult: custom?.mutationResult ?? defaultMutationResult,
|
|
20012
|
+
nativeBackendComparisonBenchmark: custom?.nativeBackendComparisonBenchmark ?? defaultNativeBackendComparisonBenchmark,
|
|
20013
|
+
nativeBackendComparisonBenchmarkSnapshot: custom?.nativeBackendComparisonBenchmarkSnapshot ?? defaultNativeBackendComparisonBenchmarkSnapshot,
|
|
18594
20014
|
evaluateResult: custom?.evaluateResult ?? defaultEvaluateResult,
|
|
18595
20015
|
searchResultItem: custom?.searchResultItem ?? defaultSearchResultItem,
|
|
18596
20016
|
searchResults: custom?.searchResults ?? defaultSearchResults,
|
|
@@ -18885,7 +20305,12 @@ var parseRAGRetrieval = (value) => {
|
|
|
18885
20305
|
"fusion",
|
|
18886
20306
|
"fusionConstant",
|
|
18887
20307
|
"lexicalWeight",
|
|
18888
|
-
"vectorWeight"
|
|
20308
|
+
"vectorWeight",
|
|
20309
|
+
"nativeQueryProfile",
|
|
20310
|
+
"nativeCandidateLimit",
|
|
20311
|
+
"nativeMaxBackfills",
|
|
20312
|
+
"nativeMinResults",
|
|
20313
|
+
"nativeFillPolicy"
|
|
18889
20314
|
]);
|
|
18890
20315
|
for (const key of Object.keys(value)) {
|
|
18891
20316
|
if (!allowedFields.has(key)) {
|
|
@@ -18962,6 +20387,36 @@ var parseRAGRetrieval = (value) => {
|
|
|
18962
20387
|
}
|
|
18963
20388
|
retrieval.vectorWeight = value.vectorWeight;
|
|
18964
20389
|
}
|
|
20390
|
+
if (getOwnProperty(value, "nativeQueryProfile")) {
|
|
20391
|
+
if (value.nativeQueryProfile !== "latency" && value.nativeQueryProfile !== "balanced" && value.nativeQueryProfile !== "recall") {
|
|
20392
|
+
return null;
|
|
20393
|
+
}
|
|
20394
|
+
retrieval.nativeQueryProfile = value.nativeQueryProfile;
|
|
20395
|
+
}
|
|
20396
|
+
if (getOwnProperty(value, "nativeCandidateLimit")) {
|
|
20397
|
+
if (typeof value.nativeCandidateLimit !== "number") {
|
|
20398
|
+
return null;
|
|
20399
|
+
}
|
|
20400
|
+
retrieval.nativeCandidateLimit = value.nativeCandidateLimit;
|
|
20401
|
+
}
|
|
20402
|
+
if (getOwnProperty(value, "nativeMaxBackfills")) {
|
|
20403
|
+
if (typeof value.nativeMaxBackfills !== "number") {
|
|
20404
|
+
return null;
|
|
20405
|
+
}
|
|
20406
|
+
retrieval.nativeMaxBackfills = value.nativeMaxBackfills;
|
|
20407
|
+
}
|
|
20408
|
+
if (getOwnProperty(value, "nativeMinResults")) {
|
|
20409
|
+
if (typeof value.nativeMinResults !== "number") {
|
|
20410
|
+
return null;
|
|
20411
|
+
}
|
|
20412
|
+
retrieval.nativeMinResults = value.nativeMinResults;
|
|
20413
|
+
}
|
|
20414
|
+
if (getOwnProperty(value, "nativeFillPolicy")) {
|
|
20415
|
+
if (value.nativeFillPolicy !== "strict_topk" && value.nativeFillPolicy !== "satisfy_min_results") {
|
|
20416
|
+
return null;
|
|
20417
|
+
}
|
|
20418
|
+
retrieval.nativeFillPolicy = value.nativeFillPolicy;
|
|
20419
|
+
}
|
|
18965
20420
|
return retrieval;
|
|
18966
20421
|
};
|
|
18967
20422
|
var getNumericStatus = (status) => typeof status === "number" ? status : HTTP_STATUS_OK;
|
|
@@ -18973,9 +20428,12 @@ var classifyGovernanceReasons = (reasons) => {
|
|
|
18973
20428
|
if (normalized.some((reason) => reason.includes("runtime ") || reason.includes("planner") || reason.includes("candidate-budget-exhausted") || reason.includes("underfilled-topk"))) {
|
|
18974
20429
|
return "runtime";
|
|
18975
20430
|
}
|
|
20431
|
+
if (normalized.some((reason) => reason.includes("evidence reconcile") || reason.includes("hybrid evidence") || reason.includes("ocr supplement"))) {
|
|
20432
|
+
return "evidence";
|
|
20433
|
+
}
|
|
18976
20434
|
return "general";
|
|
18977
20435
|
};
|
|
18978
|
-
var buildRegressionRemediationLabel = (classification) => classification === "multivector" ? "Inspect multivector coverage deltas, variant-hit traces, and collapsed-parent recovery before promotion." : classification === "runtime" ? "Inspect planner-profile shifts, candidate-budget exhaustion, and underfilled native retrieval before promotion." : "Inspect the latest retrieval comparison deltas and resolve the gate failure before promotion.";
|
|
20436
|
+
var buildRegressionRemediationLabel = (classification) => classification === "multivector" ? "Inspect multivector coverage deltas, variant-hit traces, and collapsed-parent recovery before promotion." : classification === "evidence" ? "Inspect hybrid evidence reconciliation, native-vs-OCR passage selection, and PDF evidence provenance before promotion." : classification === "runtime" ? "Inspect planner-profile shifts, candidate-budget exhaustion, and underfilled native retrieval before promotion." : "Inspect the latest retrieval comparison deltas and resolve the gate failure before promotion.";
|
|
18979
20437
|
var summarizeIncidentClassifications = (incidents) => {
|
|
18980
20438
|
const allIncidents = incidents ?? [];
|
|
18981
20439
|
const countBy = (status, classification) => allIncidents.filter((entry) => entry.status === status && (entry.classification ?? "general") === classification).length;
|
|
@@ -18983,12 +20441,15 @@ var summarizeIncidentClassifications = (incidents) => {
|
|
|
18983
20441
|
openGeneralCount: countBy("open", "general"),
|
|
18984
20442
|
openMultiVectorCount: countBy("open", "multivector"),
|
|
18985
20443
|
openRuntimeCount: countBy("open", "runtime"),
|
|
20444
|
+
openEvidenceCount: countBy("open", "evidence"),
|
|
18986
20445
|
resolvedGeneralCount: countBy("resolved", "general"),
|
|
18987
20446
|
resolvedMultiVectorCount: countBy("resolved", "multivector"),
|
|
18988
20447
|
resolvedRuntimeCount: countBy("resolved", "runtime"),
|
|
20448
|
+
resolvedEvidenceCount: countBy("resolved", "evidence"),
|
|
18989
20449
|
totalGeneralCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "general").length,
|
|
18990
20450
|
totalMultiVectorCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "multivector").length,
|
|
18991
|
-
totalRuntimeCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "runtime").length
|
|
20451
|
+
totalRuntimeCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "runtime").length,
|
|
20452
|
+
totalEvidenceCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "evidence").length
|
|
18992
20453
|
};
|
|
18993
20454
|
};
|
|
18994
20455
|
var getBooleanProperty = (value, key) => {
|
|
@@ -19190,6 +20651,7 @@ var ragChat = (config) => {
|
|
|
19190
20651
|
const { retrievalReleasePolicies } = config;
|
|
19191
20652
|
const { retrievalReleasePoliciesByRolloutLabel } = config;
|
|
19192
20653
|
const { retrievalReleasePoliciesByGroupAndRolloutLabel } = config;
|
|
20654
|
+
const { retrievalBaselineGatePoliciesByGroup } = config;
|
|
19193
20655
|
const { retrievalBaselineGatePoliciesByRolloutLabel } = config;
|
|
19194
20656
|
const { retrievalBaselineGatePoliciesByGroupAndRolloutLabel } = config;
|
|
19195
20657
|
const workflowRenderConfig = typeof config.htmx === "object" ? config.htmx.workflowRender ?? config.htmx.workflow?.render : undefined;
|
|
@@ -19803,6 +21265,7 @@ var ragChat = (config) => {
|
|
|
19803
21265
|
groupKey: getStringProperty(body, "groupKey"),
|
|
19804
21266
|
label: getStringProperty(body, "label"),
|
|
19805
21267
|
persistRun: getBooleanProperty(body, "persistRun") === true,
|
|
21268
|
+
suiteId: getStringProperty(body, "suiteId"),
|
|
19806
21269
|
tags: normalizeStringArray2(body.tags),
|
|
19807
21270
|
retrievals
|
|
19808
21271
|
};
|
|
@@ -20310,7 +21773,7 @@ var ragChat = (config) => {
|
|
|
20310
21773
|
const baselineRetrievalId = input.baselineRetrievalId ?? activeBaseline?.retrievalId;
|
|
20311
21774
|
const candidateRetrievalId = input.candidateRetrievalId ?? input.retrievals.find((entry) => entry.id !== baselineRetrievalId)?.id;
|
|
20312
21775
|
const startedAt = Date.now();
|
|
20313
|
-
const suiteId = generateId();
|
|
21776
|
+
const suiteId = input.suiteId ?? generateId();
|
|
20314
21777
|
const suiteLabel = input.label ?? "Retrieval comparison";
|
|
20315
21778
|
const comparison = await compareRAGRetrievalStrategies({
|
|
20316
21779
|
collection,
|
|
@@ -20349,7 +21812,12 @@ var ragChat = (config) => {
|
|
|
20349
21812
|
baselineRetrievalId,
|
|
20350
21813
|
candidateRetrievalId,
|
|
20351
21814
|
comparison,
|
|
20352
|
-
policy:
|
|
21815
|
+
policy: getEffectiveRetrievalBaselineGatePolicy({
|
|
21816
|
+
baselinePolicy: activeBaseline?.policy,
|
|
21817
|
+
groupKey: input.groupKey,
|
|
21818
|
+
rolloutLabel: activeBaseline?.rolloutLabel,
|
|
21819
|
+
suiteId
|
|
21820
|
+
})
|
|
20353
21821
|
});
|
|
20354
21822
|
await persistRAGRetrievalComparisonRun({
|
|
20355
21823
|
run: {
|
|
@@ -20485,9 +21953,42 @@ var ragChat = (config) => {
|
|
|
20485
21953
|
});
|
|
20486
21954
|
const getRetrievalLaneHandoffAutoCompletePolicy = (groupKey, targetRolloutLabel) => (groupKey && targetRolloutLabel ? config.retrievalLaneHandoffAutoCompletePoliciesByGroupAndTargetRolloutLabel?.[groupKey]?.[targetRolloutLabel] : undefined) ?? {};
|
|
20487
21955
|
const getDefaultRetrievalBaselineGatePolicy = (groupKey, rolloutLabel) => ({
|
|
21956
|
+
...(groupKey ? retrievalBaselineGatePoliciesByGroup?.[groupKey] : undefined) ?? {},
|
|
20488
21957
|
...(rolloutLabel ? retrievalBaselineGatePoliciesByRolloutLabel?.[rolloutLabel] : undefined) ?? {},
|
|
20489
21958
|
...(groupKey && rolloutLabel ? retrievalBaselineGatePoliciesByGroupAndRolloutLabel?.[groupKey]?.[rolloutLabel] : undefined) ?? {}
|
|
20490
21959
|
});
|
|
21960
|
+
const buildRuntimeRetrievalBenchmarkRecommendedGatePolicy = () => ({
|
|
21961
|
+
minEvidenceReconcileCasesDelta: 0,
|
|
21962
|
+
maxRuntimeCandidateBudgetExhaustedCasesDelta: 0,
|
|
21963
|
+
maxRuntimeUnderfilledTopKCasesDelta: 0,
|
|
21964
|
+
minAverageF1Delta: 0,
|
|
21965
|
+
minPassingRateDelta: 0,
|
|
21966
|
+
severity: "fail"
|
|
21967
|
+
});
|
|
21968
|
+
const getRecommendedBenchmarkBaselineGatePolicy = (input) => {
|
|
21969
|
+
const adaptiveSuite = createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
21970
|
+
const backendSuite = createRAGNativeBackendComparisonBenchmarkSuite();
|
|
21971
|
+
if (input.groupKey === (typeof adaptiveSuite.metadata?.recommendedGroupKey === "string" ? adaptiveSuite.metadata.recommendedGroupKey : undefined) || input.suiteId === adaptiveSuite.id) {
|
|
21972
|
+
return buildRuntimeRetrievalBenchmarkRecommendedGatePolicy();
|
|
21973
|
+
}
|
|
21974
|
+
if (input.groupKey === (typeof backendSuite.metadata?.recommendedGroupKey === "string" ? backendSuite.metadata.recommendedGroupKey : undefined) || input.suiteId === backendSuite.id) {
|
|
21975
|
+
return buildRuntimeRetrievalBenchmarkRecommendedGatePolicy();
|
|
21976
|
+
}
|
|
21977
|
+
return;
|
|
21978
|
+
};
|
|
21979
|
+
const getEffectiveRetrievalBaselineGatePolicy = (input) => {
|
|
21980
|
+
if (input.baselinePolicy && Object.keys(input.baselinePolicy).length > 0) {
|
|
21981
|
+
return input.baselinePolicy;
|
|
21982
|
+
}
|
|
21983
|
+
const defaultPolicy = getDefaultRetrievalBaselineGatePolicy(input.groupKey, input.rolloutLabel);
|
|
21984
|
+
if (Object.keys(defaultPolicy).length > 0) {
|
|
21985
|
+
return defaultPolicy;
|
|
21986
|
+
}
|
|
21987
|
+
return getRecommendedBenchmarkBaselineGatePolicy({
|
|
21988
|
+
groupKey: input.groupKey,
|
|
21989
|
+
suiteId: input.suiteId
|
|
21990
|
+
});
|
|
21991
|
+
};
|
|
20491
21992
|
const getRetrievalReleaseIncidentSeverity = (rolloutLabel) => rolloutLabel === "stable" ? "critical" : "warning";
|
|
20492
21993
|
const getLatestLaneHandoffDecision = (input) => input.decisions?.find((entry) => entry.groupKey === input.groupKey && entry.sourceRolloutLabel === input.sourceRolloutLabel && entry.targetRolloutLabel === input.targetRolloutLabel && (!input.kind || entry.kind === input.kind));
|
|
20493
21994
|
const getLaneHandoffFreshnessWindow = (input) => {
|
|
@@ -20843,7 +22344,11 @@ var ragChat = (config) => {
|
|
|
20843
22344
|
const gate = decision?.gate;
|
|
20844
22345
|
const reasons = gate?.status && gate.status !== "pass" ? gate.reasons.length > 0 ? [...gate.reasons] : [`gate status is ${gate.status}`] : [];
|
|
20845
22346
|
const effectiveReleasePolicy = getRetrievalReleasePolicy(input.run.groupKey, input.targetRolloutLabel);
|
|
20846
|
-
const effectiveBaselineGatePolicy =
|
|
22347
|
+
const effectiveBaselineGatePolicy = getEffectiveRetrievalBaselineGatePolicy({
|
|
22348
|
+
groupKey: input.run.groupKey,
|
|
22349
|
+
rolloutLabel: input.targetRolloutLabel,
|
|
22350
|
+
suiteId: input.run.suiteId
|
|
22351
|
+
}) ?? {};
|
|
20847
22352
|
const requiresApproval = Boolean(effectiveReleasePolicy.requireApprovalBeforePromotion);
|
|
20848
22353
|
const approvalFreshness = latestDecision ? getDecisionFreshness({
|
|
20849
22354
|
now: input.now,
|
|
@@ -20894,7 +22399,7 @@ var ragChat = (config) => {
|
|
|
20894
22399
|
if (reason.includes("approval")) {
|
|
20895
22400
|
actions.add("Renew or record the required approval for this rollout lane.");
|
|
20896
22401
|
}
|
|
20897
|
-
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average")) {
|
|
22402
|
+
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average") || reason.includes("evidence reconcile") || reason.includes("ocr supplement") || reason.includes("hybrid evidence")) {
|
|
20898
22403
|
actions.add(buildRegressionRemediationLabel(classifyGovernanceReasons([reason])));
|
|
20899
22404
|
}
|
|
20900
22405
|
if (reason.includes("source comparison run was not found")) {
|
|
@@ -20925,7 +22430,7 @@ var ragChat = (config) => {
|
|
|
20925
22430
|
})
|
|
20926
22431
|
});
|
|
20927
22432
|
}
|
|
20928
|
-
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average")) {
|
|
22433
|
+
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average") || reason.includes("evidence reconcile") || reason.includes("ocr supplement") || reason.includes("hybrid evidence")) {
|
|
20929
22434
|
steps.push({
|
|
20930
22435
|
kind: "inspect_gate",
|
|
20931
22436
|
label: buildRegressionRemediationLabel(classifyGovernanceReasons([reason])),
|
|
@@ -20986,7 +22491,10 @@ var ragChat = (config) => {
|
|
|
20986
22491
|
baselineRetrievalId,
|
|
20987
22492
|
candidateRetrievalId: input.retrievalId,
|
|
20988
22493
|
classification: input.baseline ? "general" : undefined,
|
|
20989
|
-
effectiveBaselineGatePolicy: targetRolloutLabel || input.groupKey ?
|
|
22494
|
+
effectiveBaselineGatePolicy: targetRolloutLabel || input.groupKey ? getEffectiveRetrievalBaselineGatePolicy({
|
|
22495
|
+
groupKey: input.groupKey,
|
|
22496
|
+
rolloutLabel: targetRolloutLabel
|
|
22497
|
+
}) : undefined,
|
|
20990
22498
|
effectiveReleasePolicy: getRetrievalReleasePolicy(input.groupKey, targetRolloutLabel),
|
|
20991
22499
|
groupKey: input.groupKey,
|
|
20992
22500
|
gateStatus: undefined,
|
|
@@ -21711,32 +23219,20 @@ var ragChat = (config) => {
|
|
|
21711
23219
|
store: retrievalComparisonHistoryStore
|
|
21712
23220
|
}) : undefined;
|
|
21713
23221
|
const latest = decisions?.[0];
|
|
21714
|
-
const adaptiveNativePlannerBenchmark = await (
|
|
21715
|
-
|
|
21716
|
-
|
|
21717
|
-
|
|
21718
|
-
|
|
21719
|
-
|
|
21720
|
-
|
|
21721
|
-
|
|
21722
|
-
|
|
21723
|
-
|
|
21724
|
-
|
|
21725
|
-
|
|
21726
|
-
|
|
21727
|
-
|
|
21728
|
-
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
21729
|
-
suite
|
|
21730
|
-
});
|
|
21731
|
-
return {
|
|
21732
|
-
recommendedGroupKey,
|
|
21733
|
-
recommendedTags,
|
|
21734
|
-
snapshotHistory,
|
|
21735
|
-
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
21736
|
-
suiteId: suite.id,
|
|
21737
|
-
suiteLabel: suite.label ?? suite.id
|
|
21738
|
-
};
|
|
21739
|
-
})();
|
|
23222
|
+
const adaptiveNativePlannerBenchmark = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
23223
|
+
corpusGroupKey: getStringProperty(queryInput, "benchmarkCorpusGroupKey"),
|
|
23224
|
+
groupKey: getStringProperty(queryInput, "benchmarkGroupKey"),
|
|
23225
|
+
historyLimit: getIntegerLikeProperty(queryInput, "benchmarkRunLimit") ?? getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5,
|
|
23226
|
+
queryInput,
|
|
23227
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5
|
|
23228
|
+
});
|
|
23229
|
+
const nativeBackendComparisonBenchmark = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
23230
|
+
corpusGroupKey: getStringProperty(queryInput, "backendBenchmarkCorpusGroupKey"),
|
|
23231
|
+
groupKey: getStringProperty(queryInput, "backendBenchmarkGroupKey"),
|
|
23232
|
+
historyLimit: getIntegerLikeProperty(queryInput, "backendBenchmarkRunLimit") ?? getIntegerLikeProperty(queryInput, "backendBenchmarkLimit") ?? 5,
|
|
23233
|
+
queryInput,
|
|
23234
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "backendBenchmarkLimit") ?? 5
|
|
23235
|
+
});
|
|
21740
23236
|
const presentation = buildRAGRetrievalReleaseGroupHistoryPresentation({
|
|
21741
23237
|
runs,
|
|
21742
23238
|
timeline: {
|
|
@@ -21770,23 +23266,338 @@ var ragChat = (config) => {
|
|
|
21770
23266
|
latestDecisionAt: latest?.decidedAt,
|
|
21771
23267
|
latestDecisionFreshnessStatus: latest?.freshnessStatus,
|
|
21772
23268
|
latestDecisionKind: latest?.kind
|
|
21773
|
-
}
|
|
23269
|
+
},
|
|
23270
|
+
nativeBackendComparisonBenchmark
|
|
21774
23271
|
};
|
|
21775
23272
|
};
|
|
21776
|
-
const
|
|
21777
|
-
const suite = createRAGAdaptiveNativePlannerBenchmarkSuite(
|
|
21778
|
-
|
|
21779
|
-
|
|
21780
|
-
|
|
23273
|
+
const loadAdaptiveNativePlannerBenchmarkRuntime = async (input) => {
|
|
23274
|
+
const suite = input?.suite ?? createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
23275
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23276
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata?.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
|
|
23277
|
+
const groupKey = input?.groupKey ?? getStringProperty(input?.queryInput, "benchmarkGroupKey") ?? recommendedGroupKey;
|
|
23278
|
+
const corpusGroupKey = input?.corpusGroupKey ?? getStringProperty(input?.queryInput, "benchmarkCorpusGroupKey");
|
|
23279
|
+
const recentRuns = retrievalComparisonHistoryStore ? await loadRAGRetrievalComparisonHistory({
|
|
23280
|
+
corpusGroupKey,
|
|
23281
|
+
groupKey,
|
|
23282
|
+
limit: input?.historyLimit ?? 5,
|
|
23283
|
+
store: retrievalComparisonHistoryStore,
|
|
23284
|
+
suiteId: suite.id
|
|
23285
|
+
}) : undefined;
|
|
23286
|
+
const historyTimelineGroupKey = groupKey ?? recentRuns?.[0]?.groupKey;
|
|
23287
|
+
const historyPresentation = recentRuns && recentRuns.length > 0 ? buildRAGRetrievalReleaseGroupHistoryPresentation({
|
|
23288
|
+
runs: recentRuns,
|
|
23289
|
+
timeline: historyTimelineGroupKey ? {
|
|
23290
|
+
corpusGroupKey: corpusGroupKey ?? recentRuns[0]?.corpusGroupKey,
|
|
23291
|
+
groupKey: historyTimelineGroupKey
|
|
23292
|
+
} : undefined
|
|
23293
|
+
}) : undefined;
|
|
21781
23294
|
const snapshotHistory = config.evaluationSuiteSnapshotHistoryStore ? await loadRAGEvaluationSuiteSnapshotHistory({
|
|
21782
|
-
limit:
|
|
23295
|
+
limit: input?.snapshotLimit ?? 5,
|
|
21783
23296
|
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
21784
23297
|
suite
|
|
21785
23298
|
}) : undefined;
|
|
23299
|
+
const fixtureVariants = getRetrievalBenchmarkFixtureVariants(recentRuns);
|
|
21786
23300
|
return {
|
|
21787
|
-
|
|
23301
|
+
corpusGroupKey,
|
|
23302
|
+
fixtureVariants,
|
|
23303
|
+
groupKey,
|
|
23304
|
+
historyPresentation,
|
|
23305
|
+
latestFixtureVariant: fixtureVariants[0],
|
|
23306
|
+
latestRun: recentRuns?.[0],
|
|
23307
|
+
recentRuns,
|
|
23308
|
+
recommendedGroupKey,
|
|
23309
|
+
recommendedTags,
|
|
23310
|
+
snapshotHistory,
|
|
23311
|
+
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23312
|
+
suiteId: suite.id,
|
|
23313
|
+
suiteLabel: suite.label ?? suite.id
|
|
23314
|
+
};
|
|
23315
|
+
};
|
|
23316
|
+
const buildRetrievalBenchmarkBackendTags = () => {
|
|
23317
|
+
const status = resolveCollection()?.getStatus?.();
|
|
23318
|
+
const fixtureVariant = "current-collection";
|
|
23319
|
+
if (!status) {
|
|
23320
|
+
return [`fixture:${fixtureVariant}`];
|
|
23321
|
+
}
|
|
23322
|
+
const tags = [
|
|
23323
|
+
`fixture:${fixtureVariant}`,
|
|
23324
|
+
`backend:${status.backend}`,
|
|
23325
|
+
`vector-mode:${status.vectorMode}`
|
|
23326
|
+
];
|
|
23327
|
+
if (status.native && "mode" in status.native) {
|
|
23328
|
+
tags.push(`native-mode:${status.native.mode}`);
|
|
23329
|
+
}
|
|
23330
|
+
return tags;
|
|
23331
|
+
};
|
|
23332
|
+
const getRetrievalBenchmarkFixtureVariants = (runs) => (runs ?? []).flatMap((run) => run.tags ?? []).filter((tag) => tag.startsWith("fixture:")).map((tag) => tag.slice("fixture:".length)).filter((tag, index, all) => tag.trim().length > 0 && all.indexOf(tag) === index);
|
|
23333
|
+
const ensureRetrievalBenchmarkFixtureTag = (tags) => {
|
|
23334
|
+
if (tags.some((tag) => tag.startsWith("fixture:"))) {
|
|
23335
|
+
return tags;
|
|
23336
|
+
}
|
|
23337
|
+
const fixtureTags = buildRetrievalBenchmarkBackendTags().filter((tag) => tag.startsWith("fixture:"));
|
|
23338
|
+
return [...tags, ...fixtureTags].filter((tag, index, all) => all.indexOf(tag) === index);
|
|
23339
|
+
};
|
|
23340
|
+
const loadNativeBackendComparisonBenchmarkRuntime = async (input) => {
|
|
23341
|
+
const suite = input?.suite ?? createRAGNativeBackendComparisonBenchmarkSuite();
|
|
23342
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23343
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
|
|
23344
|
+
const groupKey = input?.groupKey ?? getStringProperty(input?.queryInput, "benchmarkGroupKey") ?? recommendedGroupKey;
|
|
23345
|
+
const corpusGroupKey = input?.corpusGroupKey ?? getStringProperty(input?.queryInput, "benchmarkCorpusGroupKey");
|
|
23346
|
+
const recentRuns = retrievalComparisonHistoryStore ? await loadRAGRetrievalComparisonHistory({
|
|
23347
|
+
corpusGroupKey,
|
|
23348
|
+
groupKey,
|
|
23349
|
+
limit: input?.historyLimit ?? 5,
|
|
23350
|
+
store: retrievalComparisonHistoryStore,
|
|
23351
|
+
suiteId: suite.id
|
|
23352
|
+
}) : undefined;
|
|
23353
|
+
const historyTimelineGroupKey = groupKey ?? recentRuns?.[0]?.groupKey;
|
|
23354
|
+
const historyPresentation = recentRuns && recentRuns.length > 0 ? buildRAGRetrievalReleaseGroupHistoryPresentation({
|
|
23355
|
+
runs: recentRuns,
|
|
23356
|
+
timeline: historyTimelineGroupKey ? {
|
|
23357
|
+
corpusGroupKey: corpusGroupKey ?? recentRuns[0]?.corpusGroupKey,
|
|
23358
|
+
groupKey: historyTimelineGroupKey
|
|
23359
|
+
} : undefined
|
|
23360
|
+
}) : undefined;
|
|
23361
|
+
const snapshotHistory = config.evaluationSuiteSnapshotHistoryStore ? await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23362
|
+
limit: input?.snapshotLimit ?? 5,
|
|
23363
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23364
|
+
suite
|
|
23365
|
+
}) : undefined;
|
|
23366
|
+
const fixtureVariants = getRetrievalBenchmarkFixtureVariants(recentRuns);
|
|
23367
|
+
return {
|
|
23368
|
+
corpusGroupKey,
|
|
23369
|
+
fixtureVariants,
|
|
23370
|
+
groupKey,
|
|
23371
|
+
historyPresentation,
|
|
23372
|
+
latestFixtureVariant: fixtureVariants[0],
|
|
23373
|
+
latestRun: recentRuns?.[0],
|
|
23374
|
+
recentRuns,
|
|
23375
|
+
recommendedGroupKey,
|
|
23376
|
+
recommendedTags,
|
|
21788
23377
|
snapshotHistory,
|
|
21789
23378
|
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23379
|
+
suiteId: suite.id,
|
|
23380
|
+
suiteLabel: suite.label ?? suite.id
|
|
23381
|
+
};
|
|
23382
|
+
};
|
|
23383
|
+
const handleAdaptiveNativePlannerBenchmark = async (queryInput) => {
|
|
23384
|
+
const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
|
|
23385
|
+
description: getStringProperty(queryInput, "description"),
|
|
23386
|
+
label: getStringProperty(queryInput, "label"),
|
|
23387
|
+
metadata: getObjectProperty(queryInput, "metadata"),
|
|
23388
|
+
topK: getIntegerLikeProperty(queryInput, "topK") ?? undefined
|
|
23389
|
+
});
|
|
23390
|
+
const runtime = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
23391
|
+
historyLimit: getIntegerLikeProperty(queryInput, "runLimit") ?? 5,
|
|
23392
|
+
queryInput,
|
|
23393
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
|
|
23394
|
+
suite
|
|
23395
|
+
});
|
|
23396
|
+
return {
|
|
23397
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23398
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23399
|
+
groupKey: runtime.groupKey,
|
|
23400
|
+
historyPresentation: runtime.historyPresentation,
|
|
23401
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23402
|
+
latestRun: runtime.latestRun,
|
|
23403
|
+
ok: true,
|
|
23404
|
+
recentRuns: runtime.recentRuns,
|
|
23405
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23406
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23407
|
+
suite
|
|
23408
|
+
};
|
|
23409
|
+
};
|
|
23410
|
+
const handleNativeBackendComparisonBenchmark = async (queryInput) => {
|
|
23411
|
+
const suite = createRAGNativeBackendComparisonBenchmarkSuite({
|
|
23412
|
+
description: getStringProperty(queryInput, "description"),
|
|
23413
|
+
label: getStringProperty(queryInput, "label"),
|
|
23414
|
+
metadata: getObjectProperty(queryInput, "metadata"),
|
|
23415
|
+
topK: getIntegerLikeProperty(queryInput, "topK") ?? undefined
|
|
23416
|
+
});
|
|
23417
|
+
const runtime = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
23418
|
+
historyLimit: getIntegerLikeProperty(queryInput, "runLimit") ?? 5,
|
|
23419
|
+
queryInput,
|
|
23420
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
|
|
23421
|
+
suite
|
|
23422
|
+
});
|
|
23423
|
+
return {
|
|
23424
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23425
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23426
|
+
groupKey: runtime.groupKey,
|
|
23427
|
+
historyPresentation: runtime.historyPresentation,
|
|
23428
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23429
|
+
latestRun: runtime.latestRun,
|
|
23430
|
+
ok: true,
|
|
23431
|
+
recentRuns: runtime.recentRuns,
|
|
23432
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23433
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23434
|
+
suite
|
|
23435
|
+
};
|
|
23436
|
+
};
|
|
23437
|
+
const handleRunAdaptiveNativePlannerBenchmark = async (bodyInput, request) => {
|
|
23438
|
+
const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
|
|
23439
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23440
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23441
|
+
metadata: getObjectProperty(bodyInput, "metadata"),
|
|
23442
|
+
topK: getIntegerLikeProperty(bodyInput, "topK") ?? undefined
|
|
23443
|
+
});
|
|
23444
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23445
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : [];
|
|
23446
|
+
const explicitTags = normalizeStringArray2(bodyInput?.tags);
|
|
23447
|
+
const comparisonBody = {
|
|
23448
|
+
...suite.input,
|
|
23449
|
+
baselineRetrievalId: getStringProperty(bodyInput, "baselineRetrievalId") ?? "native-latency",
|
|
23450
|
+
candidateRetrievalId: getStringProperty(bodyInput, "candidateRetrievalId") ?? "native-adaptive",
|
|
23451
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23452
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23453
|
+
label: suite.label,
|
|
23454
|
+
persistRun: getBooleanProperty(bodyInput, "persistRun") !== false,
|
|
23455
|
+
suiteId: suite.id,
|
|
23456
|
+
retrievals: Array.isArray(bodyInput?.retrievals) ? bodyInput.retrievals : [
|
|
23457
|
+
{
|
|
23458
|
+
id: "native-latency",
|
|
23459
|
+
label: "Native latency",
|
|
23460
|
+
retrieval: {
|
|
23461
|
+
mode: "vector",
|
|
23462
|
+
nativeQueryProfile: "latency"
|
|
23463
|
+
}
|
|
23464
|
+
},
|
|
23465
|
+
{
|
|
23466
|
+
id: "native-adaptive",
|
|
23467
|
+
label: "Adaptive native planner",
|
|
23468
|
+
retrieval: {
|
|
23469
|
+
mode: "vector"
|
|
23470
|
+
}
|
|
23471
|
+
},
|
|
23472
|
+
{
|
|
23473
|
+
id: "hybrid-adaptive",
|
|
23474
|
+
label: "Hybrid adaptive",
|
|
23475
|
+
retrieval: {
|
|
23476
|
+
mode: "hybrid"
|
|
23477
|
+
}
|
|
23478
|
+
},
|
|
23479
|
+
{
|
|
23480
|
+
id: "hybrid-transform",
|
|
23481
|
+
label: "Hybrid transform",
|
|
23482
|
+
queryTransform: createHeuristicRAGQueryTransform(),
|
|
23483
|
+
retrieval: {
|
|
23484
|
+
mode: "hybrid"
|
|
23485
|
+
}
|
|
23486
|
+
}
|
|
23487
|
+
],
|
|
23488
|
+
tags: explicitTags.length > 0 ? ensureRetrievalBenchmarkFixtureTag(explicitTags) : ensureRetrievalBenchmarkFixtureTag(recommendedTags)
|
|
23489
|
+
};
|
|
23490
|
+
const comparisonResult = await handleEvaluateRetrievals(comparisonBody, request);
|
|
23491
|
+
if (!comparisonResult.ok) {
|
|
23492
|
+
return {
|
|
23493
|
+
error: comparisonResult.error,
|
|
23494
|
+
ok: false
|
|
23495
|
+
};
|
|
23496
|
+
}
|
|
23497
|
+
const runtime = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
23498
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23499
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23500
|
+
historyLimit: getIntegerLikeProperty(bodyInput, "runLimit") ?? 5,
|
|
23501
|
+
snapshotLimit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23502
|
+
suite
|
|
23503
|
+
});
|
|
23504
|
+
return {
|
|
23505
|
+
comparison: comparisonResult.comparison,
|
|
23506
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23507
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23508
|
+
groupKey: runtime.groupKey,
|
|
23509
|
+
historyPresentation: runtime.historyPresentation,
|
|
23510
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23511
|
+
latestRun: runtime.latestRun,
|
|
23512
|
+
ok: true,
|
|
23513
|
+
recentRuns: runtime.recentRuns,
|
|
23514
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23515
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23516
|
+
suite
|
|
23517
|
+
};
|
|
23518
|
+
};
|
|
23519
|
+
const handleRunNativeBackendComparisonBenchmark = async (bodyInput, request) => {
|
|
23520
|
+
const suite = createRAGNativeBackendComparisonBenchmarkSuite({
|
|
23521
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23522
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23523
|
+
metadata: getObjectProperty(bodyInput, "metadata"),
|
|
23524
|
+
topK: getIntegerLikeProperty(bodyInput, "topK") ?? undefined
|
|
23525
|
+
});
|
|
23526
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23527
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : [];
|
|
23528
|
+
const explicitTags = normalizeStringArray2(bodyInput?.tags);
|
|
23529
|
+
const comparisonBody = {
|
|
23530
|
+
...suite.input,
|
|
23531
|
+
baselineRetrievalId: getStringProperty(bodyInput, "baselineRetrievalId") ?? "native-latency",
|
|
23532
|
+
candidateRetrievalId: getStringProperty(bodyInput, "candidateRetrievalId") ?? "native-adaptive",
|
|
23533
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23534
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23535
|
+
label: suite.label,
|
|
23536
|
+
persistRun: getBooleanProperty(bodyInput, "persistRun") !== false,
|
|
23537
|
+
suiteId: suite.id,
|
|
23538
|
+
retrievals: Array.isArray(bodyInput?.retrievals) ? bodyInput.retrievals : [
|
|
23539
|
+
{
|
|
23540
|
+
id: "native-latency",
|
|
23541
|
+
label: "Native latency",
|
|
23542
|
+
retrieval: {
|
|
23543
|
+
mode: "vector",
|
|
23544
|
+
nativeQueryProfile: "latency"
|
|
23545
|
+
}
|
|
23546
|
+
},
|
|
23547
|
+
{
|
|
23548
|
+
id: "native-adaptive",
|
|
23549
|
+
label: "Adaptive native planner",
|
|
23550
|
+
retrieval: {
|
|
23551
|
+
mode: "vector"
|
|
23552
|
+
}
|
|
23553
|
+
},
|
|
23554
|
+
{
|
|
23555
|
+
id: "hybrid-adaptive",
|
|
23556
|
+
label: "Hybrid adaptive",
|
|
23557
|
+
retrieval: {
|
|
23558
|
+
mode: "hybrid"
|
|
23559
|
+
}
|
|
23560
|
+
},
|
|
23561
|
+
{
|
|
23562
|
+
id: "hybrid-transform",
|
|
23563
|
+
label: "Hybrid transform",
|
|
23564
|
+
queryTransform: createHeuristicRAGQueryTransform(),
|
|
23565
|
+
retrieval: {
|
|
23566
|
+
mode: "hybrid"
|
|
23567
|
+
}
|
|
23568
|
+
}
|
|
23569
|
+
],
|
|
23570
|
+
tags: explicitTags.length > 0 ? ensureRetrievalBenchmarkFixtureTag(explicitTags) : ensureRetrievalBenchmarkFixtureTag([
|
|
23571
|
+
...recommendedTags,
|
|
23572
|
+
...buildRetrievalBenchmarkBackendTags()
|
|
23573
|
+
])
|
|
23574
|
+
};
|
|
23575
|
+
const comparisonResult = await handleEvaluateRetrievals(comparisonBody, request);
|
|
23576
|
+
if (!comparisonResult.ok) {
|
|
23577
|
+
return {
|
|
23578
|
+
error: comparisonResult.error,
|
|
23579
|
+
ok: false
|
|
23580
|
+
};
|
|
23581
|
+
}
|
|
23582
|
+
const runtime = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
23583
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23584
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23585
|
+
historyLimit: getIntegerLikeProperty(bodyInput, "runLimit") ?? 5,
|
|
23586
|
+
snapshotLimit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23587
|
+
suite
|
|
23588
|
+
});
|
|
23589
|
+
return {
|
|
23590
|
+
comparison: comparisonResult.comparison,
|
|
23591
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23592
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23593
|
+
groupKey: runtime.groupKey,
|
|
23594
|
+
historyPresentation: runtime.historyPresentation,
|
|
23595
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23596
|
+
latestRun: runtime.latestRun,
|
|
23597
|
+
ok: true,
|
|
23598
|
+
recentRuns: runtime.recentRuns,
|
|
23599
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23600
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
21790
23601
|
suite
|
|
21791
23602
|
};
|
|
21792
23603
|
};
|
|
@@ -21836,6 +23647,52 @@ var ragChat = (config) => {
|
|
|
21836
23647
|
suite
|
|
21837
23648
|
};
|
|
21838
23649
|
};
|
|
23650
|
+
const handlePersistNativeBackendComparisonBenchmarkSnapshot = async (bodyInput, request) => {
|
|
23651
|
+
if (request) {
|
|
23652
|
+
const decision = await checkAuthorization(request, "manage_retrieval_admin");
|
|
23653
|
+
if (!decision.allowed) {
|
|
23654
|
+
return {
|
|
23655
|
+
error: decision.reason ?? "Forbidden",
|
|
23656
|
+
ok: false
|
|
23657
|
+
};
|
|
23658
|
+
}
|
|
23659
|
+
}
|
|
23660
|
+
if (!config.evaluationSuiteSnapshotHistoryStore) {
|
|
23661
|
+
return {
|
|
23662
|
+
error: "Evaluation suite snapshot history store is not configured",
|
|
23663
|
+
ok: false
|
|
23664
|
+
};
|
|
23665
|
+
}
|
|
23666
|
+
const suite = createRAGNativeBackendComparisonBenchmarkSuite({
|
|
23667
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23668
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23669
|
+
metadata: getObjectProperty(bodyInput, "metadata")
|
|
23670
|
+
});
|
|
23671
|
+
const previousHistory = await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23672
|
+
limit: 1,
|
|
23673
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23674
|
+
suite
|
|
23675
|
+
});
|
|
23676
|
+
const snapshot = createRAGNativeBackendComparisonBenchmarkSnapshot({
|
|
23677
|
+
createdAt: getNumberProperty(bodyInput, "createdAt"),
|
|
23678
|
+
metadata: getObjectProperty(bodyInput, "snapshotMetadata"),
|
|
23679
|
+
suite,
|
|
23680
|
+
version: getIntegerLikeProperty(bodyInput, "version") ?? (previousHistory.latestSnapshot?.version ?? 0) + 1
|
|
23681
|
+
});
|
|
23682
|
+
await config.evaluationSuiteSnapshotHistoryStore.saveSnapshot(snapshot);
|
|
23683
|
+
const snapshotHistory = await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23684
|
+
limit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23685
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23686
|
+
suite
|
|
23687
|
+
});
|
|
23688
|
+
return {
|
|
23689
|
+
ok: true,
|
|
23690
|
+
snapshot,
|
|
23691
|
+
snapshotHistory,
|
|
23692
|
+
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23693
|
+
suite
|
|
23694
|
+
};
|
|
23695
|
+
};
|
|
21839
23696
|
const handleRetrievalLaneHandoffList = async (queryInput, request) => {
|
|
21840
23697
|
const result = await buildOperationsPayload();
|
|
21841
23698
|
const accessScope = await loadAccessScope(request);
|
|
@@ -23829,29 +25686,14 @@ var ragChat = (config) => {
|
|
|
23829
25686
|
});
|
|
23830
25687
|
const latestRejectedCandidate = enrichedRecentRetrievalReleaseDecisions?.find((entry) => entry.kind === "reject");
|
|
23831
25688
|
const latestRetrievalComparisonRun = recentRetrievalComparisonRuns?.[0];
|
|
23832
|
-
const
|
|
23833
|
-
|
|
23834
|
-
|
|
23835
|
-
|
|
23836
|
-
|
|
23837
|
-
|
|
23838
|
-
|
|
23839
|
-
|
|
23840
|
-
}),
|
|
23841
|
-
snapshotHistoryPresentation: undefined,
|
|
23842
|
-
suiteId: adaptiveNativePlannerBenchmarkSuite.id,
|
|
23843
|
-
suiteLabel: adaptiveNativePlannerBenchmarkSuite.label ?? adaptiveNativePlannerBenchmarkSuite.id
|
|
23844
|
-
} : {
|
|
23845
|
-
recommendedGroupKey: typeof adaptiveNativePlannerBenchmarkSuite.metadata?.recommendedGroupKey === "string" ? adaptiveNativePlannerBenchmarkSuite.metadata.recommendedGroupKey : undefined,
|
|
23846
|
-
recommendedTags: Array.isArray(adaptiveNativePlannerBenchmarkSuite.metadata?.recommendedTags) ? adaptiveNativePlannerBenchmarkSuite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined,
|
|
23847
|
-
snapshotHistory: undefined,
|
|
23848
|
-
snapshotHistoryPresentation: undefined,
|
|
23849
|
-
suiteId: adaptiveNativePlannerBenchmarkSuite.id,
|
|
23850
|
-
suiteLabel: adaptiveNativePlannerBenchmarkSuite.label ?? adaptiveNativePlannerBenchmarkSuite.id
|
|
23851
|
-
};
|
|
23852
|
-
if (adaptiveNativePlannerBenchmark.snapshotHistory) {
|
|
23853
|
-
adaptiveNativePlannerBenchmark.snapshotHistoryPresentation = buildRAGEvaluationSuiteSnapshotHistoryPresentation(adaptiveNativePlannerBenchmark.snapshotHistory);
|
|
23854
|
-
}
|
|
25689
|
+
const adaptiveNativePlannerBenchmark = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
25690
|
+
historyLimit: 5,
|
|
25691
|
+
snapshotLimit: 5
|
|
25692
|
+
});
|
|
25693
|
+
const nativeBackendComparisonBenchmark = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
25694
|
+
historyLimit: 5,
|
|
25695
|
+
snapshotLimit: 5
|
|
25696
|
+
});
|
|
23855
25697
|
const latestPromotionReadiness = latestRetrievalComparisonRun ? (() => {
|
|
23856
25698
|
const activeTargetRolloutLabel = activeRetrievalBaselines?.find((entry) => entry.groupKey === latestRetrievalComparisonRun.groupKey)?.rolloutLabel;
|
|
23857
25699
|
const state = getPromotionCandidateState({
|
|
@@ -23990,7 +25832,7 @@ var ragChat = (config) => {
|
|
|
23990
25832
|
return {
|
|
23991
25833
|
...group,
|
|
23992
25834
|
acknowledgedOpenIncidentCount,
|
|
23993
|
-
classification: groupOpenIncidents.some((entry) => entry.classification === "runtime") ? "runtime" : groupOpenIncidents.some((entry) => entry.classification === "multivector") ? "multivector" : group.classification,
|
|
25835
|
+
classification: groupOpenIncidents.some((entry) => entry.classification === "runtime") ? "runtime" : groupOpenIncidents.some((entry) => entry.classification === "evidence") ? "evidence" : groupOpenIncidents.some((entry) => entry.classification === "multivector") ? "multivector" : group.classification,
|
|
23994
25836
|
openIncidentCount: groupOpenIncidents.length,
|
|
23995
25837
|
unacknowledgedOpenIncidentCount: groupOpenIncidents.length - acknowledgedOpenIncidentCount
|
|
23996
25838
|
};
|
|
@@ -24198,7 +26040,7 @@ var ragChat = (config) => {
|
|
|
24198
26040
|
] : candidate?.ready ? [
|
|
24199
26041
|
"latest candidate is ready to promote"
|
|
24200
26042
|
] : ["continue monitoring release state"];
|
|
24201
|
-
const classification = candidate?.reasons?.length ? classifyGovernanceReasons(candidate.reasons) : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "runtime") ? "runtime" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "multivector") ? "multivector" : "general";
|
|
26043
|
+
const classification = candidate?.reasons?.length ? classifyGovernanceReasons(candidate.reasons) : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "runtime") ? "runtime" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "evidence") ? "evidence" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "multivector") ? "multivector" : "general";
|
|
24202
26044
|
summaries.push({
|
|
24203
26045
|
baselineRetrievalId: candidate?.baselineRetrievalId,
|
|
24204
26046
|
candidateRetrievalId: candidate?.candidateRetrievalId,
|
|
@@ -24674,6 +26516,9 @@ var ragChat = (config) => {
|
|
|
24674
26516
|
] : [],
|
|
24675
26517
|
...(input.delta?.runtimeUnderfilledTopKCasesDelta ?? 0) > 0 ? [
|
|
24676
26518
|
`runtime underfilled-topk delta ${input.delta?.runtimeUnderfilledTopKCasesDelta ?? 0}`
|
|
26519
|
+
] : [],
|
|
26520
|
+
...(input.delta?.evidenceReconcileCasesDelta ?? 0) < 0 ? [
|
|
26521
|
+
`evidence reconcile delta ${input.delta?.evidenceReconcileCasesDelta ?? 0}`
|
|
24677
26522
|
] : []
|
|
24678
26523
|
]);
|
|
24679
26524
|
const latestWinner = latestRetrievalComparisonRun.comparison.summary.bestByPassingRate;
|
|
@@ -24853,12 +26698,14 @@ var ragChat = (config) => {
|
|
|
24853
26698
|
readiness: buildReadiness(),
|
|
24854
26699
|
retrievalComparisons: {
|
|
24855
26700
|
adaptiveNativePlannerBenchmark,
|
|
26701
|
+
nativeBackendComparisonBenchmark,
|
|
24856
26702
|
configured: Boolean(retrievalComparisonHistoryStore),
|
|
24857
26703
|
latest: latestRetrievalComparisonRun ? {
|
|
24858
26704
|
bestByAverageF1: latestRetrievalComparisonRun.comparison.summary.bestByAverageF1,
|
|
24859
26705
|
bestByMultivectorCollapsedCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorCollapsedCases,
|
|
24860
26706
|
bestByMultivectorLexicalHitCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorLexicalHitCases,
|
|
24861
26707
|
bestByMultivectorVectorHitCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorVectorHitCases,
|
|
26708
|
+
bestByEvidenceReconcileCases: latestRetrievalComparisonRun.comparison.summary.bestByEvidenceReconcileCases,
|
|
24862
26709
|
bestByLowestRuntimeCandidateBudgetExhaustedCases: latestRetrievalComparisonRun.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
|
|
24863
26710
|
bestByLowestRuntimeUnderfilledTopKCases: latestRetrievalComparisonRun.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases,
|
|
24864
26711
|
bestByPassingRate: latestRetrievalComparisonRun.comparison.summary.bestByPassingRate,
|
|
@@ -25785,10 +27632,43 @@ var ragChat = (config) => {
|
|
|
25785
27632
|
if (!result.ok) {
|
|
25786
27633
|
return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark failed"), getNumericStatus(set.status));
|
|
25787
27634
|
}
|
|
25788
|
-
return
|
|
25789
|
-
|
|
25790
|
-
|
|
25791
|
-
|
|
27635
|
+
return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmark(result), getNumericStatus(set.status));
|
|
27636
|
+
}
|
|
27637
|
+
return result;
|
|
27638
|
+
}).get(`${path}/compare/retrieval/benchmarks/native-backend-comparison`, async ({ query, request, set }) => {
|
|
27639
|
+
const result = await handleNativeBackendComparisonBenchmark(query);
|
|
27640
|
+
if (!result.ok) {
|
|
27641
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27642
|
+
}
|
|
27643
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27644
|
+
if (!result.ok) {
|
|
27645
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark failed"), getNumericStatus(set.status));
|
|
27646
|
+
}
|
|
27647
|
+
return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmark(result), getNumericStatus(set.status));
|
|
27648
|
+
}
|
|
27649
|
+
return result;
|
|
27650
|
+
}).post(`${path}/compare/retrieval/benchmarks/adaptive-native-planner/run`, async ({ body, request, set }) => {
|
|
27651
|
+
const result = await handleRunAdaptiveNativePlannerBenchmark(body, request);
|
|
27652
|
+
if (!result.ok) {
|
|
27653
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27654
|
+
}
|
|
27655
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27656
|
+
if (!result.ok) {
|
|
27657
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark run failed"), getNumericStatus(set.status));
|
|
27658
|
+
}
|
|
27659
|
+
return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmark(result), getNumericStatus(set.status));
|
|
27660
|
+
}
|
|
27661
|
+
return result;
|
|
27662
|
+
}).post(`${path}/compare/retrieval/benchmarks/native-backend-comparison/run`, async ({ body, request, set }) => {
|
|
27663
|
+
const result = await handleRunNativeBackendComparisonBenchmark(body, request);
|
|
27664
|
+
if (!result.ok) {
|
|
27665
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27666
|
+
}
|
|
27667
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27668
|
+
if (!result.ok) {
|
|
27669
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark run failed"), getNumericStatus(set.status));
|
|
27670
|
+
}
|
|
27671
|
+
return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmark(result), getNumericStatus(set.status));
|
|
25792
27672
|
}
|
|
25793
27673
|
return result;
|
|
25794
27674
|
}).post(`${path}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, async ({ body, request, set }) => {
|
|
@@ -25800,10 +27680,19 @@ var ragChat = (config) => {
|
|
|
25800
27680
|
if (!result.ok) {
|
|
25801
27681
|
return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark snapshot failed"), getNumericStatus(set.status));
|
|
25802
27682
|
}
|
|
25803
|
-
return
|
|
25804
|
-
|
|
25805
|
-
|
|
25806
|
-
|
|
27683
|
+
return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmarkSnapshot(result), getNumericStatus(set.status));
|
|
27684
|
+
}
|
|
27685
|
+
return result;
|
|
27686
|
+
}).post(`${path}/compare/retrieval/benchmarks/native-backend-comparison/snapshots`, async ({ body, request, set }) => {
|
|
27687
|
+
const result = await handlePersistNativeBackendComparisonBenchmarkSnapshot(body, request);
|
|
27688
|
+
if (!result.ok) {
|
|
27689
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27690
|
+
}
|
|
27691
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27692
|
+
if (!result.ok) {
|
|
27693
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark snapshot failed"), getNumericStatus(set.status));
|
|
27694
|
+
}
|
|
27695
|
+
return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmarkSnapshot(result), getNumericStatus(set.status));
|
|
25807
27696
|
}
|
|
25808
27697
|
return result;
|
|
25809
27698
|
}).get(`${path}/compare/retrieval/baselines`, async ({ query, request, set }) => {
|
|
@@ -33900,12 +35789,21 @@ var createRAGClient = (options) => {
|
|
|
33900
35789
|
if (typeof input?.limit === "number") {
|
|
33901
35790
|
searchParams.set("limit", String(input.limit));
|
|
33902
35791
|
}
|
|
35792
|
+
if (typeof input?.runLimit === "number") {
|
|
35793
|
+
searchParams.set("runLimit", String(input.runLimit));
|
|
35794
|
+
}
|
|
33903
35795
|
if (input?.label) {
|
|
33904
35796
|
searchParams.set("label", input.label);
|
|
33905
35797
|
}
|
|
33906
35798
|
if (input?.description) {
|
|
33907
35799
|
searchParams.set("description", input.description);
|
|
33908
35800
|
}
|
|
35801
|
+
if (input?.groupKey) {
|
|
35802
|
+
searchParams.set("benchmarkGroupKey", input.groupKey);
|
|
35803
|
+
}
|
|
35804
|
+
if (input?.corpusGroupKey) {
|
|
35805
|
+
searchParams.set("benchmarkCorpusGroupKey", input.corpusGroupKey);
|
|
35806
|
+
}
|
|
33909
35807
|
const suffix = searchParams.size ? `?${searchParams}` : "";
|
|
33910
35808
|
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner${suffix}`);
|
|
33911
35809
|
if (!response.ok) {
|
|
@@ -33917,6 +35815,35 @@ var createRAGClient = (options) => {
|
|
|
33917
35815
|
}
|
|
33918
35816
|
return payload;
|
|
33919
35817
|
},
|
|
35818
|
+
async runAdaptiveNativePlannerBenchmark(input) {
|
|
35819
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/run`, {
|
|
35820
|
+
body: JSON.stringify({
|
|
35821
|
+
baselineRetrievalId: input?.baselineRetrievalId,
|
|
35822
|
+
candidateRetrievalId: input?.candidateRetrievalId,
|
|
35823
|
+
corpusGroupKey: input?.corpusGroupKey,
|
|
35824
|
+
description: input?.description,
|
|
35825
|
+
groupKey: input?.groupKey,
|
|
35826
|
+
label: input?.label,
|
|
35827
|
+
limit: input?.limit,
|
|
35828
|
+
metadata: input?.metadata,
|
|
35829
|
+
persistRun: input?.persistRun,
|
|
35830
|
+
retrievals: input?.retrievals,
|
|
35831
|
+
runLimit: input?.runLimit,
|
|
35832
|
+
tags: input?.tags,
|
|
35833
|
+
topK: input?.topK
|
|
35834
|
+
}),
|
|
35835
|
+
headers: jsonHeaders,
|
|
35836
|
+
method: "POST"
|
|
35837
|
+
});
|
|
35838
|
+
if (!response.ok) {
|
|
35839
|
+
throw new Error(await toErrorMessage3(response));
|
|
35840
|
+
}
|
|
35841
|
+
const payload = await parseJson(response);
|
|
35842
|
+
if (!payload.ok) {
|
|
35843
|
+
throw new Error(payload.error ?? "Adaptive native planner benchmark run failed");
|
|
35844
|
+
}
|
|
35845
|
+
return payload;
|
|
35846
|
+
},
|
|
33920
35847
|
async saveAdaptiveNativePlannerBenchmarkSnapshot(input) {
|
|
33921
35848
|
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, {
|
|
33922
35849
|
body: JSON.stringify({
|
|
@@ -33940,6 +35867,89 @@ var createRAGClient = (options) => {
|
|
|
33940
35867
|
}
|
|
33941
35868
|
return payload;
|
|
33942
35869
|
},
|
|
35870
|
+
async nativeBackendComparisonBenchmark(input) {
|
|
35871
|
+
const searchParams = new URLSearchParams;
|
|
35872
|
+
if (typeof input?.limit === "number") {
|
|
35873
|
+
searchParams.set("limit", String(input.limit));
|
|
35874
|
+
}
|
|
35875
|
+
if (typeof input?.runLimit === "number") {
|
|
35876
|
+
searchParams.set("runLimit", String(input.runLimit));
|
|
35877
|
+
}
|
|
35878
|
+
if (input?.label) {
|
|
35879
|
+
searchParams.set("label", input.label);
|
|
35880
|
+
}
|
|
35881
|
+
if (input?.description) {
|
|
35882
|
+
searchParams.set("description", input.description);
|
|
35883
|
+
}
|
|
35884
|
+
if (input?.groupKey) {
|
|
35885
|
+
searchParams.set("benchmarkGroupKey", input.groupKey);
|
|
35886
|
+
}
|
|
35887
|
+
if (input?.corpusGroupKey) {
|
|
35888
|
+
searchParams.set("benchmarkCorpusGroupKey", input.corpusGroupKey);
|
|
35889
|
+
}
|
|
35890
|
+
const suffix = searchParams.size ? `?${searchParams}` : "";
|
|
35891
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison${suffix}`);
|
|
35892
|
+
if (!response.ok) {
|
|
35893
|
+
throw new Error(await toErrorMessage3(response));
|
|
35894
|
+
}
|
|
35895
|
+
const payload = await parseJson(response);
|
|
35896
|
+
if (!payload.ok) {
|
|
35897
|
+
throw new Error(payload.error ?? "Native backend comparison benchmark history failed");
|
|
35898
|
+
}
|
|
35899
|
+
return payload;
|
|
35900
|
+
},
|
|
35901
|
+
async runNativeBackendComparisonBenchmark(input) {
|
|
35902
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison/run`, {
|
|
35903
|
+
body: JSON.stringify({
|
|
35904
|
+
baselineRetrievalId: input?.baselineRetrievalId,
|
|
35905
|
+
candidateRetrievalId: input?.candidateRetrievalId,
|
|
35906
|
+
corpusGroupKey: input?.corpusGroupKey,
|
|
35907
|
+
description: input?.description,
|
|
35908
|
+
groupKey: input?.groupKey,
|
|
35909
|
+
label: input?.label,
|
|
35910
|
+
limit: input?.limit,
|
|
35911
|
+
metadata: input?.metadata,
|
|
35912
|
+
persistRun: input?.persistRun,
|
|
35913
|
+
retrievals: input?.retrievals,
|
|
35914
|
+
runLimit: input?.runLimit,
|
|
35915
|
+
tags: input?.tags,
|
|
35916
|
+
topK: input?.topK
|
|
35917
|
+
}),
|
|
35918
|
+
headers: jsonHeaders,
|
|
35919
|
+
method: "POST"
|
|
35920
|
+
});
|
|
35921
|
+
if (!response.ok) {
|
|
35922
|
+
throw new Error(await toErrorMessage3(response));
|
|
35923
|
+
}
|
|
35924
|
+
const payload = await parseJson(response);
|
|
35925
|
+
if (!payload.ok) {
|
|
35926
|
+
throw new Error(payload.error ?? "Native backend comparison benchmark run failed");
|
|
35927
|
+
}
|
|
35928
|
+
return payload;
|
|
35929
|
+
},
|
|
35930
|
+
async saveNativeBackendComparisonBenchmarkSnapshot(input) {
|
|
35931
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison/snapshots`, {
|
|
35932
|
+
body: JSON.stringify({
|
|
35933
|
+
createdAt: input?.createdAt,
|
|
35934
|
+
description: input?.description,
|
|
35935
|
+
label: input?.label,
|
|
35936
|
+
limit: input?.limit,
|
|
35937
|
+
metadata: input?.metadata,
|
|
35938
|
+
snapshotMetadata: input?.snapshotMetadata,
|
|
35939
|
+
version: input?.version
|
|
35940
|
+
}),
|
|
35941
|
+
headers: jsonHeaders,
|
|
35942
|
+
method: "POST"
|
|
35943
|
+
});
|
|
35944
|
+
if (!response.ok) {
|
|
35945
|
+
throw new Error(await toErrorMessage3(response));
|
|
35946
|
+
}
|
|
35947
|
+
const payload = await parseJson(response);
|
|
35948
|
+
if (!payload.ok) {
|
|
35949
|
+
throw new Error(payload.error ?? "Native backend comparison benchmark snapshot failed");
|
|
35950
|
+
}
|
|
35951
|
+
return payload;
|
|
35952
|
+
},
|
|
33943
35953
|
async retrievalLaneHandoffs(input) {
|
|
33944
35954
|
const searchParams = new URLSearchParams;
|
|
33945
35955
|
if (input?.groupKey) {
|
|
@@ -34876,6 +36886,10 @@ export {
|
|
|
34876
36886
|
createRAGQueryTransform,
|
|
34877
36887
|
createRAGPDFOCRExtractor,
|
|
34878
36888
|
createRAGOCRProvider,
|
|
36889
|
+
createRAGNativeBackendComparisonBenchmarkSuite,
|
|
36890
|
+
createRAGNativeBackendComparisonBenchmarkSnapshot,
|
|
36891
|
+
createRAGNativeBackendBenchmarkMockEmbedding,
|
|
36892
|
+
createRAGNativeBackendBenchmarkCorpus,
|
|
34879
36893
|
createRAGMediaTranscriber,
|
|
34880
36894
|
createRAGMediaFileExtractor,
|
|
34881
36895
|
createRAGImageOCRExtractor,
|
|
@@ -34973,5 +36987,5 @@ export {
|
|
|
34973
36987
|
addRAGEvaluationSuiteCase
|
|
34974
36988
|
};
|
|
34975
36989
|
|
|
34976
|
-
//# debugId=
|
|
36990
|
+
//# debugId=EA75EA5E660B29F864756E2164756E21
|
|
34977
36991
|
//# sourceMappingURL=index.js.map
|