@absolutejs/absolute 0.19.0-beta.643 → 0.19.0-beta.645
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +976 -15
- package/dist/ai/client/index.js.map +6 -6
- package/dist/ai/client/ui.js +807 -15
- package/dist/ai/client/ui.js.map +5 -5
- package/dist/ai/index.js +2737 -141
- package/dist/ai/index.js.map +13 -13
- package/dist/ai/rag/quality.js +813 -15
- package/dist/ai/rag/quality.js.map +5 -5
- package/dist/ai/rag/ui.js +807 -15
- package/dist/ai/rag/ui.js.map +5 -5
- package/dist/ai-client/angular/ai/index.js +436 -8
- package/dist/ai-client/react/ai/index.js +436 -8
- package/dist/ai-client/vue/ai/index.js +436 -8
- package/dist/angular/ai/index.js +976 -15
- package/dist/angular/ai/index.js.map +6 -6
- package/dist/index.js +6 -6
- package/dist/index.js.map +2 -2
- package/dist/react/ai/index.js +976 -15
- package/dist/react/ai/index.js.map +6 -6
- package/dist/src/ai/client/ragClient.d.ts +74 -1
- package/dist/src/ai/index.d.ts +2 -1
- package/dist/src/ai/rag/adapters/queryPlanning.d.ts +8 -0
- package/dist/src/ai/rag/chat.d.ts +135 -7
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/presentation.d.ts +5 -1
- package/dist/src/ai/rag/quality.d.ts +34 -1
- package/dist/src/vue/ai/useRAG.d.ts +84 -0
- package/dist/src/vue/ai/useRAGEvaluate.d.ts +74 -0
- package/dist/src/vue/ai/useRAGSearch.d.ts +10 -0
- package/dist/svelte/ai/index.js +976 -15
- package/dist/svelte/ai/index.js.map +6 -6
- package/dist/types/ai.d.ts +115 -13
- package/dist/types/index.d.ts +1 -0
- package/dist/types/session.d.ts +16 -0
- package/dist/vue/ai/index.js +976 -15
- package/dist/vue/ai/index.js.map +6 -6
- package/package.json +8 -7
package/dist/ai/index.js
CHANGED
|
@@ -208,6 +208,7 @@ var buildContextLabel = (metadata) => {
|
|
|
208
208
|
return;
|
|
209
209
|
}
|
|
210
210
|
const emailKind = getContextString(metadata.emailKind);
|
|
211
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
211
212
|
if (emailKind === "attachment") {
|
|
212
213
|
return "Attachment evidence";
|
|
213
214
|
}
|
|
@@ -245,6 +246,16 @@ var buildContextLabel = (metadata) => {
|
|
|
245
246
|
}
|
|
246
247
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
247
248
|
const sectionTitle = getContextString(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
249
|
+
const officeSectionLabel = sectionPath.length > 0 ? sectionPath.join(" > ") : sectionTitle;
|
|
250
|
+
if (officeBlockKind === "table" && officeSectionLabel) {
|
|
251
|
+
return `Office table block ${officeSectionLabel}`;
|
|
252
|
+
}
|
|
253
|
+
if (officeBlockKind === "list" && officeSectionLabel) {
|
|
254
|
+
return `Office list block ${officeSectionLabel}`;
|
|
255
|
+
}
|
|
256
|
+
if (officeBlockKind === "paragraph" && officeSectionLabel) {
|
|
257
|
+
return `Office paragraph block ${officeSectionLabel}`;
|
|
258
|
+
}
|
|
248
259
|
if (sectionTitle) {
|
|
249
260
|
return `Section ${sectionTitle}`;
|
|
250
261
|
}
|
|
@@ -266,6 +277,46 @@ var formatMediaDurationLabel = (value) => {
|
|
|
266
277
|
}
|
|
267
278
|
return formatMediaTimestamp(value);
|
|
268
279
|
};
|
|
280
|
+
var formatOfficeListLevelsLabel = (value) => {
|
|
281
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
const levels = value.map((entry) => getContextNumber(entry)).filter((entry) => typeof entry === "number").sort((left, right) => left - right);
|
|
285
|
+
if (levels.length === 0) {
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
const minLevel = levels[0];
|
|
289
|
+
const maxLevel = levels[levels.length - 1];
|
|
290
|
+
return minLevel === maxLevel ? `Office list level ${minLevel}` : `Office list levels ${minLevel}-${maxLevel}`;
|
|
291
|
+
};
|
|
292
|
+
var getOfficeTableCitationScope = (metadata) => {
|
|
293
|
+
if (!metadata) {
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
297
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
301
|
+
const sectionTitle = getContextString(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
302
|
+
const officeContextText = officeBlockKind === "table" ? getContextString(metadata.officeTableContextText) : getContextString(metadata.officeListContextText);
|
|
303
|
+
if (!sectionTitle) {
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
return {
|
|
307
|
+
blockKind: officeBlockKind,
|
|
308
|
+
pathDepth: sectionPath.length,
|
|
309
|
+
sectionTitle,
|
|
310
|
+
hasContext: typeof officeContextText === "string"
|
|
311
|
+
};
|
|
312
|
+
};
|
|
313
|
+
var getOfficeTableCitationPreference = (metadata) => {
|
|
314
|
+
const scope = getOfficeTableCitationScope(metadata);
|
|
315
|
+
if (!scope) {
|
|
316
|
+
return 0;
|
|
317
|
+
}
|
|
318
|
+
return scope.pathDepth * 10 + (scope.hasContext ? 1 : 0) + (scope.blockKind === "list" && typeof metadata?.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
|
|
319
|
+
};
|
|
269
320
|
var buildLocatorLabel = (metadata, source, title) => {
|
|
270
321
|
if (!metadata) {
|
|
271
322
|
return;
|
|
@@ -291,6 +342,10 @@ var buildLocatorLabel = (metadata, source, title) => {
|
|
|
291
342
|
return `Archive entry ${archiveEntry}`;
|
|
292
343
|
}
|
|
293
344
|
const emailKind = getContextString(metadata.emailKind);
|
|
345
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
346
|
+
const officeBlockNumber = getContextNumber(metadata.officeBlockNumber);
|
|
347
|
+
const officeTableBodyRowStart = getContextNumber(metadata.officeTableBodyRowStart);
|
|
348
|
+
const officeTableBodyRowEnd = getContextNumber(metadata.officeTableBodyRowEnd);
|
|
294
349
|
if (emailKind === "attachment") {
|
|
295
350
|
const attachmentName = getContextString(metadata.attachmentName) ?? getAttachmentName(source, title);
|
|
296
351
|
return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
|
|
@@ -303,6 +358,18 @@ var buildLocatorLabel = (metadata, source, title) => {
|
|
|
303
358
|
if (mediaStart) {
|
|
304
359
|
return `Timestamp ${mediaStart}`;
|
|
305
360
|
}
|
|
361
|
+
if (officeBlockNumber && officeBlockKind === "table") {
|
|
362
|
+
if (typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number") {
|
|
363
|
+
return officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table block ${officeBlockNumber} \xB7 Row ${officeTableBodyRowStart}` : `Office table block ${officeBlockNumber} \xB7 Rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}`;
|
|
364
|
+
}
|
|
365
|
+
return `Office table block ${officeBlockNumber}`;
|
|
366
|
+
}
|
|
367
|
+
if (officeBlockNumber && officeBlockKind === "list") {
|
|
368
|
+
return `Office list block ${officeBlockNumber}`;
|
|
369
|
+
}
|
|
370
|
+
if (officeBlockNumber && officeBlockKind === "paragraph") {
|
|
371
|
+
return `Office paragraph block ${officeBlockNumber}`;
|
|
372
|
+
}
|
|
306
373
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
307
374
|
if (sectionPath.length > 0) {
|
|
308
375
|
return `Section ${sectionPath.join(" > ")}`;
|
|
@@ -336,10 +403,31 @@ var buildProvenanceLabel = (metadata) => {
|
|
|
336
403
|
const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
|
|
337
404
|
const transcriptSource = getContextString(metadata.transcriptSource);
|
|
338
405
|
const pdfTextMode = getContextString(metadata.pdfTextMode);
|
|
406
|
+
const officeBlockKind = getContextString(metadata.officeBlockKind);
|
|
407
|
+
const officeListContextText = getContextString(metadata.officeListContextText);
|
|
408
|
+
const officeListGroupItemCount = getContextNumber(metadata.officeListGroupItemCount);
|
|
409
|
+
const officeListLevelsLabel = formatOfficeListLevelsLabel(metadata.officeListLevels);
|
|
410
|
+
const officeTableHeaders = Array.isArray(metadata.officeTableHeaders) ? metadata.officeTableHeaders.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
411
|
+
const officeTableColumnCount = getContextNumber(metadata.officeTableColumnCount);
|
|
412
|
+
const officeTableBodyRowCount = getContextNumber(metadata.officeTableBodyRowCount);
|
|
413
|
+
const officeTableBodyRowStart = getContextNumber(metadata.officeTableBodyRowStart);
|
|
414
|
+
const officeTableBodyRowEnd = getContextNumber(metadata.officeTableBodyRowEnd);
|
|
415
|
+
const officeTableContextText = getContextString(metadata.officeTableContextText);
|
|
416
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
|
|
339
417
|
const ocrEngine = getContextString(metadata.ocrEngine);
|
|
340
418
|
const ocrConfidence = getContextNumber(metadata.ocrRegionConfidence) ?? getContextNumber(metadata.ocrConfidence);
|
|
341
419
|
const labels = [
|
|
342
420
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
421
|
+
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
422
|
+
typeof officeListGroupItemCount === "number" ? `Office list ${officeListGroupItemCount} items` : "",
|
|
423
|
+
officeListLevelsLabel ?? "",
|
|
424
|
+
sectionPath.length > 0 && officeBlockKind ? `Source-aware office ${officeBlockKind} block ${sectionPath.join(" > ")}` : "",
|
|
425
|
+
officeListContextText ? `Office list context ${officeListContextText}` : "",
|
|
426
|
+
officeTableHeaders.length > 0 ? `Office table ${officeTableHeaders.join(", ")}` : "",
|
|
427
|
+
typeof officeTableColumnCount === "number" ? `Office table ${officeTableColumnCount} cols` : "",
|
|
428
|
+
typeof officeTableBodyRowCount === "number" ? `Office table ${officeTableBodyRowCount} body rows` : "",
|
|
429
|
+
typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number" ? officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table row ${officeTableBodyRowStart}` : `Office table rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}` : "",
|
|
430
|
+
officeTableContextText ? `Office table context ${officeTableContextText}` : "",
|
|
343
431
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
344
432
|
typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
|
|
345
433
|
mediaKind ? `Media ${mediaKind}` : "",
|
|
@@ -503,6 +591,15 @@ var buildRAGCitations = (sources) => {
|
|
|
503
591
|
});
|
|
504
592
|
}
|
|
505
593
|
return [...unique.values()].sort((left, right) => {
|
|
594
|
+
const leftOfficeScope = getOfficeTableCitationScope(left.metadata);
|
|
595
|
+
const rightOfficeScope = getOfficeTableCitationScope(right.metadata);
|
|
596
|
+
if (left.source === right.source && leftOfficeScope && rightOfficeScope && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
|
|
597
|
+
const leftOfficePreference = getOfficeTableCitationPreference(left.metadata);
|
|
598
|
+
const rightOfficePreference = getOfficeTableCitationPreference(right.metadata);
|
|
599
|
+
if (rightOfficePreference !== leftOfficePreference) {
|
|
600
|
+
return rightOfficePreference - leftOfficePreference;
|
|
601
|
+
}
|
|
602
|
+
}
|
|
506
603
|
if (right.score !== left.score) {
|
|
507
604
|
return right.score - left.score;
|
|
508
605
|
}
|
|
@@ -916,6 +1013,7 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
|
|
|
916
1013
|
const sectionKind = getContextString2(metadata.sectionKind);
|
|
917
1014
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
918
1015
|
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1016
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
919
1017
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
920
1018
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
921
1019
|
const sheetName = getContextString2(metadata.sheetName);
|
|
@@ -926,6 +1024,12 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
|
|
|
926
1024
|
return `Source-aware section ${sectionPath.join(" > ")}`;
|
|
927
1025
|
}
|
|
928
1026
|
if (sectionKind === "pdf_block") {
|
|
1027
|
+
if (pdfSemanticRole === "figure_caption" && sectionTitle) {
|
|
1028
|
+
return `Source-aware PDF figure caption ${sectionTitle}`;
|
|
1029
|
+
}
|
|
1030
|
+
if (pdfSemanticRole === "figure_body" && sectionTitle) {
|
|
1031
|
+
return `Source-aware PDF figure body ${sectionTitle}`;
|
|
1032
|
+
}
|
|
929
1033
|
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
930
1034
|
return `Source-aware PDF table block ${sectionTitle}`;
|
|
931
1035
|
}
|
|
@@ -935,11 +1039,12 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
|
|
|
935
1039
|
return "Source-aware PDF block";
|
|
936
1040
|
}
|
|
937
1041
|
if (sectionKind === "office_block") {
|
|
938
|
-
|
|
939
|
-
|
|
1042
|
+
const officeSectionLabel = sectionPath.length > 0 ? sectionPath.join(" > ") : sectionTitle;
|
|
1043
|
+
if (officeBlockKind && officeSectionLabel) {
|
|
1044
|
+
return `Source-aware office ${officeBlockKind} block ${officeSectionLabel}`;
|
|
940
1045
|
}
|
|
941
|
-
if (
|
|
942
|
-
return `Source-aware office block ${
|
|
1046
|
+
if (officeSectionLabel) {
|
|
1047
|
+
return `Source-aware office block ${officeSectionLabel}`;
|
|
943
1048
|
}
|
|
944
1049
|
return "Source-aware office block";
|
|
945
1050
|
}
|
|
@@ -1327,6 +1432,18 @@ var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
|
|
|
1327
1432
|
}
|
|
1328
1433
|
return `Table ${tableIndex}`;
|
|
1329
1434
|
};
|
|
1435
|
+
var formatOfficeListLevelsLabel2 = (value) => {
|
|
1436
|
+
if (!Array.isArray(value) || value.length === 0) {
|
|
1437
|
+
return;
|
|
1438
|
+
}
|
|
1439
|
+
const levels = value.map((entry) => getContextNumber2(entry)).filter((entry) => typeof entry === "number").sort((left, right) => left - right);
|
|
1440
|
+
if (levels.length === 0) {
|
|
1441
|
+
return;
|
|
1442
|
+
}
|
|
1443
|
+
const minLevel = levels[0];
|
|
1444
|
+
const maxLevel = levels[levels.length - 1];
|
|
1445
|
+
return minLevel === maxLevel ? `Office list level ${minLevel}` : `Office list levels ${minLevel}-${maxLevel}`;
|
|
1446
|
+
};
|
|
1330
1447
|
var formatMediaDurationLabel2 = (value) => {
|
|
1331
1448
|
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
1332
1449
|
return;
|
|
@@ -1338,9 +1455,18 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1338
1455
|
return;
|
|
1339
1456
|
}
|
|
1340
1457
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1458
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
1459
|
+
const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
|
|
1460
|
+
const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
|
|
1341
1461
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1342
1462
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1343
1463
|
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1464
|
+
if (pdfSemanticRole === "figure_caption" && sectionTitle) {
|
|
1465
|
+
return `PDF figure caption ${sectionTitle}`;
|
|
1466
|
+
}
|
|
1467
|
+
if (pdfSemanticRole === "figure_body" && sectionTitle) {
|
|
1468
|
+
return `PDF figure body ${sectionTitle}`;
|
|
1469
|
+
}
|
|
1344
1470
|
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
1345
1471
|
return `PDF table block ${sectionTitle}`;
|
|
1346
1472
|
}
|
|
@@ -1348,13 +1474,13 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1348
1474
|
return `PDF text block ${sectionTitle}`;
|
|
1349
1475
|
}
|
|
1350
1476
|
if (officeBlockKind === "table" && sectionTitle) {
|
|
1351
|
-
return `Office table block ${sectionTitle}`;
|
|
1477
|
+
return `Office table block ${sectionPath.join(" > ") || sectionTitle}`;
|
|
1352
1478
|
}
|
|
1353
1479
|
if (officeBlockKind === "list" && sectionTitle) {
|
|
1354
|
-
return `Office list block ${sectionTitle}`;
|
|
1480
|
+
return `Office list block ${sectionPath.join(" > ") || sectionTitle}`;
|
|
1355
1481
|
}
|
|
1356
1482
|
if (officeBlockKind === "paragraph" && sectionTitle) {
|
|
1357
|
-
return `Office paragraph block ${sectionTitle}`;
|
|
1483
|
+
return `Office paragraph block ${sectionPath.join(" > ") || sectionTitle}`;
|
|
1358
1484
|
}
|
|
1359
1485
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1360
1486
|
if (emailKind === "attachment") {
|
|
@@ -1452,9 +1578,14 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1452
1578
|
return;
|
|
1453
1579
|
}
|
|
1454
1580
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1581
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
1455
1582
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1456
1583
|
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
1584
|
+
const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
|
|
1585
|
+
const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
|
|
1457
1586
|
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
1587
|
+
const officeTableBodyRowStart = getContextNumber2(metadata.officeTableBodyRowStart);
|
|
1588
|
+
const officeTableBodyRowEnd = getContextNumber2(metadata.officeTableBodyRowEnd);
|
|
1458
1589
|
const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
|
|
1459
1590
|
const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
|
|
1460
1591
|
const slideTitle = getContextString2(metadata.slideTitle);
|
|
@@ -1465,7 +1596,16 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1465
1596
|
if (page && region) {
|
|
1466
1597
|
return `Page ${page} \xB7 Region ${region}`;
|
|
1467
1598
|
}
|
|
1599
|
+
if (page && pdfBlockNumber && pdfSemanticRole === "figure_caption") {
|
|
1600
|
+
return `Page ${page} \xB7 Figure Caption ${pdfBlockNumber}`;
|
|
1601
|
+
}
|
|
1602
|
+
if (page && pdfBlockNumber && pdfSemanticRole === "figure_body") {
|
|
1603
|
+
return `Page ${page} \xB7 Figure Body ${pdfBlockNumber}`;
|
|
1604
|
+
}
|
|
1468
1605
|
if (page && pdfBlockNumber && pdfTextKind === "table_like") {
|
|
1606
|
+
if (typeof pdfTableBodyRowStart === "number" && typeof pdfTableBodyRowEnd === "number") {
|
|
1607
|
+
return pdfTableBodyRowStart === pdfTableBodyRowEnd ? `Page ${page} \xB7 Table Block ${pdfBlockNumber} \xB7 Row ${pdfTableBodyRowStart}` : `Page ${page} \xB7 Table Block ${pdfBlockNumber} \xB7 Rows ${pdfTableBodyRowStart}-${pdfTableBodyRowEnd}`;
|
|
1608
|
+
}
|
|
1469
1609
|
return `Page ${page} \xB7 Table Block ${pdfBlockNumber}`;
|
|
1470
1610
|
}
|
|
1471
1611
|
if (page && pdfBlockNumber) {
|
|
@@ -1528,6 +1668,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1528
1668
|
return `Timestamp ${mediaStart}`;
|
|
1529
1669
|
}
|
|
1530
1670
|
if (officeBlockNumber && officeBlockKind === "table") {
|
|
1671
|
+
if (typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number") {
|
|
1672
|
+
return officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table block ${officeBlockNumber} \xB7 Row ${officeTableBodyRowStart}` : `Office table block ${officeBlockNumber} \xB7 Rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}`;
|
|
1673
|
+
}
|
|
1531
1674
|
return `Office table block ${officeBlockNumber}`;
|
|
1532
1675
|
}
|
|
1533
1676
|
if (officeBlockNumber && officeBlockKind === "list") {
|
|
@@ -1564,11 +1707,27 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1564
1707
|
const mediaSegmentWindowDurationLabel = formatMediaDurationLabel2(metadata.mediaSegmentGroupDurationMs);
|
|
1565
1708
|
const mediaSegmentGapLabel = formatMediaDurationLabel2(metadata.mediaSegmentGapFromPreviousMs);
|
|
1566
1709
|
const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
|
|
1710
|
+
const pdfTableHeaders = Array.isArray(metadata.pdfTableHeaders) ? metadata.pdfTableHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1711
|
+
const pdfTableColumnCount = getContextNumber2(metadata.pdfTableColumnCount);
|
|
1712
|
+
const pdfTableBodyRowCount = getContextNumber2(metadata.pdfTableBodyRowCount);
|
|
1567
1713
|
const spreadsheetColumnRange = formatSpreadsheetColumnRange(getContextString2(metadata.spreadsheetColumnStart), getContextString2(metadata.spreadsheetColumnEnd));
|
|
1568
1714
|
const slideNotesText = getContextString2(metadata.slideNotesText);
|
|
1569
1715
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
1716
|
+
const pdfEvidenceMode = getContextString2(metadata.pdfEvidenceMode);
|
|
1717
|
+
const pdfEvidenceOrigin = getContextString2(metadata.pdfEvidenceOrigin);
|
|
1718
|
+
const pdfEvidenceSupplement = getContextString2(metadata.pdfEvidenceSupplement);
|
|
1570
1719
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1720
|
+
const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
|
|
1571
1721
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1722
|
+
const officeListContextText = getContextString2(metadata.officeListContextText);
|
|
1723
|
+
const officeListGroupItemCount = getContextNumber2(metadata.officeListGroupItemCount);
|
|
1724
|
+
const officeListLevelsLabel = formatOfficeListLevelsLabel2(metadata.officeListLevels);
|
|
1725
|
+
const officeTableHeaders = Array.isArray(metadata.officeTableHeaders) ? metadata.officeTableHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1726
|
+
const officeTableColumnCount = getContextNumber2(metadata.officeTableColumnCount);
|
|
1727
|
+
const officeTableBodyRowCount = getContextNumber2(metadata.officeTableBodyRowCount);
|
|
1728
|
+
const officeTableBodyRowStart = getContextNumber2(metadata.officeTableBodyRowStart);
|
|
1729
|
+
const officeTableBodyRowEnd = getContextNumber2(metadata.officeTableBodyRowEnd);
|
|
1730
|
+
const officeTableContextText = getContextString2(metadata.officeTableContextText);
|
|
1572
1731
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
1573
1732
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
1574
1733
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
@@ -1584,10 +1743,19 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1584
1743
|
const ocrMinConfidence = getContextNumber2(metadata.ocrPageMinConfidence) ?? getContextNumber2(metadata.ocrMinConfidence);
|
|
1585
1744
|
const ocrMaxConfidence = getContextNumber2(metadata.ocrPageMaxConfidence) ?? getContextNumber2(metadata.ocrMaxConfidence);
|
|
1586
1745
|
const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
|
|
1746
|
+
const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
|
|
1747
|
+
const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
|
|
1587
1748
|
const labels = [
|
|
1588
1749
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
1589
|
-
|
|
1750
|
+
pdfEvidenceMode ? `PDF evidence ${pdfEvidenceMode}` : "",
|
|
1751
|
+
pdfEvidenceOrigin ? `PDF origin ${pdfEvidenceOrigin}` : "",
|
|
1752
|
+
pdfEvidenceSupplement ? `PDF supplement ${pdfEvidenceSupplement}` : "",
|
|
1753
|
+
pdfSemanticRole === "figure_caption" ? "PDF figure caption" : "",
|
|
1754
|
+
pdfSemanticRole === "figure_body" ? "PDF figure body" : "",
|
|
1755
|
+
pdfSemanticRole === "figure_caption" ? "" : pdfSemanticRole === "figure_body" ? "" : pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
1590
1756
|
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
1757
|
+
typeof officeListGroupItemCount === "number" ? `Office list ${officeListGroupItemCount} items` : "",
|
|
1758
|
+
officeListLevelsLabel ?? "",
|
|
1591
1759
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
1592
1760
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
1593
1761
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
@@ -1597,6 +1765,16 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1597
1765
|
typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
|
|
1598
1766
|
typeof ocrMinConfidence === "number" && typeof ocrMaxConfidence === "number" && ocrMinConfidence !== ocrMaxConfidence ? `Range ${ocrMinConfidence.toFixed(2)}-${ocrMaxConfidence.toFixed(2)}` : "",
|
|
1599
1767
|
typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
|
|
1768
|
+
pdfTableHeaders.length > 0 ? `PDF table ${pdfTableHeaders.join(", ")}` : "",
|
|
1769
|
+
typeof pdfTableColumnCount === "number" ? `PDF table ${pdfTableColumnCount} cols` : "",
|
|
1770
|
+
typeof pdfTableBodyRowCount === "number" ? `PDF table ${pdfTableBodyRowCount} body rows` : "",
|
|
1771
|
+
typeof pdfTableBodyRowStart === "number" && typeof pdfTableBodyRowEnd === "number" ? pdfTableBodyRowStart === pdfTableBodyRowEnd ? `PDF table row ${pdfTableBodyRowStart}` : `PDF table rows ${pdfTableBodyRowStart}-${pdfTableBodyRowEnd}` : "",
|
|
1772
|
+
officeListContextText ? `Office list context ${officeListContextText}` : "",
|
|
1773
|
+
officeTableHeaders.length > 0 ? `Office table ${officeTableHeaders.join(", ")}` : "",
|
|
1774
|
+
typeof officeTableColumnCount === "number" ? `Office table ${officeTableColumnCount} cols` : "",
|
|
1775
|
+
typeof officeTableBodyRowCount === "number" ? `Office table ${officeTableBodyRowCount} body rows` : "",
|
|
1776
|
+
typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number" ? officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table row ${officeTableBodyRowStart}` : `Office table rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}` : "",
|
|
1777
|
+
officeTableContextText ? `Office table context ${officeTableContextText}` : "",
|
|
1600
1778
|
spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
|
|
1601
1779
|
spreadsheetColumnRange ? `Spreadsheet ${spreadsheetColumnRange}` : "",
|
|
1602
1780
|
spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
|
|
@@ -2028,12 +2206,92 @@ var getStructuredSectionScoreWeight = (metadata) => {
|
|
|
2028
2206
|
return 1;
|
|
2029
2207
|
};
|
|
2030
2208
|
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
2209
|
+
var getPDFLeadEvidencePreference = (metadata) => {
|
|
2210
|
+
if (!metadata) {
|
|
2211
|
+
return 0;
|
|
2212
|
+
}
|
|
2213
|
+
const pdfEvidenceMode = getContextString2(metadata.pdfEvidenceMode);
|
|
2214
|
+
const pdfEvidenceOrigin = getContextString2(metadata.pdfEvidenceOrigin);
|
|
2215
|
+
const pdfEvidenceSupplement = getContextString2(metadata.pdfEvidenceSupplement);
|
|
2216
|
+
if (pdfEvidenceMode === "hybrid" && pdfEvidenceOrigin === "native" && pdfEvidenceSupplement === "ocr") {
|
|
2217
|
+
return 3;
|
|
2218
|
+
}
|
|
2219
|
+
if (pdfEvidenceMode === "native" && pdfEvidenceOrigin === "native") {
|
|
2220
|
+
return 2;
|
|
2221
|
+
}
|
|
2222
|
+
if (pdfEvidenceMode === "ocr" && pdfEvidenceOrigin === "ocr") {
|
|
2223
|
+
return 1;
|
|
2224
|
+
}
|
|
2225
|
+
return 0;
|
|
2226
|
+
};
|
|
2227
|
+
var getPDFLeadScope = (metadata) => {
|
|
2228
|
+
if (!metadata) {
|
|
2229
|
+
return;
|
|
2230
|
+
}
|
|
2231
|
+
const pageNumber = getContextNumber2(metadata.pageNumber) ?? getContextNumber2(metadata.page) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
2232
|
+
const sectionTitle = getContextString2(metadata.sectionTitle);
|
|
2233
|
+
const sourceNativeKind = getContextString2(metadata.sourceNativeKind);
|
|
2234
|
+
if (typeof pageNumber !== "number" && !sectionTitle && !sourceNativeKind) {
|
|
2235
|
+
return;
|
|
2236
|
+
}
|
|
2237
|
+
return {
|
|
2238
|
+
pageNumber,
|
|
2239
|
+
sectionTitle,
|
|
2240
|
+
sourceNativeKind
|
|
2241
|
+
};
|
|
2242
|
+
};
|
|
2243
|
+
var getOfficeLeadScope = (metadata) => {
|
|
2244
|
+
if (!metadata) {
|
|
2245
|
+
return;
|
|
2246
|
+
}
|
|
2247
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
2248
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
2249
|
+
return;
|
|
2250
|
+
}
|
|
2251
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
2252
|
+
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
2253
|
+
const officeContextText = officeBlockKind === "table" ? getContextString2(metadata.officeTableContextText) : getContextString2(metadata.officeListContextText);
|
|
2254
|
+
if (!sectionTitle) {
|
|
2255
|
+
return;
|
|
2256
|
+
}
|
|
2257
|
+
return {
|
|
2258
|
+
blockKind: officeBlockKind,
|
|
2259
|
+
pathDepth: sectionPath.length,
|
|
2260
|
+
sectionTitle,
|
|
2261
|
+
hasContext: typeof officeContextText === "string"
|
|
2262
|
+
};
|
|
2263
|
+
};
|
|
2264
|
+
var getOfficeLeadEvidencePreference = (metadata) => {
|
|
2265
|
+
const scope = getOfficeLeadScope(metadata);
|
|
2266
|
+
if (!scope) {
|
|
2267
|
+
return 0;
|
|
2268
|
+
}
|
|
2269
|
+
return scope.pathDepth * 10 + (scope.hasContext ? 1 : 0) + (scope.blockKind === "list" && typeof metadata?.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
|
|
2270
|
+
};
|
|
2031
2271
|
var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
|
|
2272
|
+
const leftOfficeScope = getOfficeLeadScope(left.metadata);
|
|
2273
|
+
const rightOfficeScope = getOfficeLeadScope(right.metadata);
|
|
2274
|
+
if (left.source === right.source && leftOfficeScope && rightOfficeScope && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
|
|
2275
|
+
const leftOfficePreference = getOfficeLeadEvidencePreference(left.metadata);
|
|
2276
|
+
const rightOfficePreference = getOfficeLeadEvidencePreference(right.metadata);
|
|
2277
|
+
if (rightOfficePreference !== leftOfficePreference) {
|
|
2278
|
+
return rightOfficePreference - leftOfficePreference;
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2032
2281
|
const leftWeightedScore = getStructuredSourceLeadScore(left);
|
|
2033
2282
|
const rightWeightedScore = getStructuredSourceLeadScore(right);
|
|
2034
2283
|
if (rightWeightedScore !== leftWeightedScore) {
|
|
2035
2284
|
return rightWeightedScore - leftWeightedScore;
|
|
2036
2285
|
}
|
|
2286
|
+
const leftScope = getPDFLeadScope(left.metadata);
|
|
2287
|
+
const rightScope = getPDFLeadScope(right.metadata);
|
|
2288
|
+
if (left.source === right.source && leftScope && rightScope && (leftScope.sectionTitle && rightScope.sectionTitle && leftScope.sectionTitle === rightScope.sectionTitle || typeof leftScope.pageNumber === "number" && typeof rightScope.pageNumber === "number" && leftScope.pageNumber === rightScope.pageNumber)) {
|
|
2289
|
+
const leftEvidencePreference = getPDFLeadEvidencePreference(left.metadata);
|
|
2290
|
+
const rightEvidencePreference = getPDFLeadEvidencePreference(right.metadata);
|
|
2291
|
+
if (rightEvidencePreference !== leftEvidencePreference) {
|
|
2292
|
+
return rightEvidencePreference - leftEvidencePreference;
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2037
2295
|
if (right.score !== left.score) {
|
|
2038
2296
|
return right.score - left.score;
|
|
2039
2297
|
}
|
|
@@ -2287,6 +2545,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
2287
2545
|
queryTransformProvider: trace?.queryTransformProvider,
|
|
2288
2546
|
queryTransformReason: trace?.queryTransformReason,
|
|
2289
2547
|
reasons,
|
|
2548
|
+
evidenceReconcileApplied: trace?.steps.some((step) => step.stage === "evidence_reconcile"),
|
|
2290
2549
|
rerankApplied: trace?.steps.some((step) => step.stage === "rerank" && step.metadata?.applied === true),
|
|
2291
2550
|
scoreShare,
|
|
2292
2551
|
scoreThresholdApplied: trace?.steps.some((step) => step.stage === "score_filter"),
|
|
@@ -2965,6 +3224,24 @@ var buildComparisonOverviewPresentation = (input) => {
|
|
|
2965
3224
|
value: input.resolveLabel(input.summary.bestByMultivectorVectorHitCases)
|
|
2966
3225
|
});
|
|
2967
3226
|
}
|
|
3227
|
+
if (input.summary.bestByEvidenceReconcileCases) {
|
|
3228
|
+
rows.push({
|
|
3229
|
+
label: "Best evidence reconcile",
|
|
3230
|
+
value: input.resolveLabel(input.summary.bestByEvidenceReconcileCases)
|
|
3231
|
+
});
|
|
3232
|
+
}
|
|
3233
|
+
if (input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
3234
|
+
rows.push({
|
|
3235
|
+
label: "Lowest runtime budget exhaustion",
|
|
3236
|
+
value: input.resolveLabel(input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases)
|
|
3237
|
+
});
|
|
3238
|
+
}
|
|
3239
|
+
if (input.summary.bestByLowestRuntimeUnderfilledTopKCases) {
|
|
3240
|
+
rows.push({
|
|
3241
|
+
label: "Lowest runtime underfilled TopK",
|
|
3242
|
+
value: input.resolveLabel(input.summary.bestByLowestRuntimeUnderfilledTopKCases)
|
|
3243
|
+
});
|
|
3244
|
+
}
|
|
2968
3245
|
return {
|
|
2969
3246
|
rows,
|
|
2970
3247
|
winnerLabel,
|
|
@@ -3017,6 +3294,12 @@ var buildRAGComparisonTraceSummaryRows = (entry) => {
|
|
|
3017
3294
|
}, {
|
|
3018
3295
|
label: "Multivector",
|
|
3019
3296
|
value: `${formatTraceRatio(trace.multiVectorCases, trace.totalCases)} \xB7 collapse ${formatTraceRatio(trace.multiVectorCollapsedCases, trace.totalCases)} \xB7 lexical ${formatTraceRatio(trace.multiVectorLexicalHitCases, trace.totalCases)} \xB7 vector ${formatTraceRatio(trace.multiVectorVectorHitCases, trace.totalCases)}`
|
|
3297
|
+
}, {
|
|
3298
|
+
label: "Runtime",
|
|
3299
|
+
value: `budget ${formatTraceRatio(trace.runtimeCandidateBudgetExhaustedCases, trace.totalCases)} \xB7 underfilled ${formatTraceRatio(trace.runtimeUnderfilledTopKCases, trace.totalCases)}`
|
|
3300
|
+
}, {
|
|
3301
|
+
label: "Evidence reconcile",
|
|
3302
|
+
value: `all ${formatTraceRatio(trace.stageCounts.evidence_reconcile ?? 0, trace.totalCases)} \xB7 office ${formatTraceRatio(trace.officeEvidenceReconcileCases, trace.totalCases)} \xB7 pdf ${formatTraceRatio(trace.pdfEvidenceReconcileCases, trace.totalCases)}`
|
|
3020
3303
|
}, {
|
|
3021
3304
|
label: "TopK",
|
|
3022
3305
|
value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
|
|
@@ -3121,6 +3404,12 @@ var buildRAGComparisonTraceDiffRows = (entry, leader) => {
|
|
|
3121
3404
|
}, {
|
|
3122
3405
|
label: "Round robin delta",
|
|
3123
3406
|
value: formatTraceCountDelta(trace.roundRobinCases - leaderTrace.roundRobinCases)
|
|
3407
|
+
}, {
|
|
3408
|
+
label: "Runtime budget delta",
|
|
3409
|
+
value: formatTraceCountDelta(trace.runtimeCandidateBudgetExhaustedCases - leaderTrace.runtimeCandidateBudgetExhaustedCases)
|
|
3410
|
+
}, {
|
|
3411
|
+
label: "Runtime underfilled delta",
|
|
3412
|
+
value: formatTraceCountDelta(trace.runtimeUnderfilledTopKCases - leaderTrace.runtimeUnderfilledTopKCases)
|
|
3124
3413
|
});
|
|
3125
3414
|
if (stageDelta) {
|
|
3126
3415
|
rows.push({ label: "Stage delta", value: stageDelta });
|
|
@@ -3368,6 +3657,25 @@ var buildRAGEvaluationHistoryRows = (history) => {
|
|
|
3368
3657
|
label: "Trace variant delta",
|
|
3369
3658
|
value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
|
|
3370
3659
|
});
|
|
3660
|
+
const evidenceReconcileDelta = history.diff.traceSummaryDelta.stageCounts?.evidence_reconcile;
|
|
3661
|
+
if (typeof evidenceReconcileDelta === "number") {
|
|
3662
|
+
rows.push({
|
|
3663
|
+
label: "Trace evidence reconcile delta",
|
|
3664
|
+
value: formatTraceCountDelta(evidenceReconcileDelta)
|
|
3665
|
+
});
|
|
3666
|
+
}
|
|
3667
|
+
if (typeof history.diff.traceSummaryDelta.officeEvidenceReconcileCasesDelta === "number") {
|
|
3668
|
+
rows.push({
|
|
3669
|
+
label: "Trace office evidence reconcile delta",
|
|
3670
|
+
value: formatTraceCountDelta(history.diff.traceSummaryDelta.officeEvidenceReconcileCasesDelta)
|
|
3671
|
+
});
|
|
3672
|
+
}
|
|
3673
|
+
if (typeof history.diff.traceSummaryDelta.pdfEvidenceReconcileCasesDelta === "number") {
|
|
3674
|
+
rows.push({
|
|
3675
|
+
label: "Trace PDF evidence reconcile delta",
|
|
3676
|
+
value: formatTraceCountDelta(history.diff.traceSummaryDelta.pdfEvidenceReconcileCasesDelta)
|
|
3677
|
+
});
|
|
3678
|
+
}
|
|
3371
3679
|
const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
|
|
3372
3680
|
if (stageDelta) {
|
|
3373
3681
|
rows.push({ label: "Trace stage delta", value: stageDelta });
|
|
@@ -3572,6 +3880,92 @@ var buildRAGEvaluationSuiteSnapshotHistoryPresentation = (history) => ({
|
|
|
3572
3880
|
snapshots: buildRAGEvaluationSuiteSnapshotPresentations(history),
|
|
3573
3881
|
summary: history?.latestSnapshot ? `v${history.latestSnapshot.version}` : "No saved suite snapshots yet."
|
|
3574
3882
|
});
|
|
3883
|
+
var isRuntimeGateReason = (reason) => /runtime|candidate-budget|underfilled/i.test(reason);
|
|
3884
|
+
var getFixtureVariantsFromRunTags = (tags) => (tags ?? []).filter((tag) => tag.startsWith("fixture:")).map((tag) => tag.slice("fixture:".length)).filter((tag, index, all) => tag.length > 0 && all.indexOf(tag) === index);
|
|
3885
|
+
var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
|
|
3886
|
+
const runtimeGateReasons = (run.decisionSummary?.gate?.reasons ?? run.releaseVerdict?.gate?.reasons ?? []).filter(isRuntimeGateReason);
|
|
3887
|
+
const rows = [
|
|
3888
|
+
{ label: "Finished", value: formatDateLabel(run.finishedAt) },
|
|
3889
|
+
{
|
|
3890
|
+
label: "Passing-rate winner",
|
|
3891
|
+
value: run.comparison.summary.bestByPassingRate ?? "n/a"
|
|
3892
|
+
},
|
|
3893
|
+
{
|
|
3894
|
+
label: "Average F1 winner",
|
|
3895
|
+
value: run.comparison.summary.bestByAverageF1 ?? "n/a"
|
|
3896
|
+
}
|
|
3897
|
+
];
|
|
3898
|
+
const fixtureVariants = getFixtureVariantsFromRunTags(run.tags);
|
|
3899
|
+
if (fixtureVariants.length > 0) {
|
|
3900
|
+
rows.push({
|
|
3901
|
+
label: "Fixture variant",
|
|
3902
|
+
value: fixtureVariants.join(", ")
|
|
3903
|
+
});
|
|
3904
|
+
}
|
|
3905
|
+
if (run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
|
|
3906
|
+
rows.push({
|
|
3907
|
+
label: "Lowest runtime budget exhaustion",
|
|
3908
|
+
value: run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases
|
|
3909
|
+
});
|
|
3910
|
+
}
|
|
3911
|
+
if (run.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases) {
|
|
3912
|
+
rows.push({
|
|
3913
|
+
label: "Lowest runtime underfilled TopK",
|
|
3914
|
+
value: run.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
3915
|
+
});
|
|
3916
|
+
}
|
|
3917
|
+
rows.push({
|
|
3918
|
+
label: "Gate status",
|
|
3919
|
+
value: run.decisionSummary?.gate?.status ?? run.releaseVerdict?.gate?.status ?? "n/a"
|
|
3920
|
+
}, {
|
|
3921
|
+
label: "Runtime gate failures",
|
|
3922
|
+
value: runtimeGateReasons.length > 0 ? runtimeGateReasons.join("; ") : "none"
|
|
3923
|
+
});
|
|
3924
|
+
return {
|
|
3925
|
+
label: run.label,
|
|
3926
|
+
rows,
|
|
3927
|
+
runId: run.id,
|
|
3928
|
+
summary: runtimeGateReasons.length > 0 ? `${run.label} \xB7 runtime gate blocked` : `${run.label} \xB7 ${run.comparison.summary.bestByPassingRate ?? "n/a"} leads passing rate`
|
|
3929
|
+
};
|
|
3930
|
+
};
|
|
3931
|
+
var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
|
|
3932
|
+
const recentRuns = (input.runs ?? []).map(buildRAGRetrievalReleaseHistoryRunPresentation);
|
|
3933
|
+
const fixtureVariants = (input.runs ?? []).flatMap((run) => getFixtureVariantsFromRunTags(run.tags)).filter((tag, index, all) => all.indexOf(tag) === index);
|
|
3934
|
+
const runtimeBlockedRuns = recentRuns.filter((entry) => entry.rows.some((row) => row.label === "Runtime gate failures" && row.value !== "none")).length;
|
|
3935
|
+
const rows = [
|
|
3936
|
+
{
|
|
3937
|
+
label: "Latest decision",
|
|
3938
|
+
value: input.timeline?.latestDecisionKind ?? "none"
|
|
3939
|
+
},
|
|
3940
|
+
{
|
|
3941
|
+
label: "Latest decision at",
|
|
3942
|
+
value: formatDateLabel(input.timeline?.latestDecisionAt)
|
|
3943
|
+
},
|
|
3944
|
+
{
|
|
3945
|
+
label: "Last promoted",
|
|
3946
|
+
value: formatDateLabel(input.timeline?.lastPromotedAt)
|
|
3947
|
+
},
|
|
3948
|
+
{
|
|
3949
|
+
label: "Last reverted",
|
|
3950
|
+
value: formatDateLabel(input.timeline?.lastRevertedAt)
|
|
3951
|
+
},
|
|
3952
|
+
{
|
|
3953
|
+
label: "Recent runtime-blocked runs",
|
|
3954
|
+
value: String(runtimeBlockedRuns)
|
|
3955
|
+
}
|
|
3956
|
+
];
|
|
3957
|
+
if (fixtureVariants.length > 0) {
|
|
3958
|
+
rows.push({
|
|
3959
|
+
label: "Fixture variants",
|
|
3960
|
+
value: fixtureVariants.join(", ")
|
|
3961
|
+
});
|
|
3962
|
+
}
|
|
3963
|
+
return {
|
|
3964
|
+
recentRuns,
|
|
3965
|
+
rows,
|
|
3966
|
+
summary: input.timeline?.latestDecisionKind ? `${input.timeline.latestDecisionKind} \xB7 ${recentRuns.length} recent runs` : recentRuns.length > 0 ? `${recentRuns.length} recent runs` : "No release history yet."
|
|
3967
|
+
};
|
|
3968
|
+
};
|
|
3575
3969
|
var buildRAGAnswerGroundingCaseSnapshotPresentations = (history) => {
|
|
3576
3970
|
if (!history?.caseSnapshots.length) {
|
|
3577
3971
|
return [];
|
|
@@ -4082,6 +4476,15 @@ var evaluateRetrievalComparisonGate = ({
|
|
|
4082
4476
|
if (typeof policy.minMultiVectorVectorHitCasesDelta === "number" && (delta.multiVectorVectorHitCasesDelta ?? 0) < policy.minMultiVectorVectorHitCasesDelta) {
|
|
4083
4477
|
reasons.push(`multivector vector-hit delta ${delta.multiVectorVectorHitCasesDelta ?? 0} is below ${policy.minMultiVectorVectorHitCasesDelta}`);
|
|
4084
4478
|
}
|
|
4479
|
+
if (typeof policy.minEvidenceReconcileCasesDelta === "number" && (delta.evidenceReconcileCasesDelta ?? 0) < policy.minEvidenceReconcileCasesDelta) {
|
|
4480
|
+
reasons.push(`evidence reconcile delta ${delta.evidenceReconcileCasesDelta ?? 0} is below ${policy.minEvidenceReconcileCasesDelta}`);
|
|
4481
|
+
}
|
|
4482
|
+
if (typeof policy.maxRuntimeCandidateBudgetExhaustedCasesDelta === "number" && (delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0) > policy.maxRuntimeCandidateBudgetExhaustedCasesDelta) {
|
|
4483
|
+
reasons.push(`runtime candidate-budget-exhausted delta ${delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0} exceeds ${policy.maxRuntimeCandidateBudgetExhaustedCasesDelta}`);
|
|
4484
|
+
}
|
|
4485
|
+
if (typeof policy.maxRuntimeUnderfilledTopKCasesDelta === "number" && (delta.runtimeUnderfilledTopKCasesDelta ?? 0) > policy.maxRuntimeUnderfilledTopKCasesDelta) {
|
|
4486
|
+
reasons.push(`runtime underfilled-topk delta ${delta.runtimeUnderfilledTopKCasesDelta ?? 0} exceeds ${policy.maxRuntimeUnderfilledTopKCasesDelta}`);
|
|
4487
|
+
}
|
|
4085
4488
|
if (reasons.length === 0) {
|
|
4086
4489
|
return {
|
|
4087
4490
|
policy,
|
|
@@ -4129,13 +4532,14 @@ var buildRAGRetrievalReleaseVerdict = ({
|
|
|
4129
4532
|
};
|
|
4130
4533
|
}
|
|
4131
4534
|
if (delta) {
|
|
4535
|
+
const requiresReview = delta.passingRateDelta < 0 || delta.averageF1Delta < 0 || (delta.evidenceReconcileCasesDelta ?? 0) < 0;
|
|
4132
4536
|
return {
|
|
4133
4537
|
baselineGroupKey: groupKey,
|
|
4134
4538
|
baselineRetrievalId,
|
|
4135
4539
|
candidateRetrievalId,
|
|
4136
4540
|
delta,
|
|
4137
|
-
status:
|
|
4138
|
-
summary:
|
|
4541
|
+
status: requiresReview ? "needs_review" : "pass",
|
|
4542
|
+
summary: requiresReview ? "Candidate should be reviewed before promotion." : "Candidate improved or matched the baseline."
|
|
4139
4543
|
};
|
|
4140
4544
|
}
|
|
4141
4545
|
return {
|
|
@@ -4612,6 +5016,20 @@ var buildTraceSummaryAggregate = ({
|
|
|
4612
5016
|
direction: "flat",
|
|
4613
5017
|
metric: "multiVectorCollapsedCases",
|
|
4614
5018
|
previous: 0
|
|
5019
|
+
},
|
|
5020
|
+
{
|
|
5021
|
+
current: 0,
|
|
5022
|
+
delta: 0,
|
|
5023
|
+
direction: "flat",
|
|
5024
|
+
metric: "runtimeCandidateBudgetExhaustedCases",
|
|
5025
|
+
previous: 0
|
|
5026
|
+
},
|
|
5027
|
+
{
|
|
5028
|
+
current: 0,
|
|
5029
|
+
delta: 0,
|
|
5030
|
+
direction: "flat",
|
|
5031
|
+
metric: "runtimeUnderfilledTopKCases",
|
|
5032
|
+
previous: 0
|
|
4615
5033
|
}
|
|
4616
5034
|
];
|
|
4617
5035
|
return {
|
|
@@ -4734,6 +5152,20 @@ var buildTraceSummaryAggregate = ({
|
|
|
4734
5152
|
direction: buildTraceSummaryDirection(latest.multiVectorCollapsedCases - previous.multiVectorCollapsedCases),
|
|
4735
5153
|
metric: "multiVectorCollapsedCases",
|
|
4736
5154
|
previous: previous.multiVectorCollapsedCases
|
|
5155
|
+
},
|
|
5156
|
+
{
|
|
5157
|
+
current: latest.runtimeCandidateBudgetExhaustedCases,
|
|
5158
|
+
delta: latest.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
5159
|
+
direction: buildTraceSummaryDirection(latest.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases),
|
|
5160
|
+
metric: "runtimeCandidateBudgetExhaustedCases",
|
|
5161
|
+
previous: previous.runtimeCandidateBudgetExhaustedCases
|
|
5162
|
+
},
|
|
5163
|
+
{
|
|
5164
|
+
current: latest.runtimeUnderfilledTopKCases,
|
|
5165
|
+
delta: latest.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases,
|
|
5166
|
+
direction: buildTraceSummaryDirection(latest.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases),
|
|
5167
|
+
metric: "runtimeUnderfilledTopKCases",
|
|
5168
|
+
previous: previous.runtimeUnderfilledTopKCases
|
|
4737
5169
|
}
|
|
4738
5170
|
];
|
|
4739
5171
|
const absoluteSorted = [...aggregate].sort((left, right) => Math.abs(right.delta) - Math.abs(left.delta) || left.metric.localeCompare(right.metric));
|
|
@@ -4788,12 +5220,17 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4788
5220
|
let multiVectorVectorHitCases = 0;
|
|
4789
5221
|
let multiVectorLexicalHitCases = 0;
|
|
4790
5222
|
let multiVectorCollapsedCases = 0;
|
|
5223
|
+
let officeEvidenceReconcileCases = 0;
|
|
5224
|
+
let pdfEvidenceReconcileCases = 0;
|
|
5225
|
+
let runtimeCandidateBudgetExhaustedCases = 0;
|
|
5226
|
+
let runtimeUnderfilledTopKCases = 0;
|
|
4791
5227
|
let finalCountSum = 0;
|
|
4792
5228
|
let vectorCountSum = 0;
|
|
4793
5229
|
let lexicalCountSum = 0;
|
|
4794
5230
|
let candidateTopKSum = 0;
|
|
4795
5231
|
let lexicalTopKSum = 0;
|
|
4796
5232
|
for (const trace of traces) {
|
|
5233
|
+
const vectorSearchMetadata = trace.steps.find((step) => step.stage === "vector_search")?.metadata;
|
|
4797
5234
|
modeSet.add(trace.mode);
|
|
4798
5235
|
sourceBalanceStrategySet.add(trace.sourceBalanceStrategy ?? "cap");
|
|
4799
5236
|
if (trace.runVector) {
|
|
@@ -4826,6 +5263,25 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4826
5263
|
if ((trace.multiVector?.collapsedParents ?? 0) > 0) {
|
|
4827
5264
|
multiVectorCollapsedCases += 1;
|
|
4828
5265
|
}
|
|
5266
|
+
const evidenceReconcileMetadata = trace.steps.find((step) => step.stage === "evidence_reconcile")?.metadata;
|
|
5267
|
+
if (typeof evidenceReconcileMetadata?.officeAffectedScopes === "number" && evidenceReconcileMetadata.officeAffectedScopes > 0) {
|
|
5268
|
+
officeEvidenceReconcileCases += 1;
|
|
5269
|
+
}
|
|
5270
|
+
if (typeof evidenceReconcileMetadata?.pdfAffectedScopes === "number" && evidenceReconcileMetadata.pdfAffectedScopes > 0) {
|
|
5271
|
+
pdfEvidenceReconcileCases += 1;
|
|
5272
|
+
}
|
|
5273
|
+
if (vectorSearchMetadata?.sqliteQueryCandidateBudgetExhausted) {
|
|
5274
|
+
runtimeCandidateBudgetExhaustedCases += 1;
|
|
5275
|
+
}
|
|
5276
|
+
if (vectorSearchMetadata?.postgresQueryCandidateBudgetExhausted) {
|
|
5277
|
+
runtimeCandidateBudgetExhaustedCases += 1;
|
|
5278
|
+
}
|
|
5279
|
+
if (vectorSearchMetadata?.sqliteQueryUnderfilledTopK) {
|
|
5280
|
+
runtimeUnderfilledTopKCases += 1;
|
|
5281
|
+
}
|
|
5282
|
+
if (vectorSearchMetadata?.postgresQueryUnderfilledTopK) {
|
|
5283
|
+
runtimeUnderfilledTopKCases += 1;
|
|
5284
|
+
}
|
|
4829
5285
|
finalCountSum += trace.resultCounts.final;
|
|
4830
5286
|
vectorCountSum += trace.resultCounts.vector;
|
|
4831
5287
|
lexicalCountSum += trace.resultCounts.lexical;
|
|
@@ -4851,6 +5307,10 @@ var summarizeRetrievalTraces = (traces) => {
|
|
|
4851
5307
|
multiVectorVectorHitCases,
|
|
4852
5308
|
multiVectorLexicalHitCases,
|
|
4853
5309
|
multiVectorCollapsedCases,
|
|
5310
|
+
officeEvidenceReconcileCases,
|
|
5311
|
+
pdfEvidenceReconcileCases,
|
|
5312
|
+
runtimeCandidateBudgetExhaustedCases,
|
|
5313
|
+
runtimeUnderfilledTopKCases,
|
|
4854
5314
|
vectorCases
|
|
4855
5315
|
};
|
|
4856
5316
|
};
|
|
@@ -5653,6 +6113,8 @@ var buildRAGEvaluationRunDiff = ({
|
|
|
5653
6113
|
averageLexicalTopK: (current.traceSummary?.averageLexicalTopK ?? 0) - (previous?.traceSummary?.averageLexicalTopK ?? 0),
|
|
5654
6114
|
averageVectorCount: (current.traceSummary?.averageVectorCount ?? 0) - (previous?.traceSummary?.averageVectorCount ?? 0),
|
|
5655
6115
|
balancedCases: (current.traceSummary?.balancedCases ?? 0) - (previous?.traceSummary?.balancedCases ?? 0),
|
|
6116
|
+
officeEvidenceReconcileCasesDelta: (current.traceSummary?.officeEvidenceReconcileCases ?? 0) - (previous?.traceSummary?.officeEvidenceReconcileCases ?? 0),
|
|
6117
|
+
pdfEvidenceReconcileCasesDelta: (current.traceSummary?.pdfEvidenceReconcileCases ?? 0) - (previous?.traceSummary?.pdfEvidenceReconcileCases ?? 0),
|
|
5656
6118
|
lexicalCases: (current.traceSummary?.lexicalCases ?? 0) - (previous?.traceSummary?.lexicalCases ?? 0),
|
|
5657
6119
|
modesChanged: (current.traceSummary?.modes ?? []).join("|") !== (previous?.traceSummary?.modes ?? []).join("|"),
|
|
5658
6120
|
roundRobinCases: (current.traceSummary?.roundRobinCases ?? 0) - (previous?.traceSummary?.roundRobinCases ?? 0),
|
|
@@ -8436,7 +8898,10 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8436
8898
|
passingRateDelta: candidateEntry.response.passingRate - baselineEntry.response.passingRate,
|
|
8437
8899
|
multiVectorCollapsedCasesDelta: (candidateEntry.traceSummary?.multiVectorCollapsedCases ?? 0) - (baselineEntry.traceSummary?.multiVectorCollapsedCases ?? 0),
|
|
8438
8900
|
multiVectorLexicalHitCasesDelta: (candidateEntry.traceSummary?.multiVectorLexicalHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorLexicalHitCases ?? 0),
|
|
8439
|
-
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0)
|
|
8901
|
+
multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0),
|
|
8902
|
+
evidenceReconcileCasesDelta: (candidateEntry.traceSummary?.stageCounts?.evidence_reconcile ?? 0) - (baselineEntry.traceSummary?.stageCounts?.evidence_reconcile ?? 0),
|
|
8903
|
+
runtimeCandidateBudgetExhaustedCasesDelta: (candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0) - (baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0),
|
|
8904
|
+
runtimeUnderfilledTopKCasesDelta: (candidateEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0) - (baselineEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0)
|
|
8440
8905
|
} : undefined;
|
|
8441
8906
|
return {
|
|
8442
8907
|
baseline: baselineEntry ? {
|
|
@@ -8446,6 +8911,9 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8446
8911
|
multiVectorCollapsedCases: baselineEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8447
8912
|
multiVectorLexicalHitCases: baselineEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8448
8913
|
multiVectorVectorHitCases: baselineEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8914
|
+
evidenceReconcileCases: baselineEntry.traceSummary?.stageCounts?.evidence_reconcile,
|
|
8915
|
+
runtimeCandidateBudgetExhaustedCases: baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8916
|
+
runtimeUnderfilledTopKCases: baselineEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8449
8917
|
passingRate: baselineEntry.response.passingRate,
|
|
8450
8918
|
retrievalId: baselineEntry.retrievalId
|
|
8451
8919
|
} : undefined,
|
|
@@ -8457,6 +8925,9 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8457
8925
|
multiVectorCollapsedCases: candidateEntry.traceSummary?.multiVectorCollapsedCases,
|
|
8458
8926
|
multiVectorLexicalHitCases: candidateEntry.traceSummary?.multiVectorLexicalHitCases,
|
|
8459
8927
|
multiVectorVectorHitCases: candidateEntry.traceSummary?.multiVectorVectorHitCases,
|
|
8928
|
+
evidenceReconcileCases: candidateEntry.traceSummary?.stageCounts?.evidence_reconcile,
|
|
8929
|
+
runtimeCandidateBudgetExhaustedCases: candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
|
|
8930
|
+
runtimeUnderfilledTopKCases: candidateEntry.traceSummary?.runtimeUnderfilledTopKCases,
|
|
8460
8931
|
passingRate: candidateEntry.response.passingRate,
|
|
8461
8932
|
retrievalId: candidateEntry.retrievalId
|
|
8462
8933
|
} : undefined,
|
|
@@ -8468,7 +8939,10 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
|
|
|
8468
8939
|
winnerByPassingRate: comparison.summary.bestByPassingRate,
|
|
8469
8940
|
winnerByMultivectorCollapsedCases: comparison.summary.bestByMultivectorCollapsedCases,
|
|
8470
8941
|
winnerByMultivectorLexicalHitCases: comparison.summary.bestByMultivectorLexicalHitCases,
|
|
8471
|
-
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases
|
|
8942
|
+
winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases,
|
|
8943
|
+
winnerByEvidenceReconcileCases: comparison.summary.bestByEvidenceReconcileCases,
|
|
8944
|
+
winnerByLowestRuntimeCandidateBudgetExhaustedCases: comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
|
|
8945
|
+
winnerByLowestRuntimeUnderfilledTopKCases: comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
|
|
8472
8946
|
};
|
|
8473
8947
|
};
|
|
8474
8948
|
var loadRAGSearchTracePruneHistory = async ({
|
|
@@ -9156,6 +9630,45 @@ var selectComparisonEntryByTraceMetric = (entries, idKey, metric) => {
|
|
|
9156
9630
|
}
|
|
9157
9631
|
return typeof winner[idKey] === "string" ? winner[idKey] : undefined;
|
|
9158
9632
|
};
|
|
9633
|
+
var selectComparisonEntryByLowestTraceMetric = (entries, idKey, metric) => {
|
|
9634
|
+
const ranked = [...entries].sort((left, right) => {
|
|
9635
|
+
const leftMetric = left.traceSummary?.[metric] ?? 0;
|
|
9636
|
+
const rightMetric = right.traceSummary?.[metric] ?? 0;
|
|
9637
|
+
if (leftMetric !== rightMetric) {
|
|
9638
|
+
return leftMetric - rightMetric;
|
|
9639
|
+
}
|
|
9640
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
9641
|
+
return right.response.passingRate - left.response.passingRate;
|
|
9642
|
+
}
|
|
9643
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
9644
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
9645
|
+
}
|
|
9646
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
9647
|
+
});
|
|
9648
|
+
const winner = ranked[0];
|
|
9649
|
+
return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
|
|
9650
|
+
};
|
|
9651
|
+
var selectComparisonEntryByTraceStageCount = (entries, idKey, stage) => {
|
|
9652
|
+
const ranked = [...entries].sort((left, right) => {
|
|
9653
|
+
const leftMetric = left.traceSummary?.stageCounts?.[stage] ?? 0;
|
|
9654
|
+
const rightMetric = right.traceSummary?.stageCounts?.[stage] ?? 0;
|
|
9655
|
+
if (rightMetric !== leftMetric) {
|
|
9656
|
+
return rightMetric - leftMetric;
|
|
9657
|
+
}
|
|
9658
|
+
if (right.response.passingRate !== left.response.passingRate) {
|
|
9659
|
+
return right.response.passingRate - left.response.passingRate;
|
|
9660
|
+
}
|
|
9661
|
+
if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
|
|
9662
|
+
return right.response.summary.averageF1 - left.response.summary.averageF1;
|
|
9663
|
+
}
|
|
9664
|
+
return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
|
|
9665
|
+
});
|
|
9666
|
+
const winner = ranked[0];
|
|
9667
|
+
if (!winner || (winner.traceSummary?.stageCounts?.[stage] ?? 0) === 0) {
|
|
9668
|
+
return;
|
|
9669
|
+
}
|
|
9670
|
+
return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
|
|
9671
|
+
};
|
|
9159
9672
|
var resolveRetrievalMode = (candidate) => {
|
|
9160
9673
|
if (!candidate.retrieval) {
|
|
9161
9674
|
return "vector";
|
|
@@ -9249,7 +9762,11 @@ var compareRAGRetrievalTraceSummaries = (current, previous) => ({
|
|
|
9249
9762
|
multiVectorCasesDelta: current.multiVectorCases - previous.multiVectorCases,
|
|
9250
9763
|
multiVectorVectorHitCasesDelta: current.multiVectorVectorHitCases - previous.multiVectorVectorHitCases,
|
|
9251
9764
|
multiVectorLexicalHitCasesDelta: current.multiVectorLexicalHitCases - previous.multiVectorLexicalHitCases,
|
|
9252
|
-
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases
|
|
9765
|
+
multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases,
|
|
9766
|
+
officeEvidenceReconcileCasesDelta: current.officeEvidenceReconcileCases - previous.officeEvidenceReconcileCases,
|
|
9767
|
+
pdfEvidenceReconcileCasesDelta: current.pdfEvidenceReconcileCases - previous.pdfEvidenceReconcileCases,
|
|
9768
|
+
runtimeCandidateBudgetExhaustedCasesDelta: current.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
|
|
9769
|
+
runtimeUnderfilledTopKCasesDelta: current.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases
|
|
9253
9770
|
});
|
|
9254
9771
|
var buildSearchTraceResultSnapshots = (results) => results.map((result) => ({
|
|
9255
9772
|
chunkId: result.chunkId,
|
|
@@ -9611,6 +10128,278 @@ var generateRAGEvaluationSuiteFromDocuments = ({
|
|
|
9611
10128
|
metadata
|
|
9612
10129
|
});
|
|
9613
10130
|
};
|
|
10131
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID = "rag-native-planner-larger-corpus";
|
|
10132
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL = "Adaptive Native Planner Benchmark";
|
|
10133
|
+
var DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_SUITE_ID = "rag-native-backend-larger-corpus";
|
|
10134
|
+
var DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_LABEL = "Native Backend Comparison Benchmark";
|
|
10135
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY = "Which launch checklist phrase is exact wording?";
|
|
10136
|
+
var DEFAULT_NATIVE_BACKEND_HYBRID_QUERY = "aurora promotion checklist wording";
|
|
10137
|
+
var DEFAULT_NATIVE_BACKEND_FILTERED_QUERY = "focus lane launch checklist wording";
|
|
10138
|
+
var DEFAULT_NATIVE_BACKEND_REORDERED_QUERY = "exact aurora focus lane checklist wording";
|
|
10139
|
+
var DEFAULT_NATIVE_BACKEND_GUIDE_QUERY = "which focus lane guide contains exact aurora promotion wording";
|
|
10140
|
+
var DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER = {
|
|
10141
|
+
lane: "focus"
|
|
10142
|
+
};
|
|
10143
|
+
var DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS = [
|
|
10144
|
+
"focus-distractor-0",
|
|
10145
|
+
"focus-distractor-1",
|
|
10146
|
+
"focus-distractor-2"
|
|
10147
|
+
];
|
|
10148
|
+
var createRAGNativeBackendBenchmarkMockEmbedding = async (text) => {
|
|
10149
|
+
const normalized = text.toLowerCase();
|
|
10150
|
+
if (normalized.includes("launch checklist exact wording for aurora promotion") || normalized.includes("launch checklist exact wording")) {
|
|
10151
|
+
return [0.995, 0.005];
|
|
10152
|
+
}
|
|
10153
|
+
if (normalized.includes("aurora") || normalized.includes("checklist") || normalized.includes("focus lane") || normalized.includes("exact wording") || normalized.includes("guide")) {
|
|
10154
|
+
return [1, 0];
|
|
10155
|
+
}
|
|
10156
|
+
return [0, 1];
|
|
10157
|
+
};
|
|
10158
|
+
var createRAGNativeBackendBenchmarkCorpus = (input) => {
|
|
10159
|
+
const noiseCount = input?.noiseCount ?? 5001;
|
|
10160
|
+
const backend = input?.backend ?? "generic";
|
|
10161
|
+
const genericChunks = [
|
|
10162
|
+
...Array.from({ length: noiseCount }, (_, index) => ({
|
|
10163
|
+
chunkId: `noise:${index}`,
|
|
10164
|
+
corpusKey: "noise",
|
|
10165
|
+
embedding: [0, 1],
|
|
10166
|
+
metadata: {
|
|
10167
|
+
corpusKey: "noise",
|
|
10168
|
+
documentId: `noise-${index}`,
|
|
10169
|
+
lane: "noise"
|
|
10170
|
+
},
|
|
10171
|
+
source: `noise/${index}.md`,
|
|
10172
|
+
text: `Background operations note ${index}.`
|
|
10173
|
+
})),
|
|
10174
|
+
...Array.from({ length: 3 }, (_, index) => ({
|
|
10175
|
+
chunkId: `focus:distractor:${index}`,
|
|
10176
|
+
corpusKey: "focus",
|
|
10177
|
+
embedding: [1, 0],
|
|
10178
|
+
metadata: {
|
|
10179
|
+
corpusKey: "focus",
|
|
10180
|
+
documentId: `focus-distractor-${index}`,
|
|
10181
|
+
lane: "focus"
|
|
10182
|
+
},
|
|
10183
|
+
source: `focus/distractor-${index}.md`,
|
|
10184
|
+
text: index === 0 ? "aurora promotion checklist overview" : index === 1 ? "launch checklist wording draft" : "focus lane promotion runbook notes"
|
|
10185
|
+
})),
|
|
10186
|
+
{
|
|
10187
|
+
chunkId: "focus:target",
|
|
10188
|
+
corpusKey: "focus",
|
|
10189
|
+
embedding: [0.995, 0.005],
|
|
10190
|
+
metadata: {
|
|
10191
|
+
corpusKey: "focus",
|
|
10192
|
+
documentId: "focus-target",
|
|
10193
|
+
lane: "focus"
|
|
10194
|
+
},
|
|
10195
|
+
source: "guide/planner-depth.md",
|
|
10196
|
+
text: "launch checklist exact wording for aurora promotion in the focus lane"
|
|
10197
|
+
}
|
|
10198
|
+
];
|
|
10199
|
+
const backendSpecificChunks = backend === "sqlite-native" ? [
|
|
10200
|
+
{
|
|
10201
|
+
chunkId: "focus:sqlite:phrase-matrix",
|
|
10202
|
+
corpusKey: "focus",
|
|
10203
|
+
embedding: [1, 0],
|
|
10204
|
+
metadata: {
|
|
10205
|
+
backendFixture: "sqlite-native",
|
|
10206
|
+
corpusKey: "focus",
|
|
10207
|
+
documentId: "focus-sqlite-phrase-matrix",
|
|
10208
|
+
lane: "focus"
|
|
10209
|
+
},
|
|
10210
|
+
source: "guide/sqlite-phrase-matrix.md",
|
|
10211
|
+
text: "exact aurora focus lane checklist wording matrix for sqlite validation"
|
|
10212
|
+
},
|
|
10213
|
+
{
|
|
10214
|
+
chunkId: "focus:sqlite:guide-table",
|
|
10215
|
+
corpusKey: "focus",
|
|
10216
|
+
embedding: [1, 0],
|
|
10217
|
+
metadata: {
|
|
10218
|
+
backendFixture: "sqlite-native",
|
|
10219
|
+
corpusKey: "focus",
|
|
10220
|
+
documentId: "focus-sqlite-guide-table",
|
|
10221
|
+
lane: "focus"
|
|
10222
|
+
},
|
|
10223
|
+
source: "guide/sqlite-guide-table.md",
|
|
10224
|
+
text: "which focus lane guide contains aurora promotion wording draft table for sqlite operators"
|
|
10225
|
+
}
|
|
10226
|
+
] : backend === "postgres" ? [
|
|
10227
|
+
{
|
|
10228
|
+
chunkId: "focus:postgres:appendix",
|
|
10229
|
+
corpusKey: "focus",
|
|
10230
|
+
embedding: [1, 0],
|
|
10231
|
+
metadata: {
|
|
10232
|
+
backendFixture: "postgres",
|
|
10233
|
+
corpusKey: "focus",
|
|
10234
|
+
documentId: "focus-postgres-appendix",
|
|
10235
|
+
lane: "focus"
|
|
10236
|
+
},
|
|
10237
|
+
source: "guide/postgres-appendix.md",
|
|
10238
|
+
text: "which focus lane guide contains exact aurora promotion wording appendix for postgres release review"
|
|
10239
|
+
},
|
|
10240
|
+
{
|
|
10241
|
+
chunkId: "focus:postgres:alternatives",
|
|
10242
|
+
corpusKey: "focus",
|
|
10243
|
+
embedding: [1, 0],
|
|
10244
|
+
metadata: {
|
|
10245
|
+
backendFixture: "postgres",
|
|
10246
|
+
corpusKey: "focus",
|
|
10247
|
+
documentId: "focus-postgres-alternatives",
|
|
10248
|
+
lane: "focus"
|
|
10249
|
+
},
|
|
10250
|
+
source: "guide/postgres-alternatives.md",
|
|
10251
|
+
text: "aurora promotion checklist wording alternatives and exact focus lane phrasing for postgres audits"
|
|
10252
|
+
}
|
|
10253
|
+
] : [];
|
|
10254
|
+
return [...genericChunks, ...backendSpecificChunks];
|
|
10255
|
+
};
|
|
10256
|
+
var createRAGAdaptiveNativePlannerBenchmarkSuite = (input) => createRAGEvaluationSuite({
|
|
10257
|
+
description: input?.description ?? "Stress-tests larger-corpus native planner selection, candidate-budget pressure, and transformed-query recovery on filtered retrieval.",
|
|
10258
|
+
id: input?.id ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID,
|
|
10259
|
+
input: {
|
|
10260
|
+
cases: [
|
|
10261
|
+
{
|
|
10262
|
+
expectedDocumentIds: ["focus-target"],
|
|
10263
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10264
|
+
hardNegativeDocumentIds: [
|
|
10265
|
+
"focus-distractor-0",
|
|
10266
|
+
"focus-distractor-1",
|
|
10267
|
+
"focus-distractor-2"
|
|
10268
|
+
],
|
|
10269
|
+
id: "planner-pressure-exact-phrase",
|
|
10270
|
+
label: "Exact phrase survives larger-corpus native pressure",
|
|
10271
|
+
query: DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY,
|
|
10272
|
+
topK: input?.topK ?? 1
|
|
10273
|
+
}
|
|
10274
|
+
],
|
|
10275
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10276
|
+
retrieval: "vector",
|
|
10277
|
+
topK: input?.topK ?? 1
|
|
10278
|
+
},
|
|
10279
|
+
label: input?.label ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL,
|
|
10280
|
+
metadata: {
|
|
10281
|
+
benchmarkKind: "adaptive_native_planner",
|
|
10282
|
+
benchmarkScope: "larger_corpus",
|
|
10283
|
+
expectedSignals: [
|
|
10284
|
+
"selected native planner profile",
|
|
10285
|
+
"candidate-budget exhaustion",
|
|
10286
|
+
"underfilled topk"
|
|
10287
|
+
],
|
|
10288
|
+
recommendedGroupKey: "runtime-native-planner",
|
|
10289
|
+
recommendedTags: ["runtime", "native", "planner"],
|
|
10290
|
+
...input?.metadata
|
|
10291
|
+
}
|
|
10292
|
+
});
|
|
10293
|
+
var createRAGAdaptiveNativePlannerBenchmarkSnapshot = (input) => {
|
|
10294
|
+
const suite = input?.suite ?? createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
10295
|
+
return createRAGEvaluationSuiteSnapshot({
|
|
10296
|
+
createdAt: input?.createdAt,
|
|
10297
|
+
id: input?.id,
|
|
10298
|
+
metadata: {
|
|
10299
|
+
artifactKind: "adaptive_native_planner_benchmark",
|
|
10300
|
+
persistForReleaseHistory: true,
|
|
10301
|
+
...input?.metadata
|
|
10302
|
+
},
|
|
10303
|
+
suite,
|
|
10304
|
+
version: input?.version
|
|
10305
|
+
});
|
|
10306
|
+
};
|
|
10307
|
+
var createRAGNativeBackendComparisonBenchmarkSuite = (input) => createRAGEvaluationSuite({
|
|
10308
|
+
description: input?.description ?? "Captures larger-corpus native backend parity with filtered vector pressure and harder hybrid retrieval cases so sqlite-native and postgres runs can be compared over time.",
|
|
10309
|
+
id: input?.id ?? DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_SUITE_ID,
|
|
10310
|
+
input: {
|
|
10311
|
+
cases: [
|
|
10312
|
+
{
|
|
10313
|
+
expectedDocumentIds: ["focus-target"],
|
|
10314
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10315
|
+
hardNegativeDocumentIds: [
|
|
10316
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10317
|
+
],
|
|
10318
|
+
id: "planner-pressure-exact-phrase",
|
|
10319
|
+
label: "Exact phrase survives larger-corpus native pressure",
|
|
10320
|
+
query: DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY,
|
|
10321
|
+
topK: input?.topK ?? 1
|
|
10322
|
+
},
|
|
10323
|
+
{
|
|
10324
|
+
expectedDocumentIds: ["focus-target"],
|
|
10325
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10326
|
+
hardNegativeDocumentIds: [
|
|
10327
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10328
|
+
],
|
|
10329
|
+
id: "planner-pressure-hybrid-phrase",
|
|
10330
|
+
label: "Hybrid retrieval survives filtered lexical pressure",
|
|
10331
|
+
query: DEFAULT_NATIVE_BACKEND_HYBRID_QUERY,
|
|
10332
|
+
topK: input?.topK ?? 1
|
|
10333
|
+
},
|
|
10334
|
+
{
|
|
10335
|
+
expectedDocumentIds: ["focus-target"],
|
|
10336
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10337
|
+
hardNegativeDocumentIds: [
|
|
10338
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10339
|
+
],
|
|
10340
|
+
id: "planner-pressure-filtered-lane-query",
|
|
10341
|
+
label: "Filtered lane query survives broader corpus noise",
|
|
10342
|
+
query: DEFAULT_NATIVE_BACKEND_FILTERED_QUERY,
|
|
10343
|
+
topK: input?.topK ?? 1
|
|
10344
|
+
},
|
|
10345
|
+
{
|
|
10346
|
+
expectedDocumentIds: ["focus-target"],
|
|
10347
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10348
|
+
hardNegativeDocumentIds: [
|
|
10349
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10350
|
+
],
|
|
10351
|
+
id: "planner-pressure-reordered-phrase",
|
|
10352
|
+
label: "Reordered phrase survives transform pressure",
|
|
10353
|
+
query: DEFAULT_NATIVE_BACKEND_REORDERED_QUERY,
|
|
10354
|
+
topK: input?.topK ?? 1
|
|
10355
|
+
},
|
|
10356
|
+
{
|
|
10357
|
+
expectedDocumentIds: ["focus-target"],
|
|
10358
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10359
|
+
hardNegativeDocumentIds: [
|
|
10360
|
+
...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
|
|
10361
|
+
],
|
|
10362
|
+
id: "planner-pressure-guide-query",
|
|
10363
|
+
label: "Guide attribution survives filtered corpus pressure",
|
|
10364
|
+
query: DEFAULT_NATIVE_BACKEND_GUIDE_QUERY,
|
|
10365
|
+
topK: input?.topK ?? 1
|
|
10366
|
+
}
|
|
10367
|
+
],
|
|
10368
|
+
filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
|
|
10369
|
+
retrieval: "vector",
|
|
10370
|
+
topK: input?.topK ?? 1
|
|
10371
|
+
},
|
|
10372
|
+
label: input?.label ?? DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_LABEL,
|
|
10373
|
+
metadata: {
|
|
10374
|
+
benchmarkKind: "native_backend_comparison",
|
|
10375
|
+
benchmarkScope: "larger_corpus",
|
|
10376
|
+
expectedSignals: [
|
|
10377
|
+
"backend-tagged runtime artifacts",
|
|
10378
|
+
"selected native planner profile",
|
|
10379
|
+
"hybrid filtered retrieval",
|
|
10380
|
+
"candidate-budget exhaustion",
|
|
10381
|
+
"underfilled topk",
|
|
10382
|
+
"query transform pressure"
|
|
10383
|
+
],
|
|
10384
|
+
recommendedGroupKey: "runtime-native-backend-parity",
|
|
10385
|
+
recommendedTags: ["runtime", "backend", "native"],
|
|
10386
|
+
...input?.metadata
|
|
10387
|
+
}
|
|
10388
|
+
});
|
|
10389
|
+
var createRAGNativeBackendComparisonBenchmarkSnapshot = (input) => {
|
|
10390
|
+
const suite = input?.suite ?? createRAGNativeBackendComparisonBenchmarkSuite();
|
|
10391
|
+
return createRAGEvaluationSuiteSnapshot({
|
|
10392
|
+
createdAt: input?.createdAt,
|
|
10393
|
+
id: input?.id,
|
|
10394
|
+
metadata: {
|
|
10395
|
+
artifactKind: "native_backend_comparison_benchmark",
|
|
10396
|
+
persistForReleaseHistory: true,
|
|
10397
|
+
...input?.metadata
|
|
10398
|
+
},
|
|
10399
|
+
suite,
|
|
10400
|
+
version: input?.version
|
|
10401
|
+
});
|
|
10402
|
+
};
|
|
9614
10403
|
var createRAGEvaluationSuiteSnapshot = ({
|
|
9615
10404
|
suite,
|
|
9616
10405
|
id,
|
|
@@ -9818,7 +10607,10 @@ var summarizeRAGRetrievalComparison = (entries) => ({
|
|
|
9818
10607
|
...summarizeEvaluationResponseComparison(entries, "retrievalId"),
|
|
9819
10608
|
bestByMultivectorCollapsedCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorCollapsedCases"),
|
|
9820
10609
|
bestByMultivectorLexicalHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorLexicalHitCases"),
|
|
9821
|
-
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases")
|
|
10610
|
+
bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases"),
|
|
10611
|
+
bestByEvidenceReconcileCases: selectComparisonEntryByTraceStageCount(entries, "retrievalId", "evidence_reconcile"),
|
|
10612
|
+
bestByLowestRuntimeCandidateBudgetExhaustedCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeCandidateBudgetExhaustedCases"),
|
|
10613
|
+
bestByLowestRuntimeUnderfilledTopKCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeUnderfilledTopKCases")
|
|
9822
10614
|
});
|
|
9823
10615
|
// src/ai/providers/openai.ts
|
|
9824
10616
|
var DEFAULT_BASE_URL = "https://api.openai.com";
|
|
@@ -13573,32 +14365,126 @@ var splitMarkdownPreferredChunkUnits = (value) => {
|
|
|
13573
14365
|
flushFence();
|
|
13574
14366
|
return units;
|
|
13575
14367
|
};
|
|
14368
|
+
var findNearestPDFContextHeading = (blockEntries, pageNumber) => {
|
|
14369
|
+
if (typeof pageNumber !== "number") {
|
|
14370
|
+
return;
|
|
14371
|
+
}
|
|
14372
|
+
for (let index = blockEntries.length - 1;index >= 0; index -= 1) {
|
|
14373
|
+
const entry = blockEntries[index];
|
|
14374
|
+
if (!entry || entry.pageNumber !== pageNumber) {
|
|
14375
|
+
if (entry && typeof entry.pageNumber === "number" && entry.pageNumber < pageNumber) {
|
|
14376
|
+
break;
|
|
14377
|
+
}
|
|
14378
|
+
continue;
|
|
14379
|
+
}
|
|
14380
|
+
if (entry.pdfSemanticRole || entry.pdfTextKind !== "paragraph") {
|
|
14381
|
+
continue;
|
|
14382
|
+
}
|
|
14383
|
+
const heading = inferPDFBlockHeading(entry.text);
|
|
14384
|
+
if (heading) {
|
|
14385
|
+
return heading;
|
|
14386
|
+
}
|
|
14387
|
+
}
|
|
14388
|
+
return;
|
|
14389
|
+
};
|
|
14390
|
+
var getPDFTableHeaders = (text) => {
|
|
14391
|
+
const lines = text.split(`
|
|
14392
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
14393
|
+
const headerLine = lines[0];
|
|
14394
|
+
if (!headerLine || !headerLine.includes(" | ")) {
|
|
14395
|
+
return;
|
|
14396
|
+
}
|
|
14397
|
+
const headers = headerLine.split(" | ").map((entry) => normalizeWhitespace(entry)).filter(Boolean);
|
|
14398
|
+
return headers.length >= 2 ? headers : undefined;
|
|
14399
|
+
};
|
|
13576
14400
|
var pdfNativeStructureUnits = (metadata) => {
|
|
13577
14401
|
const blocks = Array.isArray(metadata?.pdfTextBlocks) ? metadata.pdfTextBlocks : [];
|
|
13578
|
-
const
|
|
14402
|
+
const blockEntries = [];
|
|
13579
14403
|
for (const block of blocks) {
|
|
13580
14404
|
if (!block || typeof block !== "object") {
|
|
13581
14405
|
continue;
|
|
13582
14406
|
}
|
|
13583
|
-
const
|
|
14407
|
+
const rawText = typeof block.text === "string" ? block.text : "";
|
|
14408
|
+
const pdfTextKind = block.textKind === "table_like" ? "table_like" : "paragraph";
|
|
14409
|
+
const text = pdfTextKind === "table_like" ? rawText.split(`
|
|
14410
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean).join(`
|
|
14411
|
+
`) : normalizeWhitespace(rawText);
|
|
13584
14412
|
if (!text) {
|
|
13585
14413
|
continue;
|
|
13586
14414
|
}
|
|
13587
14415
|
const pageNumber = typeof block.pageNumber === "number" && Number.isFinite(block.pageNumber) ? block.pageNumber : undefined;
|
|
13588
14416
|
const pdfBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
|
|
13589
|
-
const
|
|
13590
|
-
const
|
|
13591
|
-
|
|
14417
|
+
const previousBlock = blockEntries.at(-1);
|
|
14418
|
+
const previousFigureCaption = previousBlock && previousBlock.pageNumber === pageNumber && previousBlock.pdfSemanticRole === "figure_caption" ? previousBlock : undefined;
|
|
14419
|
+
const pdfSemanticRole = block.semanticRole === "figure_caption" ? "figure_caption" : block.semanticRole === "figure_body" ? "figure_body" : pdfTextKind === "paragraph" && previousFigureCaption && !inferPDFBlockHeading(text) ? "figure_body" : undefined;
|
|
14420
|
+
const currentBlockHeading = pdfTextKind === "paragraph" && !pdfSemanticRole ? inferPDFBlockHeading(text) : undefined;
|
|
14421
|
+
const contextualHeading = pdfTextKind === "table_like" ? findNearestPDFContextHeading(blockEntries, pageNumber) : undefined;
|
|
14422
|
+
const contextualTableTitle = contextualHeading && pdfTextKind === "table_like" ? /\btable\b/i.test(contextualHeading) ? contextualHeading : `${contextualHeading} Table` : undefined;
|
|
14423
|
+
const pdfTableHeaders = pdfTextKind === "table_like" ? getPDFTableHeaders(text) : undefined;
|
|
14424
|
+
const pdfTableHeaderText = pdfTextKind === "table_like" ? text.split(`
|
|
14425
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean)[0] : undefined;
|
|
14426
|
+
const pdfTableRowCount = pdfTextKind === "table_like" ? text.split(`
|
|
14427
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean).length : undefined;
|
|
14428
|
+
const pdfTableBodyRowCount = typeof pdfTableRowCount === "number" ? Math.max(0, pdfTableRowCount - 1) : undefined;
|
|
14429
|
+
const pdfTableBodyRowStart = typeof pdfTableBodyRowCount === "number" && pdfTableBodyRowCount > 0 ? 1 : undefined;
|
|
14430
|
+
const pdfTableBodyRowEnd = typeof pdfTableBodyRowCount === "number" && pdfTableBodyRowCount > 0 ? pdfTableBodyRowCount : undefined;
|
|
14431
|
+
const pdfTableColumnCount = Array.isArray(pdfTableHeaders) ? pdfTableHeaders.length : undefined;
|
|
14432
|
+
const pdfTableSignature = Array.isArray(pdfTableHeaders) && pdfTableHeaders.length > 0 ? pdfTableHeaders.join(" | ") : undefined;
|
|
14433
|
+
const pdfFigureLabel = pdfSemanticRole === "figure_caption" ? extractPDFFigureLabel(text) : previousFigureCaption?.pdfFigureLabel;
|
|
14434
|
+
const pdfFigureCaptionBlockNumber = pdfSemanticRole === "figure_caption" ? pdfBlockNumber : previousFigureCaption?.pdfFigureCaptionBlockNumber;
|
|
14435
|
+
const baseSectionTitle = pdfSemanticRole === "figure_caption" ? pageNumber ? `Page ${pageNumber} Figure Caption` : "Figure Caption" : pdfSemanticRole === "figure_body" ? pdfFigureLabel ? `${pdfFigureLabel} Body` : pageNumber ? `Page ${pageNumber} Figure Body` : "Figure Body" : currentBlockHeading ? currentBlockHeading : contextualTableTitle ? contextualTableTitle : pageNumber ? pdfTextKind === "table_like" ? `Page ${pageNumber} Table Block` : `Page ${pageNumber} Text Block` : pdfTextKind === "table_like" ? "Table Block" : "Text Block";
|
|
14436
|
+
blockEntries.push({
|
|
14437
|
+
baseSectionTitle,
|
|
13592
14438
|
pageNumber,
|
|
13593
14439
|
pdfBlockNumber,
|
|
14440
|
+
...typeof pdfFigureCaptionBlockNumber === "number" ? { pdfFigureCaptionBlockNumber } : {},
|
|
14441
|
+
...pdfFigureLabel ? { pdfFigureLabel } : {},
|
|
14442
|
+
...pdfSemanticRole ? { pdfSemanticRole } : {},
|
|
14443
|
+
...typeof pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd } : {},
|
|
14444
|
+
...typeof pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount } : {},
|
|
14445
|
+
...typeof pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart } : {},
|
|
14446
|
+
...pdfTextKind === "table_like" ? { pdfTableChunkKind: "full_table" } : {},
|
|
14447
|
+
...typeof pdfTableColumnCount === "number" ? { pdfTableColumnCount } : {},
|
|
14448
|
+
...typeof pdfTableHeaderText === "string" ? { pdfTableHeaderText } : {},
|
|
14449
|
+
...Array.isArray(pdfTableHeaders) && pdfTableHeaders.length > 0 ? { pdfTableHeaders } : {},
|
|
14450
|
+
...typeof pdfTableRowCount === "number" ? { pdfTableRowCount } : {},
|
|
14451
|
+
...pdfTableSignature ? { pdfTableSignature } : {},
|
|
13594
14452
|
pdfTextKind,
|
|
13595
|
-
|
|
14453
|
+
text
|
|
14454
|
+
});
|
|
14455
|
+
}
|
|
14456
|
+
const titleCounts = new Map;
|
|
14457
|
+
for (const block of blockEntries) {
|
|
14458
|
+
titleCounts.set(block.baseSectionTitle, (titleCounts.get(block.baseSectionTitle) ?? 0) + 1);
|
|
14459
|
+
}
|
|
14460
|
+
const units = [];
|
|
14461
|
+
for (const block of blockEntries) {
|
|
14462
|
+
const sectionTitle = (titleCounts.get(block.baseSectionTitle) ?? 0) > 1 && typeof block.pdfBlockNumber === "number" ? `${block.baseSectionTitle} ${block.pdfBlockNumber}` : block.baseSectionTitle;
|
|
14463
|
+
units.push({
|
|
14464
|
+
pageNumber: block.pageNumber,
|
|
14465
|
+
pdfBlockNumber: block.pdfBlockNumber,
|
|
14466
|
+
...typeof block.pdfFigureCaptionBlockNumber === "number" ? {
|
|
14467
|
+
pdfFigureCaptionBlockNumber: block.pdfFigureCaptionBlockNumber
|
|
14468
|
+
} : {},
|
|
14469
|
+
...block.pdfFigureLabel ? { pdfFigureLabel: block.pdfFigureLabel } : {},
|
|
14470
|
+
...block.pdfSemanticRole ? { pdfSemanticRole: block.pdfSemanticRole } : {},
|
|
14471
|
+
...typeof block.pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd: block.pdfTableBodyRowEnd } : {},
|
|
14472
|
+
...typeof block.pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount: block.pdfTableBodyRowCount } : {},
|
|
14473
|
+
...typeof block.pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart: block.pdfTableBodyRowStart } : {},
|
|
14474
|
+
...block.pdfTableChunkKind ? { pdfTableChunkKind: block.pdfTableChunkKind } : {},
|
|
14475
|
+
...typeof block.pdfTableColumnCount === "number" ? { pdfTableColumnCount: block.pdfTableColumnCount } : {},
|
|
14476
|
+
...typeof block.pdfTableHeaderText === "string" ? { pdfTableHeaderText: block.pdfTableHeaderText } : {},
|
|
14477
|
+
...Array.isArray(block.pdfTableHeaders) && block.pdfTableHeaders.length > 0 ? { pdfTableHeaders: block.pdfTableHeaders } : {},
|
|
14478
|
+
...typeof block.pdfTableRowCount === "number" ? { pdfTableRowCount: block.pdfTableRowCount } : {},
|
|
14479
|
+
...block.pdfTableSignature ? { pdfTableSignature: block.pdfTableSignature } : {},
|
|
14480
|
+
pdfTextKind: block.pdfTextKind,
|
|
14481
|
+
preferredChunkUnits: block.pdfTextKind === "table_like" ? block.text.split(`
|
|
13596
14482
|
`).filter(Boolean) : undefined,
|
|
13597
14483
|
sectionDepth: 1,
|
|
13598
14484
|
sectionKind: "pdf_block",
|
|
13599
14485
|
sectionPath: [sectionTitle],
|
|
13600
14486
|
sectionTitle,
|
|
13601
|
-
text
|
|
14487
|
+
text: block.text
|
|
13602
14488
|
});
|
|
13603
14489
|
}
|
|
13604
14490
|
return units;
|
|
@@ -13607,6 +14493,10 @@ var officeNativeStructureUnits = (metadata) => {
|
|
|
13607
14493
|
const blocks = Array.isArray(metadata?.officeBlocks) ? metadata.officeBlocks : [];
|
|
13608
14494
|
const units = [];
|
|
13609
14495
|
const headingStack = [];
|
|
14496
|
+
const headingSiblingCounts = new Map;
|
|
14497
|
+
let pendingListContextText;
|
|
14498
|
+
let pendingTableContextText;
|
|
14499
|
+
let consumedOfficeListUntil = -1;
|
|
13610
14500
|
const decorateOfficeSectionText = (text, sectionTitle) => {
|
|
13611
14501
|
if (!sectionTitle || text.includes(sectionTitle)) {
|
|
13612
14502
|
return text;
|
|
@@ -13615,6 +14505,9 @@ var officeNativeStructureUnits = (metadata) => {
|
|
|
13615
14505
|
${text}`);
|
|
13616
14506
|
};
|
|
13617
14507
|
for (const [index, block] of blocks.entries()) {
|
|
14508
|
+
if (index <= consumedOfficeListUntil) {
|
|
14509
|
+
continue;
|
|
14510
|
+
}
|
|
13618
14511
|
if (!block || typeof block !== "object") {
|
|
13619
14512
|
continue;
|
|
13620
14513
|
}
|
|
@@ -13625,20 +14518,39 @@ ${text}`);
|
|
|
13625
14518
|
const officeBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
|
|
13626
14519
|
const officeBlockKind = block.blockKind === "title" || block.blockKind === "heading" || block.blockKind === "list" || block.blockKind === "table" ? block.blockKind : "paragraph";
|
|
13627
14520
|
const headingLevel = typeof block.headingLevel === "number" && Number.isFinite(block.headingLevel) ? block.headingLevel : undefined;
|
|
14521
|
+
const officeListLevel = typeof block.listLevel === "number" && Number.isFinite(block.listLevel) ? block.listLevel : undefined;
|
|
14522
|
+
const officeTableBodyRowCount = typeof block.tableBodyRowCount === "number" && Number.isFinite(block.tableBodyRowCount) ? block.tableBodyRowCount : undefined;
|
|
14523
|
+
const officeTableColumnCount = typeof block.tableColumnCount === "number" && Number.isFinite(block.tableColumnCount) ? block.tableColumnCount : undefined;
|
|
14524
|
+
const officeTableHeaderText = typeof block.tableHeaderText === "string" && block.tableHeaderText.length > 0 ? block.tableHeaderText : undefined;
|
|
14525
|
+
const officeTableHeaders = Array.isArray(block.tableHeaders) && block.tableHeaders.length > 0 ? block.tableHeaders.filter((value) => typeof value === "string" && value.length > 0) : undefined;
|
|
14526
|
+
const officeTableRowCount = typeof block.tableRowCount === "number" && Number.isFinite(block.tableRowCount) ? block.tableRowCount : undefined;
|
|
14527
|
+
const officeTableSignature = typeof block.tableSignature === "string" && block.tableSignature.length > 0 ? block.tableSignature : undefined;
|
|
13628
14528
|
if (officeBlockKind === "title" || officeBlockKind === "heading") {
|
|
13629
14529
|
const level = officeBlockKind === "title" ? 1 : headingLevel ?? 1;
|
|
13630
|
-
headingStack
|
|
14530
|
+
const parentScope = headingStack.slice(0, Math.max(0, level - 1)).join(" > ");
|
|
14531
|
+
const headingKey = `${level}:${parentScope}:${text}`;
|
|
14532
|
+
const headingCount = (headingSiblingCounts.get(headingKey) ?? 0) + 1;
|
|
14533
|
+
headingSiblingCounts.set(headingKey, headingCount);
|
|
14534
|
+
const resolvedHeadingText = headingCount > 1 ? `${text} (${headingCount})` : text;
|
|
14535
|
+
headingStack[level - 1] = resolvedHeadingText;
|
|
13631
14536
|
headingStack.length = level;
|
|
13632
|
-
const
|
|
13633
|
-
const
|
|
13634
|
-
if (
|
|
14537
|
+
const nextBlock2 = blocks[index + 1];
|
|
14538
|
+
const nextKind2 = nextBlock2 && typeof nextBlock2 === "object" ? nextBlock2.blockKind : undefined;
|
|
14539
|
+
if (nextKind2 === "title" || nextKind2 === "heading" || nextKind2 === "list" || nextKind2 === "table" || !nextBlock2) {
|
|
13635
14540
|
units.push({
|
|
13636
14541
|
officeBlockKind,
|
|
14542
|
+
officeListLevel,
|
|
13637
14543
|
officeBlockNumber,
|
|
14544
|
+
...typeof officeTableBodyRowCount === "number" ? { officeTableBodyRowCount } : {},
|
|
14545
|
+
...typeof officeTableColumnCount === "number" ? { officeTableColumnCount } : {},
|
|
14546
|
+
...typeof officeTableHeaderText === "string" ? { officeTableHeaderText } : {},
|
|
14547
|
+
...Array.isArray(officeTableHeaders) && officeTableHeaders.length > 0 ? { officeTableHeaders } : {},
|
|
14548
|
+
...typeof officeTableRowCount === "number" ? { officeTableRowCount } : {},
|
|
14549
|
+
...typeof officeTableSignature === "string" ? { officeTableSignature } : {},
|
|
13638
14550
|
sectionDepth: headingStack.length,
|
|
13639
14551
|
sectionKind: "office_heading",
|
|
13640
14552
|
sectionPath: [...headingStack],
|
|
13641
|
-
sectionTitle:
|
|
14553
|
+
sectionTitle: resolvedHeadingText,
|
|
13642
14554
|
text
|
|
13643
14555
|
});
|
|
13644
14556
|
}
|
|
@@ -13646,16 +14558,143 @@ ${text}`);
|
|
|
13646
14558
|
}
|
|
13647
14559
|
const sectionPath = headingStack.length > 0 ? [...headingStack] : undefined;
|
|
13648
14560
|
const sectionTitle = sectionPath?.at(-1);
|
|
14561
|
+
if (officeBlockKind === "list") {
|
|
14562
|
+
const runTexts = [];
|
|
14563
|
+
const runLevels = [];
|
|
14564
|
+
let runEnd = index;
|
|
14565
|
+
for (;runEnd < blocks.length; runEnd += 1) {
|
|
14566
|
+
const runBlock = blocks[runEnd];
|
|
14567
|
+
if (!runBlock || typeof runBlock !== "object") {
|
|
14568
|
+
break;
|
|
14569
|
+
}
|
|
14570
|
+
if (runBlock.blockKind !== "list") {
|
|
14571
|
+
break;
|
|
14572
|
+
}
|
|
14573
|
+
const runText = typeof runBlock.text === "string" ? normalizeWhitespace(runBlock.text) : "";
|
|
14574
|
+
if (!runText) {
|
|
14575
|
+
break;
|
|
14576
|
+
}
|
|
14577
|
+
runTexts.push(runText);
|
|
14578
|
+
if (typeof runBlock.listLevel === "number" && Number.isFinite(runBlock.listLevel)) {
|
|
14579
|
+
runLevels.push(runBlock.listLevel);
|
|
14580
|
+
}
|
|
14581
|
+
}
|
|
14582
|
+
runEnd -= 1;
|
|
14583
|
+
const nextAfterRun = blocks[runEnd + 1];
|
|
14584
|
+
const nextAfterRunKind = nextAfterRun && typeof nextAfterRun === "object" ? nextAfterRun.blockKind : undefined;
|
|
14585
|
+
const nextAfterRunText = nextAfterRun && typeof nextAfterRun === "object" && typeof nextAfterRun.text === "string" ? normalizeWhitespace(nextAfterRun.text) : undefined;
|
|
14586
|
+
const nextAfterRunNext = blocks[runEnd + 2];
|
|
14587
|
+
const nextAfterRunNextKind = nextAfterRunNext && typeof nextAfterRunNext === "object" ? nextAfterRunNext.blockKind : undefined;
|
|
14588
|
+
const nextAfterRunNextText = nextAfterRunNext && typeof nextAfterRunNext === "object" && typeof nextAfterRunNext.text === "string" ? nextAfterRunNext.text : undefined;
|
|
14589
|
+
const nextAfterRunNextNext = blocks[runEnd + 3];
|
|
14590
|
+
const nextAfterRunNextNextKind = nextAfterRunNextNext && typeof nextAfterRunNextNext === "object" ? nextAfterRunNextNext.blockKind : undefined;
|
|
14591
|
+
const nextAfterRunNextNextNext = blocks[runEnd + 4];
|
|
14592
|
+
const nextAfterRunNextNextNextKind = nextAfterRunNextNextNext && typeof nextAfterRunNextNextNext === "object" ? nextAfterRunNextNextNext.blockKind : undefined;
|
|
14593
|
+
const trailingTableBridgeText = nextAfterRunKind === "paragraph" && typeof nextAfterRunText === "string" && nextAfterRunText.length <= 200 && (nextAfterRunNextKind === "table" || nextAfterRunNextKind === "heading" && typeof nextAfterRunNextText === "string" && /\btable\b/i.test(nextAfterRunNextText) && (nextAfterRunNextNextKind === "table" || nextAfterRunNextNextKind === "paragraph" && nextAfterRunNextNextNextKind === "table")) ? nextAfterRunText : undefined;
|
|
14594
|
+
const officeListContextText2 = normalizeWhitespace([
|
|
14595
|
+
...typeof pendingListContextText === "string" ? [pendingListContextText] : [],
|
|
14596
|
+
...typeof trailingTableBridgeText === "string" ? [trailingTableBridgeText] : []
|
|
14597
|
+
].join(`
|
|
14598
|
+
|
|
14599
|
+
`));
|
|
14600
|
+
const distinctLevels = [...new Set(runLevels)];
|
|
14601
|
+
const officeListLevel2 = distinctLevels.length === 1 ? distinctLevels[0] : undefined;
|
|
14602
|
+
const groupedListText = normalizeWhitespace([
|
|
14603
|
+
...officeListContextText2 ? [officeListContextText2] : [],
|
|
14604
|
+
...runTexts
|
|
14605
|
+
].join(`
|
|
14606
|
+
|
|
14607
|
+
`));
|
|
14608
|
+
if (typeof trailingTableBridgeText === "string") {
|
|
14609
|
+
pendingTableContextText = trailingTableBridgeText;
|
|
14610
|
+
consumedOfficeListUntil = runEnd + 1;
|
|
14611
|
+
} else {
|
|
14612
|
+
consumedOfficeListUntil = runEnd;
|
|
14613
|
+
}
|
|
14614
|
+
pendingListContextText = undefined;
|
|
14615
|
+
units.push({
|
|
14616
|
+
officeBlockKind,
|
|
14617
|
+
...officeListContextText2 ? { officeListContextText: officeListContextText2 } : {},
|
|
14618
|
+
officeListGroupItemCount: runTexts.length,
|
|
14619
|
+
...typeof officeListLevel2 === "number" ? { officeListLevel: officeListLevel2 } : {},
|
|
14620
|
+
...distinctLevels.length > 0 ? { officeListLevels: distinctLevels } : {},
|
|
14621
|
+
officeBlockNumber,
|
|
14622
|
+
preferredChunkUnits: [
|
|
14623
|
+
...officeListContextText2 ? [officeListContextText2] : [],
|
|
14624
|
+
...runTexts
|
|
14625
|
+
],
|
|
14626
|
+
sectionDepth: sectionPath?.length,
|
|
14627
|
+
sectionKind: "office_block",
|
|
14628
|
+
sectionPath,
|
|
14629
|
+
sectionTitle,
|
|
14630
|
+
text: groupedListText
|
|
14631
|
+
});
|
|
14632
|
+
continue;
|
|
14633
|
+
}
|
|
14634
|
+
const nextBlock = blocks[index + 1];
|
|
14635
|
+
const nextKind = nextBlock && typeof nextBlock === "object" ? nextBlock.blockKind : undefined;
|
|
14636
|
+
const nextText = nextBlock && typeof nextBlock === "object" && typeof nextBlock.text === "string" ? nextBlock.text : undefined;
|
|
14637
|
+
const nextNextBlock = blocks[index + 2];
|
|
14638
|
+
const nextNextKind = nextNextBlock && typeof nextNextBlock === "object" ? nextNextBlock.blockKind : undefined;
|
|
14639
|
+
const nextNextText = nextNextBlock && typeof nextNextBlock === "object" && typeof nextNextBlock.text === "string" ? nextNextBlock.text : undefined;
|
|
14640
|
+
const nextNextNextBlock = blocks[index + 3];
|
|
14641
|
+
const nextNextNextKind = nextNextNextBlock && typeof nextNextNextBlock === "object" ? nextNextNextBlock.blockKind : undefined;
|
|
14642
|
+
const nextNextNextText = nextNextNextBlock && typeof nextNextNextBlock === "object" && typeof nextNextNextBlock.text === "string" ? nextNextNextBlock.text : undefined;
|
|
14643
|
+
const nextNextNextNextBlock = blocks[index + 4];
|
|
14644
|
+
const nextNextNextNextKind = nextNextNextNextBlock && typeof nextNextNextNextBlock === "object" ? nextNextNextNextBlock.blockKind : undefined;
|
|
14645
|
+
if (officeBlockKind === "paragraph" && (nextKind === "list" || nextKind === "paragraph" && nextNextKind === "list") && text.length <= 200) {
|
|
14646
|
+
pendingListContextText = normalizeWhitespace([
|
|
14647
|
+
...typeof pendingListContextText === "string" ? [pendingListContextText] : [],
|
|
14648
|
+
text
|
|
14649
|
+
].join(`
|
|
14650
|
+
|
|
14651
|
+
`));
|
|
14652
|
+
continue;
|
|
14653
|
+
}
|
|
14654
|
+
if (officeBlockKind === "paragraph" && (nextKind === "table" || nextKind === "paragraph" && nextNextKind === "table" || nextKind === "heading" && typeof nextText === "string" && /\btable\b/i.test(nextText) && (nextNextKind === "table" || nextNextKind === "paragraph" && nextNextNextKind === "table") || nextKind === "paragraph" && nextNextKind === "heading" && typeof nextNextText === "string" && /\btable\b/i.test(nextNextText) && (nextNextNextKind === "table" || nextNextNextKind === "paragraph" && nextNextNextNextKind === "table") || nextKind === "paragraph" && nextNextKind === "paragraph" && nextNextNextKind === "heading" && typeof nextNextNextText === "string" && /\btable\b/i.test(nextNextNextText) && (nextNextNextNextKind === "table" || nextNextNextNextKind === "paragraph" && blocks[index + 5]?.blockKind === "table")) && text.length <= 200) {
|
|
14655
|
+
pendingTableContextText = normalizeWhitespace([
|
|
14656
|
+
...typeof pendingTableContextText === "string" ? [pendingTableContextText] : [],
|
|
14657
|
+
text
|
|
14658
|
+
].join(`
|
|
14659
|
+
|
|
14660
|
+
`));
|
|
14661
|
+
continue;
|
|
14662
|
+
}
|
|
14663
|
+
const officeListContextText = officeBlockKind === "list" ? pendingListContextText : undefined;
|
|
14664
|
+
const officeTableContextText = officeBlockKind === "table" ? pendingTableContextText : undefined;
|
|
14665
|
+
if (officeBlockKind !== "list" || nextKind !== "list") {
|
|
14666
|
+
pendingListContextText = undefined;
|
|
14667
|
+
}
|
|
14668
|
+
pendingTableContextText = undefined;
|
|
13649
14669
|
units.push({
|
|
13650
14670
|
officeBlockKind,
|
|
14671
|
+
officeListLevel,
|
|
13651
14672
|
officeBlockNumber,
|
|
13652
|
-
|
|
13653
|
-
|
|
14673
|
+
...typeof officeListContextText === "string" ? { officeListContextText } : {},
|
|
14674
|
+
...typeof officeTableContextText === "string" ? { officeTableContextText } : {},
|
|
14675
|
+
...typeof officeTableBodyRowCount === "number" ? { officeTableBodyRowCount } : {},
|
|
14676
|
+
...typeof officeTableColumnCount === "number" ? { officeTableColumnCount } : {},
|
|
14677
|
+
...typeof officeTableHeaderText === "string" ? { officeTableHeaderText } : {},
|
|
14678
|
+
...Array.isArray(officeTableHeaders) && officeTableHeaders.length > 0 ? { officeTableHeaders } : {},
|
|
14679
|
+
...typeof officeTableRowCount === "number" ? { officeTableRowCount } : {},
|
|
14680
|
+
...typeof officeTableSignature === "string" ? { officeTableSignature } : {},
|
|
14681
|
+
preferredChunkUnits: officeBlockKind === "table" ? [
|
|
14682
|
+
...typeof officeTableContextText === "string" ? [officeTableContextText] : [],
|
|
14683
|
+
...text.split(`
|
|
14684
|
+
`).filter(Boolean)
|
|
14685
|
+
] : officeBlockKind === "list" ? [
|
|
14686
|
+
...typeof officeListContextText === "string" ? [officeListContextText] : [],
|
|
14687
|
+
text
|
|
14688
|
+
] : undefined,
|
|
13654
14689
|
sectionDepth: sectionPath?.length,
|
|
13655
14690
|
sectionKind: officeBlockKind === "paragraph" ? "office_heading" : "office_block",
|
|
13656
14691
|
sectionPath,
|
|
13657
14692
|
sectionTitle,
|
|
13658
|
-
text: officeBlockKind === "
|
|
14693
|
+
text: officeBlockKind === "table" && typeof officeTableContextText === "string" ? normalizeWhitespace(`${officeTableContextText}
|
|
14694
|
+
|
|
14695
|
+
${text}`) : officeBlockKind === "list" && typeof officeListContextText === "string" ? normalizeWhitespace(`${officeListContextText}
|
|
14696
|
+
|
|
14697
|
+
${text}`) : officeBlockKind === "paragraph" ? decorateOfficeSectionText(text, sectionTitle) : text
|
|
13659
14698
|
});
|
|
13660
14699
|
}
|
|
13661
14700
|
return units;
|
|
@@ -14593,6 +15632,13 @@ var appendPdfLineBreak = (parts) => {
|
|
|
14593
15632
|
`);
|
|
14594
15633
|
};
|
|
14595
15634
|
var PDF_CHROME_LINE_MAX_LENGTH = 80;
|
|
15635
|
+
var PDF_LINK_CLUSTER_LINE_MAX_LENGTH = 120;
|
|
15636
|
+
var PDF_FIGURE_LABEL_PATTERN = /^(?:figure|fig\.)\s*\d+[A-Za-z]?(?:\s*[:.-]\s*|\s+|$)/i;
|
|
15637
|
+
var PDF_LINK_CLUSTER_HEADING_PATTERN = /^(?:related|quick|useful|reference|references|resources|links|see also)\b/i;
|
|
15638
|
+
var PDF_PROMO_HEADING_PATTERN = /^(?:start|free trial|upgrade|subscribe|newsletter|contact sales|book demo|try|learn more)\b/i;
|
|
15639
|
+
var PDF_PROMO_BODY_PATTERN = /\b(?:free trial|upgrade|subscribe|newsletter|contact sales|book demo|learn more|pricing|enterprise|demo)\b/i;
|
|
15640
|
+
var OCR_SUMMARY_CONFIDENCE_THRESHOLD = 0.75;
|
|
15641
|
+
var OCR_SUMMARY_MIN_STRONG_TEXT_RATIO = 0.6;
|
|
14596
15642
|
var PDF_TEXT_OPERATOR_PATTERN = /(\[((?:\\.|[^\]])*)\]\s*TJ)|(\(((?:\\.|[^\\)])*)\)\s*Tj)|([-+]?\d*\.?\d+\s+[-+]?\d*\.?\d+\s+\(((?:\\.|[^\\)])*)\)\s*")|(\(((?:\\.|[^\\)])*)\)\s*')|((?:[-+]?\d*\.?\d+\s+){2}(?:Td|TD))|(T\*)|((?:[-+]?\d*\.?\d+\s+){6}Tm)/g;
|
|
14597
15643
|
var extractTextFromPDFTextObject = (value) => {
|
|
14598
15644
|
const parts = [];
|
|
@@ -14621,23 +15667,144 @@ var extractTextFromPDFTextObject = (value) => {
|
|
|
14621
15667
|
}
|
|
14622
15668
|
return parts.join("");
|
|
14623
15669
|
};
|
|
14624
|
-
var
|
|
14625
|
-
const
|
|
14626
|
-
if (
|
|
15670
|
+
var buildPDFNativeTextBlockSeed = (lines, pageNumber) => {
|
|
15671
|
+
const normalizedLines = lines.map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15672
|
+
if (normalizedLines.length === 0) {
|
|
14627
15673
|
return;
|
|
14628
15674
|
}
|
|
14629
|
-
const
|
|
14630
|
-
`)
|
|
14631
|
-
const
|
|
15675
|
+
const text = normalizedLines.join(`
|
|
15676
|
+
`);
|
|
15677
|
+
const semanticRole = normalizedLines.length >= 2 && PDF_FIGURE_LABEL_PATTERN.test(normalizedLines[0] ?? "") ? "figure_caption" : undefined;
|
|
14632
15678
|
return {
|
|
14633
|
-
|
|
14634
|
-
lineCount,
|
|
15679
|
+
lineCount: normalizedLines.length,
|
|
14635
15680
|
pageNumber,
|
|
14636
|
-
|
|
14637
|
-
|
|
15681
|
+
...semanticRole ? { semanticRole } : {},
|
|
15682
|
+
text,
|
|
15683
|
+
textKind: normalizedLines.some((line) => line.includes(" | ")) ? "table_like" : "paragraph"
|
|
14638
15684
|
};
|
|
14639
15685
|
};
|
|
15686
|
+
var inferPDFBlockHeading = (text) => {
|
|
15687
|
+
const lines = text.split(`
|
|
15688
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15689
|
+
const candidate = lines[0];
|
|
15690
|
+
if (!candidate || candidate.length > 80 || candidate.includes(" | ") || /[.!?]$/.test(candidate)) {
|
|
15691
|
+
return;
|
|
15692
|
+
}
|
|
15693
|
+
return candidate;
|
|
15694
|
+
};
|
|
15695
|
+
var extractPDFFigureLabel = (text) => {
|
|
15696
|
+
const lines = text.split(`
|
|
15697
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15698
|
+
const candidate = lines[0];
|
|
15699
|
+
return candidate && PDF_FIGURE_LABEL_PATTERN.test(candidate) ? candidate : undefined;
|
|
15700
|
+
};
|
|
15701
|
+
var splitPDFNativeTextBlocks = (text, pageNumber) => {
|
|
15702
|
+
const lines = text.split(`
|
|
15703
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15704
|
+
if (lines.length === 0) {
|
|
15705
|
+
return [];
|
|
15706
|
+
}
|
|
15707
|
+
const blocks = [];
|
|
15708
|
+
let currentLines = [];
|
|
15709
|
+
let currentKind;
|
|
15710
|
+
let currentSemanticRole;
|
|
15711
|
+
const flush = () => {
|
|
15712
|
+
if (currentLines.length === 0) {
|
|
15713
|
+
return;
|
|
15714
|
+
}
|
|
15715
|
+
const block = buildPDFNativeTextBlockSeed(currentLines, pageNumber);
|
|
15716
|
+
if (block) {
|
|
15717
|
+
blocks.push(block);
|
|
15718
|
+
}
|
|
15719
|
+
currentLines = [];
|
|
15720
|
+
currentKind = undefined;
|
|
15721
|
+
currentSemanticRole = undefined;
|
|
15722
|
+
};
|
|
15723
|
+
for (const [index, line] of lines.entries()) {
|
|
15724
|
+
const lineKind = line.includes(" | ") ? "table_like" : "paragraph";
|
|
15725
|
+
const isFigureLabel = PDF_FIGURE_LABEL_PATTERN.test(line);
|
|
15726
|
+
if (isFigureLabel) {
|
|
15727
|
+
flush();
|
|
15728
|
+
currentKind = "paragraph";
|
|
15729
|
+
currentSemanticRole = "figure_caption";
|
|
15730
|
+
currentLines.push(line);
|
|
15731
|
+
continue;
|
|
15732
|
+
}
|
|
15733
|
+
if (currentSemanticRole === "figure_caption") {
|
|
15734
|
+
if (lineKind === "paragraph" && currentLines.length < 2) {
|
|
15735
|
+
currentLines.push(line);
|
|
15736
|
+
continue;
|
|
15737
|
+
}
|
|
15738
|
+
flush();
|
|
15739
|
+
}
|
|
15740
|
+
if (currentKind && lineKind !== currentKind) {
|
|
15741
|
+
flush();
|
|
15742
|
+
}
|
|
15743
|
+
currentKind = lineKind;
|
|
15744
|
+
currentLines.push(line);
|
|
15745
|
+
}
|
|
15746
|
+
flush();
|
|
15747
|
+
return blocks;
|
|
15748
|
+
};
|
|
15749
|
+
var assignPDFBlockNumbers = (blocks) => blocks.map((block, index) => ({
|
|
15750
|
+
...block,
|
|
15751
|
+
blockNumber: index + 1
|
|
15752
|
+
}));
|
|
14640
15753
|
var isLikelyPDFPageLabel = (value) => /^page\s+\d+(?:\s+of\s+\d+)?$/i.test(value.trim());
|
|
15754
|
+
var isLikelyPDFChromeLine = (value) => {
|
|
15755
|
+
const normalized = value.trim();
|
|
15756
|
+
if (!normalized) {
|
|
15757
|
+
return false;
|
|
15758
|
+
}
|
|
15759
|
+
return isLikelyPDFPageLabel(normalized) || /\b(?:header|footer)\s*$/i.test(normalized);
|
|
15760
|
+
};
|
|
15761
|
+
var isLikelyPDFLinkLine = (value) => {
|
|
15762
|
+
const normalized = value.trim();
|
|
15763
|
+
if (!normalized || normalized.length > PDF_LINK_CLUSTER_LINE_MAX_LENGTH) {
|
|
15764
|
+
return false;
|
|
15765
|
+
}
|
|
15766
|
+
return /^https?:\/\//i.test(normalized) || /^www\./i.test(normalized) || /^\/[A-Za-z0-9/_#?&=%.-]+$/.test(normalized) || /\((?:https?:\/\/|\/)[^)]+\)/i.test(normalized);
|
|
15767
|
+
};
|
|
15768
|
+
var isLikelyPDFLinkClusterBlock = (block) => {
|
|
15769
|
+
if (block.semanticRole || block.textKind !== "paragraph") {
|
|
15770
|
+
return false;
|
|
15771
|
+
}
|
|
15772
|
+
const lines = block.text.split(`
|
|
15773
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15774
|
+
if (lines.length === 1) {
|
|
15775
|
+
return isLikelyPDFLinkLine(lines[0] ?? "");
|
|
15776
|
+
}
|
|
15777
|
+
if (lines.length < 2) {
|
|
15778
|
+
return false;
|
|
15779
|
+
}
|
|
15780
|
+
const heading = lines[0] ?? "";
|
|
15781
|
+
const bodyLines = lines.slice(1);
|
|
15782
|
+
const linkLikeCount = bodyLines.filter((line) => isLikelyPDFLinkLine(line)).length;
|
|
15783
|
+
if (bodyLines.length > 0 && linkLikeCount === bodyLines.length && PDF_LINK_CLUSTER_HEADING_PATTERN.test(heading)) {
|
|
15784
|
+
return true;
|
|
15785
|
+
}
|
|
15786
|
+
return linkLikeCount >= 2 && linkLikeCount >= Math.ceil(lines.length * 0.6);
|
|
15787
|
+
};
|
|
15788
|
+
var isLikelyPDFPromoBlock = (block) => {
|
|
15789
|
+
if (block.semanticRole || block.textKind !== "paragraph") {
|
|
15790
|
+
return false;
|
|
15791
|
+
}
|
|
15792
|
+
const lines = block.text.split(`
|
|
15793
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15794
|
+
if (lines.length === 1) {
|
|
15795
|
+
return PDF_PROMO_HEADING_PATTERN.test(lines[0] ?? "");
|
|
15796
|
+
}
|
|
15797
|
+
if (lines.length < 2 || lines.length > 4) {
|
|
15798
|
+
return false;
|
|
15799
|
+
}
|
|
15800
|
+
const heading = lines[0] ?? "";
|
|
15801
|
+
const bodyLines = lines.slice(1);
|
|
15802
|
+
const promoLikeCount = bodyLines.filter((line) => line.length <= PDF_LINK_CLUSTER_LINE_MAX_LENGTH && (PDF_PROMO_BODY_PATTERN.test(line) || isLikelyPDFLinkLine(line))).length;
|
|
15803
|
+
if (PDF_PROMO_HEADING_PATTERN.test(heading) && promoLikeCount >= Math.max(1, bodyLines.length - 1)) {
|
|
15804
|
+
return true;
|
|
15805
|
+
}
|
|
15806
|
+
return false;
|
|
15807
|
+
};
|
|
14641
15808
|
var suppressRepeatedPDFChrome = (blocks) => {
|
|
14642
15809
|
const linePages = new Map;
|
|
14643
15810
|
for (const block of blocks) {
|
|
@@ -14658,7 +15825,7 @@ var suppressRepeatedPDFChrome = (blocks) => {
|
|
|
14658
15825
|
if (!line) {
|
|
14659
15826
|
return false;
|
|
14660
15827
|
}
|
|
14661
|
-
if (
|
|
15828
|
+
if (isLikelyPDFChromeLine(line)) {
|
|
14662
15829
|
return false;
|
|
14663
15830
|
}
|
|
14664
15831
|
const repeatedPages = linePages.get(line);
|
|
@@ -14667,27 +15834,76 @@ var suppressRepeatedPDFChrome = (blocks) => {
|
|
|
14667
15834
|
}
|
|
14668
15835
|
return true;
|
|
14669
15836
|
});
|
|
14670
|
-
const text =
|
|
14671
|
-
`)
|
|
15837
|
+
const text = keptLines.join(`
|
|
15838
|
+
`);
|
|
14672
15839
|
if (!text) {
|
|
14673
15840
|
return;
|
|
14674
15841
|
}
|
|
14675
|
-
return
|
|
15842
|
+
return {
|
|
15843
|
+
...block,
|
|
15844
|
+
lineCount: text.split(`
|
|
15845
|
+
`).filter(Boolean).length,
|
|
15846
|
+
text,
|
|
15847
|
+
textKind: text.includes(" | ") ? "table_like" : "paragraph"
|
|
15848
|
+
};
|
|
14676
15849
|
}).filter((value) => Boolean(value));
|
|
14677
15850
|
};
|
|
15851
|
+
var suppressNonContentPDFBlocks = (blocks) => blocks.filter((block) => !isLikelyPDFLinkClusterBlock(block) && !isLikelyPDFPromoBlock(block));
|
|
15852
|
+
var mergePDFHeadingContinuationBlocks = (blocks) => {
|
|
15853
|
+
const merged = [];
|
|
15854
|
+
for (let index = 0;index < blocks.length; index += 1) {
|
|
15855
|
+
const block = blocks[index];
|
|
15856
|
+
if (!block) {
|
|
15857
|
+
continue;
|
|
15858
|
+
}
|
|
15859
|
+
const lines = block.text.split(`
|
|
15860
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
15861
|
+
const nextBlock = blocks[index + 1];
|
|
15862
|
+
const isHeadingOnlyBlock = !block.semanticRole && block.textKind === "paragraph" && lines.length === 1 && inferPDFBlockHeading(block.text) === lines[0];
|
|
15863
|
+
const canMergeWithNext = isHeadingOnlyBlock && nextBlock && nextBlock.pageNumber === block.pageNumber && !nextBlock.semanticRole && nextBlock.textKind === "paragraph" && inferPDFBlockHeading(nextBlock.text) === undefined;
|
|
15864
|
+
if (canMergeWithNext) {
|
|
15865
|
+
const text = [block.text, nextBlock.text].flatMap((value) => value.split(`
|
|
15866
|
+
`)).map((line) => normalizeWhitespace(line)).filter(Boolean).join(`
|
|
15867
|
+
`);
|
|
15868
|
+
merged.push({
|
|
15869
|
+
...block,
|
|
15870
|
+
lineCount: text.split(`
|
|
15871
|
+
`).filter(Boolean).length,
|
|
15872
|
+
text
|
|
15873
|
+
});
|
|
15874
|
+
index += 1;
|
|
15875
|
+
continue;
|
|
15876
|
+
}
|
|
15877
|
+
merged.push(block);
|
|
15878
|
+
}
|
|
15879
|
+
return merged;
|
|
15880
|
+
};
|
|
15881
|
+
var associatePDFNativeFigureBodies = (blocks) => blocks.map((block, index) => {
|
|
15882
|
+
if (block.semanticRole || block.textKind !== "paragraph" || inferPDFBlockHeading(block.text)) {
|
|
15883
|
+
return block;
|
|
15884
|
+
}
|
|
15885
|
+
const previousBlock = index > 0 ? blocks[index - 1] : undefined;
|
|
15886
|
+
if (!previousBlock || previousBlock.pageNumber !== block.pageNumber || previousBlock.semanticRole !== "figure_caption") {
|
|
15887
|
+
return block;
|
|
15888
|
+
}
|
|
15889
|
+
return {
|
|
15890
|
+
...block,
|
|
15891
|
+
semanticRole: "figure_body"
|
|
15892
|
+
};
|
|
15893
|
+
});
|
|
14678
15894
|
var extractNativePDFText = (data) => {
|
|
14679
15895
|
const raw = Buffer.from(data).toString("latin1");
|
|
14680
15896
|
const count = [...raw.matchAll(/\/Type\s*\/Page\b/g)].length;
|
|
14681
15897
|
const pageCount = count > 0 ? count : 1;
|
|
14682
15898
|
const pageMarkers = [...raw.matchAll(/\/Type\s*\/Page\b/g)].map((match) => match.index ?? raw.length);
|
|
14683
|
-
const blocks = [...raw.matchAll(/BT([\s\S]*?)ET/g)].
|
|
15899
|
+
const blocks = assignPDFBlockNumbers([...raw.matchAll(/BT([\s\S]*?)ET/g)].flatMap((match) => {
|
|
14684
15900
|
const blockText = extractTextFromPDFTextObject(match[1] ?? "");
|
|
14685
15901
|
const objectEnd = (match.index ?? 0) + (match[0]?.length ?? 0);
|
|
14686
15902
|
const pageIndex = pageMarkers.findIndex((marker) => marker >= objectEnd);
|
|
14687
15903
|
const pageNumber = pageIndex >= 0 ? pageIndex + 1 : pageCount;
|
|
14688
|
-
return
|
|
14689
|
-
})
|
|
14690
|
-
const visibleBlocks = suppressRepeatedPDFChrome(blocks);
|
|
15904
|
+
return splitPDFNativeTextBlocks(blockText, pageNumber);
|
|
15905
|
+
}));
|
|
15906
|
+
const visibleBlocks = assignPDFBlockNumbers(associatePDFNativeFigureBodies(mergePDFHeadingContinuationBlocks(suppressNonContentPDFBlocks(suppressRepeatedPDFChrome(blocks)))));
|
|
14691
15907
|
const fallbackText = [...raw.matchAll(/\(((?:\\.|[^\\)])*)\)\s*Tj/g)].map((match) => decodePdfLiteral(match[1] ?? "")).join(`
|
|
14692
15908
|
`);
|
|
14693
15909
|
const text = visibleBlocks.length > 0 ? normalizeWhitespace(visibleBlocks.map((block) => block.text).join(`
|
|
@@ -14798,13 +16014,10 @@ var officeDocumentBlocks = (entries) => {
|
|
|
14798
16014
|
for (const match of body.matchAll(blockPattern)) {
|
|
14799
16015
|
const blockXml = match[0] ?? "";
|
|
14800
16016
|
if (blockXml.startsWith("<w:tbl")) {
|
|
14801
|
-
const
|
|
14802
|
-
|
|
14803
|
-
|
|
14804
|
-
|
|
14805
|
-
if (cells.length === 0) {
|
|
14806
|
-
return "";
|
|
14807
|
-
}
|
|
16017
|
+
const tableRows = [...blockXml.matchAll(/<w:tr\b[\s\S]*?<\/w:tr>/g)].map((rowMatch) => [
|
|
16018
|
+
...(rowMatch[0] ?? "").matchAll(/<w:tc\b[\s\S]*?<\/w:tc>/g)
|
|
16019
|
+
].map((cellMatch) => extractOfficeParagraphText(cellMatch[0] ?? "")).filter(Boolean)).filter((cells) => cells.length > 0);
|
|
16020
|
+
const rows = tableRows.map((cells, rowIndex) => {
|
|
14808
16021
|
return `Row ${rowIndex + 1}. ${cells.map((cell, cellIndex) => `${String.fromCharCode(65 + cellIndex)}: ${cell}`).join(" | ")}`;
|
|
14809
16022
|
}).filter(Boolean);
|
|
14810
16023
|
const text2 = normalizeWhitespace(rows.join(`
|
|
@@ -14812,9 +16025,21 @@ var officeDocumentBlocks = (entries) => {
|
|
|
14812
16025
|
if (!text2) {
|
|
14813
16026
|
continue;
|
|
14814
16027
|
}
|
|
16028
|
+
const tableHeaders = tableRows[0];
|
|
16029
|
+
const tableRowCount = tableRows.length;
|
|
16030
|
+
const tableBodyRowCount = tableRowCount > 0 ? Math.max(0, tableRowCount - 1) : undefined;
|
|
16031
|
+
const tableColumnCount = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.length : tableRows.reduce((max, row) => Math.max(max, row.length), 0) || undefined;
|
|
16032
|
+
const tableHeaderText = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.join(" | ") : undefined;
|
|
16033
|
+
const tableSignature = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.join(" | ") : undefined;
|
|
14815
16034
|
blocks.push({
|
|
14816
16035
|
blockKind: "table",
|
|
14817
16036
|
blockNumber: blocks.length + 1,
|
|
16037
|
+
...typeof tableBodyRowCount === "number" ? { tableBodyRowCount } : {},
|
|
16038
|
+
...typeof tableColumnCount === "number" ? { tableColumnCount } : {},
|
|
16039
|
+
...typeof tableHeaderText === "string" ? { tableHeaderText } : {},
|
|
16040
|
+
...Array.isArray(tableHeaders) && tableHeaders.length > 0 ? { tableHeaders } : {},
|
|
16041
|
+
...typeof tableRowCount === "number" ? { tableRowCount } : {},
|
|
16042
|
+
...typeof tableSignature === "string" ? { tableSignature } : {},
|
|
14818
16043
|
text: text2
|
|
14819
16044
|
});
|
|
14820
16045
|
continue;
|
|
@@ -14827,12 +16052,16 @@ var officeDocumentBlocks = (entries) => {
|
|
|
14827
16052
|
const style = (styleMatch?.[1] ?? "").toLowerCase();
|
|
14828
16053
|
const headingMatch = style.match(/^heading([1-6])$/);
|
|
14829
16054
|
const isListParagraph = /<w:numPr\b/i.test(blockXml) || style.includes("list") || style.includes("bullet");
|
|
16055
|
+
const listLevelMatch = blockXml.match(/<w:ilvl\b[^>]*w:val="(\d+)"[^>]*\/?>/i);
|
|
16056
|
+
const listLevel = listLevelMatch ? Number.parseInt(listLevelMatch[1] ?? "0", 10) : undefined;
|
|
14830
16057
|
const blockKind = style === "title" ? "title" : headingMatch ? "heading" : isListParagraph ? "list" : "paragraph";
|
|
14831
|
-
const
|
|
16058
|
+
const listPrefix = blockKind === "list" ? `${" ".repeat(Math.max(0, listLevel ?? 0))}- ` : "";
|
|
16059
|
+
const decoratedText = blockKind === "list" && !/^[-*]\s/.test(text) ? `${listPrefix}${text}` : text;
|
|
14832
16060
|
blocks.push({
|
|
14833
16061
|
blockKind,
|
|
14834
16062
|
blockNumber: blocks.length + 1,
|
|
14835
16063
|
headingLevel: headingMatch ? Number.parseInt(headingMatch[1] ?? "1", 10) : undefined,
|
|
16064
|
+
listLevel: blockKind === "list" && Number.isFinite(listLevel ?? NaN) ? listLevel : undefined,
|
|
14836
16065
|
style: style || undefined,
|
|
14837
16066
|
text: decoratedText
|
|
14838
16067
|
});
|
|
@@ -15448,10 +16677,32 @@ var splitOCRColumns = (regions) => {
|
|
|
15448
16677
|
var buildOCRReadingText = (regions) => normalizeWhitespace(splitOCRColumns(regions).map((column) => buildOCRReadingLinesText(column)).filter(Boolean).join(`
|
|
15449
16678
|
|
|
15450
16679
|
`));
|
|
15451
|
-
var
|
|
15452
|
-
const regions = result.regions?.filter((region) => normalizeWhitespace(region.text ?? "").length > 0);
|
|
15453
|
-
|
|
15454
|
-
|
|
16680
|
+
var buildOCRSummaryText = (result) => {
|
|
16681
|
+
const regions = result.regions?.filter((region) => normalizeWhitespace(region.text ?? "").length > 0) ?? [];
|
|
16682
|
+
if (regions.length === 0) {
|
|
16683
|
+
return {
|
|
16684
|
+
lowConfidenceRegionCount: 0,
|
|
16685
|
+
strongRegionCount: 0,
|
|
16686
|
+
summaryConfidenceThreshold: OCR_SUMMARY_CONFIDENCE_THRESHOLD,
|
|
16687
|
+
text: result.text,
|
|
16688
|
+
usedStrongRegionsOnly: false
|
|
16689
|
+
};
|
|
16690
|
+
}
|
|
16691
|
+
const strongRegions = regions.filter((region) => typeof region.confidence !== "number" || region.confidence >= OCR_SUMMARY_CONFIDENCE_THRESHOLD);
|
|
16692
|
+
const lowConfidenceRegionCount = regions.length - strongRegions.length;
|
|
16693
|
+
const strongTextLength = strongRegions.reduce((sum, region) => sum + normalizeWhitespace(region.text ?? "").length, 0);
|
|
16694
|
+
const totalTextLength = regions.reduce((sum, region) => sum + normalizeWhitespace(region.text ?? "").length, 0);
|
|
16695
|
+
const strongCoverageRatio = totalTextLength > 0 ? strongTextLength / totalTextLength : 0;
|
|
16696
|
+
const useStrongRegionsOnly = strongRegions.length > 0 && lowConfidenceRegionCount > 0 && strongCoverageRatio >= OCR_SUMMARY_MIN_STRONG_TEXT_RATIO;
|
|
16697
|
+
const strongReconstructed = buildOCRReadingText(strongRegions);
|
|
16698
|
+
const allReconstructed = buildOCRReadingText(regions);
|
|
16699
|
+
return {
|
|
16700
|
+
lowConfidenceRegionCount,
|
|
16701
|
+
strongRegionCount: strongRegions.length,
|
|
16702
|
+
summaryConfidenceThreshold: OCR_SUMMARY_CONFIDENCE_THRESHOLD,
|
|
16703
|
+
text: (useStrongRegionsOnly ? strongReconstructed : allReconstructed) || result.text,
|
|
16704
|
+
usedStrongRegionsOnly: useStrongRegionsOnly
|
|
16705
|
+
};
|
|
15455
16706
|
};
|
|
15456
16707
|
var ocrPageDocuments = (result, input, baseMetadata) => {
|
|
15457
16708
|
const grouped = new Map;
|
|
@@ -15835,6 +17086,7 @@ var createRAGImageOCRExtractor = (provider) => ({
|
|
|
15835
17086
|
supports: imageExtractorSupports,
|
|
15836
17087
|
extract: async (input) => {
|
|
15837
17088
|
const result = await provider.extractText(input);
|
|
17089
|
+
const summary = buildOCRSummaryText(result);
|
|
15838
17090
|
return {
|
|
15839
17091
|
chunking: input.chunking,
|
|
15840
17092
|
contentType: input.contentType,
|
|
@@ -15842,11 +17094,15 @@ var createRAGImageOCRExtractor = (provider) => ({
|
|
|
15842
17094
|
metadata: {
|
|
15843
17095
|
...input.metadata ?? {},
|
|
15844
17096
|
...ocrMetadata(result),
|
|
17097
|
+
ocrLowConfidenceRegionCount: summary.lowConfidenceRegionCount,
|
|
17098
|
+
ocrStrongRegionCount: summary.strongRegionCount,
|
|
17099
|
+
ocrSummaryConfidenceThreshold: summary.summaryConfidenceThreshold,
|
|
17100
|
+
ocrSummaryUsedStrongRegionsOnly: summary.usedStrongRegionsOnly,
|
|
15845
17101
|
fileKind: "image",
|
|
15846
17102
|
sourceNativeKind: "image_ocr"
|
|
15847
17103
|
},
|
|
15848
17104
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.image.txt`,
|
|
15849
|
-
text:
|
|
17105
|
+
text: summary.text,
|
|
15850
17106
|
title: result.title ?? input.title
|
|
15851
17107
|
};
|
|
15852
17108
|
}
|
|
@@ -16030,6 +17286,8 @@ var createPDFFileExtractor = () => ({
|
|
|
16030
17286
|
...input.metadata ?? {},
|
|
16031
17287
|
fileKind: "pdf",
|
|
16032
17288
|
pageCount: extracted.pageCount,
|
|
17289
|
+
pdfEvidenceMode: "native",
|
|
17290
|
+
pdfEvidenceOrigin: "native",
|
|
16033
17291
|
pdfTextBlockCount: extracted.textBlockCount,
|
|
16034
17292
|
pdfTextBlocks: extracted.textBlocks
|
|
16035
17293
|
},
|
|
@@ -16063,6 +17321,7 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16063
17321
|
const nativeText = extracted.text;
|
|
16064
17322
|
const minLength = options.minExtractedTextLength ?? 80;
|
|
16065
17323
|
const shouldUseNativeText = !options.alwaysOCR && nativeText.length >= minLength;
|
|
17324
|
+
const shouldUseHybridText = !options.alwaysOCR && nativeText.length > 0 && nativeText.length < minLength;
|
|
16066
17325
|
if (shouldUseNativeText) {
|
|
16067
17326
|
return {
|
|
16068
17327
|
chunking: input.chunking,
|
|
@@ -16072,6 +17331,8 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16072
17331
|
...input.metadata ?? {},
|
|
16073
17332
|
fileKind: "pdf",
|
|
16074
17333
|
pageCount: extracted.pageCount,
|
|
17334
|
+
pdfEvidenceMode: "native",
|
|
17335
|
+
pdfEvidenceOrigin: "native",
|
|
16075
17336
|
pdfTextBlockCount: extracted.textBlockCount,
|
|
16076
17337
|
pdfTextBlocks: extracted.textBlocks,
|
|
16077
17338
|
pdfTextMode: "native"
|
|
@@ -16085,12 +17346,49 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16085
17346
|
...input,
|
|
16086
17347
|
contentType: input.contentType ?? "application/pdf"
|
|
16087
17348
|
});
|
|
17349
|
+
const summary = buildOCRSummaryText(ocr);
|
|
16088
17350
|
const baseMetadata = {
|
|
16089
17351
|
...ocrMetadata(ocr),
|
|
17352
|
+
ocrLowConfidenceRegionCount: summary.lowConfidenceRegionCount,
|
|
17353
|
+
ocrStrongRegionCount: summary.strongRegionCount,
|
|
17354
|
+
ocrSummaryConfidenceThreshold: summary.summaryConfidenceThreshold,
|
|
17355
|
+
ocrSummaryUsedStrongRegionsOnly: summary.usedStrongRegionsOnly,
|
|
16090
17356
|
fileKind: "pdf",
|
|
16091
17357
|
pageCount: extracted.pageCount,
|
|
17358
|
+
pdfEvidenceMode: "ocr",
|
|
17359
|
+
pdfEvidenceOrigin: "ocr",
|
|
16092
17360
|
pdfTextMode: "ocr"
|
|
16093
17361
|
};
|
|
17362
|
+
if (shouldUseHybridText) {
|
|
17363
|
+
const hybridMetadata = {
|
|
17364
|
+
...input.metadata ?? {},
|
|
17365
|
+
...baseMetadata,
|
|
17366
|
+
pageCount: extracted.pageCount,
|
|
17367
|
+
pdfEvidenceMode: "hybrid",
|
|
17368
|
+
pdfEvidenceOrigin: "native",
|
|
17369
|
+
pdfEvidenceSupplement: "ocr",
|
|
17370
|
+
pdfHybridOCRSupplement: true,
|
|
17371
|
+
pdfNativeTextBlockCount: extracted.textBlockCount,
|
|
17372
|
+
pdfNativeTextLength: nativeText.length,
|
|
17373
|
+
pdfOCRFallbackReason: "native_below_min_length",
|
|
17374
|
+
pdfOCRTextLength: summary.text.length,
|
|
17375
|
+
pdfTextBlockCount: extracted.textBlockCount,
|
|
17376
|
+
pdfTextBlocks: extracted.textBlocks,
|
|
17377
|
+
pdfTextMode: "hybrid"
|
|
17378
|
+
};
|
|
17379
|
+
const hybridDocument = {
|
|
17380
|
+
chunking: input.chunking,
|
|
17381
|
+
contentType: input.contentType ?? "application/pdf",
|
|
17382
|
+
format: "text",
|
|
17383
|
+
metadata: hybridMetadata,
|
|
17384
|
+
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
|
|
17385
|
+
text: nativeText,
|
|
17386
|
+
title: input.title
|
|
17387
|
+
};
|
|
17388
|
+
const pageDocuments2 = ocrPageDocuments(ocr, input, baseMetadata);
|
|
17389
|
+
const regionDocuments2 = ocrRegionDocuments(ocr, input, baseMetadata);
|
|
17390
|
+
return [hybridDocument, ...pageDocuments2, ...regionDocuments2];
|
|
17391
|
+
}
|
|
16094
17392
|
const summaryDocument = {
|
|
16095
17393
|
chunking: input.chunking,
|
|
16096
17394
|
contentType: input.contentType ?? "application/pdf",
|
|
@@ -16100,7 +17398,7 @@ var createRAGPDFOCRExtractor = (options) => ({
|
|
|
16100
17398
|
...baseMetadata
|
|
16101
17399
|
},
|
|
16102
17400
|
source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
|
|
16103
|
-
text:
|
|
17401
|
+
text: summary.text,
|
|
16104
17402
|
title: ocr.title ?? input.title
|
|
16105
17403
|
};
|
|
16106
17404
|
const pageDocuments = ocrPageDocuments(ocr, input, baseMetadata);
|
|
@@ -16370,6 +17668,101 @@ var chunkFromUnits = (units, maxChunkLength, chunkOverlap, minChunkLength) => {
|
|
|
16370
17668
|
};
|
|
16371
17669
|
var chunkSourceAwareUnit = (unit, options) => {
|
|
16372
17670
|
const defaultSourceAwareChunkReason = unit.sectionKind === "markdown_heading" || unit.sectionKind === "html_heading" || unit.sectionKind === "office_heading" ? "section_boundary" : unit.sectionKind ? "source_native_unit" : unit.sourceAwareChunkReason;
|
|
17671
|
+
if (unit.officeBlockKind === "table" && typeof unit.officeTableHeaderText === "string" && typeof unit.officeTableBodyRowCount === "number" && unit.officeTableBodyRowCount > 0 && unit.text.length > options.maxChunkLength) {
|
|
17672
|
+
const headerLine = unit.officeTableHeaderText;
|
|
17673
|
+
const contextText = typeof unit.officeTableContextText === "string" ? unit.officeTableContextText : undefined;
|
|
17674
|
+
const bodyRows = unit.text.split(`
|
|
17675
|
+
`).map((line) => normalizeWhitespace(line)).filter((line) => /^Row \d+\./.test(line)).slice(1);
|
|
17676
|
+
const slices = [];
|
|
17677
|
+
let currentRows = [];
|
|
17678
|
+
let currentStart = 1;
|
|
17679
|
+
const pushSlice = () => {
|
|
17680
|
+
if (currentRows.length === 0) {
|
|
17681
|
+
return;
|
|
17682
|
+
}
|
|
17683
|
+
slices.push({
|
|
17684
|
+
bodyRowEnd: currentStart + currentRows.length - 1,
|
|
17685
|
+
bodyRowStart: currentStart,
|
|
17686
|
+
text: normalizeWhitespace([
|
|
17687
|
+
...typeof contextText === "string" ? [contextText] : [],
|
|
17688
|
+
headerLine,
|
|
17689
|
+
...currentRows
|
|
17690
|
+
].join(`
|
|
17691
|
+
`))
|
|
17692
|
+
});
|
|
17693
|
+
currentStart += currentRows.length;
|
|
17694
|
+
currentRows = [];
|
|
17695
|
+
};
|
|
17696
|
+
for (const row of bodyRows) {
|
|
17697
|
+
const candidateRows = [...currentRows, row];
|
|
17698
|
+
const candidateText = normalizeWhitespace([
|
|
17699
|
+
...typeof contextText === "string" ? [contextText] : [],
|
|
17700
|
+
headerLine,
|
|
17701
|
+
...candidateRows
|
|
17702
|
+
].join(`
|
|
17703
|
+
`));
|
|
17704
|
+
if (currentRows.length > 0 && candidateText.length > options.maxChunkLength) {
|
|
17705
|
+
pushSlice();
|
|
17706
|
+
}
|
|
17707
|
+
currentRows.push(row);
|
|
17708
|
+
}
|
|
17709
|
+
pushSlice();
|
|
17710
|
+
if (slices.length > 0) {
|
|
17711
|
+
return slices.map((slice) => ({
|
|
17712
|
+
...unit,
|
|
17713
|
+
officeTableBodyRowCount: slice.bodyRowEnd - slice.bodyRowStart + 1,
|
|
17714
|
+
officeTableBodyRowEnd: slice.bodyRowEnd,
|
|
17715
|
+
officeTableBodyRowStart: slice.bodyRowStart,
|
|
17716
|
+
officeTableChunkKind: slices.length > 1 ? "table_slice" : "full_table",
|
|
17717
|
+
officeTableRowCount: slice.bodyRowEnd - slice.bodyRowStart + 2,
|
|
17718
|
+
sourceAwareChunkReason: slices.length > 1 ? "size_limit" : defaultSourceAwareChunkReason,
|
|
17719
|
+
text: slice.text
|
|
17720
|
+
}));
|
|
17721
|
+
}
|
|
17722
|
+
}
|
|
17723
|
+
if (unit.pdfTextKind === "table_like" && typeof unit.pdfTableHeaderText === "string" && typeof unit.pdfTableBodyRowCount === "number" && unit.pdfTableBodyRowCount > 0 && unit.text.length > options.maxChunkLength) {
|
|
17724
|
+
const headerLine = unit.pdfTableHeaderText;
|
|
17725
|
+
const bodyRows = unit.text.split(`
|
|
17726
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean).slice(1);
|
|
17727
|
+
const slices = [];
|
|
17728
|
+
let currentRows = [];
|
|
17729
|
+
let currentStart = 1;
|
|
17730
|
+
const pushSlice = () => {
|
|
17731
|
+
if (currentRows.length === 0) {
|
|
17732
|
+
return;
|
|
17733
|
+
}
|
|
17734
|
+
slices.push({
|
|
17735
|
+
bodyRowEnd: currentStart + currentRows.length - 1,
|
|
17736
|
+
bodyRowStart: currentStart,
|
|
17737
|
+
text: normalizeWhitespace([headerLine, ...currentRows].join(`
|
|
17738
|
+
`))
|
|
17739
|
+
});
|
|
17740
|
+
currentStart += currentRows.length;
|
|
17741
|
+
currentRows = [];
|
|
17742
|
+
};
|
|
17743
|
+
for (const row of bodyRows) {
|
|
17744
|
+
const candidateRows = [...currentRows, row];
|
|
17745
|
+
const candidateText = normalizeWhitespace([headerLine, ...candidateRows].join(`
|
|
17746
|
+
`));
|
|
17747
|
+
if (currentRows.length > 0 && candidateText.length > options.maxChunkLength) {
|
|
17748
|
+
pushSlice();
|
|
17749
|
+
}
|
|
17750
|
+
currentRows.push(row);
|
|
17751
|
+
}
|
|
17752
|
+
pushSlice();
|
|
17753
|
+
if (slices.length > 0) {
|
|
17754
|
+
return slices.map((slice) => ({
|
|
17755
|
+
...unit,
|
|
17756
|
+
pdfTableBodyRowCount: slice.bodyRowEnd - slice.bodyRowStart + 1,
|
|
17757
|
+
pdfTableBodyRowEnd: slice.bodyRowEnd,
|
|
17758
|
+
pdfTableBodyRowStart: slice.bodyRowStart,
|
|
17759
|
+
pdfTableChunkKind: slices.length > 1 ? "table_slice" : "full_table",
|
|
17760
|
+
pdfTableRowCount: slice.bodyRowEnd - slice.bodyRowStart + 2,
|
|
17761
|
+
sourceAwareChunkReason: slices.length > 1 ? "size_limit" : defaultSourceAwareChunkReason,
|
|
17762
|
+
text: slice.text
|
|
17763
|
+
}));
|
|
17764
|
+
}
|
|
17765
|
+
}
|
|
16373
17766
|
if (unit.text.length <= options.maxChunkLength) {
|
|
16374
17767
|
return [
|
|
16375
17768
|
{
|
|
@@ -16666,7 +18059,37 @@ var prepareRAGDocument = (document, defaultChunking, chunkingRegistry) => {
|
|
|
16666
18059
|
...typeof entry.pageNumber === "number" ? { pageNumber: entry.pageNumber } : {},
|
|
16667
18060
|
...typeof entry.officeBlockNumber === "number" ? { officeBlockNumber: entry.officeBlockNumber } : {},
|
|
16668
18061
|
...entry.officeBlockKind ? { officeBlockKind: entry.officeBlockKind } : {},
|
|
18062
|
+
...typeof entry.officeListContextText === "string" ? { officeListContextText: entry.officeListContextText } : {},
|
|
18063
|
+
...typeof entry.officeListGroupItemCount === "number" ? {
|
|
18064
|
+
officeListGroupItemCount: entry.officeListGroupItemCount
|
|
18065
|
+
} : {},
|
|
18066
|
+
...typeof entry.officeListLevel === "number" ? { officeListLevel: entry.officeListLevel } : {},
|
|
18067
|
+
...Array.isArray(entry.officeListLevels) && entry.officeListLevels.length > 0 ? { officeListLevels: entry.officeListLevels } : {},
|
|
18068
|
+
...typeof entry.officeTableBodyRowCount === "number" ? { officeTableBodyRowCount: entry.officeTableBodyRowCount } : {},
|
|
18069
|
+
...typeof entry.officeTableBodyRowEnd === "number" ? { officeTableBodyRowEnd: entry.officeTableBodyRowEnd } : {},
|
|
18070
|
+
...typeof entry.officeTableBodyRowStart === "number" ? { officeTableBodyRowStart: entry.officeTableBodyRowStart } : {},
|
|
18071
|
+
...entry.officeTableChunkKind ? { officeTableChunkKind: entry.officeTableChunkKind } : {},
|
|
18072
|
+
...typeof entry.officeTableColumnCount === "number" ? { officeTableColumnCount: entry.officeTableColumnCount } : {},
|
|
18073
|
+
...typeof entry.officeTableContextText === "string" ? { officeTableContextText: entry.officeTableContextText } : {},
|
|
18074
|
+
...typeof entry.officeTableHeaderText === "string" ? { officeTableHeaderText: entry.officeTableHeaderText } : {},
|
|
18075
|
+
...Array.isArray(entry.officeTableHeaders) && entry.officeTableHeaders.length > 0 ? { officeTableHeaders: entry.officeTableHeaders } : {},
|
|
18076
|
+
...typeof entry.officeTableRowCount === "number" ? { officeTableRowCount: entry.officeTableRowCount } : {},
|
|
18077
|
+
...typeof entry.officeTableSignature === "string" ? { officeTableSignature: entry.officeTableSignature } : {},
|
|
16669
18078
|
...typeof entry.pdfBlockNumber === "number" ? { pdfBlockNumber: entry.pdfBlockNumber } : {},
|
|
18079
|
+
...typeof entry.pdfFigureCaptionBlockNumber === "number" ? {
|
|
18080
|
+
pdfFigureCaptionBlockNumber: entry.pdfFigureCaptionBlockNumber
|
|
18081
|
+
} : {},
|
|
18082
|
+
...typeof entry.pdfFigureLabel === "string" ? { pdfFigureLabel: entry.pdfFigureLabel } : {},
|
|
18083
|
+
...entry.pdfSemanticRole ? { pdfSemanticRole: entry.pdfSemanticRole } : {},
|
|
18084
|
+
...typeof entry.pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd: entry.pdfTableBodyRowEnd } : {},
|
|
18085
|
+
...typeof entry.pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount: entry.pdfTableBodyRowCount } : {},
|
|
18086
|
+
...typeof entry.pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart: entry.pdfTableBodyRowStart } : {},
|
|
18087
|
+
...entry.pdfTableChunkKind ? { pdfTableChunkKind: entry.pdfTableChunkKind } : {},
|
|
18088
|
+
...typeof entry.pdfTableColumnCount === "number" ? { pdfTableColumnCount: entry.pdfTableColumnCount } : {},
|
|
18089
|
+
...typeof entry.pdfTableHeaderText === "string" ? { pdfTableHeaderText: entry.pdfTableHeaderText } : {},
|
|
18090
|
+
...Array.isArray(entry.pdfTableHeaders) && entry.pdfTableHeaders.length > 0 ? { pdfTableHeaders: entry.pdfTableHeaders } : {},
|
|
18091
|
+
...typeof entry.pdfTableRowCount === "number" ? { pdfTableRowCount: entry.pdfTableRowCount } : {},
|
|
18092
|
+
...typeof entry.pdfTableSignature === "string" ? { pdfTableSignature: entry.pdfTableSignature } : {},
|
|
16670
18093
|
...entry.pdfTextKind ? { pdfTextKind: entry.pdfTextKind } : {},
|
|
16671
18094
|
...entry.sectionKind ? { sectionKind: entry.sectionKind } : {},
|
|
16672
18095
|
...entry.sourceAwareChunkReason ? { sourceAwareChunkReason: entry.sourceAwareChunkReason } : {},
|
|
@@ -16903,6 +18326,9 @@ var prepareRAGDirectoryDocuments = async (input) => prepareRAGDocuments(await lo
|
|
|
16903
18326
|
|
|
16904
18327
|
// src/ai/rag/collection.ts
|
|
16905
18328
|
var DEFAULT_TOP_K2 = 6;
|
|
18329
|
+
var AUTO_BALANCED_NATIVE_ROW_ESTIMATE = 5000;
|
|
18330
|
+
var AUTO_RECALL_NATIVE_ROW_ESTIMATE = 20000;
|
|
18331
|
+
var AUTO_RECALL_FILTER_CLAUSE_COUNT = 3;
|
|
16906
18332
|
var MULTIVECTOR_VARIANT_CHUNK_DELIMITER = "__mv__";
|
|
16907
18333
|
var MULTIVECTOR_PARENT_CHUNK_ID = "absoluteMultivectorParentChunkId";
|
|
16908
18334
|
var MULTIVECTOR_VARIANT_ID = "absoluteMultivectorVariantId";
|
|
@@ -16910,6 +18336,40 @@ var MULTIVECTOR_VARIANT_LABEL = "absoluteMultivectorVariantLabel";
|
|
|
16910
18336
|
var MULTIVECTOR_VARIANT_TEXT = "absoluteMultivectorVariantText";
|
|
16911
18337
|
var MULTIVECTOR_VARIANT_METADATA = "absoluteMultivectorVariantMetadata";
|
|
16912
18338
|
var MULTIVECTOR_PRIMARY = "absoluteMultivectorPrimary";
|
|
18339
|
+
var isObjectRecord2 = (value) => Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
18340
|
+
var countSearchFilterClauses = (filter) => {
|
|
18341
|
+
if (!filter) {
|
|
18342
|
+
return 0;
|
|
18343
|
+
}
|
|
18344
|
+
let count = 0;
|
|
18345
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
18346
|
+
if (key === "$and" || key === "$or") {
|
|
18347
|
+
if (Array.isArray(value)) {
|
|
18348
|
+
count += value.reduce((total, entry) => total + (isObjectRecord2(entry) ? countSearchFilterClauses(entry) : 0), 0);
|
|
18349
|
+
}
|
|
18350
|
+
continue;
|
|
18351
|
+
}
|
|
18352
|
+
if (key === "$not") {
|
|
18353
|
+
count += isObjectRecord2(value) ? countSearchFilterClauses(value) : 0;
|
|
18354
|
+
continue;
|
|
18355
|
+
}
|
|
18356
|
+
count += 1;
|
|
18357
|
+
}
|
|
18358
|
+
return count;
|
|
18359
|
+
};
|
|
18360
|
+
var getNativeCorpusRowEstimate = (store) => {
|
|
18361
|
+
const status = store.getStatus?.();
|
|
18362
|
+
if (!status?.native || !("mode" in status.native)) {
|
|
18363
|
+
return;
|
|
18364
|
+
}
|
|
18365
|
+
if (status.backend === "postgres" && status.native.mode === "pgvector" && typeof status.native.estimatedRowCount === "number" && Number.isFinite(status.native.estimatedRowCount)) {
|
|
18366
|
+
return Math.max(0, Math.floor(status.native.estimatedRowCount));
|
|
18367
|
+
}
|
|
18368
|
+
if (status.backend === "sqlite" && status.native.mode === "vec0" && typeof status.native.rowCount === "number" && Number.isFinite(status.native.rowCount)) {
|
|
18369
|
+
return Math.max(0, Math.floor(status.native.rowCount));
|
|
18370
|
+
}
|
|
18371
|
+
return;
|
|
18372
|
+
};
|
|
16913
18373
|
var resolveNativeQueryProfile = (input) => {
|
|
16914
18374
|
if (!input.profile) {
|
|
16915
18375
|
return;
|
|
@@ -16941,6 +18401,45 @@ var resolveNativeQueryProfile = (input) => {
|
|
|
16941
18401
|
queryMultiplier: 4
|
|
16942
18402
|
};
|
|
16943
18403
|
};
|
|
18404
|
+
var resolveNativeQueryProfileSelection = (input) => {
|
|
18405
|
+
if (input.explicitProfile) {
|
|
18406
|
+
return {
|
|
18407
|
+
filterClauseCount: countSearchFilterClauses(input.filter),
|
|
18408
|
+
profile: input.explicitProfile,
|
|
18409
|
+
reason: undefined,
|
|
18410
|
+
resolved: resolveNativeQueryProfile({
|
|
18411
|
+
candidateTopK: input.candidateTopK,
|
|
18412
|
+
profile: input.explicitProfile,
|
|
18413
|
+
topK: input.topK
|
|
18414
|
+
}),
|
|
18415
|
+
rowEstimate: getNativeCorpusRowEstimate(input.store),
|
|
18416
|
+
selectionMode: "explicit"
|
|
18417
|
+
};
|
|
18418
|
+
}
|
|
18419
|
+
const rowEstimate = getNativeCorpusRowEstimate(input.store);
|
|
18420
|
+
const filterClauseCount = countSearchFilterClauses(input.filter);
|
|
18421
|
+
let profile;
|
|
18422
|
+
let reason;
|
|
18423
|
+
if (typeof rowEstimate === "number" && rowEstimate >= AUTO_RECALL_NATIVE_ROW_ESTIMATE && (filterClauseCount >= AUTO_RECALL_FILTER_CLAUSE_COUNT || input.variantQueryCount > 0 || input.retrievalMode === "hybrid")) {
|
|
18424
|
+
profile = "recall";
|
|
18425
|
+
reason = "larger corpus with complex or expanded retrieval benefits from deeper candidate recovery";
|
|
18426
|
+
} else if (typeof rowEstimate === "number" && rowEstimate >= AUTO_BALANCED_NATIVE_ROW_ESTIMATE) {
|
|
18427
|
+
profile = "balanced";
|
|
18428
|
+
reason = "larger native corpus benefits from balanced candidate expansion and backfill";
|
|
18429
|
+
}
|
|
18430
|
+
return {
|
|
18431
|
+
filterClauseCount,
|
|
18432
|
+
profile,
|
|
18433
|
+
reason,
|
|
18434
|
+
resolved: resolveNativeQueryProfile({
|
|
18435
|
+
candidateTopK: input.candidateTopK,
|
|
18436
|
+
profile,
|
|
18437
|
+
topK: input.topK
|
|
18438
|
+
}),
|
|
18439
|
+
rowEstimate,
|
|
18440
|
+
selectionMode: profile ? "auto" : "default"
|
|
18441
|
+
};
|
|
18442
|
+
};
|
|
16944
18443
|
var VARIANT_RESULT_WEIGHT = 0.92;
|
|
16945
18444
|
var normalizeTraceCueText = (value) => value.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim().replace(/\s+/g, " ");
|
|
16946
18445
|
var queryIncludesTraceCue = (query, value) => {
|
|
@@ -17310,6 +18809,166 @@ var annotateRetrievalChannels = (input) => {
|
|
|
17310
18809
|
};
|
|
17311
18810
|
});
|
|
17312
18811
|
};
|
|
18812
|
+
var getPDFRetrievalEvidencePreference = (metadata) => {
|
|
18813
|
+
if (!metadata) {
|
|
18814
|
+
return 0;
|
|
18815
|
+
}
|
|
18816
|
+
const pdfEvidenceMode = typeof metadata.pdfEvidenceMode === "string" ? metadata.pdfEvidenceMode : undefined;
|
|
18817
|
+
const pdfEvidenceOrigin = typeof metadata.pdfEvidenceOrigin === "string" ? metadata.pdfEvidenceOrigin : undefined;
|
|
18818
|
+
const pdfEvidenceSupplement = typeof metadata.pdfEvidenceSupplement === "string" ? metadata.pdfEvidenceSupplement : undefined;
|
|
18819
|
+
if (pdfEvidenceMode === "hybrid" && pdfEvidenceOrigin === "native" && pdfEvidenceSupplement === "ocr") {
|
|
18820
|
+
return 3;
|
|
18821
|
+
}
|
|
18822
|
+
if (pdfEvidenceMode === "native" && pdfEvidenceOrigin === "native") {
|
|
18823
|
+
return 2;
|
|
18824
|
+
}
|
|
18825
|
+
if (pdfEvidenceMode === "ocr" && pdfEvidenceOrigin === "ocr") {
|
|
18826
|
+
return 1;
|
|
18827
|
+
}
|
|
18828
|
+
return 0;
|
|
18829
|
+
};
|
|
18830
|
+
var getPDFRetrievalScope = (result) => {
|
|
18831
|
+
const metadata = result.metadata;
|
|
18832
|
+
if (!metadata) {
|
|
18833
|
+
return;
|
|
18834
|
+
}
|
|
18835
|
+
const pageNumber = typeof metadata.pageNumber === "number" ? metadata.pageNumber : typeof metadata.page === "number" ? metadata.page : typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined;
|
|
18836
|
+
const sectionTitle = typeof metadata.sectionTitle === "string" && metadata.sectionTitle.length > 0 ? metadata.sectionTitle : undefined;
|
|
18837
|
+
const source = typeof result.source === "string" && result.source.length > 0 ? result.source : undefined;
|
|
18838
|
+
if (!source) {
|
|
18839
|
+
return;
|
|
18840
|
+
}
|
|
18841
|
+
return {
|
|
18842
|
+
pageNumber,
|
|
18843
|
+
sectionTitle,
|
|
18844
|
+
source
|
|
18845
|
+
};
|
|
18846
|
+
};
|
|
18847
|
+
var getPDFRetrievalComparableScopeKey = (scope) => {
|
|
18848
|
+
if (!scope) {
|
|
18849
|
+
return;
|
|
18850
|
+
}
|
|
18851
|
+
if (typeof scope.pageNumber === "number") {
|
|
18852
|
+
return `${scope.source}::page:${scope.pageNumber}`;
|
|
18853
|
+
}
|
|
18854
|
+
if (scope.sectionTitle) {
|
|
18855
|
+
return `${scope.source}::section:${scope.sectionTitle}`;
|
|
18856
|
+
}
|
|
18857
|
+
return;
|
|
18858
|
+
};
|
|
18859
|
+
var getOfficeRetrievalScope = (result) => {
|
|
18860
|
+
const metadata = result.metadata;
|
|
18861
|
+
if (!metadata) {
|
|
18862
|
+
return;
|
|
18863
|
+
}
|
|
18864
|
+
const officeBlockKind = metadata.officeBlockKind === "table" || metadata.officeBlockKind === "list" ? metadata.officeBlockKind : undefined;
|
|
18865
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
18866
|
+
return;
|
|
18867
|
+
}
|
|
18868
|
+
const source = typeof result.source === "string" && result.source.length > 0 ? result.source : undefined;
|
|
18869
|
+
if (!source) {
|
|
18870
|
+
return;
|
|
18871
|
+
}
|
|
18872
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
|
|
18873
|
+
const sectionTitle = (typeof metadata.sectionTitle === "string" && metadata.sectionTitle.trim().length > 0 ? metadata.sectionTitle.trim() : undefined) ?? sectionPath.at(-1);
|
|
18874
|
+
if (!sectionTitle) {
|
|
18875
|
+
return;
|
|
18876
|
+
}
|
|
18877
|
+
return {
|
|
18878
|
+
blockKind: officeBlockKind,
|
|
18879
|
+
hasContext: officeBlockKind === "table" ? typeof metadata.officeTableContextText === "string" && metadata.officeTableContextText.trim().length > 0 : typeof metadata.officeListContextText === "string" && metadata.officeListContextText.trim().length > 0,
|
|
18880
|
+
pathDepth: sectionPath.length,
|
|
18881
|
+
sectionTitle,
|
|
18882
|
+
source
|
|
18883
|
+
};
|
|
18884
|
+
};
|
|
18885
|
+
var getOfficeRetrievalComparableScopeKey = (scope) => {
|
|
18886
|
+
if (!scope) {
|
|
18887
|
+
return;
|
|
18888
|
+
}
|
|
18889
|
+
return `${scope.source}::office_section:${scope.blockKind}:${scope.sectionTitle}`;
|
|
18890
|
+
};
|
|
18891
|
+
var getOfficeRetrievalEvidencePreference = (metadata) => {
|
|
18892
|
+
if (!metadata) {
|
|
18893
|
+
return 0;
|
|
18894
|
+
}
|
|
18895
|
+
const officeBlockKind = metadata.officeBlockKind === "table" || metadata.officeBlockKind === "list" ? metadata.officeBlockKind : undefined;
|
|
18896
|
+
if (officeBlockKind !== "table" && officeBlockKind !== "list") {
|
|
18897
|
+
return 0;
|
|
18898
|
+
}
|
|
18899
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
|
|
18900
|
+
return sectionPath.length * 10 + ((officeBlockKind === "table" ? typeof metadata.officeTableContextText === "string" && metadata.officeTableContextText.trim().length > 0 : typeof metadata.officeListContextText === "string" && metadata.officeListContextText.trim().length > 0) ? 1 : 0) + (officeBlockKind === "list" && typeof metadata.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
|
|
18901
|
+
};
|
|
18902
|
+
var buildStructuredEvidenceReconcileLabel = (input) => {
|
|
18903
|
+
if (input.officeAffectedScopeCount > 0 && input.pdfAffectedScopeCount === 0) {
|
|
18904
|
+
return "Preferred deeper office-structure evidence within matching sections";
|
|
18905
|
+
}
|
|
18906
|
+
if (input.pdfAffectedScopeCount > 0 && input.officeAffectedScopeCount === 0) {
|
|
18907
|
+
return "Preferred native-layout PDF evidence within matching sections";
|
|
18908
|
+
}
|
|
18909
|
+
return "Preferred stronger structured evidence within matching sections";
|
|
18910
|
+
};
|
|
18911
|
+
var reconcileStructuredEvidenceOrdering = (results) => {
|
|
18912
|
+
const indexed = results.map((result, index) => ({ index, result }));
|
|
18913
|
+
const sorted = [...indexed].sort((leftEntry, rightEntry) => {
|
|
18914
|
+
const left = leftEntry.result;
|
|
18915
|
+
const right = rightEntry.result;
|
|
18916
|
+
const leftOfficeScope = getOfficeRetrievalScope(left);
|
|
18917
|
+
const rightOfficeScope = getOfficeRetrievalScope(right);
|
|
18918
|
+
if (leftOfficeScope && rightOfficeScope && leftOfficeScope.source === rightOfficeScope.source && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
|
|
18919
|
+
const leftPreference = getOfficeRetrievalEvidencePreference(left.metadata);
|
|
18920
|
+
const rightPreference = getOfficeRetrievalEvidencePreference(right.metadata);
|
|
18921
|
+
if (rightPreference !== leftPreference) {
|
|
18922
|
+
return rightPreference - leftPreference;
|
|
18923
|
+
}
|
|
18924
|
+
}
|
|
18925
|
+
const leftScope = getPDFRetrievalScope(left);
|
|
18926
|
+
const rightScope = getPDFRetrievalScope(right);
|
|
18927
|
+
if (leftScope && rightScope && leftScope.source === rightScope.source && (leftScope.sectionTitle && rightScope.sectionTitle && leftScope.sectionTitle === rightScope.sectionTitle || typeof leftScope.pageNumber === "number" && typeof rightScope.pageNumber === "number" && leftScope.pageNumber === rightScope.pageNumber)) {
|
|
18928
|
+
const leftPreference = getPDFRetrievalEvidencePreference(left.metadata);
|
|
18929
|
+
const rightPreference = getPDFRetrievalEvidencePreference(right.metadata);
|
|
18930
|
+
if (rightPreference !== leftPreference) {
|
|
18931
|
+
return rightPreference - leftPreference;
|
|
18932
|
+
}
|
|
18933
|
+
}
|
|
18934
|
+
return leftEntry.index - rightEntry.index;
|
|
18935
|
+
});
|
|
18936
|
+
const orderedResults = sorted.map((entry) => entry.result);
|
|
18937
|
+
const reorderedResults = sorted.reduce((count, entry, index) => count + (results[index]?.chunkId === entry.result.chunkId ? 0 : 1), 0);
|
|
18938
|
+
const officeAffectedScopes = new Set;
|
|
18939
|
+
const pdfAffectedScopes = new Set;
|
|
18940
|
+
for (const [index, entry] of sorted.entries()) {
|
|
18941
|
+
if (results[index]?.chunkId === entry.result.chunkId) {
|
|
18942
|
+
continue;
|
|
18943
|
+
}
|
|
18944
|
+
const officeScope = getOfficeRetrievalScope(entry.result);
|
|
18945
|
+
if (officeScope) {
|
|
18946
|
+
const officeScopeKey = getOfficeRetrievalComparableScopeKey(officeScope);
|
|
18947
|
+
if (officeScopeKey) {
|
|
18948
|
+
officeAffectedScopes.add(officeScopeKey);
|
|
18949
|
+
}
|
|
18950
|
+
continue;
|
|
18951
|
+
}
|
|
18952
|
+
const pdfScope = getPDFRetrievalScope(entry.result);
|
|
18953
|
+
const pdfScopeKey = getPDFRetrievalComparableScopeKey(pdfScope);
|
|
18954
|
+
if (pdfScopeKey) {
|
|
18955
|
+
pdfAffectedScopes.add(pdfScopeKey);
|
|
18956
|
+
}
|
|
18957
|
+
}
|
|
18958
|
+
const affectedScopeCount = officeAffectedScopes.size + pdfAffectedScopes.size;
|
|
18959
|
+
return {
|
|
18960
|
+
affectedScopeCount,
|
|
18961
|
+
label: buildStructuredEvidenceReconcileLabel({
|
|
18962
|
+
officeAffectedScopeCount: officeAffectedScopes.size,
|
|
18963
|
+
pdfAffectedScopeCount: pdfAffectedScopes.size
|
|
18964
|
+
}),
|
|
18965
|
+
applied: reorderedResults > 0,
|
|
18966
|
+
officeAffectedScopeCount: officeAffectedScopes.size,
|
|
18967
|
+
pdfAffectedScopeCount: pdfAffectedScopes.size,
|
|
18968
|
+
results: orderedResults,
|
|
18969
|
+
reorderedResults
|
|
18970
|
+
};
|
|
18971
|
+
};
|
|
17313
18972
|
var getStructuredSectionScoreWeight2 = (metadata) => {
|
|
17314
18973
|
const pdfTextKind = typeof metadata?.pdfTextKind === "string" ? metadata.pdfTextKind : undefined;
|
|
17315
18974
|
const officeBlockKind = typeof metadata?.officeBlockKind === "string" ? metadata.officeBlockKind : undefined;
|
|
@@ -17598,12 +19257,30 @@ var createRAGCollection = (options) => {
|
|
|
17598
19257
|
stage: "routing"
|
|
17599
19258
|
});
|
|
17600
19259
|
}
|
|
17601
|
-
const
|
|
17602
|
-
|
|
17603
|
-
|
|
17604
|
-
|
|
17605
|
-
|
|
19260
|
+
const nativeQueryProfileSelection = resolveNativeQueryProfileSelection({
|
|
19261
|
+
candidateTopK,
|
|
19262
|
+
explicitProfile: input.nativeQueryProfile,
|
|
19263
|
+
filter: input.filter,
|
|
19264
|
+
retrievalMode: retrieval.mode,
|
|
19265
|
+
store: options.store,
|
|
19266
|
+
topK,
|
|
19267
|
+
variantQueryCount: Math.max(0, searchQueries.length - 1)
|
|
19268
|
+
});
|
|
19269
|
+
const nativeQueryProfile = nativeQueryProfileSelection.resolved;
|
|
19270
|
+
if (runVector && nativeQueryProfileSelection.selectionMode === "auto" && nativeQueryProfileSelection.profile) {
|
|
19271
|
+
steps.push({
|
|
19272
|
+
label: "Selected native planner profile",
|
|
19273
|
+
metadata: {
|
|
19274
|
+
autoSelected: true,
|
|
19275
|
+
filterClauseCount: nativeQueryProfileSelection.filterClauseCount,
|
|
19276
|
+
reason: nativeQueryProfileSelection.reason ?? null,
|
|
19277
|
+
rowEstimate: nativeQueryProfileSelection.rowEstimate ?? null,
|
|
19278
|
+
selectedProfile: nativeQueryProfileSelection.profile
|
|
19279
|
+
},
|
|
19280
|
+
stage: "routing"
|
|
17606
19281
|
});
|
|
19282
|
+
}
|
|
19283
|
+
const resultGroups = await Promise.all(searchQueries.map(async (query, queryIndex) => {
|
|
17607
19284
|
const [vectorResults2, lexicalResults2] = await Promise.all([
|
|
17608
19285
|
runVector ? embed({
|
|
17609
19286
|
model,
|
|
@@ -17760,9 +19437,25 @@ var createRAGCollection = (options) => {
|
|
|
17760
19437
|
stage: "source_balance"
|
|
17761
19438
|
});
|
|
17762
19439
|
}
|
|
19440
|
+
const evidenceReconciled = reconcileStructuredEvidenceOrdering(diversified);
|
|
19441
|
+
if (evidenceReconciled.applied) {
|
|
19442
|
+
steps.push({
|
|
19443
|
+
count: evidenceReconciled.results.length,
|
|
19444
|
+
label: evidenceReconciled.label,
|
|
19445
|
+
metadata: {
|
|
19446
|
+
affectedScopes: evidenceReconciled.affectedScopeCount,
|
|
19447
|
+
officeAffectedScopes: evidenceReconciled.officeAffectedScopeCount,
|
|
19448
|
+
pdfAffectedScopes: evidenceReconciled.pdfAffectedScopeCount,
|
|
19449
|
+
reorderedResults: evidenceReconciled.reorderedResults
|
|
19450
|
+
},
|
|
19451
|
+
sectionCounts: buildTraceSectionCounts(evidenceReconciled.results),
|
|
19452
|
+
sectionScores: buildTraceSectionScores(evidenceReconciled.results),
|
|
19453
|
+
stage: "evidence_reconcile"
|
|
19454
|
+
});
|
|
19455
|
+
}
|
|
17763
19456
|
const limited = annotateRetrievalChannels({
|
|
17764
19457
|
lexicalResults,
|
|
17765
|
-
results:
|
|
19458
|
+
results: evidenceReconciled.results.slice(0, topK),
|
|
17766
19459
|
vectorResults
|
|
17767
19460
|
});
|
|
17768
19461
|
if (typeof input.scoreThreshold !== "number") {
|
|
@@ -17928,6 +19621,37 @@ var searchDocuments = async (collection, input) => collection.search(input);
|
|
|
17928
19621
|
// src/ai/rag/htmxWorkflowRenderers.ts
|
|
17929
19622
|
init_constants();
|
|
17930
19623
|
var escapeHtml2 = (text) => text.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """);
|
|
19624
|
+
var renderLabelValueRows = (rows) => rows.length > 0 ? `<dl class="rag-status">${rows.map((row) => `<div><dt>${escapeHtml2(row.label)}</dt><dd>${escapeHtml2(row.value)}</dd></div>`).join("")}</dl>` : "";
|
|
19625
|
+
var renderBenchmarkRuntimePanel = (input) => {
|
|
19626
|
+
const rows = [
|
|
19627
|
+
{
|
|
19628
|
+
label: "Suite",
|
|
19629
|
+
value: input.response.suite?.label ?? input.response.suite?.id ?? "n/a"
|
|
19630
|
+
},
|
|
19631
|
+
input.response.groupKey ? { label: "Group", value: input.response.groupKey } : undefined,
|
|
19632
|
+
input.response.corpusGroupKey ? { label: "Corpus group", value: input.response.corpusGroupKey } : undefined
|
|
19633
|
+
].filter((row) => Boolean(row));
|
|
19634
|
+
const latestRows = input.response.historyPresentation?.rows ?? [];
|
|
19635
|
+
const recentRuns = input.response.historyPresentation?.recentRuns ?? [];
|
|
19636
|
+
const snapshotRows = input.response.snapshotHistoryPresentation?.rows ?? [];
|
|
19637
|
+
const snapshots = input.response.snapshotHistoryPresentation?.snapshots ?? [];
|
|
19638
|
+
return `<section class="rag-status-governance"><h3>${escapeHtml2(input.title)}</h3>` + renderLabelValueRows(rows) + `<h4>Run history</h4>` + renderLabelValueRows(latestRows) + (recentRuns.length > 0 ? `<ul class="rag-status-capabilities">${recentRuns.slice(0, 3).map((run) => `<li><strong>${escapeHtml2(run.label)}</strong> ${escapeHtml2(run.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No persisted benchmark runs yet.</p>`) + `<h4>Snapshot history</h4>` + renderLabelValueRows(snapshotRows) + (snapshots.length > 0 ? `<ul class="rag-status-capabilities">${snapshots.slice(0, 3).map((snapshot) => `<li><strong>${escapeHtml2(snapshot.label)}</strong> ${escapeHtml2(snapshot.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No saved suite snapshots yet.</p>`) + `</section>`;
|
|
19639
|
+
};
|
|
19640
|
+
var renderBenchmarkSnapshotPanel = (input) => {
|
|
19641
|
+
const summaryRows = [
|
|
19642
|
+
{
|
|
19643
|
+
label: "Suite",
|
|
19644
|
+
value: input.response.suite?.label ?? input.response.suite?.id ?? "n/a"
|
|
19645
|
+
},
|
|
19646
|
+
input.response.snapshot ? {
|
|
19647
|
+
label: "Saved snapshot",
|
|
19648
|
+
value: `${input.response.snapshot.label ?? input.response.snapshot.suiteId} \xB7 v${input.response.snapshot.version}`
|
|
19649
|
+
} : undefined
|
|
19650
|
+
].filter((row) => Boolean(row));
|
|
19651
|
+
const snapshotRows = input.response.snapshotHistoryPresentation?.rows ?? [];
|
|
19652
|
+
const snapshots = input.response.snapshotHistoryPresentation?.snapshots ?? [];
|
|
19653
|
+
return `<section class="rag-status-governance"><h3>${escapeHtml2(input.title)}</h3>` + renderLabelValueRows(summaryRows) + renderLabelValueRows(snapshotRows) + (snapshots.length > 0 ? `<ul class="rag-status-capabilities">${snapshots.slice(0, 3).map((snapshot) => `<li><strong>${escapeHtml2(snapshot.label)}</strong> ${escapeHtml2(snapshot.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No saved suite snapshots yet.</p>`) + `</section>`;
|
|
19654
|
+
};
|
|
17931
19655
|
var renderSourceLabels = (input) => {
|
|
17932
19656
|
if (!input) {
|
|
17933
19657
|
return "";
|
|
@@ -18124,7 +19848,7 @@ var renderRetrievalGovernancePanel = (retrievalComparisons) => {
|
|
|
18124
19848
|
const latest = retrievalComparisons.latest;
|
|
18125
19849
|
const alerts = (retrievalComparisons.alerts ?? []).slice(0, 3);
|
|
18126
19850
|
const releaseGroups = (retrievalComparisons.releaseGroups ?? []).slice(0, 2);
|
|
18127
|
-
const formatClassification = (classification) => classification === "multivector" ? "multivector regression" : classification === "general" ? "general regression" : undefined;
|
|
19851
|
+
const formatClassification = (classification) => classification === "multivector" ? "multivector regression" : classification === "evidence" ? "evidence regression" : classification === "runtime" ? "runtime regression" : classification === "general" ? "general regression" : undefined;
|
|
18128
19852
|
return `<section class="rag-status-governance"><h3>Retrieval governance</h3>` + (latest ? `<dl class="rag-status">` + `<div><dt>Latest comparison</dt><dd>${escapeHtml2(latest.label)}</dd></div>` + (latest.bestByPassingRate ? `<div><dt>Best passing rate</dt><dd>${escapeHtml2(latest.bestByPassingRate)}</dd></div>` : "") + (latest.bestByAverageF1 ? `<div><dt>Best average F1</dt><dd>${escapeHtml2(latest.bestByAverageF1)}</dd></div>` : "") + (latest.bestByMultivectorCollapsedCases ? `<div><dt>Best multivector collapse</dt><dd>${escapeHtml2(latest.bestByMultivectorCollapsedCases)}</dd></div>` : "") + (latest.bestByMultivectorLexicalHitCases ? `<div><dt>Best multivector lexical hits</dt><dd>${escapeHtml2(latest.bestByMultivectorLexicalHitCases)}</dd></div>` : "") + (latest.bestByMultivectorVectorHitCases ? `<div><dt>Best multivector vector hits</dt><dd>${escapeHtml2(latest.bestByMultivectorVectorHitCases)}</dd></div>` : "") + (latest.decisionSummary?.gate?.status ? `<div><dt>Gate</dt><dd>${escapeHtml2(latest.decisionSummary.gate.status)}</dd></div>` : "") + (latest.releaseVerdict?.status ? `<div><dt>Verdict</dt><dd>${escapeHtml2(latest.releaseVerdict.status)}</dd></div>` : "") + `</dl>` : "") + `<h4>Active alerts</h4>` + (alerts.length > 0 ? `<ul class="rag-status-capabilities">${alerts.map((alert) => `<li><strong>${escapeHtml2(alert.kind)}</strong>${formatClassification(alert.classification) ? ` <span>${escapeHtml2(formatClassification(alert.classification) ?? "")}</span>` : ""} ${escapeHtml2(alert.message)}</li>`).join("")}</ul>` : `<p class="rag-empty">No active retrieval comparison alerts.</p>`) + (releaseGroups.length > 0 ? `<h4>Release groups</h4><ul class="rag-status-capabilities">${releaseGroups.map((group) => {
|
|
18129
19853
|
const reasons = group.recommendedActionReasons?.slice(0, 2).join("; ") ?? "No recommended action.";
|
|
18130
19854
|
return `<li><strong>${escapeHtml2(group.groupKey)}</strong>${formatClassification(group.classification) ? ` <span>${escapeHtml2(formatClassification(group.classification) ?? "")}</span>` : ""} ${escapeHtml2(group.recommendedAction ?? "monitor")} \xB7 ${escapeHtml2(reasons)}</li>`;
|
|
@@ -18188,6 +19912,22 @@ var defaultSearchResults = ({
|
|
|
18188
19912
|
return defaultSearchResultItem(result, index, sectionJumps);
|
|
18189
19913
|
}).join("")}</section>`;
|
|
18190
19914
|
})();
|
|
19915
|
+
var defaultAdaptiveNativePlannerBenchmark = (input) => renderBenchmarkRuntimePanel({
|
|
19916
|
+
response: input,
|
|
19917
|
+
title: "Adaptive native planner benchmark"
|
|
19918
|
+
});
|
|
19919
|
+
var defaultNativeBackendComparisonBenchmark = (input) => renderBenchmarkRuntimePanel({
|
|
19920
|
+
response: input,
|
|
19921
|
+
title: "Native backend comparison benchmark"
|
|
19922
|
+
});
|
|
19923
|
+
var defaultAdaptiveNativePlannerBenchmarkSnapshot = (input) => renderBenchmarkSnapshotPanel({
|
|
19924
|
+
response: input,
|
|
19925
|
+
title: "Adaptive native planner snapshots"
|
|
19926
|
+
});
|
|
19927
|
+
var defaultNativeBackendComparisonBenchmarkSnapshot = (input) => renderBenchmarkSnapshotPanel({
|
|
19928
|
+
response: input,
|
|
19929
|
+
title: "Native backend comparison snapshots"
|
|
19930
|
+
});
|
|
18191
19931
|
var defaultDocumentItem = (document, index) => '<article class="rag-document">' + `<h3>${escapeHtml2(document.title || `Document ${index + 1}`)}</h3>` + `<p class="rag-document-id">${escapeHtml2(document.id)}</p>` + `<p class="rag-document-source">${escapeHtml2(document.source)}</p>` + renderSourceLabels(document.labels) + `<p class="rag-document-meta">${escapeHtml2(document.format ?? "text")} \xB7 ${escapeHtml2(document.chunkStrategy ?? "paragraphs")} \xB7 ${document.chunkCount ?? 0} chunks</p>` + "</article>";
|
|
18192
19932
|
var defaultDocuments = ({
|
|
18193
19933
|
documents
|
|
@@ -18260,6 +20000,8 @@ var defaultEvaluateResult = ({
|
|
|
18260
20000
|
var defaultError2 = (message) => `<div class="rag-error">${escapeHtml2(message)}</div>`;
|
|
18261
20001
|
var defaultMaintenance = (input) => renderMaintenancePanel(input);
|
|
18262
20002
|
var resolveRAGWorkflowRenderers = (custom) => ({
|
|
20003
|
+
adaptiveNativePlannerBenchmark: custom?.adaptiveNativePlannerBenchmark ?? defaultAdaptiveNativePlannerBenchmark,
|
|
20004
|
+
adaptiveNativePlannerBenchmarkSnapshot: custom?.adaptiveNativePlannerBenchmarkSnapshot ?? defaultAdaptiveNativePlannerBenchmarkSnapshot,
|
|
18263
20005
|
chunkPreview: custom?.chunkPreview ?? defaultChunkPreview,
|
|
18264
20006
|
documentItem: custom?.documentItem ?? defaultDocumentItem,
|
|
18265
20007
|
documents: custom?.documents ?? defaultDocuments,
|
|
@@ -18267,6 +20009,8 @@ var resolveRAGWorkflowRenderers = (custom) => ({
|
|
|
18267
20009
|
error: custom?.error ?? defaultError2,
|
|
18268
20010
|
maintenance: custom?.maintenance ?? defaultMaintenance,
|
|
18269
20011
|
mutationResult: custom?.mutationResult ?? defaultMutationResult,
|
|
20012
|
+
nativeBackendComparisonBenchmark: custom?.nativeBackendComparisonBenchmark ?? defaultNativeBackendComparisonBenchmark,
|
|
20013
|
+
nativeBackendComparisonBenchmarkSnapshot: custom?.nativeBackendComparisonBenchmarkSnapshot ?? defaultNativeBackendComparisonBenchmarkSnapshot,
|
|
18270
20014
|
evaluateResult: custom?.evaluateResult ?? defaultEvaluateResult,
|
|
18271
20015
|
searchResultItem: custom?.searchResultItem ?? defaultSearchResultItem,
|
|
18272
20016
|
searchResults: custom?.searchResults ?? defaultSearchResults,
|
|
@@ -18451,25 +20195,25 @@ var defaultParseProvider2 = (content) => {
|
|
|
18451
20195
|
};
|
|
18452
20196
|
var normalizeScore = (value) => Number.isFinite(value) ? value : 0;
|
|
18453
20197
|
var isHTMXRequest = (request) => request.headers.get("HX-Request") === "true";
|
|
18454
|
-
var
|
|
20198
|
+
var isObjectRecord3 = (value) => Boolean(value) && typeof value === "object";
|
|
18455
20199
|
var getStringProperty = (value, key) => {
|
|
18456
|
-
if (!
|
|
20200
|
+
if (!isObjectRecord3(value)) {
|
|
18457
20201
|
return;
|
|
18458
20202
|
}
|
|
18459
20203
|
return typeof value[key] === "string" ? value[key] : undefined;
|
|
18460
20204
|
};
|
|
18461
20205
|
var getObjectProperty = (value, key) => {
|
|
18462
|
-
if (!
|
|
20206
|
+
if (!isObjectRecord3(value)) {
|
|
18463
20207
|
return;
|
|
18464
20208
|
}
|
|
18465
|
-
return
|
|
20209
|
+
return isObjectRecord3(value[key]) ? value[key] : undefined;
|
|
18466
20210
|
};
|
|
18467
20211
|
var getNumberProperty = (value, key) => {
|
|
18468
|
-
const candidate =
|
|
20212
|
+
const candidate = isObjectRecord3(value) ? value[key] : undefined;
|
|
18469
20213
|
return typeof candidate === "number" ? candidate : undefined;
|
|
18470
20214
|
};
|
|
18471
20215
|
var getIntegerLikeProperty = (value, key) => {
|
|
18472
|
-
const candidate =
|
|
20216
|
+
const candidate = isObjectRecord3(value) ? value[key] : undefined;
|
|
18473
20217
|
if (typeof candidate === "number" && Number.isFinite(candidate)) {
|
|
18474
20218
|
return candidate;
|
|
18475
20219
|
}
|
|
@@ -18479,7 +20223,7 @@ var getIntegerLikeProperty = (value, key) => {
|
|
|
18479
20223
|
const parsed = Number(candidate);
|
|
18480
20224
|
return Number.isFinite(parsed) ? parsed : undefined;
|
|
18481
20225
|
};
|
|
18482
|
-
var isMetadataMap = (value) =>
|
|
20226
|
+
var isMetadataMap = (value) => isObjectRecord3(value);
|
|
18483
20227
|
var markMaintenancePanelOutOfBand = (html) => html.replace('<div id="rag-status-maintenance-panel"', '<div id="rag-status-maintenance-panel" hx-swap-oob="outerHTML"');
|
|
18484
20228
|
var normalizeStringArray2 = (value) => {
|
|
18485
20229
|
if (!Array.isArray(value)) {
|
|
@@ -18492,7 +20236,7 @@ var parseRAGSearchTracePruneInput = (value) => {
|
|
|
18492
20236
|
if (value === undefined) {
|
|
18493
20237
|
return;
|
|
18494
20238
|
}
|
|
18495
|
-
if (!
|
|
20239
|
+
if (!isObjectRecord3(value)) {
|
|
18496
20240
|
return null;
|
|
18497
20241
|
}
|
|
18498
20242
|
const pruneInput = {};
|
|
@@ -18533,7 +20277,7 @@ var parseRAGSearchTracePruneInput = (value) => {
|
|
|
18533
20277
|
}
|
|
18534
20278
|
return pruneInput;
|
|
18535
20279
|
};
|
|
18536
|
-
var getOwnProperty = (value, key) =>
|
|
20280
|
+
var getOwnProperty = (value, key) => isObjectRecord3(value) ? Object.prototype.hasOwnProperty.call(value, key) : false;
|
|
18537
20281
|
var parseRetrievalMode = (value) => {
|
|
18538
20282
|
if (value === "vector" || value === "lexical" || value === "hybrid") {
|
|
18539
20283
|
return value;
|
|
@@ -18561,7 +20305,12 @@ var parseRAGRetrieval = (value) => {
|
|
|
18561
20305
|
"fusion",
|
|
18562
20306
|
"fusionConstant",
|
|
18563
20307
|
"lexicalWeight",
|
|
18564
|
-
"vectorWeight"
|
|
20308
|
+
"vectorWeight",
|
|
20309
|
+
"nativeQueryProfile",
|
|
20310
|
+
"nativeCandidateLimit",
|
|
20311
|
+
"nativeMaxBackfills",
|
|
20312
|
+
"nativeMinResults",
|
|
20313
|
+
"nativeFillPolicy"
|
|
18565
20314
|
]);
|
|
18566
20315
|
for (const key of Object.keys(value)) {
|
|
18567
20316
|
if (!allowedFields.has(key)) {
|
|
@@ -18638,34 +20387,82 @@ var parseRAGRetrieval = (value) => {
|
|
|
18638
20387
|
}
|
|
18639
20388
|
retrieval.vectorWeight = value.vectorWeight;
|
|
18640
20389
|
}
|
|
20390
|
+
if (getOwnProperty(value, "nativeQueryProfile")) {
|
|
20391
|
+
if (value.nativeQueryProfile !== "latency" && value.nativeQueryProfile !== "balanced" && value.nativeQueryProfile !== "recall") {
|
|
20392
|
+
return null;
|
|
20393
|
+
}
|
|
20394
|
+
retrieval.nativeQueryProfile = value.nativeQueryProfile;
|
|
20395
|
+
}
|
|
20396
|
+
if (getOwnProperty(value, "nativeCandidateLimit")) {
|
|
20397
|
+
if (typeof value.nativeCandidateLimit !== "number") {
|
|
20398
|
+
return null;
|
|
20399
|
+
}
|
|
20400
|
+
retrieval.nativeCandidateLimit = value.nativeCandidateLimit;
|
|
20401
|
+
}
|
|
20402
|
+
if (getOwnProperty(value, "nativeMaxBackfills")) {
|
|
20403
|
+
if (typeof value.nativeMaxBackfills !== "number") {
|
|
20404
|
+
return null;
|
|
20405
|
+
}
|
|
20406
|
+
retrieval.nativeMaxBackfills = value.nativeMaxBackfills;
|
|
20407
|
+
}
|
|
20408
|
+
if (getOwnProperty(value, "nativeMinResults")) {
|
|
20409
|
+
if (typeof value.nativeMinResults !== "number") {
|
|
20410
|
+
return null;
|
|
20411
|
+
}
|
|
20412
|
+
retrieval.nativeMinResults = value.nativeMinResults;
|
|
20413
|
+
}
|
|
20414
|
+
if (getOwnProperty(value, "nativeFillPolicy")) {
|
|
20415
|
+
if (value.nativeFillPolicy !== "strict_topk" && value.nativeFillPolicy !== "satisfy_min_results") {
|
|
20416
|
+
return null;
|
|
20417
|
+
}
|
|
20418
|
+
retrieval.nativeFillPolicy = value.nativeFillPolicy;
|
|
20419
|
+
}
|
|
18641
20420
|
return retrieval;
|
|
18642
20421
|
};
|
|
18643
20422
|
var getNumericStatus = (status) => typeof status === "number" ? status : HTTP_STATUS_OK;
|
|
18644
|
-
var classifyGovernanceReasons = (reasons) =>
|
|
18645
|
-
|
|
20423
|
+
var classifyGovernanceReasons = (reasons) => {
|
|
20424
|
+
const normalized = (reasons ?? []).map((reason) => reason.toLowerCase());
|
|
20425
|
+
if (normalized.some((reason) => reason.includes("multivector"))) {
|
|
20426
|
+
return "multivector";
|
|
20427
|
+
}
|
|
20428
|
+
if (normalized.some((reason) => reason.includes("runtime ") || reason.includes("planner") || reason.includes("candidate-budget-exhausted") || reason.includes("underfilled-topk"))) {
|
|
20429
|
+
return "runtime";
|
|
20430
|
+
}
|
|
20431
|
+
if (normalized.some((reason) => reason.includes("evidence reconcile") || reason.includes("hybrid evidence") || reason.includes("ocr supplement"))) {
|
|
20432
|
+
return "evidence";
|
|
20433
|
+
}
|
|
20434
|
+
return "general";
|
|
20435
|
+
};
|
|
20436
|
+
var buildRegressionRemediationLabel = (classification) => classification === "multivector" ? "Inspect multivector coverage deltas, variant-hit traces, and collapsed-parent recovery before promotion." : classification === "evidence" ? "Inspect hybrid evidence reconciliation, native-vs-OCR passage selection, and PDF evidence provenance before promotion." : classification === "runtime" ? "Inspect planner-profile shifts, candidate-budget exhaustion, and underfilled native retrieval before promotion." : "Inspect the latest retrieval comparison deltas and resolve the gate failure before promotion.";
|
|
18646
20437
|
var summarizeIncidentClassifications = (incidents) => {
|
|
18647
20438
|
const allIncidents = incidents ?? [];
|
|
18648
20439
|
const countBy = (status, classification) => allIncidents.filter((entry) => entry.status === status && (entry.classification ?? "general") === classification).length;
|
|
18649
20440
|
return {
|
|
18650
20441
|
openGeneralCount: countBy("open", "general"),
|
|
18651
20442
|
openMultiVectorCount: countBy("open", "multivector"),
|
|
20443
|
+
openRuntimeCount: countBy("open", "runtime"),
|
|
20444
|
+
openEvidenceCount: countBy("open", "evidence"),
|
|
18652
20445
|
resolvedGeneralCount: countBy("resolved", "general"),
|
|
18653
20446
|
resolvedMultiVectorCount: countBy("resolved", "multivector"),
|
|
20447
|
+
resolvedRuntimeCount: countBy("resolved", "runtime"),
|
|
20448
|
+
resolvedEvidenceCount: countBy("resolved", "evidence"),
|
|
18654
20449
|
totalGeneralCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "general").length,
|
|
18655
|
-
totalMultiVectorCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "multivector").length
|
|
20450
|
+
totalMultiVectorCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "multivector").length,
|
|
20451
|
+
totalRuntimeCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "runtime").length,
|
|
20452
|
+
totalEvidenceCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "evidence").length
|
|
18656
20453
|
};
|
|
18657
20454
|
};
|
|
18658
20455
|
var getBooleanProperty = (value, key) => {
|
|
18659
|
-
if (!
|
|
20456
|
+
if (!isObjectRecord3(value)) {
|
|
18660
20457
|
return;
|
|
18661
20458
|
}
|
|
18662
20459
|
return typeof value[key] === "boolean" ? value[key] : undefined;
|
|
18663
20460
|
};
|
|
18664
|
-
var isRAGDocumentChunk = (value) =>
|
|
18665
|
-
var isRAGDocument = (value) =>
|
|
18666
|
-
var isRAGDocumentUrl = (value) =>
|
|
20461
|
+
var isRAGDocumentChunk = (value) => isObjectRecord3(value) && typeof value.chunkId === "string" && typeof value.text === "string";
|
|
20462
|
+
var isRAGDocument = (value) => isObjectRecord3(value) && typeof value.text === "string";
|
|
20463
|
+
var isRAGDocumentUrl = (value) => isObjectRecord3(value) && typeof value.url === "string" && value.url.trim().length > 0;
|
|
18667
20464
|
var isRAGDocumentArray = (value) => Array.isArray(value) && value.every((entry) => isRAGDocument(entry));
|
|
18668
|
-
var isRAGDocumentUpload = (value) =>
|
|
20465
|
+
var isRAGDocumentUpload = (value) => isObjectRecord3(value) && typeof value.name === "string" && typeof value.content === "string";
|
|
18669
20466
|
var isRAGDocumentUploadArray = (value) => Array.isArray(value) && value.every((entry) => isRAGDocumentUpload(entry));
|
|
18670
20467
|
var isRAGDocumentUrlArray = (value) => Array.isArray(value) && value.every((entry) => isRAGDocumentUrl(entry));
|
|
18671
20468
|
var isRAGDocumentChunkArray = (value) => Array.isArray(value) && value.every((entry) => isRAGDocumentChunk(entry));
|
|
@@ -18854,6 +20651,7 @@ var ragChat = (config) => {
|
|
|
18854
20651
|
const { retrievalReleasePolicies } = config;
|
|
18855
20652
|
const { retrievalReleasePoliciesByRolloutLabel } = config;
|
|
18856
20653
|
const { retrievalReleasePoliciesByGroupAndRolloutLabel } = config;
|
|
20654
|
+
const { retrievalBaselineGatePoliciesByGroup } = config;
|
|
18857
20655
|
const { retrievalBaselineGatePoliciesByRolloutLabel } = config;
|
|
18858
20656
|
const { retrievalBaselineGatePoliciesByGroupAndRolloutLabel } = config;
|
|
18859
20657
|
const workflowRenderConfig = typeof config.htmx === "object" ? config.htmx.workflowRender ?? config.htmx.workflow?.render : undefined;
|
|
@@ -19336,18 +21134,18 @@ var ragChat = (config) => {
|
|
|
19336
21134
|
store: ragStore
|
|
19337
21135
|
}) : null);
|
|
19338
21136
|
const toRAGEvaluationInput = (body) => {
|
|
19339
|
-
if (!
|
|
21137
|
+
if (!isObjectRecord3(body) || !Array.isArray(body.cases)) {
|
|
19340
21138
|
return null;
|
|
19341
21139
|
}
|
|
19342
21140
|
const parsedCases = body.cases.map((candidate, caseIndex) => {
|
|
19343
|
-
if (!
|
|
21141
|
+
if (!isObjectRecord3(candidate)) {
|
|
19344
21142
|
return null;
|
|
19345
21143
|
}
|
|
19346
21144
|
const query = getStringProperty(candidate, "query")?.trim() ?? "";
|
|
19347
21145
|
if (!query) {
|
|
19348
21146
|
return null;
|
|
19349
21147
|
}
|
|
19350
|
-
const caseMetadata =
|
|
21148
|
+
const caseMetadata = isObjectRecord3(candidate.metadata) ? candidate.metadata : undefined;
|
|
19351
21149
|
const expectedChunkIds = normalizeStringArray2(candidate.expectedChunkIds);
|
|
19352
21150
|
const expectedSources = normalizeStringArray2(candidate.expectedSources);
|
|
19353
21151
|
const expectedDocumentIds = normalizeStringArray2(candidate.expectedDocumentIds);
|
|
@@ -19436,11 +21234,11 @@ var ragChat = (config) => {
|
|
|
19436
21234
|
};
|
|
19437
21235
|
const toRAGRetrievalComparisonRequest = (body) => {
|
|
19438
21236
|
const input = toRAGEvaluationInput(body);
|
|
19439
|
-
if (!input || !
|
|
21237
|
+
if (!input || !isObjectRecord3(body) || !Array.isArray(body.retrievals)) {
|
|
19440
21238
|
return null;
|
|
19441
21239
|
}
|
|
19442
21240
|
const retrievals = body.retrievals.map((candidate, index) => {
|
|
19443
|
-
if (!
|
|
21241
|
+
if (!isObjectRecord3(candidate)) {
|
|
19444
21242
|
return null;
|
|
19445
21243
|
}
|
|
19446
21244
|
const id = getStringProperty(candidate, "id") ?? `retrieval-${index + 1}`;
|
|
@@ -19467,12 +21265,13 @@ var ragChat = (config) => {
|
|
|
19467
21265
|
groupKey: getStringProperty(body, "groupKey"),
|
|
19468
21266
|
label: getStringProperty(body, "label"),
|
|
19469
21267
|
persistRun: getBooleanProperty(body, "persistRun") === true,
|
|
21268
|
+
suiteId: getStringProperty(body, "suiteId"),
|
|
19470
21269
|
tags: normalizeStringArray2(body.tags),
|
|
19471
21270
|
retrievals
|
|
19472
21271
|
};
|
|
19473
21272
|
};
|
|
19474
21273
|
const toRAGRetrievalBaselinePromotionRequest = (body) => {
|
|
19475
|
-
if (!
|
|
21274
|
+
if (!isObjectRecord3(body)) {
|
|
19476
21275
|
return null;
|
|
19477
21276
|
}
|
|
19478
21277
|
const groupKey = getStringProperty(body, "groupKey");
|
|
@@ -19501,7 +21300,7 @@ var ragChat = (config) => {
|
|
|
19501
21300
|
};
|
|
19502
21301
|
};
|
|
19503
21302
|
const toRAGRetrievalBaselinePromotionFromRunRequest = (body) => {
|
|
19504
|
-
if (!
|
|
21303
|
+
if (!isObjectRecord3(body)) {
|
|
19505
21304
|
return null;
|
|
19506
21305
|
}
|
|
19507
21306
|
const groupKey = getStringProperty(body, "groupKey");
|
|
@@ -19528,7 +21327,7 @@ var ragChat = (config) => {
|
|
|
19528
21327
|
};
|
|
19529
21328
|
};
|
|
19530
21329
|
const toRAGRetrievalBaselineRevertRequest = (body) => {
|
|
19531
|
-
if (!
|
|
21330
|
+
if (!isObjectRecord3(body)) {
|
|
19532
21331
|
return null;
|
|
19533
21332
|
}
|
|
19534
21333
|
const groupKey = getStringProperty(body, "groupKey");
|
|
@@ -19552,7 +21351,7 @@ var ragChat = (config) => {
|
|
|
19552
21351
|
};
|
|
19553
21352
|
};
|
|
19554
21353
|
const toRAGRetrievalReleaseDecisionActionRequest = (body) => {
|
|
19555
|
-
if (!
|
|
21354
|
+
if (!isObjectRecord3(body)) {
|
|
19556
21355
|
return null;
|
|
19557
21356
|
}
|
|
19558
21357
|
const groupKey = getStringProperty(body, "groupKey");
|
|
@@ -19576,7 +21375,7 @@ var ragChat = (config) => {
|
|
|
19576
21375
|
};
|
|
19577
21376
|
};
|
|
19578
21377
|
const toRAGRetrievalLaneHandoffDecisionRequest = (body) => {
|
|
19579
|
-
if (!
|
|
21378
|
+
if (!isObjectRecord3(body)) {
|
|
19580
21379
|
return null;
|
|
19581
21380
|
}
|
|
19582
21381
|
const groupKey = getStringProperty(body, "groupKey");
|
|
@@ -19601,7 +21400,7 @@ var ragChat = (config) => {
|
|
|
19601
21400
|
};
|
|
19602
21401
|
};
|
|
19603
21402
|
const toRAGRetrievalReleaseIncidentAcknowledgeRequest = (body) => {
|
|
19604
|
-
if (!
|
|
21403
|
+
if (!isObjectRecord3(body)) {
|
|
19605
21404
|
return null;
|
|
19606
21405
|
}
|
|
19607
21406
|
const incidentId = getStringProperty(body, "incidentId");
|
|
@@ -19616,7 +21415,7 @@ var ragChat = (config) => {
|
|
|
19616
21415
|
};
|
|
19617
21416
|
};
|
|
19618
21417
|
const toRAGRetrievalReleaseIncidentUnacknowledgeRequest = (body) => {
|
|
19619
|
-
if (!
|
|
21418
|
+
if (!isObjectRecord3(body)) {
|
|
19620
21419
|
return null;
|
|
19621
21420
|
}
|
|
19622
21421
|
const incidentId = getStringProperty(body, "incidentId");
|
|
@@ -19626,7 +21425,7 @@ var ragChat = (config) => {
|
|
|
19626
21425
|
return { incidentId };
|
|
19627
21426
|
};
|
|
19628
21427
|
const toRAGRetrievalReleaseIncidentResolveRequest = (body) => {
|
|
19629
|
-
if (!
|
|
21428
|
+
if (!isObjectRecord3(body)) {
|
|
19630
21429
|
return null;
|
|
19631
21430
|
}
|
|
19632
21431
|
const incidentId = getStringProperty(body, "incidentId");
|
|
@@ -19641,7 +21440,7 @@ var ragChat = (config) => {
|
|
|
19641
21440
|
};
|
|
19642
21441
|
};
|
|
19643
21442
|
const toRAGRemediationAction = (value) => {
|
|
19644
|
-
if (!
|
|
21443
|
+
if (!isObjectRecord3(value)) {
|
|
19645
21444
|
return;
|
|
19646
21445
|
}
|
|
19647
21446
|
const kind = getStringProperty(value, "kind");
|
|
@@ -19660,7 +21459,7 @@ var ragChat = (config) => {
|
|
|
19660
21459
|
};
|
|
19661
21460
|
};
|
|
19662
21461
|
const toRAGRetrievalIncidentRemediationDecisionRequest = (body) => {
|
|
19663
|
-
if (!
|
|
21462
|
+
if (!isObjectRecord3(body)) {
|
|
19664
21463
|
return null;
|
|
19665
21464
|
}
|
|
19666
21465
|
const incidentId = getStringProperty(body, "incidentId");
|
|
@@ -19680,7 +21479,7 @@ var ragChat = (config) => {
|
|
|
19680
21479
|
};
|
|
19681
21480
|
};
|
|
19682
21481
|
const toRAGRetrievalIncidentRemediationExecutionRequest = (body) => {
|
|
19683
|
-
if (!
|
|
21482
|
+
if (!isObjectRecord3(body)) {
|
|
19684
21483
|
return null;
|
|
19685
21484
|
}
|
|
19686
21485
|
const action = toRAGRemediationAction(getObjectProperty(body, "action"));
|
|
@@ -19700,7 +21499,7 @@ var ragChat = (config) => {
|
|
|
19700
21499
|
};
|
|
19701
21500
|
};
|
|
19702
21501
|
const toRAGRetrievalIncidentRemediationBulkExecutionRequest = (body) => {
|
|
19703
|
-
if (!
|
|
21502
|
+
if (!isObjectRecord3(body) || !Array.isArray(body.items)) {
|
|
19704
21503
|
return null;
|
|
19705
21504
|
}
|
|
19706
21505
|
const items = body.items.map((entry) => toRAGRetrievalIncidentRemediationExecutionRequest(entry)).filter((entry) => Boolean(entry));
|
|
@@ -19974,7 +21773,7 @@ var ragChat = (config) => {
|
|
|
19974
21773
|
const baselineRetrievalId = input.baselineRetrievalId ?? activeBaseline?.retrievalId;
|
|
19975
21774
|
const candidateRetrievalId = input.candidateRetrievalId ?? input.retrievals.find((entry) => entry.id !== baselineRetrievalId)?.id;
|
|
19976
21775
|
const startedAt = Date.now();
|
|
19977
|
-
const suiteId = generateId();
|
|
21776
|
+
const suiteId = input.suiteId ?? generateId();
|
|
19978
21777
|
const suiteLabel = input.label ?? "Retrieval comparison";
|
|
19979
21778
|
const comparison = await compareRAGRetrievalStrategies({
|
|
19980
21779
|
collection,
|
|
@@ -20013,7 +21812,12 @@ var ragChat = (config) => {
|
|
|
20013
21812
|
baselineRetrievalId,
|
|
20014
21813
|
candidateRetrievalId,
|
|
20015
21814
|
comparison,
|
|
20016
|
-
policy:
|
|
21815
|
+
policy: getEffectiveRetrievalBaselineGatePolicy({
|
|
21816
|
+
baselinePolicy: activeBaseline?.policy,
|
|
21817
|
+
groupKey: input.groupKey,
|
|
21818
|
+
rolloutLabel: activeBaseline?.rolloutLabel,
|
|
21819
|
+
suiteId
|
|
21820
|
+
})
|
|
20017
21821
|
});
|
|
20018
21822
|
await persistRAGRetrievalComparisonRun({
|
|
20019
21823
|
run: {
|
|
@@ -20149,9 +21953,42 @@ var ragChat = (config) => {
|
|
|
20149
21953
|
});
|
|
20150
21954
|
const getRetrievalLaneHandoffAutoCompletePolicy = (groupKey, targetRolloutLabel) => (groupKey && targetRolloutLabel ? config.retrievalLaneHandoffAutoCompletePoliciesByGroupAndTargetRolloutLabel?.[groupKey]?.[targetRolloutLabel] : undefined) ?? {};
|
|
20151
21955
|
const getDefaultRetrievalBaselineGatePolicy = (groupKey, rolloutLabel) => ({
|
|
21956
|
+
...(groupKey ? retrievalBaselineGatePoliciesByGroup?.[groupKey] : undefined) ?? {},
|
|
20152
21957
|
...(rolloutLabel ? retrievalBaselineGatePoliciesByRolloutLabel?.[rolloutLabel] : undefined) ?? {},
|
|
20153
21958
|
...(groupKey && rolloutLabel ? retrievalBaselineGatePoliciesByGroupAndRolloutLabel?.[groupKey]?.[rolloutLabel] : undefined) ?? {}
|
|
20154
21959
|
});
|
|
21960
|
+
const buildRuntimeRetrievalBenchmarkRecommendedGatePolicy = () => ({
|
|
21961
|
+
minEvidenceReconcileCasesDelta: 0,
|
|
21962
|
+
maxRuntimeCandidateBudgetExhaustedCasesDelta: 0,
|
|
21963
|
+
maxRuntimeUnderfilledTopKCasesDelta: 0,
|
|
21964
|
+
minAverageF1Delta: 0,
|
|
21965
|
+
minPassingRateDelta: 0,
|
|
21966
|
+
severity: "fail"
|
|
21967
|
+
});
|
|
21968
|
+
const getRecommendedBenchmarkBaselineGatePolicy = (input) => {
|
|
21969
|
+
const adaptiveSuite = createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
21970
|
+
const backendSuite = createRAGNativeBackendComparisonBenchmarkSuite();
|
|
21971
|
+
if (input.groupKey === (typeof adaptiveSuite.metadata?.recommendedGroupKey === "string" ? adaptiveSuite.metadata.recommendedGroupKey : undefined) || input.suiteId === adaptiveSuite.id) {
|
|
21972
|
+
return buildRuntimeRetrievalBenchmarkRecommendedGatePolicy();
|
|
21973
|
+
}
|
|
21974
|
+
if (input.groupKey === (typeof backendSuite.metadata?.recommendedGroupKey === "string" ? backendSuite.metadata.recommendedGroupKey : undefined) || input.suiteId === backendSuite.id) {
|
|
21975
|
+
return buildRuntimeRetrievalBenchmarkRecommendedGatePolicy();
|
|
21976
|
+
}
|
|
21977
|
+
return;
|
|
21978
|
+
};
|
|
21979
|
+
const getEffectiveRetrievalBaselineGatePolicy = (input) => {
|
|
21980
|
+
if (input.baselinePolicy && Object.keys(input.baselinePolicy).length > 0) {
|
|
21981
|
+
return input.baselinePolicy;
|
|
21982
|
+
}
|
|
21983
|
+
const defaultPolicy = getDefaultRetrievalBaselineGatePolicy(input.groupKey, input.rolloutLabel);
|
|
21984
|
+
if (Object.keys(defaultPolicy).length > 0) {
|
|
21985
|
+
return defaultPolicy;
|
|
21986
|
+
}
|
|
21987
|
+
return getRecommendedBenchmarkBaselineGatePolicy({
|
|
21988
|
+
groupKey: input.groupKey,
|
|
21989
|
+
suiteId: input.suiteId
|
|
21990
|
+
});
|
|
21991
|
+
};
|
|
20155
21992
|
const getRetrievalReleaseIncidentSeverity = (rolloutLabel) => rolloutLabel === "stable" ? "critical" : "warning";
|
|
20156
21993
|
const getLatestLaneHandoffDecision = (input) => input.decisions?.find((entry) => entry.groupKey === input.groupKey && entry.sourceRolloutLabel === input.sourceRolloutLabel && entry.targetRolloutLabel === input.targetRolloutLabel && (!input.kind || entry.kind === input.kind));
|
|
20157
21994
|
const getLaneHandoffFreshnessWindow = (input) => {
|
|
@@ -20366,7 +22203,7 @@ var ragChat = (config) => {
|
|
|
20366
22203
|
return existing.find((entry) => entry.groupKey === groupKey && typeof entry.corpusGroupKey === "string")?.corpusGroupKey ?? comparisonRunCorpusGroups.find((entry) => entry.groupKey === groupKey && typeof entry.corpusGroupKey === "string")?.corpusGroupKey ?? baselineCorpusGroups.find((entry) => entry.groupKey === groupKey && typeof entry.corpusGroupKey === "string")?.corpusGroupKey ?? releaseDecisionCorpusGroups.find((entry) => entry.groupKey === groupKey && typeof entry.corpusGroupKey === "string")?.corpusGroupKey;
|
|
20367
22204
|
};
|
|
20368
22205
|
const nextByKey = new Map;
|
|
20369
|
-
const classifyPromotionIncident = (reasons) => (reasons
|
|
22206
|
+
const classifyPromotionIncident = (reasons) => classifyGovernanceReasons(reasons);
|
|
20370
22207
|
for (const candidate of input.promotionCandidates) {
|
|
20371
22208
|
if (!candidate.groupKey || !candidate.targetRolloutLabel) {
|
|
20372
22209
|
continue;
|
|
@@ -20507,7 +22344,11 @@ var ragChat = (config) => {
|
|
|
20507
22344
|
const gate = decision?.gate;
|
|
20508
22345
|
const reasons = gate?.status && gate.status !== "pass" ? gate.reasons.length > 0 ? [...gate.reasons] : [`gate status is ${gate.status}`] : [];
|
|
20509
22346
|
const effectiveReleasePolicy = getRetrievalReleasePolicy(input.run.groupKey, input.targetRolloutLabel);
|
|
20510
|
-
const effectiveBaselineGatePolicy =
|
|
22347
|
+
const effectiveBaselineGatePolicy = getEffectiveRetrievalBaselineGatePolicy({
|
|
22348
|
+
groupKey: input.run.groupKey,
|
|
22349
|
+
rolloutLabel: input.targetRolloutLabel,
|
|
22350
|
+
suiteId: input.run.suiteId
|
|
22351
|
+
}) ?? {};
|
|
20511
22352
|
const requiresApproval = Boolean(effectiveReleasePolicy.requireApprovalBeforePromotion);
|
|
20512
22353
|
const approvalFreshness = latestDecision ? getDecisionFreshness({
|
|
20513
22354
|
now: input.now,
|
|
@@ -20558,7 +22399,7 @@ var ragChat = (config) => {
|
|
|
20558
22399
|
if (reason.includes("approval")) {
|
|
20559
22400
|
actions.add("Renew or record the required approval for this rollout lane.");
|
|
20560
22401
|
}
|
|
20561
|
-
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average")) {
|
|
22402
|
+
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average") || reason.includes("evidence reconcile") || reason.includes("ocr supplement") || reason.includes("hybrid evidence")) {
|
|
20562
22403
|
actions.add(buildRegressionRemediationLabel(classifyGovernanceReasons([reason])));
|
|
20563
22404
|
}
|
|
20564
22405
|
if (reason.includes("source comparison run was not found")) {
|
|
@@ -20589,7 +22430,7 @@ var ragChat = (config) => {
|
|
|
20589
22430
|
})
|
|
20590
22431
|
});
|
|
20591
22432
|
}
|
|
20592
|
-
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average")) {
|
|
22433
|
+
if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average") || reason.includes("evidence reconcile") || reason.includes("ocr supplement") || reason.includes("hybrid evidence")) {
|
|
20593
22434
|
steps.push({
|
|
20594
22435
|
kind: "inspect_gate",
|
|
20595
22436
|
label: buildRegressionRemediationLabel(classifyGovernanceReasons([reason])),
|
|
@@ -20650,7 +22491,10 @@ var ragChat = (config) => {
|
|
|
20650
22491
|
baselineRetrievalId,
|
|
20651
22492
|
candidateRetrievalId: input.retrievalId,
|
|
20652
22493
|
classification: input.baseline ? "general" : undefined,
|
|
20653
|
-
effectiveBaselineGatePolicy: targetRolloutLabel || input.groupKey ?
|
|
22494
|
+
effectiveBaselineGatePolicy: targetRolloutLabel || input.groupKey ? getEffectiveRetrievalBaselineGatePolicy({
|
|
22495
|
+
groupKey: input.groupKey,
|
|
22496
|
+
rolloutLabel: targetRolloutLabel
|
|
22497
|
+
}) : undefined,
|
|
20654
22498
|
effectiveReleasePolicy: getRetrievalReleasePolicy(input.groupKey, targetRolloutLabel),
|
|
20655
22499
|
groupKey: input.groupKey,
|
|
20656
22500
|
gateStatus: undefined,
|
|
@@ -21375,12 +23219,42 @@ var ragChat = (config) => {
|
|
|
21375
23219
|
store: retrievalComparisonHistoryStore
|
|
21376
23220
|
}) : undefined;
|
|
21377
23221
|
const latest = decisions?.[0];
|
|
23222
|
+
const adaptiveNativePlannerBenchmark = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
23223
|
+
corpusGroupKey: getStringProperty(queryInput, "benchmarkCorpusGroupKey"),
|
|
23224
|
+
groupKey: getStringProperty(queryInput, "benchmarkGroupKey"),
|
|
23225
|
+
historyLimit: getIntegerLikeProperty(queryInput, "benchmarkRunLimit") ?? getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5,
|
|
23226
|
+
queryInput,
|
|
23227
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5
|
|
23228
|
+
});
|
|
23229
|
+
const nativeBackendComparisonBenchmark = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
23230
|
+
corpusGroupKey: getStringProperty(queryInput, "backendBenchmarkCorpusGroupKey"),
|
|
23231
|
+
groupKey: getStringProperty(queryInput, "backendBenchmarkGroupKey"),
|
|
23232
|
+
historyLimit: getIntegerLikeProperty(queryInput, "backendBenchmarkRunLimit") ?? getIntegerLikeProperty(queryInput, "backendBenchmarkLimit") ?? 5,
|
|
23233
|
+
queryInput,
|
|
23234
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "backendBenchmarkLimit") ?? 5
|
|
23235
|
+
});
|
|
23236
|
+
const presentation = buildRAGRetrievalReleaseGroupHistoryPresentation({
|
|
23237
|
+
runs,
|
|
23238
|
+
timeline: {
|
|
23239
|
+
corpusGroupKey: corpusGroupKey ?? decisions?.[0]?.corpusGroupKey ?? baselines?.[0]?.corpusGroupKey ?? runs?.[0]?.corpusGroupKey,
|
|
23240
|
+
groupKey,
|
|
23241
|
+
lastApprovedAt: decisions?.find((entry) => entry.kind === "approve")?.decidedAt,
|
|
23242
|
+
lastPromotedAt: decisions?.find((entry) => entry.kind === "promote")?.decidedAt,
|
|
23243
|
+
lastRejectedAt: decisions?.find((entry) => entry.kind === "reject")?.decidedAt,
|
|
23244
|
+
lastRevertedAt: decisions?.find((entry) => entry.kind === "revert")?.decidedAt,
|
|
23245
|
+
latestDecisionAt: latest?.decidedAt,
|
|
23246
|
+
latestDecisionFreshnessStatus: latest?.freshnessStatus,
|
|
23247
|
+
latestDecisionKind: latest?.kind
|
|
23248
|
+
}
|
|
23249
|
+
});
|
|
21378
23250
|
return {
|
|
23251
|
+
adaptiveNativePlannerBenchmark,
|
|
21379
23252
|
baselines,
|
|
21380
23253
|
corpusGroupKey: corpusGroupKey ?? decisions?.[0]?.corpusGroupKey ?? baselines?.[0]?.corpusGroupKey ?? runs?.[0]?.corpusGroupKey,
|
|
21381
23254
|
decisions,
|
|
21382
23255
|
groupKey,
|
|
21383
23256
|
ok: true,
|
|
23257
|
+
presentation,
|
|
21384
23258
|
runs,
|
|
21385
23259
|
timeline: {
|
|
21386
23260
|
corpusGroupKey: corpusGroupKey ?? decisions?.[0]?.corpusGroupKey ?? baselines?.[0]?.corpusGroupKey ?? runs?.[0]?.corpusGroupKey,
|
|
@@ -21392,7 +23266,431 @@ var ragChat = (config) => {
|
|
|
21392
23266
|
latestDecisionAt: latest?.decidedAt,
|
|
21393
23267
|
latestDecisionFreshnessStatus: latest?.freshnessStatus,
|
|
21394
23268
|
latestDecisionKind: latest?.kind
|
|
23269
|
+
},
|
|
23270
|
+
nativeBackendComparisonBenchmark
|
|
23271
|
+
};
|
|
23272
|
+
};
|
|
23273
|
+
const loadAdaptiveNativePlannerBenchmarkRuntime = async (input) => {
|
|
23274
|
+
const suite = input?.suite ?? createRAGAdaptiveNativePlannerBenchmarkSuite();
|
|
23275
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23276
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata?.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
|
|
23277
|
+
const groupKey = input?.groupKey ?? getStringProperty(input?.queryInput, "benchmarkGroupKey") ?? recommendedGroupKey;
|
|
23278
|
+
const corpusGroupKey = input?.corpusGroupKey ?? getStringProperty(input?.queryInput, "benchmarkCorpusGroupKey");
|
|
23279
|
+
const recentRuns = retrievalComparisonHistoryStore ? await loadRAGRetrievalComparisonHistory({
|
|
23280
|
+
corpusGroupKey,
|
|
23281
|
+
groupKey,
|
|
23282
|
+
limit: input?.historyLimit ?? 5,
|
|
23283
|
+
store: retrievalComparisonHistoryStore,
|
|
23284
|
+
suiteId: suite.id
|
|
23285
|
+
}) : undefined;
|
|
23286
|
+
const historyTimelineGroupKey = groupKey ?? recentRuns?.[0]?.groupKey;
|
|
23287
|
+
const historyPresentation = recentRuns && recentRuns.length > 0 ? buildRAGRetrievalReleaseGroupHistoryPresentation({
|
|
23288
|
+
runs: recentRuns,
|
|
23289
|
+
timeline: historyTimelineGroupKey ? {
|
|
23290
|
+
corpusGroupKey: corpusGroupKey ?? recentRuns[0]?.corpusGroupKey,
|
|
23291
|
+
groupKey: historyTimelineGroupKey
|
|
23292
|
+
} : undefined
|
|
23293
|
+
}) : undefined;
|
|
23294
|
+
const snapshotHistory = config.evaluationSuiteSnapshotHistoryStore ? await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23295
|
+
limit: input?.snapshotLimit ?? 5,
|
|
23296
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23297
|
+
suite
|
|
23298
|
+
}) : undefined;
|
|
23299
|
+
const fixtureVariants = getRetrievalBenchmarkFixtureVariants(recentRuns);
|
|
23300
|
+
return {
|
|
23301
|
+
corpusGroupKey,
|
|
23302
|
+
fixtureVariants,
|
|
23303
|
+
groupKey,
|
|
23304
|
+
historyPresentation,
|
|
23305
|
+
latestFixtureVariant: fixtureVariants[0],
|
|
23306
|
+
latestRun: recentRuns?.[0],
|
|
23307
|
+
recentRuns,
|
|
23308
|
+
recommendedGroupKey,
|
|
23309
|
+
recommendedTags,
|
|
23310
|
+
snapshotHistory,
|
|
23311
|
+
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23312
|
+
suiteId: suite.id,
|
|
23313
|
+
suiteLabel: suite.label ?? suite.id
|
|
23314
|
+
};
|
|
23315
|
+
};
|
|
23316
|
+
const buildRetrievalBenchmarkBackendTags = () => {
|
|
23317
|
+
const status = resolveCollection()?.getStatus?.();
|
|
23318
|
+
const fixtureVariant = "current-collection";
|
|
23319
|
+
if (!status) {
|
|
23320
|
+
return [`fixture:${fixtureVariant}`];
|
|
23321
|
+
}
|
|
23322
|
+
const tags = [
|
|
23323
|
+
`fixture:${fixtureVariant}`,
|
|
23324
|
+
`backend:${status.backend}`,
|
|
23325
|
+
`vector-mode:${status.vectorMode}`
|
|
23326
|
+
];
|
|
23327
|
+
if (status.native && "mode" in status.native) {
|
|
23328
|
+
tags.push(`native-mode:${status.native.mode}`);
|
|
23329
|
+
}
|
|
23330
|
+
return tags;
|
|
23331
|
+
};
|
|
23332
|
+
const getRetrievalBenchmarkFixtureVariants = (runs) => (runs ?? []).flatMap((run) => run.tags ?? []).filter((tag) => tag.startsWith("fixture:")).map((tag) => tag.slice("fixture:".length)).filter((tag, index, all) => tag.trim().length > 0 && all.indexOf(tag) === index);
|
|
23333
|
+
const ensureRetrievalBenchmarkFixtureTag = (tags) => {
|
|
23334
|
+
if (tags.some((tag) => tag.startsWith("fixture:"))) {
|
|
23335
|
+
return tags;
|
|
23336
|
+
}
|
|
23337
|
+
const fixtureTags = buildRetrievalBenchmarkBackendTags().filter((tag) => tag.startsWith("fixture:"));
|
|
23338
|
+
return [...tags, ...fixtureTags].filter((tag, index, all) => all.indexOf(tag) === index);
|
|
23339
|
+
};
|
|
23340
|
+
const loadNativeBackendComparisonBenchmarkRuntime = async (input) => {
|
|
23341
|
+
const suite = input?.suite ?? createRAGNativeBackendComparisonBenchmarkSuite();
|
|
23342
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23343
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
|
|
23344
|
+
const groupKey = input?.groupKey ?? getStringProperty(input?.queryInput, "benchmarkGroupKey") ?? recommendedGroupKey;
|
|
23345
|
+
const corpusGroupKey = input?.corpusGroupKey ?? getStringProperty(input?.queryInput, "benchmarkCorpusGroupKey");
|
|
23346
|
+
const recentRuns = retrievalComparisonHistoryStore ? await loadRAGRetrievalComparisonHistory({
|
|
23347
|
+
corpusGroupKey,
|
|
23348
|
+
groupKey,
|
|
23349
|
+
limit: input?.historyLimit ?? 5,
|
|
23350
|
+
store: retrievalComparisonHistoryStore,
|
|
23351
|
+
suiteId: suite.id
|
|
23352
|
+
}) : undefined;
|
|
23353
|
+
const historyTimelineGroupKey = groupKey ?? recentRuns?.[0]?.groupKey;
|
|
23354
|
+
const historyPresentation = recentRuns && recentRuns.length > 0 ? buildRAGRetrievalReleaseGroupHistoryPresentation({
|
|
23355
|
+
runs: recentRuns,
|
|
23356
|
+
timeline: historyTimelineGroupKey ? {
|
|
23357
|
+
corpusGroupKey: corpusGroupKey ?? recentRuns[0]?.corpusGroupKey,
|
|
23358
|
+
groupKey: historyTimelineGroupKey
|
|
23359
|
+
} : undefined
|
|
23360
|
+
}) : undefined;
|
|
23361
|
+
const snapshotHistory = config.evaluationSuiteSnapshotHistoryStore ? await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23362
|
+
limit: input?.snapshotLimit ?? 5,
|
|
23363
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23364
|
+
suite
|
|
23365
|
+
}) : undefined;
|
|
23366
|
+
const fixtureVariants = getRetrievalBenchmarkFixtureVariants(recentRuns);
|
|
23367
|
+
return {
|
|
23368
|
+
corpusGroupKey,
|
|
23369
|
+
fixtureVariants,
|
|
23370
|
+
groupKey,
|
|
23371
|
+
historyPresentation,
|
|
23372
|
+
latestFixtureVariant: fixtureVariants[0],
|
|
23373
|
+
latestRun: recentRuns?.[0],
|
|
23374
|
+
recentRuns,
|
|
23375
|
+
recommendedGroupKey,
|
|
23376
|
+
recommendedTags,
|
|
23377
|
+
snapshotHistory,
|
|
23378
|
+
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23379
|
+
suiteId: suite.id,
|
|
23380
|
+
suiteLabel: suite.label ?? suite.id
|
|
23381
|
+
};
|
|
23382
|
+
};
|
|
23383
|
+
const handleAdaptiveNativePlannerBenchmark = async (queryInput) => {
|
|
23384
|
+
const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
|
|
23385
|
+
description: getStringProperty(queryInput, "description"),
|
|
23386
|
+
label: getStringProperty(queryInput, "label"),
|
|
23387
|
+
metadata: getObjectProperty(queryInput, "metadata"),
|
|
23388
|
+
topK: getIntegerLikeProperty(queryInput, "topK") ?? undefined
|
|
23389
|
+
});
|
|
23390
|
+
const runtime = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
23391
|
+
historyLimit: getIntegerLikeProperty(queryInput, "runLimit") ?? 5,
|
|
23392
|
+
queryInput,
|
|
23393
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
|
|
23394
|
+
suite
|
|
23395
|
+
});
|
|
23396
|
+
return {
|
|
23397
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23398
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23399
|
+
groupKey: runtime.groupKey,
|
|
23400
|
+
historyPresentation: runtime.historyPresentation,
|
|
23401
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23402
|
+
latestRun: runtime.latestRun,
|
|
23403
|
+
ok: true,
|
|
23404
|
+
recentRuns: runtime.recentRuns,
|
|
23405
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23406
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23407
|
+
suite
|
|
23408
|
+
};
|
|
23409
|
+
};
|
|
23410
|
+
const handleNativeBackendComparisonBenchmark = async (queryInput) => {
|
|
23411
|
+
const suite = createRAGNativeBackendComparisonBenchmarkSuite({
|
|
23412
|
+
description: getStringProperty(queryInput, "description"),
|
|
23413
|
+
label: getStringProperty(queryInput, "label"),
|
|
23414
|
+
metadata: getObjectProperty(queryInput, "metadata"),
|
|
23415
|
+
topK: getIntegerLikeProperty(queryInput, "topK") ?? undefined
|
|
23416
|
+
});
|
|
23417
|
+
const runtime = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
23418
|
+
historyLimit: getIntegerLikeProperty(queryInput, "runLimit") ?? 5,
|
|
23419
|
+
queryInput,
|
|
23420
|
+
snapshotLimit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
|
|
23421
|
+
suite
|
|
23422
|
+
});
|
|
23423
|
+
return {
|
|
23424
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23425
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23426
|
+
groupKey: runtime.groupKey,
|
|
23427
|
+
historyPresentation: runtime.historyPresentation,
|
|
23428
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23429
|
+
latestRun: runtime.latestRun,
|
|
23430
|
+
ok: true,
|
|
23431
|
+
recentRuns: runtime.recentRuns,
|
|
23432
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23433
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23434
|
+
suite
|
|
23435
|
+
};
|
|
23436
|
+
};
|
|
23437
|
+
const handleRunAdaptiveNativePlannerBenchmark = async (bodyInput, request) => {
|
|
23438
|
+
const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
|
|
23439
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23440
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23441
|
+
metadata: getObjectProperty(bodyInput, "metadata"),
|
|
23442
|
+
topK: getIntegerLikeProperty(bodyInput, "topK") ?? undefined
|
|
23443
|
+
});
|
|
23444
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23445
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : [];
|
|
23446
|
+
const explicitTags = normalizeStringArray2(bodyInput?.tags);
|
|
23447
|
+
const comparisonBody = {
|
|
23448
|
+
...suite.input,
|
|
23449
|
+
baselineRetrievalId: getStringProperty(bodyInput, "baselineRetrievalId") ?? "native-latency",
|
|
23450
|
+
candidateRetrievalId: getStringProperty(bodyInput, "candidateRetrievalId") ?? "native-adaptive",
|
|
23451
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23452
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23453
|
+
label: suite.label,
|
|
23454
|
+
persistRun: getBooleanProperty(bodyInput, "persistRun") !== false,
|
|
23455
|
+
suiteId: suite.id,
|
|
23456
|
+
retrievals: Array.isArray(bodyInput?.retrievals) ? bodyInput.retrievals : [
|
|
23457
|
+
{
|
|
23458
|
+
id: "native-latency",
|
|
23459
|
+
label: "Native latency",
|
|
23460
|
+
retrieval: {
|
|
23461
|
+
mode: "vector",
|
|
23462
|
+
nativeQueryProfile: "latency"
|
|
23463
|
+
}
|
|
23464
|
+
},
|
|
23465
|
+
{
|
|
23466
|
+
id: "native-adaptive",
|
|
23467
|
+
label: "Adaptive native planner",
|
|
23468
|
+
retrieval: {
|
|
23469
|
+
mode: "vector"
|
|
23470
|
+
}
|
|
23471
|
+
},
|
|
23472
|
+
{
|
|
23473
|
+
id: "hybrid-adaptive",
|
|
23474
|
+
label: "Hybrid adaptive",
|
|
23475
|
+
retrieval: {
|
|
23476
|
+
mode: "hybrid"
|
|
23477
|
+
}
|
|
23478
|
+
},
|
|
23479
|
+
{
|
|
23480
|
+
id: "hybrid-transform",
|
|
23481
|
+
label: "Hybrid transform",
|
|
23482
|
+
queryTransform: createHeuristicRAGQueryTransform(),
|
|
23483
|
+
retrieval: {
|
|
23484
|
+
mode: "hybrid"
|
|
23485
|
+
}
|
|
23486
|
+
}
|
|
23487
|
+
],
|
|
23488
|
+
tags: explicitTags.length > 0 ? ensureRetrievalBenchmarkFixtureTag(explicitTags) : ensureRetrievalBenchmarkFixtureTag(recommendedTags)
|
|
23489
|
+
};
|
|
23490
|
+
const comparisonResult = await handleEvaluateRetrievals(comparisonBody, request);
|
|
23491
|
+
if (!comparisonResult.ok) {
|
|
23492
|
+
return {
|
|
23493
|
+
error: comparisonResult.error,
|
|
23494
|
+
ok: false
|
|
23495
|
+
};
|
|
23496
|
+
}
|
|
23497
|
+
const runtime = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
23498
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23499
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23500
|
+
historyLimit: getIntegerLikeProperty(bodyInput, "runLimit") ?? 5,
|
|
23501
|
+
snapshotLimit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23502
|
+
suite
|
|
23503
|
+
});
|
|
23504
|
+
return {
|
|
23505
|
+
comparison: comparisonResult.comparison,
|
|
23506
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23507
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23508
|
+
groupKey: runtime.groupKey,
|
|
23509
|
+
historyPresentation: runtime.historyPresentation,
|
|
23510
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23511
|
+
latestRun: runtime.latestRun,
|
|
23512
|
+
ok: true,
|
|
23513
|
+
recentRuns: runtime.recentRuns,
|
|
23514
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23515
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23516
|
+
suite
|
|
23517
|
+
};
|
|
23518
|
+
};
|
|
23519
|
+
const handleRunNativeBackendComparisonBenchmark = async (bodyInput, request) => {
|
|
23520
|
+
const suite = createRAGNativeBackendComparisonBenchmarkSuite({
|
|
23521
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23522
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23523
|
+
metadata: getObjectProperty(bodyInput, "metadata"),
|
|
23524
|
+
topK: getIntegerLikeProperty(bodyInput, "topK") ?? undefined
|
|
23525
|
+
});
|
|
23526
|
+
const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
|
|
23527
|
+
const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : [];
|
|
23528
|
+
const explicitTags = normalizeStringArray2(bodyInput?.tags);
|
|
23529
|
+
const comparisonBody = {
|
|
23530
|
+
...suite.input,
|
|
23531
|
+
baselineRetrievalId: getStringProperty(bodyInput, "baselineRetrievalId") ?? "native-latency",
|
|
23532
|
+
candidateRetrievalId: getStringProperty(bodyInput, "candidateRetrievalId") ?? "native-adaptive",
|
|
23533
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23534
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23535
|
+
label: suite.label,
|
|
23536
|
+
persistRun: getBooleanProperty(bodyInput, "persistRun") !== false,
|
|
23537
|
+
suiteId: suite.id,
|
|
23538
|
+
retrievals: Array.isArray(bodyInput?.retrievals) ? bodyInput.retrievals : [
|
|
23539
|
+
{
|
|
23540
|
+
id: "native-latency",
|
|
23541
|
+
label: "Native latency",
|
|
23542
|
+
retrieval: {
|
|
23543
|
+
mode: "vector",
|
|
23544
|
+
nativeQueryProfile: "latency"
|
|
23545
|
+
}
|
|
23546
|
+
},
|
|
23547
|
+
{
|
|
23548
|
+
id: "native-adaptive",
|
|
23549
|
+
label: "Adaptive native planner",
|
|
23550
|
+
retrieval: {
|
|
23551
|
+
mode: "vector"
|
|
23552
|
+
}
|
|
23553
|
+
},
|
|
23554
|
+
{
|
|
23555
|
+
id: "hybrid-adaptive",
|
|
23556
|
+
label: "Hybrid adaptive",
|
|
23557
|
+
retrieval: {
|
|
23558
|
+
mode: "hybrid"
|
|
23559
|
+
}
|
|
23560
|
+
},
|
|
23561
|
+
{
|
|
23562
|
+
id: "hybrid-transform",
|
|
23563
|
+
label: "Hybrid transform",
|
|
23564
|
+
queryTransform: createHeuristicRAGQueryTransform(),
|
|
23565
|
+
retrieval: {
|
|
23566
|
+
mode: "hybrid"
|
|
23567
|
+
}
|
|
23568
|
+
}
|
|
23569
|
+
],
|
|
23570
|
+
tags: explicitTags.length > 0 ? ensureRetrievalBenchmarkFixtureTag(explicitTags) : ensureRetrievalBenchmarkFixtureTag([
|
|
23571
|
+
...recommendedTags,
|
|
23572
|
+
...buildRetrievalBenchmarkBackendTags()
|
|
23573
|
+
])
|
|
23574
|
+
};
|
|
23575
|
+
const comparisonResult = await handleEvaluateRetrievals(comparisonBody, request);
|
|
23576
|
+
if (!comparisonResult.ok) {
|
|
23577
|
+
return {
|
|
23578
|
+
error: comparisonResult.error,
|
|
23579
|
+
ok: false
|
|
23580
|
+
};
|
|
23581
|
+
}
|
|
23582
|
+
const runtime = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
23583
|
+
corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
|
|
23584
|
+
groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
|
|
23585
|
+
historyLimit: getIntegerLikeProperty(bodyInput, "runLimit") ?? 5,
|
|
23586
|
+
snapshotLimit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23587
|
+
suite
|
|
23588
|
+
});
|
|
23589
|
+
return {
|
|
23590
|
+
comparison: comparisonResult.comparison,
|
|
23591
|
+
corpusGroupKey: runtime.corpusGroupKey,
|
|
23592
|
+
fixtureVariants: runtime.fixtureVariants,
|
|
23593
|
+
groupKey: runtime.groupKey,
|
|
23594
|
+
historyPresentation: runtime.historyPresentation,
|
|
23595
|
+
latestFixtureVariant: runtime.latestFixtureVariant,
|
|
23596
|
+
latestRun: runtime.latestRun,
|
|
23597
|
+
ok: true,
|
|
23598
|
+
recentRuns: runtime.recentRuns,
|
|
23599
|
+
snapshotHistory: runtime.snapshotHistory,
|
|
23600
|
+
snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
|
|
23601
|
+
suite
|
|
23602
|
+
};
|
|
23603
|
+
};
|
|
23604
|
+
const handlePersistAdaptiveNativePlannerBenchmarkSnapshot = async (bodyInput, request) => {
|
|
23605
|
+
if (request) {
|
|
23606
|
+
const decision = await checkAuthorization(request, "manage_retrieval_admin");
|
|
23607
|
+
if (!decision.allowed) {
|
|
23608
|
+
return {
|
|
23609
|
+
error: decision.reason ?? "Forbidden",
|
|
23610
|
+
ok: false
|
|
23611
|
+
};
|
|
21395
23612
|
}
|
|
23613
|
+
}
|
|
23614
|
+
if (!config.evaluationSuiteSnapshotHistoryStore) {
|
|
23615
|
+
return {
|
|
23616
|
+
error: "Evaluation suite snapshot history store is not configured",
|
|
23617
|
+
ok: false
|
|
23618
|
+
};
|
|
23619
|
+
}
|
|
23620
|
+
const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
|
|
23621
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23622
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23623
|
+
metadata: getObjectProperty(bodyInput, "metadata")
|
|
23624
|
+
});
|
|
23625
|
+
const previousHistory = await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23626
|
+
limit: 1,
|
|
23627
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23628
|
+
suite
|
|
23629
|
+
});
|
|
23630
|
+
const snapshot = createRAGAdaptiveNativePlannerBenchmarkSnapshot({
|
|
23631
|
+
createdAt: getNumberProperty(bodyInput, "createdAt"),
|
|
23632
|
+
metadata: getObjectProperty(bodyInput, "snapshotMetadata"),
|
|
23633
|
+
suite,
|
|
23634
|
+
version: getIntegerLikeProperty(bodyInput, "version") ?? (previousHistory.latestSnapshot?.version ?? 0) + 1
|
|
23635
|
+
});
|
|
23636
|
+
await config.evaluationSuiteSnapshotHistoryStore.saveSnapshot(snapshot);
|
|
23637
|
+
const snapshotHistory = await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23638
|
+
limit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23639
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23640
|
+
suite
|
|
23641
|
+
});
|
|
23642
|
+
return {
|
|
23643
|
+
ok: true,
|
|
23644
|
+
snapshot,
|
|
23645
|
+
snapshotHistory,
|
|
23646
|
+
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23647
|
+
suite
|
|
23648
|
+
};
|
|
23649
|
+
};
|
|
23650
|
+
const handlePersistNativeBackendComparisonBenchmarkSnapshot = async (bodyInput, request) => {
|
|
23651
|
+
if (request) {
|
|
23652
|
+
const decision = await checkAuthorization(request, "manage_retrieval_admin");
|
|
23653
|
+
if (!decision.allowed) {
|
|
23654
|
+
return {
|
|
23655
|
+
error: decision.reason ?? "Forbidden",
|
|
23656
|
+
ok: false
|
|
23657
|
+
};
|
|
23658
|
+
}
|
|
23659
|
+
}
|
|
23660
|
+
if (!config.evaluationSuiteSnapshotHistoryStore) {
|
|
23661
|
+
return {
|
|
23662
|
+
error: "Evaluation suite snapshot history store is not configured",
|
|
23663
|
+
ok: false
|
|
23664
|
+
};
|
|
23665
|
+
}
|
|
23666
|
+
const suite = createRAGNativeBackendComparisonBenchmarkSuite({
|
|
23667
|
+
description: getStringProperty(bodyInput, "description"),
|
|
23668
|
+
label: getStringProperty(bodyInput, "label"),
|
|
23669
|
+
metadata: getObjectProperty(bodyInput, "metadata")
|
|
23670
|
+
});
|
|
23671
|
+
const previousHistory = await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23672
|
+
limit: 1,
|
|
23673
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23674
|
+
suite
|
|
23675
|
+
});
|
|
23676
|
+
const snapshot = createRAGNativeBackendComparisonBenchmarkSnapshot({
|
|
23677
|
+
createdAt: getNumberProperty(bodyInput, "createdAt"),
|
|
23678
|
+
metadata: getObjectProperty(bodyInput, "snapshotMetadata"),
|
|
23679
|
+
suite,
|
|
23680
|
+
version: getIntegerLikeProperty(bodyInput, "version") ?? (previousHistory.latestSnapshot?.version ?? 0) + 1
|
|
23681
|
+
});
|
|
23682
|
+
await config.evaluationSuiteSnapshotHistoryStore.saveSnapshot(snapshot);
|
|
23683
|
+
const snapshotHistory = await loadRAGEvaluationSuiteSnapshotHistory({
|
|
23684
|
+
limit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
|
|
23685
|
+
store: config.evaluationSuiteSnapshotHistoryStore,
|
|
23686
|
+
suite
|
|
23687
|
+
});
|
|
23688
|
+
return {
|
|
23689
|
+
ok: true,
|
|
23690
|
+
snapshot,
|
|
23691
|
+
snapshotHistory,
|
|
23692
|
+
snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
|
|
23693
|
+
suite
|
|
21396
23694
|
};
|
|
21397
23695
|
};
|
|
21398
23696
|
const handleRetrievalLaneHandoffList = async (queryInput, request) => {
|
|
@@ -22586,7 +24884,7 @@ var ragChat = (config) => {
|
|
|
22586
24884
|
};
|
|
22587
24885
|
};
|
|
22588
24886
|
const handlePromoteRetrievalBaselineToLane = async (body) => {
|
|
22589
|
-
if (!
|
|
24887
|
+
if (!isObjectRecord3(body)) {
|
|
22590
24888
|
return {
|
|
22591
24889
|
error: "Expected payload shape: { groupKey, retrievalId, rolloutLabel }",
|
|
22592
24890
|
ok: false
|
|
@@ -22668,7 +24966,7 @@ var ragChat = (config) => {
|
|
|
22668
24966
|
};
|
|
22669
24967
|
};
|
|
22670
24968
|
const handleIngest = async (body) => {
|
|
22671
|
-
if (!
|
|
24969
|
+
if (!isObjectRecord3(body)) {
|
|
22672
24970
|
return { error: "Invalid payload", ok: false };
|
|
22673
24971
|
}
|
|
22674
24972
|
if (!ragStore) {
|
|
@@ -22770,7 +25068,7 @@ var ragChat = (config) => {
|
|
|
22770
25068
|
}
|
|
22771
25069
|
};
|
|
22772
25070
|
const handleSearch = async (body, request) => {
|
|
22773
|
-
if (!
|
|
25071
|
+
if (!isObjectRecord3(body)) {
|
|
22774
25072
|
return { error: "Invalid payload", ok: false };
|
|
22775
25073
|
}
|
|
22776
25074
|
const query = (getStringProperty(body, "query") ?? "").trim();
|
|
@@ -22801,7 +25099,7 @@ var ragChat = (config) => {
|
|
|
22801
25099
|
};
|
|
22802
25100
|
const persistTrace = getBooleanProperty(body, "persistTrace") === true;
|
|
22803
25101
|
const traceGroupKey = getStringProperty(body, "traceGroupKey");
|
|
22804
|
-
const traceTags = normalizeStringArray2(
|
|
25102
|
+
const traceTags = normalizeStringArray2(isObjectRecord3(body) ? body.traceTags : undefined);
|
|
22805
25103
|
const hasSearchRetrieval = getOwnProperty(body, "retrieval");
|
|
22806
25104
|
const parsedSearchRetrieval = parseRAGRetrieval(body.retrieval);
|
|
22807
25105
|
if (hasSearchRetrieval && parsedSearchRetrieval === null) {
|
|
@@ -23388,6 +25686,14 @@ var ragChat = (config) => {
|
|
|
23388
25686
|
});
|
|
23389
25687
|
const latestRejectedCandidate = enrichedRecentRetrievalReleaseDecisions?.find((entry) => entry.kind === "reject");
|
|
23390
25688
|
const latestRetrievalComparisonRun = recentRetrievalComparisonRuns?.[0];
|
|
25689
|
+
const adaptiveNativePlannerBenchmark = await loadAdaptiveNativePlannerBenchmarkRuntime({
|
|
25690
|
+
historyLimit: 5,
|
|
25691
|
+
snapshotLimit: 5
|
|
25692
|
+
});
|
|
25693
|
+
const nativeBackendComparisonBenchmark = await loadNativeBackendComparisonBenchmarkRuntime({
|
|
25694
|
+
historyLimit: 5,
|
|
25695
|
+
snapshotLimit: 5
|
|
25696
|
+
});
|
|
23391
25697
|
const latestPromotionReadiness = latestRetrievalComparisonRun ? (() => {
|
|
23392
25698
|
const activeTargetRolloutLabel = activeRetrievalBaselines?.find((entry) => entry.groupKey === latestRetrievalComparisonRun.groupKey)?.rolloutLabel;
|
|
23393
25699
|
const state = getPromotionCandidateState({
|
|
@@ -23526,7 +25832,7 @@ var ragChat = (config) => {
|
|
|
23526
25832
|
return {
|
|
23527
25833
|
...group,
|
|
23528
25834
|
acknowledgedOpenIncidentCount,
|
|
23529
|
-
classification: groupOpenIncidents.some((entry) => entry.classification === "multivector") ? "multivector" : group.classification,
|
|
25835
|
+
classification: groupOpenIncidents.some((entry) => entry.classification === "runtime") ? "runtime" : groupOpenIncidents.some((entry) => entry.classification === "evidence") ? "evidence" : groupOpenIncidents.some((entry) => entry.classification === "multivector") ? "multivector" : group.classification,
|
|
23530
25836
|
openIncidentCount: groupOpenIncidents.length,
|
|
23531
25837
|
unacknowledgedOpenIncidentCount: groupOpenIncidents.length - acknowledgedOpenIncidentCount
|
|
23532
25838
|
};
|
|
@@ -23734,7 +26040,7 @@ var ragChat = (config) => {
|
|
|
23734
26040
|
] : candidate?.ready ? [
|
|
23735
26041
|
"latest candidate is ready to promote"
|
|
23736
26042
|
] : ["continue monitoring release state"];
|
|
23737
|
-
const classification = candidate?.reasons?.length ? classifyGovernanceReasons(candidate.reasons) : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "multivector") ? "multivector" : "general";
|
|
26043
|
+
const classification = candidate?.reasons?.length ? classifyGovernanceReasons(candidate.reasons) : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "runtime") ? "runtime" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "evidence") ? "evidence" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "multivector") ? "multivector" : "general";
|
|
23738
26044
|
summaries.push({
|
|
23739
26045
|
baselineRetrievalId: candidate?.baselineRetrievalId,
|
|
23740
26046
|
candidateRetrievalId: candidate?.candidateRetrievalId,
|
|
@@ -24194,7 +26500,27 @@ var ragChat = (config) => {
|
|
|
24194
26500
|
if (!latestRetrievalComparisonRun) {
|
|
24195
26501
|
return alerts;
|
|
24196
26502
|
}
|
|
24197
|
-
const classifyRetrievalRegression = (input) => (
|
|
26503
|
+
const classifyRetrievalRegression = (input) => classifyGovernanceReasons([
|
|
26504
|
+
...input.reasons ?? [],
|
|
26505
|
+
...(input.delta?.multiVectorCollapsedCasesDelta ?? 0) < 0 ? [
|
|
26506
|
+
`multivector collapsed delta ${input.delta?.multiVectorCollapsedCasesDelta ?? 0}`
|
|
26507
|
+
] : [],
|
|
26508
|
+
...(input.delta?.multiVectorLexicalHitCasesDelta ?? 0) < 0 ? [
|
|
26509
|
+
`multivector lexical-hit delta ${input.delta?.multiVectorLexicalHitCasesDelta ?? 0}`
|
|
26510
|
+
] : [],
|
|
26511
|
+
...(input.delta?.multiVectorVectorHitCasesDelta ?? 0) < 0 ? [
|
|
26512
|
+
`multivector vector-hit delta ${input.delta?.multiVectorVectorHitCasesDelta ?? 0}`
|
|
26513
|
+
] : [],
|
|
26514
|
+
...(input.delta?.runtimeCandidateBudgetExhaustedCasesDelta ?? 0) > 0 ? [
|
|
26515
|
+
`runtime candidate-budget-exhausted delta ${input.delta?.runtimeCandidateBudgetExhaustedCasesDelta ?? 0}`
|
|
26516
|
+
] : [],
|
|
26517
|
+
...(input.delta?.runtimeUnderfilledTopKCasesDelta ?? 0) > 0 ? [
|
|
26518
|
+
`runtime underfilled-topk delta ${input.delta?.runtimeUnderfilledTopKCasesDelta ?? 0}`
|
|
26519
|
+
] : [],
|
|
26520
|
+
...(input.delta?.evidenceReconcileCasesDelta ?? 0) < 0 ? [
|
|
26521
|
+
`evidence reconcile delta ${input.delta?.evidenceReconcileCasesDelta ?? 0}`
|
|
26522
|
+
] : []
|
|
26523
|
+
]);
|
|
24198
26524
|
const latestWinner = latestRetrievalComparisonRun.comparison.summary.bestByPassingRate;
|
|
24199
26525
|
if (latestWinner && stableWinnerByPassingRate?.retrievalId && stableWinnerByPassingRate.retrievalId !== latestWinner) {
|
|
24200
26526
|
alerts.push({
|
|
@@ -24371,12 +26697,17 @@ var ragChat = (config) => {
|
|
|
24371
26697
|
ok: true,
|
|
24372
26698
|
readiness: buildReadiness(),
|
|
24373
26699
|
retrievalComparisons: {
|
|
26700
|
+
adaptiveNativePlannerBenchmark,
|
|
26701
|
+
nativeBackendComparisonBenchmark,
|
|
24374
26702
|
configured: Boolean(retrievalComparisonHistoryStore),
|
|
24375
26703
|
latest: latestRetrievalComparisonRun ? {
|
|
24376
26704
|
bestByAverageF1: latestRetrievalComparisonRun.comparison.summary.bestByAverageF1,
|
|
24377
26705
|
bestByMultivectorCollapsedCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorCollapsedCases,
|
|
24378
26706
|
bestByMultivectorLexicalHitCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorLexicalHitCases,
|
|
24379
26707
|
bestByMultivectorVectorHitCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorVectorHitCases,
|
|
26708
|
+
bestByEvidenceReconcileCases: latestRetrievalComparisonRun.comparison.summary.bestByEvidenceReconcileCases,
|
|
26709
|
+
bestByLowestRuntimeCandidateBudgetExhaustedCases: latestRetrievalComparisonRun.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
|
|
26710
|
+
bestByLowestRuntimeUnderfilledTopKCases: latestRetrievalComparisonRun.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases,
|
|
24380
26711
|
bestByPassingRate: latestRetrievalComparisonRun.comparison.summary.bestByPassingRate,
|
|
24381
26712
|
corpusGroupKey: latestRetrievalComparisonRun.corpusGroupKey,
|
|
24382
26713
|
elapsedMs: latestRetrievalComparisonRun.elapsedMs,
|
|
@@ -24539,7 +26870,7 @@ var ragChat = (config) => {
|
|
|
24539
26870
|
ok: false
|
|
24540
26871
|
};
|
|
24541
26872
|
}
|
|
24542
|
-
if (!
|
|
26873
|
+
if (!isObjectRecord3(body)) {
|
|
24543
26874
|
return {
|
|
24544
26875
|
error: "Invalid payload",
|
|
24545
26876
|
ok: false
|
|
@@ -25292,6 +27623,78 @@ var ragChat = (config) => {
|
|
|
25292
27623
|
});
|
|
25293
27624
|
}
|
|
25294
27625
|
return result;
|
|
27626
|
+
}).get(`${path}/compare/retrieval/benchmarks/adaptive-native-planner`, async ({ query, request, set }) => {
|
|
27627
|
+
const result = await handleAdaptiveNativePlannerBenchmark(query);
|
|
27628
|
+
if (!result.ok) {
|
|
27629
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27630
|
+
}
|
|
27631
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27632
|
+
if (!result.ok) {
|
|
27633
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark failed"), getNumericStatus(set.status));
|
|
27634
|
+
}
|
|
27635
|
+
return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmark(result), getNumericStatus(set.status));
|
|
27636
|
+
}
|
|
27637
|
+
return result;
|
|
27638
|
+
}).get(`${path}/compare/retrieval/benchmarks/native-backend-comparison`, async ({ query, request, set }) => {
|
|
27639
|
+
const result = await handleNativeBackendComparisonBenchmark(query);
|
|
27640
|
+
if (!result.ok) {
|
|
27641
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27642
|
+
}
|
|
27643
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27644
|
+
if (!result.ok) {
|
|
27645
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark failed"), getNumericStatus(set.status));
|
|
27646
|
+
}
|
|
27647
|
+
return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmark(result), getNumericStatus(set.status));
|
|
27648
|
+
}
|
|
27649
|
+
return result;
|
|
27650
|
+
}).post(`${path}/compare/retrieval/benchmarks/adaptive-native-planner/run`, async ({ body, request, set }) => {
|
|
27651
|
+
const result = await handleRunAdaptiveNativePlannerBenchmark(body, request);
|
|
27652
|
+
if (!result.ok) {
|
|
27653
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27654
|
+
}
|
|
27655
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27656
|
+
if (!result.ok) {
|
|
27657
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark run failed"), getNumericStatus(set.status));
|
|
27658
|
+
}
|
|
27659
|
+
return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmark(result), getNumericStatus(set.status));
|
|
27660
|
+
}
|
|
27661
|
+
return result;
|
|
27662
|
+
}).post(`${path}/compare/retrieval/benchmarks/native-backend-comparison/run`, async ({ body, request, set }) => {
|
|
27663
|
+
const result = await handleRunNativeBackendComparisonBenchmark(body, request);
|
|
27664
|
+
if (!result.ok) {
|
|
27665
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27666
|
+
}
|
|
27667
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27668
|
+
if (!result.ok) {
|
|
27669
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark run failed"), getNumericStatus(set.status));
|
|
27670
|
+
}
|
|
27671
|
+
return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmark(result), getNumericStatus(set.status));
|
|
27672
|
+
}
|
|
27673
|
+
return result;
|
|
27674
|
+
}).post(`${path}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, async ({ body, request, set }) => {
|
|
27675
|
+
const result = await handlePersistAdaptiveNativePlannerBenchmarkSnapshot(body, request);
|
|
27676
|
+
if (!result.ok) {
|
|
27677
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27678
|
+
}
|
|
27679
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27680
|
+
if (!result.ok) {
|
|
27681
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark snapshot failed"), getNumericStatus(set.status));
|
|
27682
|
+
}
|
|
27683
|
+
return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmarkSnapshot(result), getNumericStatus(set.status));
|
|
27684
|
+
}
|
|
27685
|
+
return result;
|
|
27686
|
+
}).post(`${path}/compare/retrieval/benchmarks/native-backend-comparison/snapshots`, async ({ body, request, set }) => {
|
|
27687
|
+
const result = await handlePersistNativeBackendComparisonBenchmarkSnapshot(body, request);
|
|
27688
|
+
if (!result.ok) {
|
|
27689
|
+
set.status = HTTP_STATUS_BAD_REQUEST;
|
|
27690
|
+
}
|
|
27691
|
+
if (config.htmx && isHTMXRequest(request)) {
|
|
27692
|
+
if (!result.ok) {
|
|
27693
|
+
return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark snapshot failed"), getNumericStatus(set.status));
|
|
27694
|
+
}
|
|
27695
|
+
return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmarkSnapshot(result), getNumericStatus(set.status));
|
|
27696
|
+
}
|
|
27697
|
+
return result;
|
|
25295
27698
|
}).get(`${path}/compare/retrieval/baselines`, async ({ query, request, set }) => {
|
|
25296
27699
|
const result = await handleRetrievalBaselineList(query, request);
|
|
25297
27700
|
if (!result.ok) {
|
|
@@ -29825,7 +32228,7 @@ var querySimilarity = (left, right) => {
|
|
|
29825
32228
|
init_constants();
|
|
29826
32229
|
|
|
29827
32230
|
// src/ai/rag/adapters/filtering.ts
|
|
29828
|
-
var
|
|
32231
|
+
var isObjectRecord4 = (value) => Boolean(value) && typeof value === "object" && !Array.isArray(value);
|
|
29829
32232
|
var valuesMatch = (expected, actual) => {
|
|
29830
32233
|
if (actual === expected) {
|
|
29831
32234
|
return true;
|
|
@@ -29835,12 +32238,12 @@ var valuesMatch = (expected, actual) => {
|
|
|
29835
32238
|
}
|
|
29836
32239
|
return false;
|
|
29837
32240
|
};
|
|
29838
|
-
var isFilterOperatorRecord = (value) =>
|
|
32241
|
+
var isFilterOperatorRecord = (value) => isObjectRecord4(value) && Object.keys(value).some((key) => key.startsWith("$"));
|
|
29839
32242
|
var getPathValue = (record, path) => {
|
|
29840
32243
|
const segments = path.split(".").filter(Boolean);
|
|
29841
32244
|
let current = record;
|
|
29842
32245
|
for (const segment of segments) {
|
|
29843
|
-
if (!
|
|
32246
|
+
if (!isObjectRecord4(current)) {
|
|
29844
32247
|
return;
|
|
29845
32248
|
}
|
|
29846
32249
|
current = current[segment];
|
|
@@ -29876,7 +32279,7 @@ var matchesOperatorFilter = (actual, filter) => Object.entries(filter).every(([o
|
|
|
29876
32279
|
}
|
|
29877
32280
|
});
|
|
29878
32281
|
var matchesMetadataFilterValue = (actual, expected) => isFilterOperatorRecord(expected) ? matchesOperatorFilter(actual, expected) : Array.isArray(actual) ? actual.some((entry) => valuesMatch(expected, entry)) : valuesMatch(expected, actual);
|
|
29879
|
-
var isNestedFilterArray = (value) => Array.isArray(value) && value.every((entry) =>
|
|
32282
|
+
var isNestedFilterArray = (value) => Array.isArray(value) && value.every((entry) => isObjectRecord4(entry));
|
|
29880
32283
|
var matchesLogicalFilter = (record, key, value) => {
|
|
29881
32284
|
switch (key) {
|
|
29882
32285
|
case "$and":
|
|
@@ -29884,7 +32287,7 @@ var matchesLogicalFilter = (record, key, value) => {
|
|
|
29884
32287
|
case "$or":
|
|
29885
32288
|
return isNestedFilterArray(value) ? value.some((entry) => matchesMetadataFilterRecord(record, entry)) : false;
|
|
29886
32289
|
case "$not":
|
|
29887
|
-
return
|
|
32290
|
+
return isObjectRecord4(value) ? !matchesMetadataFilterRecord(record, value) : false;
|
|
29888
32291
|
default:
|
|
29889
32292
|
return false;
|
|
29890
32293
|
}
|
|
@@ -30053,6 +32456,22 @@ var planNativeCandidateSearchK = (input) => {
|
|
|
30053
32456
|
}
|
|
30054
32457
|
return Math.min(base, filtered);
|
|
30055
32458
|
};
|
|
32459
|
+
var resolveAdaptiveNativeCandidateLimit = (input) => {
|
|
32460
|
+
const clamp = (value) => Math.min(input.defaultCandidateLimit, Math.max(1, Math.floor(value)));
|
|
32461
|
+
const filteredCap = typeof input.filteredCandidateCount === "number" && Number.isFinite(input.filteredCandidateCount) ? Math.max(0, Math.floor(input.filteredCandidateCount)) : undefined;
|
|
32462
|
+
if (typeof input.explicitCandidateLimit === "number" && Number.isFinite(input.explicitCandidateLimit)) {
|
|
32463
|
+
return filteredCap === undefined ? clamp(input.explicitCandidateLimit) : Math.min(clamp(input.explicitCandidateLimit), filteredCap);
|
|
32464
|
+
}
|
|
32465
|
+
const baseFloor = Math.max(input.topK, input.topK * Math.max(1, Math.floor(input.queryMultiplier)));
|
|
32466
|
+
let tuned = input.plannerProfile === "latency" ? clamp(Math.max(input.topK * 2, baseFloor)) : input.plannerProfile === "recall" ? clamp(Math.max(input.topK * 12, baseFloor * 4)) : clamp(Math.max(input.topK * 6, baseFloor * 2));
|
|
32467
|
+
if (filteredCap !== undefined) {
|
|
32468
|
+
if (filteredCap === 0) {
|
|
32469
|
+
return 0;
|
|
32470
|
+
}
|
|
32471
|
+
tuned = Math.min(tuned, filteredCap);
|
|
32472
|
+
}
|
|
32473
|
+
return tuned;
|
|
32474
|
+
};
|
|
30056
32475
|
var planNativeCandidateSearchBackfillK = (input) => {
|
|
30057
32476
|
if (typeof input.maxBackfills === "number" && Number.isFinite(input.maxBackfills) && (input.backfillCount ?? 0) >= Math.max(0, Math.floor(input.maxBackfills))) {
|
|
30058
32477
|
return input.currentSearchK;
|
|
@@ -30373,12 +32792,6 @@ var normalizeQueryMultiplier = (value) => {
|
|
|
30373
32792
|
}
|
|
30374
32793
|
return Math.min(MAX_QUERY_MULTIPLIER, Math.max(1, Math.floor(value)));
|
|
30375
32794
|
};
|
|
30376
|
-
var normalizeCandidateLimit = (value) => {
|
|
30377
|
-
if (value === undefined || !Number.isFinite(value)) {
|
|
30378
|
-
return RAG_NATIVE_QUERY_CANDIDATE_LIMIT;
|
|
30379
|
-
}
|
|
30380
|
-
return Math.min(RAG_NATIVE_QUERY_CANDIDATE_LIMIT, Math.max(1, Math.floor(value)));
|
|
30381
|
-
};
|
|
30382
32795
|
var normalizeMaxBackfills = (value) => {
|
|
30383
32796
|
if (value === undefined || !Number.isFinite(value)) {
|
|
30384
32797
|
return;
|
|
@@ -30733,7 +33146,6 @@ var createPostgresRAGStore = (options = {}) => {
|
|
|
30733
33146
|
await init();
|
|
30734
33147
|
const queryVector = normalizeVector(input.queryVector);
|
|
30735
33148
|
const queryMultiplier2 = normalizeQueryMultiplier(input.queryMultiplier ?? options.queryMultiplier);
|
|
30736
|
-
const candidateLimit = normalizeCandidateLimit(input.candidateLimit);
|
|
30737
33149
|
const maxBackfills = normalizeMaxBackfills(input.maxBackfills);
|
|
30738
33150
|
const minResults = normalizeMinResults(input.minResults, input.topK);
|
|
30739
33151
|
const fillTarget = resolveFillTarget({
|
|
@@ -30756,6 +33168,14 @@ var createPostgresRAGStore = (options = {}) => {
|
|
|
30756
33168
|
pushdownFilter: effectivePushdownFilter
|
|
30757
33169
|
};
|
|
30758
33170
|
const totalRows = parseCountValue(totalRowsResult?.[0]?.count);
|
|
33171
|
+
const candidateLimit = resolveAdaptiveNativeCandidateLimit({
|
|
33172
|
+
defaultCandidateLimit: RAG_NATIVE_QUERY_CANDIDATE_LIMIT,
|
|
33173
|
+
explicitCandidateLimit: input.candidateLimit,
|
|
33174
|
+
filteredCandidateCount: totalRows,
|
|
33175
|
+
plannerProfile: input.plannerProfile,
|
|
33176
|
+
queryMultiplier: queryMultiplier2,
|
|
33177
|
+
topK: input.topK
|
|
33178
|
+
});
|
|
30759
33179
|
const hasPushdownFilter = Boolean(effectivePushdownFilter);
|
|
30760
33180
|
const plannedFilteredCandidateCount = hasPushdownFilter && totalRows === 0 ? undefined : totalRows;
|
|
30761
33181
|
const initialSearchK = planNativeCandidateSearchK({
|
|
@@ -31098,9 +33518,9 @@ var DEFAULT_QUERY_MULTIPLIER2 = 4;
|
|
|
31098
33518
|
var MAX_QUERY_MULTIPLIER2 = 16;
|
|
31099
33519
|
var IDENTIFIER_RE2 = /^[a-zA-Z_][a-zA-Z0-9_]*$/;
|
|
31100
33520
|
var isParsedMetadata = (value) => Boolean(value) && typeof value === "object";
|
|
31101
|
-
var
|
|
31102
|
-
var isStoredRow = (value) =>
|
|
31103
|
-
var isNativeStoredRow = (value) =>
|
|
33521
|
+
var isObjectRecord5 = (value) => Boolean(value) && typeof value === "object";
|
|
33522
|
+
var isStoredRow = (value) => isObjectRecord5(value) && typeof value.chunk_id === "string" && typeof value.text === "string" && (typeof value.title === "string" || value.title === null) && (typeof value.source === "string" || value.source === null) && (typeof value.metadata === "string" || value.metadata === null) && typeof value.embedding === "string";
|
|
33523
|
+
var isNativeStoredRow = (value) => isObjectRecord5(value) && typeof value.chunk_id === "string" && typeof value.chunk_text === "string" && (typeof value.title === "string" || value.title === null) && (typeof value.source === "string" || value.source === null) && (typeof value.metadata === "string" || value.metadata === null) && typeof value.embedding === "string" && typeof value.distance === "number";
|
|
31104
33524
|
var toStoredRows = (value) => Array.isArray(value) ? value.filter((row) => isStoredRow(row)) : [];
|
|
31105
33525
|
var toNativeStoredRows = (value) => Array.isArray(value) ? value.filter((row) => isNativeStoredRow(row)) : [];
|
|
31106
33526
|
var createSQLiteStatus = (dimensions, nativeDiagnostics, useNative) => ({
|
|
@@ -31128,12 +33548,6 @@ var normalizeQueryMultiplier2 = (value) => {
|
|
|
31128
33548
|
const minMultiplier = Math.max(1, Math.floor(value));
|
|
31129
33549
|
return Math.min(minMultiplier, MAX_QUERY_MULTIPLIER2);
|
|
31130
33550
|
};
|
|
31131
|
-
var normalizeCandidateLimit2 = (value) => {
|
|
31132
|
-
if (value === undefined || !Number.isFinite(value)) {
|
|
31133
|
-
return RAG_NATIVE_QUERY_CANDIDATE_LIMIT;
|
|
31134
|
-
}
|
|
31135
|
-
return Math.min(RAG_NATIVE_QUERY_CANDIDATE_LIMIT, Math.max(1, Math.floor(value)));
|
|
31136
|
-
};
|
|
31137
33551
|
var normalizeMaxBackfills2 = (value) => {
|
|
31138
33552
|
if (value === undefined || !Number.isFinite(value)) {
|
|
31139
33553
|
return;
|
|
@@ -31992,7 +34406,6 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
31992
34406
|
throw new Error("Native vector backend is not available");
|
|
31993
34407
|
}
|
|
31994
34408
|
const queryMultiplier = normalizeQueryMultiplier2(input.queryMultiplier ?? nativeConfig?.queryMultiplier);
|
|
31995
|
-
const candidateLimit = normalizeCandidateLimit2(input.candidateLimit);
|
|
31996
34409
|
const maxBackfills = normalizeMaxBackfills2(input.maxBackfills);
|
|
31997
34410
|
const minResults = normalizeMinResults2(input.minResults, input.topK);
|
|
31998
34411
|
const fillTarget = resolveFillTarget2({
|
|
@@ -32010,6 +34423,14 @@ var createSQLiteRAGStore = (options = {}) => {
|
|
|
32010
34423
|
title: "title"
|
|
32011
34424
|
});
|
|
32012
34425
|
const filteredCandidateCount = getFilteredSQLiteCandidateCount(db, tableName, filterPlan);
|
|
34426
|
+
const candidateLimit = resolveAdaptiveNativeCandidateLimit({
|
|
34427
|
+
defaultCandidateLimit: RAG_NATIVE_QUERY_CANDIDATE_LIMIT,
|
|
34428
|
+
explicitCandidateLimit: input.candidateLimit,
|
|
34429
|
+
filteredCandidateCount,
|
|
34430
|
+
plannerProfile: input.plannerProfile,
|
|
34431
|
+
queryMultiplier,
|
|
34432
|
+
topK: input.topK
|
|
34433
|
+
});
|
|
32013
34434
|
const searchK = planNativeCandidateSearchK({
|
|
32014
34435
|
candidateLimit,
|
|
32015
34436
|
filteredCandidateCount,
|
|
@@ -33347,6 +35768,9 @@ var createRAGClient = (options) => {
|
|
|
33347
35768
|
if (typeof input.runLimit === "number") {
|
|
33348
35769
|
searchParams.set("runLimit", String(input.runLimit));
|
|
33349
35770
|
}
|
|
35771
|
+
if (typeof input.benchmarkLimit === "number") {
|
|
35772
|
+
searchParams.set("benchmarkLimit", String(input.benchmarkLimit));
|
|
35773
|
+
}
|
|
33350
35774
|
if (input.targetRolloutLabel) {
|
|
33351
35775
|
searchParams.set("targetRolloutLabel", input.targetRolloutLabel);
|
|
33352
35776
|
}
|
|
@@ -33360,6 +35784,172 @@ var createRAGClient = (options) => {
|
|
|
33360
35784
|
}
|
|
33361
35785
|
return payload;
|
|
33362
35786
|
},
|
|
35787
|
+
async adaptiveNativePlannerBenchmark(input) {
|
|
35788
|
+
const searchParams = new URLSearchParams;
|
|
35789
|
+
if (typeof input?.limit === "number") {
|
|
35790
|
+
searchParams.set("limit", String(input.limit));
|
|
35791
|
+
}
|
|
35792
|
+
if (typeof input?.runLimit === "number") {
|
|
35793
|
+
searchParams.set("runLimit", String(input.runLimit));
|
|
35794
|
+
}
|
|
35795
|
+
if (input?.label) {
|
|
35796
|
+
searchParams.set("label", input.label);
|
|
35797
|
+
}
|
|
35798
|
+
if (input?.description) {
|
|
35799
|
+
searchParams.set("description", input.description);
|
|
35800
|
+
}
|
|
35801
|
+
if (input?.groupKey) {
|
|
35802
|
+
searchParams.set("benchmarkGroupKey", input.groupKey);
|
|
35803
|
+
}
|
|
35804
|
+
if (input?.corpusGroupKey) {
|
|
35805
|
+
searchParams.set("benchmarkCorpusGroupKey", input.corpusGroupKey);
|
|
35806
|
+
}
|
|
35807
|
+
const suffix = searchParams.size ? `?${searchParams}` : "";
|
|
35808
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner${suffix}`);
|
|
35809
|
+
if (!response.ok) {
|
|
35810
|
+
throw new Error(await toErrorMessage3(response));
|
|
35811
|
+
}
|
|
35812
|
+
const payload = await parseJson(response);
|
|
35813
|
+
if (!payload.ok) {
|
|
35814
|
+
throw new Error(payload.error ?? "Adaptive native planner benchmark history failed");
|
|
35815
|
+
}
|
|
35816
|
+
return payload;
|
|
35817
|
+
},
|
|
35818
|
+
async runAdaptiveNativePlannerBenchmark(input) {
|
|
35819
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/run`, {
|
|
35820
|
+
body: JSON.stringify({
|
|
35821
|
+
baselineRetrievalId: input?.baselineRetrievalId,
|
|
35822
|
+
candidateRetrievalId: input?.candidateRetrievalId,
|
|
35823
|
+
corpusGroupKey: input?.corpusGroupKey,
|
|
35824
|
+
description: input?.description,
|
|
35825
|
+
groupKey: input?.groupKey,
|
|
35826
|
+
label: input?.label,
|
|
35827
|
+
limit: input?.limit,
|
|
35828
|
+
metadata: input?.metadata,
|
|
35829
|
+
persistRun: input?.persistRun,
|
|
35830
|
+
retrievals: input?.retrievals,
|
|
35831
|
+
runLimit: input?.runLimit,
|
|
35832
|
+
tags: input?.tags,
|
|
35833
|
+
topK: input?.topK
|
|
35834
|
+
}),
|
|
35835
|
+
headers: jsonHeaders,
|
|
35836
|
+
method: "POST"
|
|
35837
|
+
});
|
|
35838
|
+
if (!response.ok) {
|
|
35839
|
+
throw new Error(await toErrorMessage3(response));
|
|
35840
|
+
}
|
|
35841
|
+
const payload = await parseJson(response);
|
|
35842
|
+
if (!payload.ok) {
|
|
35843
|
+
throw new Error(payload.error ?? "Adaptive native planner benchmark run failed");
|
|
35844
|
+
}
|
|
35845
|
+
return payload;
|
|
35846
|
+
},
|
|
35847
|
+
async saveAdaptiveNativePlannerBenchmarkSnapshot(input) {
|
|
35848
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, {
|
|
35849
|
+
body: JSON.stringify({
|
|
35850
|
+
createdAt: input?.createdAt,
|
|
35851
|
+
description: input?.description,
|
|
35852
|
+
label: input?.label,
|
|
35853
|
+
limit: input?.limit,
|
|
35854
|
+
metadata: input?.metadata,
|
|
35855
|
+
snapshotMetadata: input?.snapshotMetadata,
|
|
35856
|
+
version: input?.version
|
|
35857
|
+
}),
|
|
35858
|
+
headers: jsonHeaders,
|
|
35859
|
+
method: "POST"
|
|
35860
|
+
});
|
|
35861
|
+
if (!response.ok) {
|
|
35862
|
+
throw new Error(await toErrorMessage3(response));
|
|
35863
|
+
}
|
|
35864
|
+
const payload = await parseJson(response);
|
|
35865
|
+
if (!payload.ok) {
|
|
35866
|
+
throw new Error(payload.error ?? "Adaptive native planner benchmark snapshot failed");
|
|
35867
|
+
}
|
|
35868
|
+
return payload;
|
|
35869
|
+
},
|
|
35870
|
+
async nativeBackendComparisonBenchmark(input) {
|
|
35871
|
+
const searchParams = new URLSearchParams;
|
|
35872
|
+
if (typeof input?.limit === "number") {
|
|
35873
|
+
searchParams.set("limit", String(input.limit));
|
|
35874
|
+
}
|
|
35875
|
+
if (typeof input?.runLimit === "number") {
|
|
35876
|
+
searchParams.set("runLimit", String(input.runLimit));
|
|
35877
|
+
}
|
|
35878
|
+
if (input?.label) {
|
|
35879
|
+
searchParams.set("label", input.label);
|
|
35880
|
+
}
|
|
35881
|
+
if (input?.description) {
|
|
35882
|
+
searchParams.set("description", input.description);
|
|
35883
|
+
}
|
|
35884
|
+
if (input?.groupKey) {
|
|
35885
|
+
searchParams.set("benchmarkGroupKey", input.groupKey);
|
|
35886
|
+
}
|
|
35887
|
+
if (input?.corpusGroupKey) {
|
|
35888
|
+
searchParams.set("benchmarkCorpusGroupKey", input.corpusGroupKey);
|
|
35889
|
+
}
|
|
35890
|
+
const suffix = searchParams.size ? `?${searchParams}` : "";
|
|
35891
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison${suffix}`);
|
|
35892
|
+
if (!response.ok) {
|
|
35893
|
+
throw new Error(await toErrorMessage3(response));
|
|
35894
|
+
}
|
|
35895
|
+
const payload = await parseJson(response);
|
|
35896
|
+
if (!payload.ok) {
|
|
35897
|
+
throw new Error(payload.error ?? "Native backend comparison benchmark history failed");
|
|
35898
|
+
}
|
|
35899
|
+
return payload;
|
|
35900
|
+
},
|
|
35901
|
+
async runNativeBackendComparisonBenchmark(input) {
|
|
35902
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison/run`, {
|
|
35903
|
+
body: JSON.stringify({
|
|
35904
|
+
baselineRetrievalId: input?.baselineRetrievalId,
|
|
35905
|
+
candidateRetrievalId: input?.candidateRetrievalId,
|
|
35906
|
+
corpusGroupKey: input?.corpusGroupKey,
|
|
35907
|
+
description: input?.description,
|
|
35908
|
+
groupKey: input?.groupKey,
|
|
35909
|
+
label: input?.label,
|
|
35910
|
+
limit: input?.limit,
|
|
35911
|
+
metadata: input?.metadata,
|
|
35912
|
+
persistRun: input?.persistRun,
|
|
35913
|
+
retrievals: input?.retrievals,
|
|
35914
|
+
runLimit: input?.runLimit,
|
|
35915
|
+
tags: input?.tags,
|
|
35916
|
+
topK: input?.topK
|
|
35917
|
+
}),
|
|
35918
|
+
headers: jsonHeaders,
|
|
35919
|
+
method: "POST"
|
|
35920
|
+
});
|
|
35921
|
+
if (!response.ok) {
|
|
35922
|
+
throw new Error(await toErrorMessage3(response));
|
|
35923
|
+
}
|
|
35924
|
+
const payload = await parseJson(response);
|
|
35925
|
+
if (!payload.ok) {
|
|
35926
|
+
throw new Error(payload.error ?? "Native backend comparison benchmark run failed");
|
|
35927
|
+
}
|
|
35928
|
+
return payload;
|
|
35929
|
+
},
|
|
35930
|
+
async saveNativeBackendComparisonBenchmarkSnapshot(input) {
|
|
35931
|
+
const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison/snapshots`, {
|
|
35932
|
+
body: JSON.stringify({
|
|
35933
|
+
createdAt: input?.createdAt,
|
|
35934
|
+
description: input?.description,
|
|
35935
|
+
label: input?.label,
|
|
35936
|
+
limit: input?.limit,
|
|
35937
|
+
metadata: input?.metadata,
|
|
35938
|
+
snapshotMetadata: input?.snapshotMetadata,
|
|
35939
|
+
version: input?.version
|
|
35940
|
+
}),
|
|
35941
|
+
headers: jsonHeaders,
|
|
35942
|
+
method: "POST"
|
|
35943
|
+
});
|
|
35944
|
+
if (!response.ok) {
|
|
35945
|
+
throw new Error(await toErrorMessage3(response));
|
|
35946
|
+
}
|
|
35947
|
+
const payload = await parseJson(response);
|
|
35948
|
+
if (!payload.ok) {
|
|
35949
|
+
throw new Error(payload.error ?? "Native backend comparison benchmark snapshot failed");
|
|
35950
|
+
}
|
|
35951
|
+
return payload;
|
|
35952
|
+
},
|
|
33363
35953
|
async retrievalLaneHandoffs(input) {
|
|
33364
35954
|
const searchParams = new URLSearchParams;
|
|
33365
35955
|
if (input?.groupKey) {
|
|
@@ -34296,6 +36886,10 @@ export {
|
|
|
34296
36886
|
createRAGQueryTransform,
|
|
34297
36887
|
createRAGPDFOCRExtractor,
|
|
34298
36888
|
createRAGOCRProvider,
|
|
36889
|
+
createRAGNativeBackendComparisonBenchmarkSuite,
|
|
36890
|
+
createRAGNativeBackendComparisonBenchmarkSnapshot,
|
|
36891
|
+
createRAGNativeBackendBenchmarkMockEmbedding,
|
|
36892
|
+
createRAGNativeBackendBenchmarkCorpus,
|
|
34299
36893
|
createRAGMediaTranscriber,
|
|
34300
36894
|
createRAGMediaFileExtractor,
|
|
34301
36895
|
createRAGImageOCRExtractor,
|
|
@@ -34340,6 +36934,8 @@ export {
|
|
|
34340
36934
|
createRAGBunS3SyncClient,
|
|
34341
36935
|
createRAGArchiveFileExtractor,
|
|
34342
36936
|
createRAGArchiveExpander,
|
|
36937
|
+
createRAGAdaptiveNativePlannerBenchmarkSuite,
|
|
36938
|
+
createRAGAdaptiveNativePlannerBenchmarkSnapshot,
|
|
34343
36939
|
createRAGAccessControl,
|
|
34344
36940
|
createPostgresRAGStore,
|
|
34345
36941
|
createPDFFileExtractor,
|
|
@@ -34391,5 +36987,5 @@ export {
|
|
|
34391
36987
|
addRAGEvaluationSuiteCase
|
|
34392
36988
|
};
|
|
34393
36989
|
|
|
34394
|
-
//# debugId=
|
|
36990
|
+
//# debugId=EA75EA5E660B29F864756E2164756E21
|
|
34395
36991
|
//# sourceMappingURL=index.js.map
|