@absolutejs/absolute 0.19.0-beta.644 → 0.19.0-beta.646

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -208,6 +208,7 @@ var buildContextLabel = (metadata) => {
208
208
  return;
209
209
  }
210
210
  const emailKind = getContextString(metadata.emailKind);
211
+ const officeBlockKind = getContextString(metadata.officeBlockKind);
211
212
  if (emailKind === "attachment") {
212
213
  return "Attachment evidence";
213
214
  }
@@ -245,6 +246,16 @@ var buildContextLabel = (metadata) => {
245
246
  }
246
247
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
247
248
  const sectionTitle = getContextString(metadata.sectionTitle) ?? sectionPath.at(-1);
249
+ const officeSectionLabel = sectionPath.length > 0 ? sectionPath.join(" > ") : sectionTitle;
250
+ if (officeBlockKind === "table" && officeSectionLabel) {
251
+ return `Office table block ${officeSectionLabel}`;
252
+ }
253
+ if (officeBlockKind === "list" && officeSectionLabel) {
254
+ return `Office list block ${officeSectionLabel}`;
255
+ }
256
+ if (officeBlockKind === "paragraph" && officeSectionLabel) {
257
+ return `Office paragraph block ${officeSectionLabel}`;
258
+ }
248
259
  if (sectionTitle) {
249
260
  return `Section ${sectionTitle}`;
250
261
  }
@@ -266,6 +277,46 @@ var formatMediaDurationLabel = (value) => {
266
277
  }
267
278
  return formatMediaTimestamp(value);
268
279
  };
280
+ var formatOfficeListLevelsLabel = (value) => {
281
+ if (!Array.isArray(value) || value.length === 0) {
282
+ return;
283
+ }
284
+ const levels = value.map((entry) => getContextNumber(entry)).filter((entry) => typeof entry === "number").sort((left, right) => left - right);
285
+ if (levels.length === 0) {
286
+ return;
287
+ }
288
+ const minLevel = levels[0];
289
+ const maxLevel = levels[levels.length - 1];
290
+ return minLevel === maxLevel ? `Office list level ${minLevel}` : `Office list levels ${minLevel}-${maxLevel}`;
291
+ };
292
+ var getOfficeTableCitationScope = (metadata) => {
293
+ if (!metadata) {
294
+ return;
295
+ }
296
+ const officeBlockKind = getContextString(metadata.officeBlockKind);
297
+ if (officeBlockKind !== "table" && officeBlockKind !== "list") {
298
+ return;
299
+ }
300
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
301
+ const sectionTitle = getContextString(metadata.sectionTitle) ?? sectionPath.at(-1);
302
+ const officeContextText = officeBlockKind === "table" ? getContextString(metadata.officeTableContextText) : getContextString(metadata.officeListContextText);
303
+ if (!sectionTitle) {
304
+ return;
305
+ }
306
+ return {
307
+ blockKind: officeBlockKind,
308
+ pathDepth: sectionPath.length,
309
+ sectionTitle,
310
+ hasContext: typeof officeContextText === "string"
311
+ };
312
+ };
313
+ var getOfficeTableCitationPreference = (metadata) => {
314
+ const scope = getOfficeTableCitationScope(metadata);
315
+ if (!scope) {
316
+ return 0;
317
+ }
318
+ return scope.pathDepth * 10 + (scope.hasContext ? 1 : 0) + (scope.blockKind === "list" && typeof metadata?.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
319
+ };
269
320
  var buildLocatorLabel = (metadata, source, title) => {
270
321
  if (!metadata) {
271
322
  return;
@@ -291,6 +342,10 @@ var buildLocatorLabel = (metadata, source, title) => {
291
342
  return `Archive entry ${archiveEntry}`;
292
343
  }
293
344
  const emailKind = getContextString(metadata.emailKind);
345
+ const officeBlockKind = getContextString(metadata.officeBlockKind);
346
+ const officeBlockNumber = getContextNumber(metadata.officeBlockNumber);
347
+ const officeTableBodyRowStart = getContextNumber(metadata.officeTableBodyRowStart);
348
+ const officeTableBodyRowEnd = getContextNumber(metadata.officeTableBodyRowEnd);
294
349
  if (emailKind === "attachment") {
295
350
  const attachmentName = getContextString(metadata.attachmentName) ?? getAttachmentName(source, title);
296
351
  return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
@@ -303,6 +358,18 @@ var buildLocatorLabel = (metadata, source, title) => {
303
358
  if (mediaStart) {
304
359
  return `Timestamp ${mediaStart}`;
305
360
  }
361
+ if (officeBlockNumber && officeBlockKind === "table") {
362
+ if (typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number") {
363
+ return officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table block ${officeBlockNumber} \xB7 Row ${officeTableBodyRowStart}` : `Office table block ${officeBlockNumber} \xB7 Rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}`;
364
+ }
365
+ return `Office table block ${officeBlockNumber}`;
366
+ }
367
+ if (officeBlockNumber && officeBlockKind === "list") {
368
+ return `Office list block ${officeBlockNumber}`;
369
+ }
370
+ if (officeBlockNumber && officeBlockKind === "paragraph") {
371
+ return `Office paragraph block ${officeBlockNumber}`;
372
+ }
306
373
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
307
374
  if (sectionPath.length > 0) {
308
375
  return `Section ${sectionPath.join(" > ")}`;
@@ -336,10 +403,31 @@ var buildProvenanceLabel = (metadata) => {
336
403
  const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
337
404
  const transcriptSource = getContextString(metadata.transcriptSource);
338
405
  const pdfTextMode = getContextString(metadata.pdfTextMode);
406
+ const officeBlockKind = getContextString(metadata.officeBlockKind);
407
+ const officeListContextText = getContextString(metadata.officeListContextText);
408
+ const officeListGroupItemCount = getContextNumber(metadata.officeListGroupItemCount);
409
+ const officeListLevelsLabel = formatOfficeListLevelsLabel(metadata.officeListLevels);
410
+ const officeTableHeaders = Array.isArray(metadata.officeTableHeaders) ? metadata.officeTableHeaders.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
411
+ const officeTableColumnCount = getContextNumber(metadata.officeTableColumnCount);
412
+ const officeTableBodyRowCount = getContextNumber(metadata.officeTableBodyRowCount);
413
+ const officeTableBodyRowStart = getContextNumber(metadata.officeTableBodyRowStart);
414
+ const officeTableBodyRowEnd = getContextNumber(metadata.officeTableBodyRowEnd);
415
+ const officeTableContextText = getContextString(metadata.officeTableContextText);
416
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString(value)).filter((value) => typeof value === "string") : [];
339
417
  const ocrEngine = getContextString(metadata.ocrEngine);
340
418
  const ocrConfidence = getContextNumber(metadata.ocrRegionConfidence) ?? getContextNumber(metadata.ocrConfidence);
341
419
  const labels = [
342
420
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
421
+ officeBlockKind ? `Office ${officeBlockKind}` : "",
422
+ typeof officeListGroupItemCount === "number" ? `Office list ${officeListGroupItemCount} items` : "",
423
+ officeListLevelsLabel ?? "",
424
+ sectionPath.length > 0 && officeBlockKind ? `Source-aware office ${officeBlockKind} block ${sectionPath.join(" > ")}` : "",
425
+ officeListContextText ? `Office list context ${officeListContextText}` : "",
426
+ officeTableHeaders.length > 0 ? `Office table ${officeTableHeaders.join(", ")}` : "",
427
+ typeof officeTableColumnCount === "number" ? `Office table ${officeTableColumnCount} cols` : "",
428
+ typeof officeTableBodyRowCount === "number" ? `Office table ${officeTableBodyRowCount} body rows` : "",
429
+ typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number" ? officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table row ${officeTableBodyRowStart}` : `Office table rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}` : "",
430
+ officeTableContextText ? `Office table context ${officeTableContextText}` : "",
343
431
  ocrEngine ? `OCR ${ocrEngine}` : "",
344
432
  typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
345
433
  mediaKind ? `Media ${mediaKind}` : "",
@@ -503,6 +591,15 @@ var buildRAGCitations = (sources) => {
503
591
  });
504
592
  }
505
593
  return [...unique.values()].sort((left, right) => {
594
+ const leftOfficeScope = getOfficeTableCitationScope(left.metadata);
595
+ const rightOfficeScope = getOfficeTableCitationScope(right.metadata);
596
+ if (left.source === right.source && leftOfficeScope && rightOfficeScope && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
597
+ const leftOfficePreference = getOfficeTableCitationPreference(left.metadata);
598
+ const rightOfficePreference = getOfficeTableCitationPreference(right.metadata);
599
+ if (rightOfficePreference !== leftOfficePreference) {
600
+ return rightOfficePreference - leftOfficePreference;
601
+ }
602
+ }
506
603
  if (right.score !== left.score) {
507
604
  return right.score - left.score;
508
605
  }
@@ -916,6 +1013,7 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
916
1013
  const sectionKind = getContextString2(metadata.sectionKind);
917
1014
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
918
1015
  const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
1016
+ const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
919
1017
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
920
1018
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
921
1019
  const sheetName = getContextString2(metadata.sheetName);
@@ -926,6 +1024,12 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
926
1024
  return `Source-aware section ${sectionPath.join(" > ")}`;
927
1025
  }
928
1026
  if (sectionKind === "pdf_block") {
1027
+ if (pdfSemanticRole === "figure_caption" && sectionTitle) {
1028
+ return `Source-aware PDF figure caption ${sectionTitle}`;
1029
+ }
1030
+ if (pdfSemanticRole === "figure_body" && sectionTitle) {
1031
+ return `Source-aware PDF figure body ${sectionTitle}`;
1032
+ }
929
1033
  if (pdfTextKind === "table_like" && sectionTitle) {
930
1034
  return `Source-aware PDF table block ${sectionTitle}`;
931
1035
  }
@@ -935,11 +1039,12 @@ var buildSourceAwareUnitScopeLabel = (metadata) => {
935
1039
  return "Source-aware PDF block";
936
1040
  }
937
1041
  if (sectionKind === "office_block") {
938
- if (officeBlockKind && sectionTitle) {
939
- return `Source-aware office ${officeBlockKind} block ${sectionTitle}`;
1042
+ const officeSectionLabel = sectionPath.length > 0 ? sectionPath.join(" > ") : sectionTitle;
1043
+ if (officeBlockKind && officeSectionLabel) {
1044
+ return `Source-aware office ${officeBlockKind} block ${officeSectionLabel}`;
940
1045
  }
941
- if (sectionTitle) {
942
- return `Source-aware office block ${sectionTitle}`;
1046
+ if (officeSectionLabel) {
1047
+ return `Source-aware office block ${officeSectionLabel}`;
943
1048
  }
944
1049
  return "Source-aware office block";
945
1050
  }
@@ -1327,6 +1432,18 @@ var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
1327
1432
  }
1328
1433
  return `Table ${tableIndex}`;
1329
1434
  };
1435
+ var formatOfficeListLevelsLabel2 = (value) => {
1436
+ if (!Array.isArray(value) || value.length === 0) {
1437
+ return;
1438
+ }
1439
+ const levels = value.map((entry) => getContextNumber2(entry)).filter((entry) => typeof entry === "number").sort((left, right) => left - right);
1440
+ if (levels.length === 0) {
1441
+ return;
1442
+ }
1443
+ const minLevel = levels[0];
1444
+ const maxLevel = levels[levels.length - 1];
1445
+ return minLevel === maxLevel ? `Office list level ${minLevel}` : `Office list levels ${minLevel}-${maxLevel}`;
1446
+ };
1330
1447
  var formatMediaDurationLabel2 = (value) => {
1331
1448
  if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
1332
1449
  return;
@@ -1338,9 +1455,18 @@ var buildContextLabel2 = (metadata) => {
1338
1455
  return;
1339
1456
  }
1340
1457
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1458
+ const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
1459
+ const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
1460
+ const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
1341
1461
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1342
1462
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1343
1463
  const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
1464
+ if (pdfSemanticRole === "figure_caption" && sectionTitle) {
1465
+ return `PDF figure caption ${sectionTitle}`;
1466
+ }
1467
+ if (pdfSemanticRole === "figure_body" && sectionTitle) {
1468
+ return `PDF figure body ${sectionTitle}`;
1469
+ }
1344
1470
  if (pdfTextKind === "table_like" && sectionTitle) {
1345
1471
  return `PDF table block ${sectionTitle}`;
1346
1472
  }
@@ -1348,13 +1474,13 @@ var buildContextLabel2 = (metadata) => {
1348
1474
  return `PDF text block ${sectionTitle}`;
1349
1475
  }
1350
1476
  if (officeBlockKind === "table" && sectionTitle) {
1351
- return `Office table block ${sectionTitle}`;
1477
+ return `Office table block ${sectionPath.join(" > ") || sectionTitle}`;
1352
1478
  }
1353
1479
  if (officeBlockKind === "list" && sectionTitle) {
1354
- return `Office list block ${sectionTitle}`;
1480
+ return `Office list block ${sectionPath.join(" > ") || sectionTitle}`;
1355
1481
  }
1356
1482
  if (officeBlockKind === "paragraph" && sectionTitle) {
1357
- return `Office paragraph block ${sectionTitle}`;
1483
+ return `Office paragraph block ${sectionPath.join(" > ") || sectionTitle}`;
1358
1484
  }
1359
1485
  const emailKind = getContextString2(metadata.emailKind);
1360
1486
  if (emailKind === "attachment") {
@@ -1452,9 +1578,14 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1452
1578
  return;
1453
1579
  }
1454
1580
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1581
+ const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
1455
1582
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1456
1583
  const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
1584
+ const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
1585
+ const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
1457
1586
  const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
1587
+ const officeTableBodyRowStart = getContextNumber2(metadata.officeTableBodyRowStart);
1588
+ const officeTableBodyRowEnd = getContextNumber2(metadata.officeTableBodyRowEnd);
1458
1589
  const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
1459
1590
  const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
1460
1591
  const slideTitle = getContextString2(metadata.slideTitle);
@@ -1465,7 +1596,16 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1465
1596
  if (page && region) {
1466
1597
  return `Page ${page} \xB7 Region ${region}`;
1467
1598
  }
1599
+ if (page && pdfBlockNumber && pdfSemanticRole === "figure_caption") {
1600
+ return `Page ${page} \xB7 Figure Caption ${pdfBlockNumber}`;
1601
+ }
1602
+ if (page && pdfBlockNumber && pdfSemanticRole === "figure_body") {
1603
+ return `Page ${page} \xB7 Figure Body ${pdfBlockNumber}`;
1604
+ }
1468
1605
  if (page && pdfBlockNumber && pdfTextKind === "table_like") {
1606
+ if (typeof pdfTableBodyRowStart === "number" && typeof pdfTableBodyRowEnd === "number") {
1607
+ return pdfTableBodyRowStart === pdfTableBodyRowEnd ? `Page ${page} \xB7 Table Block ${pdfBlockNumber} \xB7 Row ${pdfTableBodyRowStart}` : `Page ${page} \xB7 Table Block ${pdfBlockNumber} \xB7 Rows ${pdfTableBodyRowStart}-${pdfTableBodyRowEnd}`;
1608
+ }
1469
1609
  return `Page ${page} \xB7 Table Block ${pdfBlockNumber}`;
1470
1610
  }
1471
1611
  if (page && pdfBlockNumber) {
@@ -1528,6 +1668,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1528
1668
  return `Timestamp ${mediaStart}`;
1529
1669
  }
1530
1670
  if (officeBlockNumber && officeBlockKind === "table") {
1671
+ if (typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number") {
1672
+ return officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table block ${officeBlockNumber} \xB7 Row ${officeTableBodyRowStart}` : `Office table block ${officeBlockNumber} \xB7 Rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}`;
1673
+ }
1531
1674
  return `Office table block ${officeBlockNumber}`;
1532
1675
  }
1533
1676
  if (officeBlockNumber && officeBlockKind === "list") {
@@ -1564,11 +1707,27 @@ var buildProvenanceLabel2 = (metadata) => {
1564
1707
  const mediaSegmentWindowDurationLabel = formatMediaDurationLabel2(metadata.mediaSegmentGroupDurationMs);
1565
1708
  const mediaSegmentGapLabel = formatMediaDurationLabel2(metadata.mediaSegmentGapFromPreviousMs);
1566
1709
  const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
1710
+ const pdfTableHeaders = Array.isArray(metadata.pdfTableHeaders) ? metadata.pdfTableHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1711
+ const pdfTableColumnCount = getContextNumber2(metadata.pdfTableColumnCount);
1712
+ const pdfTableBodyRowCount = getContextNumber2(metadata.pdfTableBodyRowCount);
1567
1713
  const spreadsheetColumnRange = formatSpreadsheetColumnRange(getContextString2(metadata.spreadsheetColumnStart), getContextString2(metadata.spreadsheetColumnEnd));
1568
1714
  const slideNotesText = getContextString2(metadata.slideNotesText);
1569
1715
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
1716
+ const pdfEvidenceMode = getContextString2(metadata.pdfEvidenceMode);
1717
+ const pdfEvidenceOrigin = getContextString2(metadata.pdfEvidenceOrigin);
1718
+ const pdfEvidenceSupplement = getContextString2(metadata.pdfEvidenceSupplement);
1570
1719
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1720
+ const pdfSemanticRole = getContextString2(metadata.pdfSemanticRole);
1571
1721
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1722
+ const officeListContextText = getContextString2(metadata.officeListContextText);
1723
+ const officeListGroupItemCount = getContextNumber2(metadata.officeListGroupItemCount);
1724
+ const officeListLevelsLabel = formatOfficeListLevelsLabel2(metadata.officeListLevels);
1725
+ const officeTableHeaders = Array.isArray(metadata.officeTableHeaders) ? metadata.officeTableHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1726
+ const officeTableColumnCount = getContextNumber2(metadata.officeTableColumnCount);
1727
+ const officeTableBodyRowCount = getContextNumber2(metadata.officeTableBodyRowCount);
1728
+ const officeTableBodyRowStart = getContextNumber2(metadata.officeTableBodyRowStart);
1729
+ const officeTableBodyRowEnd = getContextNumber2(metadata.officeTableBodyRowEnd);
1730
+ const officeTableContextText = getContextString2(metadata.officeTableContextText);
1572
1731
  const ocrEngine = getContextString2(metadata.ocrEngine);
1573
1732
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
1574
1733
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
@@ -1584,10 +1743,19 @@ var buildProvenanceLabel2 = (metadata) => {
1584
1743
  const ocrMinConfidence = getContextNumber2(metadata.ocrPageMinConfidence) ?? getContextNumber2(metadata.ocrMinConfidence);
1585
1744
  const ocrMaxConfidence = getContextNumber2(metadata.ocrPageMaxConfidence) ?? getContextNumber2(metadata.ocrMaxConfidence);
1586
1745
  const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
1746
+ const pdfTableBodyRowStart = getContextNumber2(metadata.pdfTableBodyRowStart);
1747
+ const pdfTableBodyRowEnd = getContextNumber2(metadata.pdfTableBodyRowEnd);
1587
1748
  const labels = [
1588
1749
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
1589
- pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
1750
+ pdfEvidenceMode ? `PDF evidence ${pdfEvidenceMode}` : "",
1751
+ pdfEvidenceOrigin ? `PDF origin ${pdfEvidenceOrigin}` : "",
1752
+ pdfEvidenceSupplement ? `PDF supplement ${pdfEvidenceSupplement}` : "",
1753
+ pdfSemanticRole === "figure_caption" ? "PDF figure caption" : "",
1754
+ pdfSemanticRole === "figure_body" ? "PDF figure body" : "",
1755
+ pdfSemanticRole === "figure_caption" ? "" : pdfSemanticRole === "figure_body" ? "" : pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
1590
1756
  officeBlockKind ? `Office ${officeBlockKind}` : "",
1757
+ typeof officeListGroupItemCount === "number" ? `Office list ${officeListGroupItemCount} items` : "",
1758
+ officeListLevelsLabel ?? "",
1591
1759
  ocrEngine ? `OCR ${ocrEngine}` : "",
1592
1760
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
1593
1761
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
@@ -1597,6 +1765,16 @@ var buildProvenanceLabel2 = (metadata) => {
1597
1765
  typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
1598
1766
  typeof ocrMinConfidence === "number" && typeof ocrMaxConfidence === "number" && ocrMinConfidence !== ocrMaxConfidence ? `Range ${ocrMinConfidence.toFixed(2)}-${ocrMaxConfidence.toFixed(2)}` : "",
1599
1767
  typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
1768
+ pdfTableHeaders.length > 0 ? `PDF table ${pdfTableHeaders.join(", ")}` : "",
1769
+ typeof pdfTableColumnCount === "number" ? `PDF table ${pdfTableColumnCount} cols` : "",
1770
+ typeof pdfTableBodyRowCount === "number" ? `PDF table ${pdfTableBodyRowCount} body rows` : "",
1771
+ typeof pdfTableBodyRowStart === "number" && typeof pdfTableBodyRowEnd === "number" ? pdfTableBodyRowStart === pdfTableBodyRowEnd ? `PDF table row ${pdfTableBodyRowStart}` : `PDF table rows ${pdfTableBodyRowStart}-${pdfTableBodyRowEnd}` : "",
1772
+ officeListContextText ? `Office list context ${officeListContextText}` : "",
1773
+ officeTableHeaders.length > 0 ? `Office table ${officeTableHeaders.join(", ")}` : "",
1774
+ typeof officeTableColumnCount === "number" ? `Office table ${officeTableColumnCount} cols` : "",
1775
+ typeof officeTableBodyRowCount === "number" ? `Office table ${officeTableBodyRowCount} body rows` : "",
1776
+ typeof officeTableBodyRowStart === "number" && typeof officeTableBodyRowEnd === "number" ? officeTableBodyRowStart === officeTableBodyRowEnd ? `Office table row ${officeTableBodyRowStart}` : `Office table rows ${officeTableBodyRowStart}-${officeTableBodyRowEnd}` : "",
1777
+ officeTableContextText ? `Office table context ${officeTableContextText}` : "",
1600
1778
  spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
1601
1779
  spreadsheetColumnRange ? `Spreadsheet ${spreadsheetColumnRange}` : "",
1602
1780
  spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
@@ -2028,12 +2206,92 @@ var getStructuredSectionScoreWeight = (metadata) => {
2028
2206
  return 1;
2029
2207
  };
2030
2208
  var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
2209
+ var getPDFLeadEvidencePreference = (metadata) => {
2210
+ if (!metadata) {
2211
+ return 0;
2212
+ }
2213
+ const pdfEvidenceMode = getContextString2(metadata.pdfEvidenceMode);
2214
+ const pdfEvidenceOrigin = getContextString2(metadata.pdfEvidenceOrigin);
2215
+ const pdfEvidenceSupplement = getContextString2(metadata.pdfEvidenceSupplement);
2216
+ if (pdfEvidenceMode === "hybrid" && pdfEvidenceOrigin === "native" && pdfEvidenceSupplement === "ocr") {
2217
+ return 3;
2218
+ }
2219
+ if (pdfEvidenceMode === "native" && pdfEvidenceOrigin === "native") {
2220
+ return 2;
2221
+ }
2222
+ if (pdfEvidenceMode === "ocr" && pdfEvidenceOrigin === "ocr") {
2223
+ return 1;
2224
+ }
2225
+ return 0;
2226
+ };
2227
+ var getPDFLeadScope = (metadata) => {
2228
+ if (!metadata) {
2229
+ return;
2230
+ }
2231
+ const pageNumber = getContextNumber2(metadata.pageNumber) ?? getContextNumber2(metadata.page) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
2232
+ const sectionTitle = getContextString2(metadata.sectionTitle);
2233
+ const sourceNativeKind = getContextString2(metadata.sourceNativeKind);
2234
+ if (typeof pageNumber !== "number" && !sectionTitle && !sourceNativeKind) {
2235
+ return;
2236
+ }
2237
+ return {
2238
+ pageNumber,
2239
+ sectionTitle,
2240
+ sourceNativeKind
2241
+ };
2242
+ };
2243
+ var getOfficeLeadScope = (metadata) => {
2244
+ if (!metadata) {
2245
+ return;
2246
+ }
2247
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
2248
+ if (officeBlockKind !== "table" && officeBlockKind !== "list") {
2249
+ return;
2250
+ }
2251
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
2252
+ const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
2253
+ const officeContextText = officeBlockKind === "table" ? getContextString2(metadata.officeTableContextText) : getContextString2(metadata.officeListContextText);
2254
+ if (!sectionTitle) {
2255
+ return;
2256
+ }
2257
+ return {
2258
+ blockKind: officeBlockKind,
2259
+ pathDepth: sectionPath.length,
2260
+ sectionTitle,
2261
+ hasContext: typeof officeContextText === "string"
2262
+ };
2263
+ };
2264
+ var getOfficeLeadEvidencePreference = (metadata) => {
2265
+ const scope = getOfficeLeadScope(metadata);
2266
+ if (!scope) {
2267
+ return 0;
2268
+ }
2269
+ return scope.pathDepth * 10 + (scope.hasContext ? 1 : 0) + (scope.blockKind === "list" && typeof metadata?.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
2270
+ };
2031
2271
  var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
2272
+ const leftOfficeScope = getOfficeLeadScope(left.metadata);
2273
+ const rightOfficeScope = getOfficeLeadScope(right.metadata);
2274
+ if (left.source === right.source && leftOfficeScope && rightOfficeScope && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
2275
+ const leftOfficePreference = getOfficeLeadEvidencePreference(left.metadata);
2276
+ const rightOfficePreference = getOfficeLeadEvidencePreference(right.metadata);
2277
+ if (rightOfficePreference !== leftOfficePreference) {
2278
+ return rightOfficePreference - leftOfficePreference;
2279
+ }
2280
+ }
2032
2281
  const leftWeightedScore = getStructuredSourceLeadScore(left);
2033
2282
  const rightWeightedScore = getStructuredSourceLeadScore(right);
2034
2283
  if (rightWeightedScore !== leftWeightedScore) {
2035
2284
  return rightWeightedScore - leftWeightedScore;
2036
2285
  }
2286
+ const leftScope = getPDFLeadScope(left.metadata);
2287
+ const rightScope = getPDFLeadScope(right.metadata);
2288
+ if (left.source === right.source && leftScope && rightScope && (leftScope.sectionTitle && rightScope.sectionTitle && leftScope.sectionTitle === rightScope.sectionTitle || typeof leftScope.pageNumber === "number" && typeof rightScope.pageNumber === "number" && leftScope.pageNumber === rightScope.pageNumber)) {
2289
+ const leftEvidencePreference = getPDFLeadEvidencePreference(left.metadata);
2290
+ const rightEvidencePreference = getPDFLeadEvidencePreference(right.metadata);
2291
+ if (rightEvidencePreference !== leftEvidencePreference) {
2292
+ return rightEvidencePreference - leftEvidencePreference;
2293
+ }
2294
+ }
2037
2295
  if (right.score !== left.score) {
2038
2296
  return right.score - left.score;
2039
2297
  }
@@ -2287,6 +2545,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
2287
2545
  queryTransformProvider: trace?.queryTransformProvider,
2288
2546
  queryTransformReason: trace?.queryTransformReason,
2289
2547
  reasons,
2548
+ evidenceReconcileApplied: trace?.steps.some((step) => step.stage === "evidence_reconcile"),
2290
2549
  rerankApplied: trace?.steps.some((step) => step.stage === "rerank" && step.metadata?.applied === true),
2291
2550
  scoreShare,
2292
2551
  scoreThresholdApplied: trace?.steps.some((step) => step.stage === "score_filter"),
@@ -2965,6 +3224,12 @@ var buildComparisonOverviewPresentation = (input) => {
2965
3224
  value: input.resolveLabel(input.summary.bestByMultivectorVectorHitCases)
2966
3225
  });
2967
3226
  }
3227
+ if (input.summary.bestByEvidenceReconcileCases) {
3228
+ rows.push({
3229
+ label: "Best evidence reconcile",
3230
+ value: input.resolveLabel(input.summary.bestByEvidenceReconcileCases)
3231
+ });
3232
+ }
2968
3233
  if (input.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
2969
3234
  rows.push({
2970
3235
  label: "Lowest runtime budget exhaustion",
@@ -3032,6 +3297,9 @@ var buildRAGComparisonTraceSummaryRows = (entry) => {
3032
3297
  }, {
3033
3298
  label: "Runtime",
3034
3299
  value: `budget ${formatTraceRatio(trace.runtimeCandidateBudgetExhaustedCases, trace.totalCases)} \xB7 underfilled ${formatTraceRatio(trace.runtimeUnderfilledTopKCases, trace.totalCases)}`
3300
+ }, {
3301
+ label: "Evidence reconcile",
3302
+ value: `all ${formatTraceRatio(trace.stageCounts.evidence_reconcile ?? 0, trace.totalCases)} \xB7 office ${formatTraceRatio(trace.officeEvidenceReconcileCases, trace.totalCases)} \xB7 pdf ${formatTraceRatio(trace.pdfEvidenceReconcileCases, trace.totalCases)}`
3035
3303
  }, {
3036
3304
  label: "TopK",
3037
3305
  value: `${trace.averageCandidateTopK.toFixed(1)} / ${trace.averageLexicalTopK.toFixed(1)}`
@@ -3389,6 +3657,25 @@ var buildRAGEvaluationHistoryRows = (history) => {
3389
3657
  label: "Trace variant delta",
3390
3658
  value: formatTraceCountDelta(history.diff.traceSummaryDelta.variantCases)
3391
3659
  });
3660
+ const evidenceReconcileDelta = history.diff.traceSummaryDelta.stageCounts?.evidence_reconcile;
3661
+ if (typeof evidenceReconcileDelta === "number") {
3662
+ rows.push({
3663
+ label: "Trace evidence reconcile delta",
3664
+ value: formatTraceCountDelta(evidenceReconcileDelta)
3665
+ });
3666
+ }
3667
+ if (typeof history.diff.traceSummaryDelta.officeEvidenceReconcileCasesDelta === "number") {
3668
+ rows.push({
3669
+ label: "Trace office evidence reconcile delta",
3670
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.officeEvidenceReconcileCasesDelta)
3671
+ });
3672
+ }
3673
+ if (typeof history.diff.traceSummaryDelta.pdfEvidenceReconcileCasesDelta === "number") {
3674
+ rows.push({
3675
+ label: "Trace PDF evidence reconcile delta",
3676
+ value: formatTraceCountDelta(history.diff.traceSummaryDelta.pdfEvidenceReconcileCasesDelta)
3677
+ });
3678
+ }
3392
3679
  const stageDelta = Object.entries(history.diff.traceSummaryDelta.stageCounts ?? {}).map(([stage, count]) => `${stage} ${formatTraceCountDelta(count)}`).join(", ");
3393
3680
  if (stageDelta) {
3394
3681
  rows.push({ label: "Trace stage delta", value: stageDelta });
@@ -3594,6 +3881,7 @@ var buildRAGEvaluationSuiteSnapshotHistoryPresentation = (history) => ({
3594
3881
  summary: history?.latestSnapshot ? `v${history.latestSnapshot.version}` : "No saved suite snapshots yet."
3595
3882
  });
3596
3883
  var isRuntimeGateReason = (reason) => /runtime|candidate-budget|underfilled/i.test(reason);
3884
+ var getFixtureVariantsFromRunTags = (tags) => (tags ?? []).filter((tag) => tag.startsWith("fixture:")).map((tag) => tag.slice("fixture:".length)).filter((tag, index, all) => tag.length > 0 && all.indexOf(tag) === index);
3597
3885
  var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
3598
3886
  const runtimeGateReasons = (run.decisionSummary?.gate?.reasons ?? run.releaseVerdict?.gate?.reasons ?? []).filter(isRuntimeGateReason);
3599
3887
  const rows = [
@@ -3607,6 +3895,13 @@ var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
3607
3895
  value: run.comparison.summary.bestByAverageF1 ?? "n/a"
3608
3896
  }
3609
3897
  ];
3898
+ const fixtureVariants = getFixtureVariantsFromRunTags(run.tags);
3899
+ if (fixtureVariants.length > 0) {
3900
+ rows.push({
3901
+ label: "Fixture variant",
3902
+ value: fixtureVariants.join(", ")
3903
+ });
3904
+ }
3610
3905
  if (run.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases) {
3611
3906
  rows.push({
3612
3907
  label: "Lowest runtime budget exhaustion",
@@ -3635,6 +3930,7 @@ var buildRAGRetrievalReleaseHistoryRunPresentation = (run) => {
3635
3930
  };
3636
3931
  var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
3637
3932
  const recentRuns = (input.runs ?? []).map(buildRAGRetrievalReleaseHistoryRunPresentation);
3933
+ const fixtureVariants = (input.runs ?? []).flatMap((run) => getFixtureVariantsFromRunTags(run.tags)).filter((tag, index, all) => all.indexOf(tag) === index);
3638
3934
  const runtimeBlockedRuns = recentRuns.filter((entry) => entry.rows.some((row) => row.label === "Runtime gate failures" && row.value !== "none")).length;
3639
3935
  const rows = [
3640
3936
  {
@@ -3658,6 +3954,12 @@ var buildRAGRetrievalReleaseGroupHistoryPresentation = (input) => {
3658
3954
  value: String(runtimeBlockedRuns)
3659
3955
  }
3660
3956
  ];
3957
+ if (fixtureVariants.length > 0) {
3958
+ rows.push({
3959
+ label: "Fixture variants",
3960
+ value: fixtureVariants.join(", ")
3961
+ });
3962
+ }
3661
3963
  return {
3662
3964
  recentRuns,
3663
3965
  rows,
@@ -4174,6 +4476,9 @@ var evaluateRetrievalComparisonGate = ({
4174
4476
  if (typeof policy.minMultiVectorVectorHitCasesDelta === "number" && (delta.multiVectorVectorHitCasesDelta ?? 0) < policy.minMultiVectorVectorHitCasesDelta) {
4175
4477
  reasons.push(`multivector vector-hit delta ${delta.multiVectorVectorHitCasesDelta ?? 0} is below ${policy.minMultiVectorVectorHitCasesDelta}`);
4176
4478
  }
4479
+ if (typeof policy.minEvidenceReconcileCasesDelta === "number" && (delta.evidenceReconcileCasesDelta ?? 0) < policy.minEvidenceReconcileCasesDelta) {
4480
+ reasons.push(`evidence reconcile delta ${delta.evidenceReconcileCasesDelta ?? 0} is below ${policy.minEvidenceReconcileCasesDelta}`);
4481
+ }
4177
4482
  if (typeof policy.maxRuntimeCandidateBudgetExhaustedCasesDelta === "number" && (delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0) > policy.maxRuntimeCandidateBudgetExhaustedCasesDelta) {
4178
4483
  reasons.push(`runtime candidate-budget-exhausted delta ${delta.runtimeCandidateBudgetExhaustedCasesDelta ?? 0} exceeds ${policy.maxRuntimeCandidateBudgetExhaustedCasesDelta}`);
4179
4484
  }
@@ -4227,13 +4532,14 @@ var buildRAGRetrievalReleaseVerdict = ({
4227
4532
  };
4228
4533
  }
4229
4534
  if (delta) {
4535
+ const requiresReview = delta.passingRateDelta < 0 || delta.averageF1Delta < 0 || (delta.evidenceReconcileCasesDelta ?? 0) < 0;
4230
4536
  return {
4231
4537
  baselineGroupKey: groupKey,
4232
4538
  baselineRetrievalId,
4233
4539
  candidateRetrievalId,
4234
4540
  delta,
4235
- status: delta.passingRateDelta < 0 || delta.averageF1Delta < 0 ? "needs_review" : "pass",
4236
- summary: delta.passingRateDelta < 0 || delta.averageF1Delta < 0 ? "Candidate should be reviewed before promotion." : "Candidate improved or matched the baseline."
4541
+ status: requiresReview ? "needs_review" : "pass",
4542
+ summary: requiresReview ? "Candidate should be reviewed before promotion." : "Candidate improved or matched the baseline."
4237
4543
  };
4238
4544
  }
4239
4545
  return {
@@ -4914,6 +5220,8 @@ var summarizeRetrievalTraces = (traces) => {
4914
5220
  let multiVectorVectorHitCases = 0;
4915
5221
  let multiVectorLexicalHitCases = 0;
4916
5222
  let multiVectorCollapsedCases = 0;
5223
+ let officeEvidenceReconcileCases = 0;
5224
+ let pdfEvidenceReconcileCases = 0;
4917
5225
  let runtimeCandidateBudgetExhaustedCases = 0;
4918
5226
  let runtimeUnderfilledTopKCases = 0;
4919
5227
  let finalCountSum = 0;
@@ -4955,6 +5263,13 @@ var summarizeRetrievalTraces = (traces) => {
4955
5263
  if ((trace.multiVector?.collapsedParents ?? 0) > 0) {
4956
5264
  multiVectorCollapsedCases += 1;
4957
5265
  }
5266
+ const evidenceReconcileMetadata = trace.steps.find((step) => step.stage === "evidence_reconcile")?.metadata;
5267
+ if (typeof evidenceReconcileMetadata?.officeAffectedScopes === "number" && evidenceReconcileMetadata.officeAffectedScopes > 0) {
5268
+ officeEvidenceReconcileCases += 1;
5269
+ }
5270
+ if (typeof evidenceReconcileMetadata?.pdfAffectedScopes === "number" && evidenceReconcileMetadata.pdfAffectedScopes > 0) {
5271
+ pdfEvidenceReconcileCases += 1;
5272
+ }
4958
5273
  if (vectorSearchMetadata?.sqliteQueryCandidateBudgetExhausted) {
4959
5274
  runtimeCandidateBudgetExhaustedCases += 1;
4960
5275
  }
@@ -4992,6 +5307,8 @@ var summarizeRetrievalTraces = (traces) => {
4992
5307
  multiVectorVectorHitCases,
4993
5308
  multiVectorLexicalHitCases,
4994
5309
  multiVectorCollapsedCases,
5310
+ officeEvidenceReconcileCases,
5311
+ pdfEvidenceReconcileCases,
4995
5312
  runtimeCandidateBudgetExhaustedCases,
4996
5313
  runtimeUnderfilledTopKCases,
4997
5314
  vectorCases
@@ -5796,6 +6113,8 @@ var buildRAGEvaluationRunDiff = ({
5796
6113
  averageLexicalTopK: (current.traceSummary?.averageLexicalTopK ?? 0) - (previous?.traceSummary?.averageLexicalTopK ?? 0),
5797
6114
  averageVectorCount: (current.traceSummary?.averageVectorCount ?? 0) - (previous?.traceSummary?.averageVectorCount ?? 0),
5798
6115
  balancedCases: (current.traceSummary?.balancedCases ?? 0) - (previous?.traceSummary?.balancedCases ?? 0),
6116
+ officeEvidenceReconcileCasesDelta: (current.traceSummary?.officeEvidenceReconcileCases ?? 0) - (previous?.traceSummary?.officeEvidenceReconcileCases ?? 0),
6117
+ pdfEvidenceReconcileCasesDelta: (current.traceSummary?.pdfEvidenceReconcileCases ?? 0) - (previous?.traceSummary?.pdfEvidenceReconcileCases ?? 0),
5799
6118
  lexicalCases: (current.traceSummary?.lexicalCases ?? 0) - (previous?.traceSummary?.lexicalCases ?? 0),
5800
6119
  modesChanged: (current.traceSummary?.modes ?? []).join("|") !== (previous?.traceSummary?.modes ?? []).join("|"),
5801
6120
  roundRobinCases: (current.traceSummary?.roundRobinCases ?? 0) - (previous?.traceSummary?.roundRobinCases ?? 0),
@@ -8580,6 +8899,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
8580
8899
  multiVectorCollapsedCasesDelta: (candidateEntry.traceSummary?.multiVectorCollapsedCases ?? 0) - (baselineEntry.traceSummary?.multiVectorCollapsedCases ?? 0),
8581
8900
  multiVectorLexicalHitCasesDelta: (candidateEntry.traceSummary?.multiVectorLexicalHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorLexicalHitCases ?? 0),
8582
8901
  multiVectorVectorHitCasesDelta: (candidateEntry.traceSummary?.multiVectorVectorHitCases ?? 0) - (baselineEntry.traceSummary?.multiVectorVectorHitCases ?? 0),
8902
+ evidenceReconcileCasesDelta: (candidateEntry.traceSummary?.stageCounts?.evidence_reconcile ?? 0) - (baselineEntry.traceSummary?.stageCounts?.evidence_reconcile ?? 0),
8583
8903
  runtimeCandidateBudgetExhaustedCasesDelta: (candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0) - (baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases ?? 0),
8584
8904
  runtimeUnderfilledTopKCasesDelta: (candidateEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0) - (baselineEntry.traceSummary?.runtimeUnderfilledTopKCases ?? 0)
8585
8905
  } : undefined;
@@ -8591,6 +8911,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
8591
8911
  multiVectorCollapsedCases: baselineEntry.traceSummary?.multiVectorCollapsedCases,
8592
8912
  multiVectorLexicalHitCases: baselineEntry.traceSummary?.multiVectorLexicalHitCases,
8593
8913
  multiVectorVectorHitCases: baselineEntry.traceSummary?.multiVectorVectorHitCases,
8914
+ evidenceReconcileCases: baselineEntry.traceSummary?.stageCounts?.evidence_reconcile,
8594
8915
  runtimeCandidateBudgetExhaustedCases: baselineEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
8595
8916
  runtimeUnderfilledTopKCases: baselineEntry.traceSummary?.runtimeUnderfilledTopKCases,
8596
8917
  passingRate: baselineEntry.response.passingRate,
@@ -8604,6 +8925,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
8604
8925
  multiVectorCollapsedCases: candidateEntry.traceSummary?.multiVectorCollapsedCases,
8605
8926
  multiVectorLexicalHitCases: candidateEntry.traceSummary?.multiVectorLexicalHitCases,
8606
8927
  multiVectorVectorHitCases: candidateEntry.traceSummary?.multiVectorVectorHitCases,
8928
+ evidenceReconcileCases: candidateEntry.traceSummary?.stageCounts?.evidence_reconcile,
8607
8929
  runtimeCandidateBudgetExhaustedCases: candidateEntry.traceSummary?.runtimeCandidateBudgetExhaustedCases,
8608
8930
  runtimeUnderfilledTopKCases: candidateEntry.traceSummary?.runtimeUnderfilledTopKCases,
8609
8931
  passingRate: candidateEntry.response.passingRate,
@@ -8618,6 +8940,7 @@ var buildRAGRetrievalComparisonDecisionSummary = ({
8618
8940
  winnerByMultivectorCollapsedCases: comparison.summary.bestByMultivectorCollapsedCases,
8619
8941
  winnerByMultivectorLexicalHitCases: comparison.summary.bestByMultivectorLexicalHitCases,
8620
8942
  winnerByMultivectorVectorHitCases: comparison.summary.bestByMultivectorVectorHitCases,
8943
+ winnerByEvidenceReconcileCases: comparison.summary.bestByEvidenceReconcileCases,
8621
8944
  winnerByLowestRuntimeCandidateBudgetExhaustedCases: comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
8622
8945
  winnerByLowestRuntimeUnderfilledTopKCases: comparison.summary.bestByLowestRuntimeUnderfilledTopKCases
8623
8946
  };
@@ -9325,6 +9648,27 @@ var selectComparisonEntryByLowestTraceMetric = (entries, idKey, metric) => {
9325
9648
  const winner = ranked[0];
9326
9649
  return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
9327
9650
  };
9651
+ var selectComparisonEntryByTraceStageCount = (entries, idKey, stage) => {
9652
+ const ranked = [...entries].sort((left, right) => {
9653
+ const leftMetric = left.traceSummary?.stageCounts?.[stage] ?? 0;
9654
+ const rightMetric = right.traceSummary?.stageCounts?.[stage] ?? 0;
9655
+ if (rightMetric !== leftMetric) {
9656
+ return rightMetric - leftMetric;
9657
+ }
9658
+ if (right.response.passingRate !== left.response.passingRate) {
9659
+ return right.response.passingRate - left.response.passingRate;
9660
+ }
9661
+ if (right.response.summary.averageF1 !== left.response.summary.averageF1) {
9662
+ return right.response.summary.averageF1 - left.response.summary.averageF1;
9663
+ }
9664
+ return left.response.summary.averageLatencyMs - right.response.summary.averageLatencyMs;
9665
+ });
9666
+ const winner = ranked[0];
9667
+ if (!winner || (winner.traceSummary?.stageCounts?.[stage] ?? 0) === 0) {
9668
+ return;
9669
+ }
9670
+ return typeof winner?.[idKey] === "string" ? winner[idKey] : undefined;
9671
+ };
9328
9672
  var resolveRetrievalMode = (candidate) => {
9329
9673
  if (!candidate.retrieval) {
9330
9674
  return "vector";
@@ -9419,6 +9763,8 @@ var compareRAGRetrievalTraceSummaries = (current, previous) => ({
9419
9763
  multiVectorVectorHitCasesDelta: current.multiVectorVectorHitCases - previous.multiVectorVectorHitCases,
9420
9764
  multiVectorLexicalHitCasesDelta: current.multiVectorLexicalHitCases - previous.multiVectorLexicalHitCases,
9421
9765
  multiVectorCollapsedCasesDelta: current.multiVectorCollapsedCases - previous.multiVectorCollapsedCases,
9766
+ officeEvidenceReconcileCasesDelta: current.officeEvidenceReconcileCases - previous.officeEvidenceReconcileCases,
9767
+ pdfEvidenceReconcileCasesDelta: current.pdfEvidenceReconcileCases - previous.pdfEvidenceReconcileCases,
9422
9768
  runtimeCandidateBudgetExhaustedCasesDelta: current.runtimeCandidateBudgetExhaustedCases - previous.runtimeCandidateBudgetExhaustedCases,
9423
9769
  runtimeUnderfilledTopKCasesDelta: current.runtimeUnderfilledTopKCases - previous.runtimeUnderfilledTopKCases
9424
9770
  });
@@ -9784,10 +10130,129 @@ var generateRAGEvaluationSuiteFromDocuments = ({
9784
10130
  };
9785
10131
  var DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID = "rag-native-planner-larger-corpus";
9786
10132
  var DEFAULT_NATIVE_PLANNER_BENCHMARK_LABEL = "Adaptive Native Planner Benchmark";
10133
+ var DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_SUITE_ID = "rag-native-backend-larger-corpus";
10134
+ var DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_LABEL = "Native Backend Comparison Benchmark";
9787
10135
  var DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY = "Which launch checklist phrase is exact wording?";
10136
+ var DEFAULT_NATIVE_BACKEND_HYBRID_QUERY = "aurora promotion checklist wording";
10137
+ var DEFAULT_NATIVE_BACKEND_FILTERED_QUERY = "focus lane launch checklist wording";
10138
+ var DEFAULT_NATIVE_BACKEND_REORDERED_QUERY = "exact aurora focus lane checklist wording";
10139
+ var DEFAULT_NATIVE_BACKEND_GUIDE_QUERY = "which focus lane guide contains exact aurora promotion wording";
9788
10140
  var DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER = {
9789
10141
  lane: "focus"
9790
10142
  };
10143
+ var DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS = [
10144
+ "focus-distractor-0",
10145
+ "focus-distractor-1",
10146
+ "focus-distractor-2"
10147
+ ];
10148
+ var createRAGNativeBackendBenchmarkMockEmbedding = async (text) => {
10149
+ const normalized = text.toLowerCase();
10150
+ if (normalized.includes("launch checklist exact wording for aurora promotion") || normalized.includes("launch checklist exact wording")) {
10151
+ return [0.995, 0.005];
10152
+ }
10153
+ if (normalized.includes("aurora") || normalized.includes("checklist") || normalized.includes("focus lane") || normalized.includes("exact wording") || normalized.includes("guide")) {
10154
+ return [1, 0];
10155
+ }
10156
+ return [0, 1];
10157
+ };
10158
+ var createRAGNativeBackendBenchmarkCorpus = (input) => {
10159
+ const noiseCount = input?.noiseCount ?? 5001;
10160
+ const backend = input?.backend ?? "generic";
10161
+ const genericChunks = [
10162
+ ...Array.from({ length: noiseCount }, (_, index) => ({
10163
+ chunkId: `noise:${index}`,
10164
+ corpusKey: "noise",
10165
+ embedding: [0, 1],
10166
+ metadata: {
10167
+ corpusKey: "noise",
10168
+ documentId: `noise-${index}`,
10169
+ lane: "noise"
10170
+ },
10171
+ source: `noise/${index}.md`,
10172
+ text: `Background operations note ${index}.`
10173
+ })),
10174
+ ...Array.from({ length: 3 }, (_, index) => ({
10175
+ chunkId: `focus:distractor:${index}`,
10176
+ corpusKey: "focus",
10177
+ embedding: [1, 0],
10178
+ metadata: {
10179
+ corpusKey: "focus",
10180
+ documentId: `focus-distractor-${index}`,
10181
+ lane: "focus"
10182
+ },
10183
+ source: `focus/distractor-${index}.md`,
10184
+ text: index === 0 ? "aurora promotion checklist overview" : index === 1 ? "launch checklist wording draft" : "focus lane promotion runbook notes"
10185
+ })),
10186
+ {
10187
+ chunkId: "focus:target",
10188
+ corpusKey: "focus",
10189
+ embedding: [0.995, 0.005],
10190
+ metadata: {
10191
+ corpusKey: "focus",
10192
+ documentId: "focus-target",
10193
+ lane: "focus"
10194
+ },
10195
+ source: "guide/planner-depth.md",
10196
+ text: "launch checklist exact wording for aurora promotion in the focus lane"
10197
+ }
10198
+ ];
10199
+ const backendSpecificChunks = backend === "sqlite-native" ? [
10200
+ {
10201
+ chunkId: "focus:sqlite:phrase-matrix",
10202
+ corpusKey: "focus",
10203
+ embedding: [1, 0],
10204
+ metadata: {
10205
+ backendFixture: "sqlite-native",
10206
+ corpusKey: "focus",
10207
+ documentId: "focus-sqlite-phrase-matrix",
10208
+ lane: "focus"
10209
+ },
10210
+ source: "guide/sqlite-phrase-matrix.md",
10211
+ text: "exact aurora focus lane checklist wording matrix for sqlite validation"
10212
+ },
10213
+ {
10214
+ chunkId: "focus:sqlite:guide-table",
10215
+ corpusKey: "focus",
10216
+ embedding: [1, 0],
10217
+ metadata: {
10218
+ backendFixture: "sqlite-native",
10219
+ corpusKey: "focus",
10220
+ documentId: "focus-sqlite-guide-table",
10221
+ lane: "focus"
10222
+ },
10223
+ source: "guide/sqlite-guide-table.md",
10224
+ text: "which focus lane guide contains aurora promotion wording draft table for sqlite operators"
10225
+ }
10226
+ ] : backend === "postgres" ? [
10227
+ {
10228
+ chunkId: "focus:postgres:appendix",
10229
+ corpusKey: "focus",
10230
+ embedding: [1, 0],
10231
+ metadata: {
10232
+ backendFixture: "postgres",
10233
+ corpusKey: "focus",
10234
+ documentId: "focus-postgres-appendix",
10235
+ lane: "focus"
10236
+ },
10237
+ source: "guide/postgres-appendix.md",
10238
+ text: "which focus lane guide contains exact aurora promotion wording appendix for postgres release review"
10239
+ },
10240
+ {
10241
+ chunkId: "focus:postgres:alternatives",
10242
+ corpusKey: "focus",
10243
+ embedding: [1, 0],
10244
+ metadata: {
10245
+ backendFixture: "postgres",
10246
+ corpusKey: "focus",
10247
+ documentId: "focus-postgres-alternatives",
10248
+ lane: "focus"
10249
+ },
10250
+ source: "guide/postgres-alternatives.md",
10251
+ text: "aurora promotion checklist wording alternatives and exact focus lane phrasing for postgres audits"
10252
+ }
10253
+ ] : [];
10254
+ return [...genericChunks, ...backendSpecificChunks];
10255
+ };
9791
10256
  var createRAGAdaptiveNativePlannerBenchmarkSuite = (input) => createRAGEvaluationSuite({
9792
10257
  description: input?.description ?? "Stress-tests larger-corpus native planner selection, candidate-budget pressure, and transformed-query recovery on filtered retrieval.",
9793
10258
  id: input?.id ?? DEFAULT_NATIVE_PLANNER_BENCHMARK_SUITE_ID,
@@ -9839,6 +10304,102 @@ var createRAGAdaptiveNativePlannerBenchmarkSnapshot = (input) => {
9839
10304
  version: input?.version
9840
10305
  });
9841
10306
  };
10307
+ var createRAGNativeBackendComparisonBenchmarkSuite = (input) => createRAGEvaluationSuite({
10308
+ description: input?.description ?? "Captures larger-corpus native backend parity with filtered vector pressure and harder hybrid retrieval cases so sqlite-native and postgres runs can be compared over time.",
10309
+ id: input?.id ?? DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_SUITE_ID,
10310
+ input: {
10311
+ cases: [
10312
+ {
10313
+ expectedDocumentIds: ["focus-target"],
10314
+ filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
10315
+ hardNegativeDocumentIds: [
10316
+ ...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
10317
+ ],
10318
+ id: "planner-pressure-exact-phrase",
10319
+ label: "Exact phrase survives larger-corpus native pressure",
10320
+ query: DEFAULT_NATIVE_PLANNER_BENCHMARK_QUERY,
10321
+ topK: input?.topK ?? 1
10322
+ },
10323
+ {
10324
+ expectedDocumentIds: ["focus-target"],
10325
+ filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
10326
+ hardNegativeDocumentIds: [
10327
+ ...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
10328
+ ],
10329
+ id: "planner-pressure-hybrid-phrase",
10330
+ label: "Hybrid retrieval survives filtered lexical pressure",
10331
+ query: DEFAULT_NATIVE_BACKEND_HYBRID_QUERY,
10332
+ topK: input?.topK ?? 1
10333
+ },
10334
+ {
10335
+ expectedDocumentIds: ["focus-target"],
10336
+ filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
10337
+ hardNegativeDocumentIds: [
10338
+ ...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
10339
+ ],
10340
+ id: "planner-pressure-filtered-lane-query",
10341
+ label: "Filtered lane query survives broader corpus noise",
10342
+ query: DEFAULT_NATIVE_BACKEND_FILTERED_QUERY,
10343
+ topK: input?.topK ?? 1
10344
+ },
10345
+ {
10346
+ expectedDocumentIds: ["focus-target"],
10347
+ filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
10348
+ hardNegativeDocumentIds: [
10349
+ ...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
10350
+ ],
10351
+ id: "planner-pressure-reordered-phrase",
10352
+ label: "Reordered phrase survives transform pressure",
10353
+ query: DEFAULT_NATIVE_BACKEND_REORDERED_QUERY,
10354
+ topK: input?.topK ?? 1
10355
+ },
10356
+ {
10357
+ expectedDocumentIds: ["focus-target"],
10358
+ filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
10359
+ hardNegativeDocumentIds: [
10360
+ ...DEFAULT_NATIVE_PLANNER_HARD_NEGATIVE_DOCUMENT_IDS
10361
+ ],
10362
+ id: "planner-pressure-guide-query",
10363
+ label: "Guide attribution survives filtered corpus pressure",
10364
+ query: DEFAULT_NATIVE_BACKEND_GUIDE_QUERY,
10365
+ topK: input?.topK ?? 1
10366
+ }
10367
+ ],
10368
+ filter: { ...DEFAULT_NATIVE_PLANNER_BENCHMARK_FILTER },
10369
+ retrieval: "vector",
10370
+ topK: input?.topK ?? 1
10371
+ },
10372
+ label: input?.label ?? DEFAULT_NATIVE_BACKEND_COMPARISON_BENCHMARK_LABEL,
10373
+ metadata: {
10374
+ benchmarkKind: "native_backend_comparison",
10375
+ benchmarkScope: "larger_corpus",
10376
+ expectedSignals: [
10377
+ "backend-tagged runtime artifacts",
10378
+ "selected native planner profile",
10379
+ "hybrid filtered retrieval",
10380
+ "candidate-budget exhaustion",
10381
+ "underfilled topk",
10382
+ "query transform pressure"
10383
+ ],
10384
+ recommendedGroupKey: "runtime-native-backend-parity",
10385
+ recommendedTags: ["runtime", "backend", "native"],
10386
+ ...input?.metadata
10387
+ }
10388
+ });
10389
+ var createRAGNativeBackendComparisonBenchmarkSnapshot = (input) => {
10390
+ const suite = input?.suite ?? createRAGNativeBackendComparisonBenchmarkSuite();
10391
+ return createRAGEvaluationSuiteSnapshot({
10392
+ createdAt: input?.createdAt,
10393
+ id: input?.id,
10394
+ metadata: {
10395
+ artifactKind: "native_backend_comparison_benchmark",
10396
+ persistForReleaseHistory: true,
10397
+ ...input?.metadata
10398
+ },
10399
+ suite,
10400
+ version: input?.version
10401
+ });
10402
+ };
9842
10403
  var createRAGEvaluationSuiteSnapshot = ({
9843
10404
  suite,
9844
10405
  id,
@@ -10047,6 +10608,7 @@ var summarizeRAGRetrievalComparison = (entries) => ({
10047
10608
  bestByMultivectorCollapsedCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorCollapsedCases"),
10048
10609
  bestByMultivectorLexicalHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorLexicalHitCases"),
10049
10610
  bestByMultivectorVectorHitCases: selectComparisonEntryByTraceMetric(entries, "retrievalId", "multiVectorVectorHitCases"),
10611
+ bestByEvidenceReconcileCases: selectComparisonEntryByTraceStageCount(entries, "retrievalId", "evidence_reconcile"),
10050
10612
  bestByLowestRuntimeCandidateBudgetExhaustedCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeCandidateBudgetExhaustedCases"),
10051
10613
  bestByLowestRuntimeUnderfilledTopKCases: selectComparisonEntryByLowestTraceMetric(entries, "retrievalId", "runtimeUnderfilledTopKCases")
10052
10614
  });
@@ -13803,32 +14365,126 @@ var splitMarkdownPreferredChunkUnits = (value) => {
13803
14365
  flushFence();
13804
14366
  return units;
13805
14367
  };
14368
+ var findNearestPDFContextHeading = (blockEntries, pageNumber) => {
14369
+ if (typeof pageNumber !== "number") {
14370
+ return;
14371
+ }
14372
+ for (let index = blockEntries.length - 1;index >= 0; index -= 1) {
14373
+ const entry = blockEntries[index];
14374
+ if (!entry || entry.pageNumber !== pageNumber) {
14375
+ if (entry && typeof entry.pageNumber === "number" && entry.pageNumber < pageNumber) {
14376
+ break;
14377
+ }
14378
+ continue;
14379
+ }
14380
+ if (entry.pdfSemanticRole || entry.pdfTextKind !== "paragraph") {
14381
+ continue;
14382
+ }
14383
+ const heading = inferPDFBlockHeading(entry.text);
14384
+ if (heading) {
14385
+ return heading;
14386
+ }
14387
+ }
14388
+ return;
14389
+ };
14390
+ var getPDFTableHeaders = (text) => {
14391
+ const lines = text.split(`
14392
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
14393
+ const headerLine = lines[0];
14394
+ if (!headerLine || !headerLine.includes(" | ")) {
14395
+ return;
14396
+ }
14397
+ const headers = headerLine.split(" | ").map((entry) => normalizeWhitespace(entry)).filter(Boolean);
14398
+ return headers.length >= 2 ? headers : undefined;
14399
+ };
13806
14400
  var pdfNativeStructureUnits = (metadata) => {
13807
14401
  const blocks = Array.isArray(metadata?.pdfTextBlocks) ? metadata.pdfTextBlocks : [];
13808
- const units = [];
14402
+ const blockEntries = [];
13809
14403
  for (const block of blocks) {
13810
14404
  if (!block || typeof block !== "object") {
13811
14405
  continue;
13812
14406
  }
13813
- const text = typeof block.text === "string" ? normalizeWhitespace(block.text) : "";
14407
+ const rawText = typeof block.text === "string" ? block.text : "";
14408
+ const pdfTextKind = block.textKind === "table_like" ? "table_like" : "paragraph";
14409
+ const text = pdfTextKind === "table_like" ? rawText.split(`
14410
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean).join(`
14411
+ `) : normalizeWhitespace(rawText);
13814
14412
  if (!text) {
13815
14413
  continue;
13816
14414
  }
13817
14415
  const pageNumber = typeof block.pageNumber === "number" && Number.isFinite(block.pageNumber) ? block.pageNumber : undefined;
13818
14416
  const pdfBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
13819
- const pdfTextKind = block.textKind === "table_like" ? "table_like" : "paragraph";
13820
- const sectionTitle = pageNumber ? pdfTextKind === "table_like" ? `Page ${pageNumber} Table Block` : `Page ${pageNumber} Text Block` : pdfTextKind === "table_like" ? "Table Block" : "Text Block";
13821
- units.push({
14417
+ const previousBlock = blockEntries.at(-1);
14418
+ const previousFigureCaption = previousBlock && previousBlock.pageNumber === pageNumber && previousBlock.pdfSemanticRole === "figure_caption" ? previousBlock : undefined;
14419
+ const pdfSemanticRole = block.semanticRole === "figure_caption" ? "figure_caption" : block.semanticRole === "figure_body" ? "figure_body" : pdfTextKind === "paragraph" && previousFigureCaption && !inferPDFBlockHeading(text) ? "figure_body" : undefined;
14420
+ const currentBlockHeading = pdfTextKind === "paragraph" && !pdfSemanticRole ? inferPDFBlockHeading(text) : undefined;
14421
+ const contextualHeading = pdfTextKind === "table_like" ? findNearestPDFContextHeading(blockEntries, pageNumber) : undefined;
14422
+ const contextualTableTitle = contextualHeading && pdfTextKind === "table_like" ? /\btable\b/i.test(contextualHeading) ? contextualHeading : `${contextualHeading} Table` : undefined;
14423
+ const pdfTableHeaders = pdfTextKind === "table_like" ? getPDFTableHeaders(text) : undefined;
14424
+ const pdfTableHeaderText = pdfTextKind === "table_like" ? text.split(`
14425
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean)[0] : undefined;
14426
+ const pdfTableRowCount = pdfTextKind === "table_like" ? text.split(`
14427
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean).length : undefined;
14428
+ const pdfTableBodyRowCount = typeof pdfTableRowCount === "number" ? Math.max(0, pdfTableRowCount - 1) : undefined;
14429
+ const pdfTableBodyRowStart = typeof pdfTableBodyRowCount === "number" && pdfTableBodyRowCount > 0 ? 1 : undefined;
14430
+ const pdfTableBodyRowEnd = typeof pdfTableBodyRowCount === "number" && pdfTableBodyRowCount > 0 ? pdfTableBodyRowCount : undefined;
14431
+ const pdfTableColumnCount = Array.isArray(pdfTableHeaders) ? pdfTableHeaders.length : undefined;
14432
+ const pdfTableSignature = Array.isArray(pdfTableHeaders) && pdfTableHeaders.length > 0 ? pdfTableHeaders.join(" | ") : undefined;
14433
+ const pdfFigureLabel = pdfSemanticRole === "figure_caption" ? extractPDFFigureLabel(text) : previousFigureCaption?.pdfFigureLabel;
14434
+ const pdfFigureCaptionBlockNumber = pdfSemanticRole === "figure_caption" ? pdfBlockNumber : previousFigureCaption?.pdfFigureCaptionBlockNumber;
14435
+ const baseSectionTitle = pdfSemanticRole === "figure_caption" ? pageNumber ? `Page ${pageNumber} Figure Caption` : "Figure Caption" : pdfSemanticRole === "figure_body" ? pdfFigureLabel ? `${pdfFigureLabel} Body` : pageNumber ? `Page ${pageNumber} Figure Body` : "Figure Body" : currentBlockHeading ? currentBlockHeading : contextualTableTitle ? contextualTableTitle : pageNumber ? pdfTextKind === "table_like" ? `Page ${pageNumber} Table Block` : `Page ${pageNumber} Text Block` : pdfTextKind === "table_like" ? "Table Block" : "Text Block";
14436
+ blockEntries.push({
14437
+ baseSectionTitle,
13822
14438
  pageNumber,
13823
14439
  pdfBlockNumber,
14440
+ ...typeof pdfFigureCaptionBlockNumber === "number" ? { pdfFigureCaptionBlockNumber } : {},
14441
+ ...pdfFigureLabel ? { pdfFigureLabel } : {},
14442
+ ...pdfSemanticRole ? { pdfSemanticRole } : {},
14443
+ ...typeof pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd } : {},
14444
+ ...typeof pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount } : {},
14445
+ ...typeof pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart } : {},
14446
+ ...pdfTextKind === "table_like" ? { pdfTableChunkKind: "full_table" } : {},
14447
+ ...typeof pdfTableColumnCount === "number" ? { pdfTableColumnCount } : {},
14448
+ ...typeof pdfTableHeaderText === "string" ? { pdfTableHeaderText } : {},
14449
+ ...Array.isArray(pdfTableHeaders) && pdfTableHeaders.length > 0 ? { pdfTableHeaders } : {},
14450
+ ...typeof pdfTableRowCount === "number" ? { pdfTableRowCount } : {},
14451
+ ...pdfTableSignature ? { pdfTableSignature } : {},
13824
14452
  pdfTextKind,
13825
- preferredChunkUnits: pdfTextKind === "table_like" ? text.split(`
14453
+ text
14454
+ });
14455
+ }
14456
+ const titleCounts = new Map;
14457
+ for (const block of blockEntries) {
14458
+ titleCounts.set(block.baseSectionTitle, (titleCounts.get(block.baseSectionTitle) ?? 0) + 1);
14459
+ }
14460
+ const units = [];
14461
+ for (const block of blockEntries) {
14462
+ const sectionTitle = (titleCounts.get(block.baseSectionTitle) ?? 0) > 1 && typeof block.pdfBlockNumber === "number" ? `${block.baseSectionTitle} ${block.pdfBlockNumber}` : block.baseSectionTitle;
14463
+ units.push({
14464
+ pageNumber: block.pageNumber,
14465
+ pdfBlockNumber: block.pdfBlockNumber,
14466
+ ...typeof block.pdfFigureCaptionBlockNumber === "number" ? {
14467
+ pdfFigureCaptionBlockNumber: block.pdfFigureCaptionBlockNumber
14468
+ } : {},
14469
+ ...block.pdfFigureLabel ? { pdfFigureLabel: block.pdfFigureLabel } : {},
14470
+ ...block.pdfSemanticRole ? { pdfSemanticRole: block.pdfSemanticRole } : {},
14471
+ ...typeof block.pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd: block.pdfTableBodyRowEnd } : {},
14472
+ ...typeof block.pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount: block.pdfTableBodyRowCount } : {},
14473
+ ...typeof block.pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart: block.pdfTableBodyRowStart } : {},
14474
+ ...block.pdfTableChunkKind ? { pdfTableChunkKind: block.pdfTableChunkKind } : {},
14475
+ ...typeof block.pdfTableColumnCount === "number" ? { pdfTableColumnCount: block.pdfTableColumnCount } : {},
14476
+ ...typeof block.pdfTableHeaderText === "string" ? { pdfTableHeaderText: block.pdfTableHeaderText } : {},
14477
+ ...Array.isArray(block.pdfTableHeaders) && block.pdfTableHeaders.length > 0 ? { pdfTableHeaders: block.pdfTableHeaders } : {},
14478
+ ...typeof block.pdfTableRowCount === "number" ? { pdfTableRowCount: block.pdfTableRowCount } : {},
14479
+ ...block.pdfTableSignature ? { pdfTableSignature: block.pdfTableSignature } : {},
14480
+ pdfTextKind: block.pdfTextKind,
14481
+ preferredChunkUnits: block.pdfTextKind === "table_like" ? block.text.split(`
13826
14482
  `).filter(Boolean) : undefined,
13827
14483
  sectionDepth: 1,
13828
14484
  sectionKind: "pdf_block",
13829
14485
  sectionPath: [sectionTitle],
13830
14486
  sectionTitle,
13831
- text
14487
+ text: block.text
13832
14488
  });
13833
14489
  }
13834
14490
  return units;
@@ -13837,6 +14493,10 @@ var officeNativeStructureUnits = (metadata) => {
13837
14493
  const blocks = Array.isArray(metadata?.officeBlocks) ? metadata.officeBlocks : [];
13838
14494
  const units = [];
13839
14495
  const headingStack = [];
14496
+ const headingSiblingCounts = new Map;
14497
+ let pendingListContextText;
14498
+ let pendingTableContextText;
14499
+ let consumedOfficeListUntil = -1;
13840
14500
  const decorateOfficeSectionText = (text, sectionTitle) => {
13841
14501
  if (!sectionTitle || text.includes(sectionTitle)) {
13842
14502
  return text;
@@ -13845,6 +14505,9 @@ var officeNativeStructureUnits = (metadata) => {
13845
14505
  ${text}`);
13846
14506
  };
13847
14507
  for (const [index, block] of blocks.entries()) {
14508
+ if (index <= consumedOfficeListUntil) {
14509
+ continue;
14510
+ }
13848
14511
  if (!block || typeof block !== "object") {
13849
14512
  continue;
13850
14513
  }
@@ -13855,20 +14518,39 @@ ${text}`);
13855
14518
  const officeBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
13856
14519
  const officeBlockKind = block.blockKind === "title" || block.blockKind === "heading" || block.blockKind === "list" || block.blockKind === "table" ? block.blockKind : "paragraph";
13857
14520
  const headingLevel = typeof block.headingLevel === "number" && Number.isFinite(block.headingLevel) ? block.headingLevel : undefined;
14521
+ const officeListLevel = typeof block.listLevel === "number" && Number.isFinite(block.listLevel) ? block.listLevel : undefined;
14522
+ const officeTableBodyRowCount = typeof block.tableBodyRowCount === "number" && Number.isFinite(block.tableBodyRowCount) ? block.tableBodyRowCount : undefined;
14523
+ const officeTableColumnCount = typeof block.tableColumnCount === "number" && Number.isFinite(block.tableColumnCount) ? block.tableColumnCount : undefined;
14524
+ const officeTableHeaderText = typeof block.tableHeaderText === "string" && block.tableHeaderText.length > 0 ? block.tableHeaderText : undefined;
14525
+ const officeTableHeaders = Array.isArray(block.tableHeaders) && block.tableHeaders.length > 0 ? block.tableHeaders.filter((value) => typeof value === "string" && value.length > 0) : undefined;
14526
+ const officeTableRowCount = typeof block.tableRowCount === "number" && Number.isFinite(block.tableRowCount) ? block.tableRowCount : undefined;
14527
+ const officeTableSignature = typeof block.tableSignature === "string" && block.tableSignature.length > 0 ? block.tableSignature : undefined;
13858
14528
  if (officeBlockKind === "title" || officeBlockKind === "heading") {
13859
14529
  const level = officeBlockKind === "title" ? 1 : headingLevel ?? 1;
13860
- headingStack[level - 1] = text;
14530
+ const parentScope = headingStack.slice(0, Math.max(0, level - 1)).join(" > ");
14531
+ const headingKey = `${level}:${parentScope}:${text}`;
14532
+ const headingCount = (headingSiblingCounts.get(headingKey) ?? 0) + 1;
14533
+ headingSiblingCounts.set(headingKey, headingCount);
14534
+ const resolvedHeadingText = headingCount > 1 ? `${text} (${headingCount})` : text;
14535
+ headingStack[level - 1] = resolvedHeadingText;
13861
14536
  headingStack.length = level;
13862
- const nextBlock = blocks[index + 1];
13863
- const nextKind = nextBlock && typeof nextBlock === "object" ? nextBlock.blockKind : undefined;
13864
- if (nextKind === "title" || nextKind === "heading" || nextKind === "list" || nextKind === "table" || !nextBlock) {
14537
+ const nextBlock2 = blocks[index + 1];
14538
+ const nextKind2 = nextBlock2 && typeof nextBlock2 === "object" ? nextBlock2.blockKind : undefined;
14539
+ if (nextKind2 === "title" || nextKind2 === "heading" || nextKind2 === "list" || nextKind2 === "table" || !nextBlock2) {
13865
14540
  units.push({
13866
14541
  officeBlockKind,
14542
+ officeListLevel,
13867
14543
  officeBlockNumber,
14544
+ ...typeof officeTableBodyRowCount === "number" ? { officeTableBodyRowCount } : {},
14545
+ ...typeof officeTableColumnCount === "number" ? { officeTableColumnCount } : {},
14546
+ ...typeof officeTableHeaderText === "string" ? { officeTableHeaderText } : {},
14547
+ ...Array.isArray(officeTableHeaders) && officeTableHeaders.length > 0 ? { officeTableHeaders } : {},
14548
+ ...typeof officeTableRowCount === "number" ? { officeTableRowCount } : {},
14549
+ ...typeof officeTableSignature === "string" ? { officeTableSignature } : {},
13868
14550
  sectionDepth: headingStack.length,
13869
14551
  sectionKind: "office_heading",
13870
14552
  sectionPath: [...headingStack],
13871
- sectionTitle: text,
14553
+ sectionTitle: resolvedHeadingText,
13872
14554
  text
13873
14555
  });
13874
14556
  }
@@ -13876,16 +14558,143 @@ ${text}`);
13876
14558
  }
13877
14559
  const sectionPath = headingStack.length > 0 ? [...headingStack] : undefined;
13878
14560
  const sectionTitle = sectionPath?.at(-1);
14561
+ if (officeBlockKind === "list") {
14562
+ const runTexts = [];
14563
+ const runLevels = [];
14564
+ let runEnd = index;
14565
+ for (;runEnd < blocks.length; runEnd += 1) {
14566
+ const runBlock = blocks[runEnd];
14567
+ if (!runBlock || typeof runBlock !== "object") {
14568
+ break;
14569
+ }
14570
+ if (runBlock.blockKind !== "list") {
14571
+ break;
14572
+ }
14573
+ const runText = typeof runBlock.text === "string" ? normalizeWhitespace(runBlock.text) : "";
14574
+ if (!runText) {
14575
+ break;
14576
+ }
14577
+ runTexts.push(runText);
14578
+ if (typeof runBlock.listLevel === "number" && Number.isFinite(runBlock.listLevel)) {
14579
+ runLevels.push(runBlock.listLevel);
14580
+ }
14581
+ }
14582
+ runEnd -= 1;
14583
+ const nextAfterRun = blocks[runEnd + 1];
14584
+ const nextAfterRunKind = nextAfterRun && typeof nextAfterRun === "object" ? nextAfterRun.blockKind : undefined;
14585
+ const nextAfterRunText = nextAfterRun && typeof nextAfterRun === "object" && typeof nextAfterRun.text === "string" ? normalizeWhitespace(nextAfterRun.text) : undefined;
14586
+ const nextAfterRunNext = blocks[runEnd + 2];
14587
+ const nextAfterRunNextKind = nextAfterRunNext && typeof nextAfterRunNext === "object" ? nextAfterRunNext.blockKind : undefined;
14588
+ const nextAfterRunNextText = nextAfterRunNext && typeof nextAfterRunNext === "object" && typeof nextAfterRunNext.text === "string" ? nextAfterRunNext.text : undefined;
14589
+ const nextAfterRunNextNext = blocks[runEnd + 3];
14590
+ const nextAfterRunNextNextKind = nextAfterRunNextNext && typeof nextAfterRunNextNext === "object" ? nextAfterRunNextNext.blockKind : undefined;
14591
+ const nextAfterRunNextNextNext = blocks[runEnd + 4];
14592
+ const nextAfterRunNextNextNextKind = nextAfterRunNextNextNext && typeof nextAfterRunNextNextNext === "object" ? nextAfterRunNextNextNext.blockKind : undefined;
14593
+ const trailingTableBridgeText = nextAfterRunKind === "paragraph" && typeof nextAfterRunText === "string" && nextAfterRunText.length <= 200 && (nextAfterRunNextKind === "table" || nextAfterRunNextKind === "heading" && typeof nextAfterRunNextText === "string" && /\btable\b/i.test(nextAfterRunNextText) && (nextAfterRunNextNextKind === "table" || nextAfterRunNextNextKind === "paragraph" && nextAfterRunNextNextNextKind === "table")) ? nextAfterRunText : undefined;
14594
+ const officeListContextText2 = normalizeWhitespace([
14595
+ ...typeof pendingListContextText === "string" ? [pendingListContextText] : [],
14596
+ ...typeof trailingTableBridgeText === "string" ? [trailingTableBridgeText] : []
14597
+ ].join(`
14598
+
14599
+ `));
14600
+ const distinctLevels = [...new Set(runLevels)];
14601
+ const officeListLevel2 = distinctLevels.length === 1 ? distinctLevels[0] : undefined;
14602
+ const groupedListText = normalizeWhitespace([
14603
+ ...officeListContextText2 ? [officeListContextText2] : [],
14604
+ ...runTexts
14605
+ ].join(`
14606
+
14607
+ `));
14608
+ if (typeof trailingTableBridgeText === "string") {
14609
+ pendingTableContextText = trailingTableBridgeText;
14610
+ consumedOfficeListUntil = runEnd + 1;
14611
+ } else {
14612
+ consumedOfficeListUntil = runEnd;
14613
+ }
14614
+ pendingListContextText = undefined;
14615
+ units.push({
14616
+ officeBlockKind,
14617
+ ...officeListContextText2 ? { officeListContextText: officeListContextText2 } : {},
14618
+ officeListGroupItemCount: runTexts.length,
14619
+ ...typeof officeListLevel2 === "number" ? { officeListLevel: officeListLevel2 } : {},
14620
+ ...distinctLevels.length > 0 ? { officeListLevels: distinctLevels } : {},
14621
+ officeBlockNumber,
14622
+ preferredChunkUnits: [
14623
+ ...officeListContextText2 ? [officeListContextText2] : [],
14624
+ ...runTexts
14625
+ ],
14626
+ sectionDepth: sectionPath?.length,
14627
+ sectionKind: "office_block",
14628
+ sectionPath,
14629
+ sectionTitle,
14630
+ text: groupedListText
14631
+ });
14632
+ continue;
14633
+ }
14634
+ const nextBlock = blocks[index + 1];
14635
+ const nextKind = nextBlock && typeof nextBlock === "object" ? nextBlock.blockKind : undefined;
14636
+ const nextText = nextBlock && typeof nextBlock === "object" && typeof nextBlock.text === "string" ? nextBlock.text : undefined;
14637
+ const nextNextBlock = blocks[index + 2];
14638
+ const nextNextKind = nextNextBlock && typeof nextNextBlock === "object" ? nextNextBlock.blockKind : undefined;
14639
+ const nextNextText = nextNextBlock && typeof nextNextBlock === "object" && typeof nextNextBlock.text === "string" ? nextNextBlock.text : undefined;
14640
+ const nextNextNextBlock = blocks[index + 3];
14641
+ const nextNextNextKind = nextNextNextBlock && typeof nextNextNextBlock === "object" ? nextNextNextBlock.blockKind : undefined;
14642
+ const nextNextNextText = nextNextNextBlock && typeof nextNextNextBlock === "object" && typeof nextNextNextBlock.text === "string" ? nextNextNextBlock.text : undefined;
14643
+ const nextNextNextNextBlock = blocks[index + 4];
14644
+ const nextNextNextNextKind = nextNextNextNextBlock && typeof nextNextNextNextBlock === "object" ? nextNextNextNextBlock.blockKind : undefined;
14645
+ if (officeBlockKind === "paragraph" && (nextKind === "list" || nextKind === "paragraph" && nextNextKind === "list") && text.length <= 200) {
14646
+ pendingListContextText = normalizeWhitespace([
14647
+ ...typeof pendingListContextText === "string" ? [pendingListContextText] : [],
14648
+ text
14649
+ ].join(`
14650
+
14651
+ `));
14652
+ continue;
14653
+ }
14654
+ if (officeBlockKind === "paragraph" && (nextKind === "table" || nextKind === "paragraph" && nextNextKind === "table" || nextKind === "heading" && typeof nextText === "string" && /\btable\b/i.test(nextText) && (nextNextKind === "table" || nextNextKind === "paragraph" && nextNextNextKind === "table") || nextKind === "paragraph" && nextNextKind === "heading" && typeof nextNextText === "string" && /\btable\b/i.test(nextNextText) && (nextNextNextKind === "table" || nextNextNextKind === "paragraph" && nextNextNextNextKind === "table") || nextKind === "paragraph" && nextNextKind === "paragraph" && nextNextNextKind === "heading" && typeof nextNextNextText === "string" && /\btable\b/i.test(nextNextNextText) && (nextNextNextNextKind === "table" || nextNextNextNextKind === "paragraph" && blocks[index + 5]?.blockKind === "table")) && text.length <= 200) {
14655
+ pendingTableContextText = normalizeWhitespace([
14656
+ ...typeof pendingTableContextText === "string" ? [pendingTableContextText] : [],
14657
+ text
14658
+ ].join(`
14659
+
14660
+ `));
14661
+ continue;
14662
+ }
14663
+ const officeListContextText = officeBlockKind === "list" ? pendingListContextText : undefined;
14664
+ const officeTableContextText = officeBlockKind === "table" ? pendingTableContextText : undefined;
14665
+ if (officeBlockKind !== "list" || nextKind !== "list") {
14666
+ pendingListContextText = undefined;
14667
+ }
14668
+ pendingTableContextText = undefined;
13879
14669
  units.push({
13880
14670
  officeBlockKind,
14671
+ officeListLevel,
13881
14672
  officeBlockNumber,
13882
- preferredChunkUnits: officeBlockKind === "table" ? text.split(`
13883
- `).filter(Boolean) : undefined,
14673
+ ...typeof officeListContextText === "string" ? { officeListContextText } : {},
14674
+ ...typeof officeTableContextText === "string" ? { officeTableContextText } : {},
14675
+ ...typeof officeTableBodyRowCount === "number" ? { officeTableBodyRowCount } : {},
14676
+ ...typeof officeTableColumnCount === "number" ? { officeTableColumnCount } : {},
14677
+ ...typeof officeTableHeaderText === "string" ? { officeTableHeaderText } : {},
14678
+ ...Array.isArray(officeTableHeaders) && officeTableHeaders.length > 0 ? { officeTableHeaders } : {},
14679
+ ...typeof officeTableRowCount === "number" ? { officeTableRowCount } : {},
14680
+ ...typeof officeTableSignature === "string" ? { officeTableSignature } : {},
14681
+ preferredChunkUnits: officeBlockKind === "table" ? [
14682
+ ...typeof officeTableContextText === "string" ? [officeTableContextText] : [],
14683
+ ...text.split(`
14684
+ `).filter(Boolean)
14685
+ ] : officeBlockKind === "list" ? [
14686
+ ...typeof officeListContextText === "string" ? [officeListContextText] : [],
14687
+ text
14688
+ ] : undefined,
13884
14689
  sectionDepth: sectionPath?.length,
13885
14690
  sectionKind: officeBlockKind === "paragraph" ? "office_heading" : "office_block",
13886
14691
  sectionPath,
13887
14692
  sectionTitle,
13888
- text: officeBlockKind === "paragraph" ? decorateOfficeSectionText(text, sectionTitle) : text
14693
+ text: officeBlockKind === "table" && typeof officeTableContextText === "string" ? normalizeWhitespace(`${officeTableContextText}
14694
+
14695
+ ${text}`) : officeBlockKind === "list" && typeof officeListContextText === "string" ? normalizeWhitespace(`${officeListContextText}
14696
+
14697
+ ${text}`) : officeBlockKind === "paragraph" ? decorateOfficeSectionText(text, sectionTitle) : text
13889
14698
  });
13890
14699
  }
13891
14700
  return units;
@@ -14823,6 +15632,13 @@ var appendPdfLineBreak = (parts) => {
14823
15632
  `);
14824
15633
  };
14825
15634
  var PDF_CHROME_LINE_MAX_LENGTH = 80;
15635
+ var PDF_LINK_CLUSTER_LINE_MAX_LENGTH = 120;
15636
+ var PDF_FIGURE_LABEL_PATTERN = /^(?:figure|fig\.)\s*\d+[A-Za-z]?(?:\s*[:.-]\s*|\s+|$)/i;
15637
+ var PDF_LINK_CLUSTER_HEADING_PATTERN = /^(?:related|quick|useful|reference|references|resources|links|see also)\b/i;
15638
+ var PDF_PROMO_HEADING_PATTERN = /^(?:start|free trial|upgrade|subscribe|newsletter|contact sales|book demo|try|learn more)\b/i;
15639
+ var PDF_PROMO_BODY_PATTERN = /\b(?:free trial|upgrade|subscribe|newsletter|contact sales|book demo|learn more|pricing|enterprise|demo)\b/i;
15640
+ var OCR_SUMMARY_CONFIDENCE_THRESHOLD = 0.75;
15641
+ var OCR_SUMMARY_MIN_STRONG_TEXT_RATIO = 0.6;
14826
15642
  var PDF_TEXT_OPERATOR_PATTERN = /(\[((?:\\.|[^\]])*)\]\s*TJ)|(\(((?:\\.|[^\\)])*)\)\s*Tj)|([-+]?\d*\.?\d+\s+[-+]?\d*\.?\d+\s+\(((?:\\.|[^\\)])*)\)\s*")|(\(((?:\\.|[^\\)])*)\)\s*')|((?:[-+]?\d*\.?\d+\s+){2}(?:Td|TD))|(T\*)|((?:[-+]?\d*\.?\d+\s+){6}Tm)/g;
14827
15643
  var extractTextFromPDFTextObject = (value) => {
14828
15644
  const parts = [];
@@ -14851,23 +15667,144 @@ var extractTextFromPDFTextObject = (value) => {
14851
15667
  }
14852
15668
  return parts.join("");
14853
15669
  };
14854
- var buildPDFNativeTextBlock = (text, blockNumber, pageNumber) => {
14855
- const normalized = normalizeWhitespace(text);
14856
- if (!normalized) {
15670
+ var buildPDFNativeTextBlockSeed = (lines, pageNumber) => {
15671
+ const normalizedLines = lines.map((line) => normalizeWhitespace(line)).filter(Boolean);
15672
+ if (normalizedLines.length === 0) {
14857
15673
  return;
14858
15674
  }
14859
- const lineCount = normalized.split(`
14860
- `).filter(Boolean).length;
14861
- const textKind = normalized.includes(" | ") ? "table_like" : "paragraph";
15675
+ const text = normalizedLines.join(`
15676
+ `);
15677
+ const semanticRole = normalizedLines.length >= 2 && PDF_FIGURE_LABEL_PATTERN.test(normalizedLines[0] ?? "") ? "figure_caption" : undefined;
14862
15678
  return {
14863
- blockNumber,
14864
- lineCount,
15679
+ lineCount: normalizedLines.length,
14865
15680
  pageNumber,
14866
- text: normalized,
14867
- textKind
15681
+ ...semanticRole ? { semanticRole } : {},
15682
+ text,
15683
+ textKind: normalizedLines.some((line) => line.includes(" | ")) ? "table_like" : "paragraph"
14868
15684
  };
14869
15685
  };
15686
+ var inferPDFBlockHeading = (text) => {
15687
+ const lines = text.split(`
15688
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
15689
+ const candidate = lines[0];
15690
+ if (!candidate || candidate.length > 80 || candidate.includes(" | ") || /[.!?]$/.test(candidate)) {
15691
+ return;
15692
+ }
15693
+ return candidate;
15694
+ };
15695
+ var extractPDFFigureLabel = (text) => {
15696
+ const lines = text.split(`
15697
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
15698
+ const candidate = lines[0];
15699
+ return candidate && PDF_FIGURE_LABEL_PATTERN.test(candidate) ? candidate : undefined;
15700
+ };
15701
+ var splitPDFNativeTextBlocks = (text, pageNumber) => {
15702
+ const lines = text.split(`
15703
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
15704
+ if (lines.length === 0) {
15705
+ return [];
15706
+ }
15707
+ const blocks = [];
15708
+ let currentLines = [];
15709
+ let currentKind;
15710
+ let currentSemanticRole;
15711
+ const flush = () => {
15712
+ if (currentLines.length === 0) {
15713
+ return;
15714
+ }
15715
+ const block = buildPDFNativeTextBlockSeed(currentLines, pageNumber);
15716
+ if (block) {
15717
+ blocks.push(block);
15718
+ }
15719
+ currentLines = [];
15720
+ currentKind = undefined;
15721
+ currentSemanticRole = undefined;
15722
+ };
15723
+ for (const [index, line] of lines.entries()) {
15724
+ const lineKind = line.includes(" | ") ? "table_like" : "paragraph";
15725
+ const isFigureLabel = PDF_FIGURE_LABEL_PATTERN.test(line);
15726
+ if (isFigureLabel) {
15727
+ flush();
15728
+ currentKind = "paragraph";
15729
+ currentSemanticRole = "figure_caption";
15730
+ currentLines.push(line);
15731
+ continue;
15732
+ }
15733
+ if (currentSemanticRole === "figure_caption") {
15734
+ if (lineKind === "paragraph" && currentLines.length < 2) {
15735
+ currentLines.push(line);
15736
+ continue;
15737
+ }
15738
+ flush();
15739
+ }
15740
+ if (currentKind && lineKind !== currentKind) {
15741
+ flush();
15742
+ }
15743
+ currentKind = lineKind;
15744
+ currentLines.push(line);
15745
+ }
15746
+ flush();
15747
+ return blocks;
15748
+ };
15749
+ var assignPDFBlockNumbers = (blocks) => blocks.map((block, index) => ({
15750
+ ...block,
15751
+ blockNumber: index + 1
15752
+ }));
14870
15753
  var isLikelyPDFPageLabel = (value) => /^page\s+\d+(?:\s+of\s+\d+)?$/i.test(value.trim());
15754
+ var isLikelyPDFChromeLine = (value) => {
15755
+ const normalized = value.trim();
15756
+ if (!normalized) {
15757
+ return false;
15758
+ }
15759
+ return isLikelyPDFPageLabel(normalized) || /\b(?:header|footer)\s*$/i.test(normalized);
15760
+ };
15761
+ var isLikelyPDFLinkLine = (value) => {
15762
+ const normalized = value.trim();
15763
+ if (!normalized || normalized.length > PDF_LINK_CLUSTER_LINE_MAX_LENGTH) {
15764
+ return false;
15765
+ }
15766
+ return /^https?:\/\//i.test(normalized) || /^www\./i.test(normalized) || /^\/[A-Za-z0-9/_#?&=%.-]+$/.test(normalized) || /\((?:https?:\/\/|\/)[^)]+\)/i.test(normalized);
15767
+ };
15768
+ var isLikelyPDFLinkClusterBlock = (block) => {
15769
+ if (block.semanticRole || block.textKind !== "paragraph") {
15770
+ return false;
15771
+ }
15772
+ const lines = block.text.split(`
15773
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
15774
+ if (lines.length === 1) {
15775
+ return isLikelyPDFLinkLine(lines[0] ?? "");
15776
+ }
15777
+ if (lines.length < 2) {
15778
+ return false;
15779
+ }
15780
+ const heading = lines[0] ?? "";
15781
+ const bodyLines = lines.slice(1);
15782
+ const linkLikeCount = bodyLines.filter((line) => isLikelyPDFLinkLine(line)).length;
15783
+ if (bodyLines.length > 0 && linkLikeCount === bodyLines.length && PDF_LINK_CLUSTER_HEADING_PATTERN.test(heading)) {
15784
+ return true;
15785
+ }
15786
+ return linkLikeCount >= 2 && linkLikeCount >= Math.ceil(lines.length * 0.6);
15787
+ };
15788
+ var isLikelyPDFPromoBlock = (block) => {
15789
+ if (block.semanticRole || block.textKind !== "paragraph") {
15790
+ return false;
15791
+ }
15792
+ const lines = block.text.split(`
15793
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
15794
+ if (lines.length === 1) {
15795
+ return PDF_PROMO_HEADING_PATTERN.test(lines[0] ?? "");
15796
+ }
15797
+ if (lines.length < 2 || lines.length > 4) {
15798
+ return false;
15799
+ }
15800
+ const heading = lines[0] ?? "";
15801
+ const bodyLines = lines.slice(1);
15802
+ const promoLikeCount = bodyLines.filter((line) => line.length <= PDF_LINK_CLUSTER_LINE_MAX_LENGTH && (PDF_PROMO_BODY_PATTERN.test(line) || isLikelyPDFLinkLine(line))).length;
15803
+ if (PDF_PROMO_HEADING_PATTERN.test(heading) && promoLikeCount >= Math.max(1, bodyLines.length - 1)) {
15804
+ return true;
15805
+ }
15806
+ return false;
15807
+ };
14871
15808
  var suppressRepeatedPDFChrome = (blocks) => {
14872
15809
  const linePages = new Map;
14873
15810
  for (const block of blocks) {
@@ -14888,7 +15825,7 @@ var suppressRepeatedPDFChrome = (blocks) => {
14888
15825
  if (!line) {
14889
15826
  return false;
14890
15827
  }
14891
- if (isLikelyPDFPageLabel(line)) {
15828
+ if (isLikelyPDFChromeLine(line)) {
14892
15829
  return false;
14893
15830
  }
14894
15831
  const repeatedPages = linePages.get(line);
@@ -14897,27 +15834,76 @@ var suppressRepeatedPDFChrome = (blocks) => {
14897
15834
  }
14898
15835
  return true;
14899
15836
  });
14900
- const text = normalizeWhitespace(keptLines.join(`
14901
- `));
15837
+ const text = keptLines.join(`
15838
+ `);
14902
15839
  if (!text) {
14903
15840
  return;
14904
15841
  }
14905
- return buildPDFNativeTextBlock(text, block.blockNumber, block.pageNumber);
15842
+ return {
15843
+ ...block,
15844
+ lineCount: text.split(`
15845
+ `).filter(Boolean).length,
15846
+ text,
15847
+ textKind: text.includes(" | ") ? "table_like" : "paragraph"
15848
+ };
14906
15849
  }).filter((value) => Boolean(value));
14907
15850
  };
15851
+ var suppressNonContentPDFBlocks = (blocks) => blocks.filter((block) => !isLikelyPDFLinkClusterBlock(block) && !isLikelyPDFPromoBlock(block));
15852
+ var mergePDFHeadingContinuationBlocks = (blocks) => {
15853
+ const merged = [];
15854
+ for (let index = 0;index < blocks.length; index += 1) {
15855
+ const block = blocks[index];
15856
+ if (!block) {
15857
+ continue;
15858
+ }
15859
+ const lines = block.text.split(`
15860
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
15861
+ const nextBlock = blocks[index + 1];
15862
+ const isHeadingOnlyBlock = !block.semanticRole && block.textKind === "paragraph" && lines.length === 1 && inferPDFBlockHeading(block.text) === lines[0];
15863
+ const canMergeWithNext = isHeadingOnlyBlock && nextBlock && nextBlock.pageNumber === block.pageNumber && !nextBlock.semanticRole && nextBlock.textKind === "paragraph" && inferPDFBlockHeading(nextBlock.text) === undefined;
15864
+ if (canMergeWithNext) {
15865
+ const text = [block.text, nextBlock.text].flatMap((value) => value.split(`
15866
+ `)).map((line) => normalizeWhitespace(line)).filter(Boolean).join(`
15867
+ `);
15868
+ merged.push({
15869
+ ...block,
15870
+ lineCount: text.split(`
15871
+ `).filter(Boolean).length,
15872
+ text
15873
+ });
15874
+ index += 1;
15875
+ continue;
15876
+ }
15877
+ merged.push(block);
15878
+ }
15879
+ return merged;
15880
+ };
15881
+ var associatePDFNativeFigureBodies = (blocks) => blocks.map((block, index) => {
15882
+ if (block.semanticRole || block.textKind !== "paragraph" || inferPDFBlockHeading(block.text)) {
15883
+ return block;
15884
+ }
15885
+ const previousBlock = index > 0 ? blocks[index - 1] : undefined;
15886
+ if (!previousBlock || previousBlock.pageNumber !== block.pageNumber || previousBlock.semanticRole !== "figure_caption") {
15887
+ return block;
15888
+ }
15889
+ return {
15890
+ ...block,
15891
+ semanticRole: "figure_body"
15892
+ };
15893
+ });
14908
15894
  var extractNativePDFText = (data) => {
14909
15895
  const raw = Buffer.from(data).toString("latin1");
14910
15896
  const count = [...raw.matchAll(/\/Type\s*\/Page\b/g)].length;
14911
15897
  const pageCount = count > 0 ? count : 1;
14912
15898
  const pageMarkers = [...raw.matchAll(/\/Type\s*\/Page\b/g)].map((match) => match.index ?? raw.length);
14913
- const blocks = [...raw.matchAll(/BT([\s\S]*?)ET/g)].map((match, index) => {
15899
+ const blocks = assignPDFBlockNumbers([...raw.matchAll(/BT([\s\S]*?)ET/g)].flatMap((match) => {
14914
15900
  const blockText = extractTextFromPDFTextObject(match[1] ?? "");
14915
15901
  const objectEnd = (match.index ?? 0) + (match[0]?.length ?? 0);
14916
15902
  const pageIndex = pageMarkers.findIndex((marker) => marker >= objectEnd);
14917
15903
  const pageNumber = pageIndex >= 0 ? pageIndex + 1 : pageCount;
14918
- return buildPDFNativeTextBlock(blockText, index + 1, pageNumber);
14919
- }).filter((value) => Boolean(value));
14920
- const visibleBlocks = suppressRepeatedPDFChrome(blocks);
15904
+ return splitPDFNativeTextBlocks(blockText, pageNumber);
15905
+ }));
15906
+ const visibleBlocks = assignPDFBlockNumbers(associatePDFNativeFigureBodies(mergePDFHeadingContinuationBlocks(suppressNonContentPDFBlocks(suppressRepeatedPDFChrome(blocks)))));
14921
15907
  const fallbackText = [...raw.matchAll(/\(((?:\\.|[^\\)])*)\)\s*Tj/g)].map((match) => decodePdfLiteral(match[1] ?? "")).join(`
14922
15908
  `);
14923
15909
  const text = visibleBlocks.length > 0 ? normalizeWhitespace(visibleBlocks.map((block) => block.text).join(`
@@ -15028,13 +16014,10 @@ var officeDocumentBlocks = (entries) => {
15028
16014
  for (const match of body.matchAll(blockPattern)) {
15029
16015
  const blockXml = match[0] ?? "";
15030
16016
  if (blockXml.startsWith("<w:tbl")) {
15031
- const rows = [...blockXml.matchAll(/<w:tr\b[\s\S]*?<\/w:tr>/g)].map((rowMatch, rowIndex) => {
15032
- const cells = [
15033
- ...(rowMatch[0] ?? "").matchAll(/<w:tc\b[\s\S]*?<\/w:tc>/g)
15034
- ].map((cellMatch) => extractOfficeParagraphText(cellMatch[0] ?? "")).filter(Boolean);
15035
- if (cells.length === 0) {
15036
- return "";
15037
- }
16017
+ const tableRows = [...blockXml.matchAll(/<w:tr\b[\s\S]*?<\/w:tr>/g)].map((rowMatch) => [
16018
+ ...(rowMatch[0] ?? "").matchAll(/<w:tc\b[\s\S]*?<\/w:tc>/g)
16019
+ ].map((cellMatch) => extractOfficeParagraphText(cellMatch[0] ?? "")).filter(Boolean)).filter((cells) => cells.length > 0);
16020
+ const rows = tableRows.map((cells, rowIndex) => {
15038
16021
  return `Row ${rowIndex + 1}. ${cells.map((cell, cellIndex) => `${String.fromCharCode(65 + cellIndex)}: ${cell}`).join(" | ")}`;
15039
16022
  }).filter(Boolean);
15040
16023
  const text2 = normalizeWhitespace(rows.join(`
@@ -15042,9 +16025,21 @@ var officeDocumentBlocks = (entries) => {
15042
16025
  if (!text2) {
15043
16026
  continue;
15044
16027
  }
16028
+ const tableHeaders = tableRows[0];
16029
+ const tableRowCount = tableRows.length;
16030
+ const tableBodyRowCount = tableRowCount > 0 ? Math.max(0, tableRowCount - 1) : undefined;
16031
+ const tableColumnCount = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.length : tableRows.reduce((max, row) => Math.max(max, row.length), 0) || undefined;
16032
+ const tableHeaderText = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.join(" | ") : undefined;
16033
+ const tableSignature = Array.isArray(tableHeaders) && tableHeaders.length > 0 ? tableHeaders.join(" | ") : undefined;
15045
16034
  blocks.push({
15046
16035
  blockKind: "table",
15047
16036
  blockNumber: blocks.length + 1,
16037
+ ...typeof tableBodyRowCount === "number" ? { tableBodyRowCount } : {},
16038
+ ...typeof tableColumnCount === "number" ? { tableColumnCount } : {},
16039
+ ...typeof tableHeaderText === "string" ? { tableHeaderText } : {},
16040
+ ...Array.isArray(tableHeaders) && tableHeaders.length > 0 ? { tableHeaders } : {},
16041
+ ...typeof tableRowCount === "number" ? { tableRowCount } : {},
16042
+ ...typeof tableSignature === "string" ? { tableSignature } : {},
15048
16043
  text: text2
15049
16044
  });
15050
16045
  continue;
@@ -15057,12 +16052,16 @@ var officeDocumentBlocks = (entries) => {
15057
16052
  const style = (styleMatch?.[1] ?? "").toLowerCase();
15058
16053
  const headingMatch = style.match(/^heading([1-6])$/);
15059
16054
  const isListParagraph = /<w:numPr\b/i.test(blockXml) || style.includes("list") || style.includes("bullet");
16055
+ const listLevelMatch = blockXml.match(/<w:ilvl\b[^>]*w:val="(\d+)"[^>]*\/?>/i);
16056
+ const listLevel = listLevelMatch ? Number.parseInt(listLevelMatch[1] ?? "0", 10) : undefined;
15060
16057
  const blockKind = style === "title" ? "title" : headingMatch ? "heading" : isListParagraph ? "list" : "paragraph";
15061
- const decoratedText = blockKind === "list" && !/^[-*]\s/.test(text) ? `- ${text}` : text;
16058
+ const listPrefix = blockKind === "list" ? `${" ".repeat(Math.max(0, listLevel ?? 0))}- ` : "";
16059
+ const decoratedText = blockKind === "list" && !/^[-*]\s/.test(text) ? `${listPrefix}${text}` : text;
15062
16060
  blocks.push({
15063
16061
  blockKind,
15064
16062
  blockNumber: blocks.length + 1,
15065
16063
  headingLevel: headingMatch ? Number.parseInt(headingMatch[1] ?? "1", 10) : undefined,
16064
+ listLevel: blockKind === "list" && Number.isFinite(listLevel ?? NaN) ? listLevel : undefined,
15066
16065
  style: style || undefined,
15067
16066
  text: decoratedText
15068
16067
  });
@@ -15678,10 +16677,32 @@ var splitOCRColumns = (regions) => {
15678
16677
  var buildOCRReadingText = (regions) => normalizeWhitespace(splitOCRColumns(regions).map((column) => buildOCRReadingLinesText(column)).filter(Boolean).join(`
15679
16678
 
15680
16679
  `));
15681
- var getOCRPrimaryText = (result) => {
15682
- const regions = result.regions?.filter((region) => normalizeWhitespace(region.text ?? "").length > 0);
15683
- const reconstructed = regions && regions.length > 0 ? buildOCRReadingText(regions) : "";
15684
- return reconstructed || result.text;
16680
+ var buildOCRSummaryText = (result) => {
16681
+ const regions = result.regions?.filter((region) => normalizeWhitespace(region.text ?? "").length > 0) ?? [];
16682
+ if (regions.length === 0) {
16683
+ return {
16684
+ lowConfidenceRegionCount: 0,
16685
+ strongRegionCount: 0,
16686
+ summaryConfidenceThreshold: OCR_SUMMARY_CONFIDENCE_THRESHOLD,
16687
+ text: result.text,
16688
+ usedStrongRegionsOnly: false
16689
+ };
16690
+ }
16691
+ const strongRegions = regions.filter((region) => typeof region.confidence !== "number" || region.confidence >= OCR_SUMMARY_CONFIDENCE_THRESHOLD);
16692
+ const lowConfidenceRegionCount = regions.length - strongRegions.length;
16693
+ const strongTextLength = strongRegions.reduce((sum, region) => sum + normalizeWhitespace(region.text ?? "").length, 0);
16694
+ const totalTextLength = regions.reduce((sum, region) => sum + normalizeWhitespace(region.text ?? "").length, 0);
16695
+ const strongCoverageRatio = totalTextLength > 0 ? strongTextLength / totalTextLength : 0;
16696
+ const useStrongRegionsOnly = strongRegions.length > 0 && lowConfidenceRegionCount > 0 && strongCoverageRatio >= OCR_SUMMARY_MIN_STRONG_TEXT_RATIO;
16697
+ const strongReconstructed = buildOCRReadingText(strongRegions);
16698
+ const allReconstructed = buildOCRReadingText(regions);
16699
+ return {
16700
+ lowConfidenceRegionCount,
16701
+ strongRegionCount: strongRegions.length,
16702
+ summaryConfidenceThreshold: OCR_SUMMARY_CONFIDENCE_THRESHOLD,
16703
+ text: (useStrongRegionsOnly ? strongReconstructed : allReconstructed) || result.text,
16704
+ usedStrongRegionsOnly: useStrongRegionsOnly
16705
+ };
15685
16706
  };
15686
16707
  var ocrPageDocuments = (result, input, baseMetadata) => {
15687
16708
  const grouped = new Map;
@@ -16065,6 +17086,7 @@ var createRAGImageOCRExtractor = (provider) => ({
16065
17086
  supports: imageExtractorSupports,
16066
17087
  extract: async (input) => {
16067
17088
  const result = await provider.extractText(input);
17089
+ const summary = buildOCRSummaryText(result);
16068
17090
  return {
16069
17091
  chunking: input.chunking,
16070
17092
  contentType: input.contentType,
@@ -16072,11 +17094,15 @@ var createRAGImageOCRExtractor = (provider) => ({
16072
17094
  metadata: {
16073
17095
  ...input.metadata ?? {},
16074
17096
  ...ocrMetadata(result),
17097
+ ocrLowConfidenceRegionCount: summary.lowConfidenceRegionCount,
17098
+ ocrStrongRegionCount: summary.strongRegionCount,
17099
+ ocrSummaryConfidenceThreshold: summary.summaryConfidenceThreshold,
17100
+ ocrSummaryUsedStrongRegionsOnly: summary.usedStrongRegionsOnly,
16075
17101
  fileKind: "image",
16076
17102
  sourceNativeKind: "image_ocr"
16077
17103
  },
16078
17104
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.image.txt`,
16079
- text: getOCRPrimaryText(result),
17105
+ text: summary.text,
16080
17106
  title: result.title ?? input.title
16081
17107
  };
16082
17108
  }
@@ -16260,6 +17286,8 @@ var createPDFFileExtractor = () => ({
16260
17286
  ...input.metadata ?? {},
16261
17287
  fileKind: "pdf",
16262
17288
  pageCount: extracted.pageCount,
17289
+ pdfEvidenceMode: "native",
17290
+ pdfEvidenceOrigin: "native",
16263
17291
  pdfTextBlockCount: extracted.textBlockCount,
16264
17292
  pdfTextBlocks: extracted.textBlocks
16265
17293
  },
@@ -16293,6 +17321,7 @@ var createRAGPDFOCRExtractor = (options) => ({
16293
17321
  const nativeText = extracted.text;
16294
17322
  const minLength = options.minExtractedTextLength ?? 80;
16295
17323
  const shouldUseNativeText = !options.alwaysOCR && nativeText.length >= minLength;
17324
+ const shouldUseHybridText = !options.alwaysOCR && nativeText.length > 0 && nativeText.length < minLength;
16296
17325
  if (shouldUseNativeText) {
16297
17326
  return {
16298
17327
  chunking: input.chunking,
@@ -16302,6 +17331,8 @@ var createRAGPDFOCRExtractor = (options) => ({
16302
17331
  ...input.metadata ?? {},
16303
17332
  fileKind: "pdf",
16304
17333
  pageCount: extracted.pageCount,
17334
+ pdfEvidenceMode: "native",
17335
+ pdfEvidenceOrigin: "native",
16305
17336
  pdfTextBlockCount: extracted.textBlockCount,
16306
17337
  pdfTextBlocks: extracted.textBlocks,
16307
17338
  pdfTextMode: "native"
@@ -16315,12 +17346,49 @@ var createRAGPDFOCRExtractor = (options) => ({
16315
17346
  ...input,
16316
17347
  contentType: input.contentType ?? "application/pdf"
16317
17348
  });
17349
+ const summary = buildOCRSummaryText(ocr);
16318
17350
  const baseMetadata = {
16319
17351
  ...ocrMetadata(ocr),
17352
+ ocrLowConfidenceRegionCount: summary.lowConfidenceRegionCount,
17353
+ ocrStrongRegionCount: summary.strongRegionCount,
17354
+ ocrSummaryConfidenceThreshold: summary.summaryConfidenceThreshold,
17355
+ ocrSummaryUsedStrongRegionsOnly: summary.usedStrongRegionsOnly,
16320
17356
  fileKind: "pdf",
16321
17357
  pageCount: extracted.pageCount,
17358
+ pdfEvidenceMode: "ocr",
17359
+ pdfEvidenceOrigin: "ocr",
16322
17360
  pdfTextMode: "ocr"
16323
17361
  };
17362
+ if (shouldUseHybridText) {
17363
+ const hybridMetadata = {
17364
+ ...input.metadata ?? {},
17365
+ ...baseMetadata,
17366
+ pageCount: extracted.pageCount,
17367
+ pdfEvidenceMode: "hybrid",
17368
+ pdfEvidenceOrigin: "native",
17369
+ pdfEvidenceSupplement: "ocr",
17370
+ pdfHybridOCRSupplement: true,
17371
+ pdfNativeTextBlockCount: extracted.textBlockCount,
17372
+ pdfNativeTextLength: nativeText.length,
17373
+ pdfOCRFallbackReason: "native_below_min_length",
17374
+ pdfOCRTextLength: summary.text.length,
17375
+ pdfTextBlockCount: extracted.textBlockCount,
17376
+ pdfTextBlocks: extracted.textBlocks,
17377
+ pdfTextMode: "hybrid"
17378
+ };
17379
+ const hybridDocument = {
17380
+ chunking: input.chunking,
17381
+ contentType: input.contentType ?? "application/pdf",
17382
+ format: "text",
17383
+ metadata: hybridMetadata,
17384
+ source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
17385
+ text: nativeText,
17386
+ title: input.title
17387
+ };
17388
+ const pageDocuments2 = ocrPageDocuments(ocr, input, baseMetadata);
17389
+ const regionDocuments2 = ocrRegionDocuments(ocr, input, baseMetadata);
17390
+ return [hybridDocument, ...pageDocuments2, ...regionDocuments2];
17391
+ }
16324
17392
  const summaryDocument = {
16325
17393
  chunking: input.chunking,
16326
17394
  contentType: input.contentType ?? "application/pdf",
@@ -16330,7 +17398,7 @@ var createRAGPDFOCRExtractor = (options) => ({
16330
17398
  ...baseMetadata
16331
17399
  },
16332
17400
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
16333
- text: getOCRPrimaryText(ocr),
17401
+ text: summary.text,
16334
17402
  title: ocr.title ?? input.title
16335
17403
  };
16336
17404
  const pageDocuments = ocrPageDocuments(ocr, input, baseMetadata);
@@ -16600,6 +17668,101 @@ var chunkFromUnits = (units, maxChunkLength, chunkOverlap, minChunkLength) => {
16600
17668
  };
16601
17669
  var chunkSourceAwareUnit = (unit, options) => {
16602
17670
  const defaultSourceAwareChunkReason = unit.sectionKind === "markdown_heading" || unit.sectionKind === "html_heading" || unit.sectionKind === "office_heading" ? "section_boundary" : unit.sectionKind ? "source_native_unit" : unit.sourceAwareChunkReason;
17671
+ if (unit.officeBlockKind === "table" && typeof unit.officeTableHeaderText === "string" && typeof unit.officeTableBodyRowCount === "number" && unit.officeTableBodyRowCount > 0 && unit.text.length > options.maxChunkLength) {
17672
+ const headerLine = unit.officeTableHeaderText;
17673
+ const contextText = typeof unit.officeTableContextText === "string" ? unit.officeTableContextText : undefined;
17674
+ const bodyRows = unit.text.split(`
17675
+ `).map((line) => normalizeWhitespace(line)).filter((line) => /^Row \d+\./.test(line)).slice(1);
17676
+ const slices = [];
17677
+ let currentRows = [];
17678
+ let currentStart = 1;
17679
+ const pushSlice = () => {
17680
+ if (currentRows.length === 0) {
17681
+ return;
17682
+ }
17683
+ slices.push({
17684
+ bodyRowEnd: currentStart + currentRows.length - 1,
17685
+ bodyRowStart: currentStart,
17686
+ text: normalizeWhitespace([
17687
+ ...typeof contextText === "string" ? [contextText] : [],
17688
+ headerLine,
17689
+ ...currentRows
17690
+ ].join(`
17691
+ `))
17692
+ });
17693
+ currentStart += currentRows.length;
17694
+ currentRows = [];
17695
+ };
17696
+ for (const row of bodyRows) {
17697
+ const candidateRows = [...currentRows, row];
17698
+ const candidateText = normalizeWhitespace([
17699
+ ...typeof contextText === "string" ? [contextText] : [],
17700
+ headerLine,
17701
+ ...candidateRows
17702
+ ].join(`
17703
+ `));
17704
+ if (currentRows.length > 0 && candidateText.length > options.maxChunkLength) {
17705
+ pushSlice();
17706
+ }
17707
+ currentRows.push(row);
17708
+ }
17709
+ pushSlice();
17710
+ if (slices.length > 0) {
17711
+ return slices.map((slice) => ({
17712
+ ...unit,
17713
+ officeTableBodyRowCount: slice.bodyRowEnd - slice.bodyRowStart + 1,
17714
+ officeTableBodyRowEnd: slice.bodyRowEnd,
17715
+ officeTableBodyRowStart: slice.bodyRowStart,
17716
+ officeTableChunkKind: slices.length > 1 ? "table_slice" : "full_table",
17717
+ officeTableRowCount: slice.bodyRowEnd - slice.bodyRowStart + 2,
17718
+ sourceAwareChunkReason: slices.length > 1 ? "size_limit" : defaultSourceAwareChunkReason,
17719
+ text: slice.text
17720
+ }));
17721
+ }
17722
+ }
17723
+ if (unit.pdfTextKind === "table_like" && typeof unit.pdfTableHeaderText === "string" && typeof unit.pdfTableBodyRowCount === "number" && unit.pdfTableBodyRowCount > 0 && unit.text.length > options.maxChunkLength) {
17724
+ const headerLine = unit.pdfTableHeaderText;
17725
+ const bodyRows = unit.text.split(`
17726
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean).slice(1);
17727
+ const slices = [];
17728
+ let currentRows = [];
17729
+ let currentStart = 1;
17730
+ const pushSlice = () => {
17731
+ if (currentRows.length === 0) {
17732
+ return;
17733
+ }
17734
+ slices.push({
17735
+ bodyRowEnd: currentStart + currentRows.length - 1,
17736
+ bodyRowStart: currentStart,
17737
+ text: normalizeWhitespace([headerLine, ...currentRows].join(`
17738
+ `))
17739
+ });
17740
+ currentStart += currentRows.length;
17741
+ currentRows = [];
17742
+ };
17743
+ for (const row of bodyRows) {
17744
+ const candidateRows = [...currentRows, row];
17745
+ const candidateText = normalizeWhitespace([headerLine, ...candidateRows].join(`
17746
+ `));
17747
+ if (currentRows.length > 0 && candidateText.length > options.maxChunkLength) {
17748
+ pushSlice();
17749
+ }
17750
+ currentRows.push(row);
17751
+ }
17752
+ pushSlice();
17753
+ if (slices.length > 0) {
17754
+ return slices.map((slice) => ({
17755
+ ...unit,
17756
+ pdfTableBodyRowCount: slice.bodyRowEnd - slice.bodyRowStart + 1,
17757
+ pdfTableBodyRowEnd: slice.bodyRowEnd,
17758
+ pdfTableBodyRowStart: slice.bodyRowStart,
17759
+ pdfTableChunkKind: slices.length > 1 ? "table_slice" : "full_table",
17760
+ pdfTableRowCount: slice.bodyRowEnd - slice.bodyRowStart + 2,
17761
+ sourceAwareChunkReason: slices.length > 1 ? "size_limit" : defaultSourceAwareChunkReason,
17762
+ text: slice.text
17763
+ }));
17764
+ }
17765
+ }
16603
17766
  if (unit.text.length <= options.maxChunkLength) {
16604
17767
  return [
16605
17768
  {
@@ -16896,7 +18059,37 @@ var prepareRAGDocument = (document, defaultChunking, chunkingRegistry) => {
16896
18059
  ...typeof entry.pageNumber === "number" ? { pageNumber: entry.pageNumber } : {},
16897
18060
  ...typeof entry.officeBlockNumber === "number" ? { officeBlockNumber: entry.officeBlockNumber } : {},
16898
18061
  ...entry.officeBlockKind ? { officeBlockKind: entry.officeBlockKind } : {},
18062
+ ...typeof entry.officeListContextText === "string" ? { officeListContextText: entry.officeListContextText } : {},
18063
+ ...typeof entry.officeListGroupItemCount === "number" ? {
18064
+ officeListGroupItemCount: entry.officeListGroupItemCount
18065
+ } : {},
18066
+ ...typeof entry.officeListLevel === "number" ? { officeListLevel: entry.officeListLevel } : {},
18067
+ ...Array.isArray(entry.officeListLevels) && entry.officeListLevels.length > 0 ? { officeListLevels: entry.officeListLevels } : {},
18068
+ ...typeof entry.officeTableBodyRowCount === "number" ? { officeTableBodyRowCount: entry.officeTableBodyRowCount } : {},
18069
+ ...typeof entry.officeTableBodyRowEnd === "number" ? { officeTableBodyRowEnd: entry.officeTableBodyRowEnd } : {},
18070
+ ...typeof entry.officeTableBodyRowStart === "number" ? { officeTableBodyRowStart: entry.officeTableBodyRowStart } : {},
18071
+ ...entry.officeTableChunkKind ? { officeTableChunkKind: entry.officeTableChunkKind } : {},
18072
+ ...typeof entry.officeTableColumnCount === "number" ? { officeTableColumnCount: entry.officeTableColumnCount } : {},
18073
+ ...typeof entry.officeTableContextText === "string" ? { officeTableContextText: entry.officeTableContextText } : {},
18074
+ ...typeof entry.officeTableHeaderText === "string" ? { officeTableHeaderText: entry.officeTableHeaderText } : {},
18075
+ ...Array.isArray(entry.officeTableHeaders) && entry.officeTableHeaders.length > 0 ? { officeTableHeaders: entry.officeTableHeaders } : {},
18076
+ ...typeof entry.officeTableRowCount === "number" ? { officeTableRowCount: entry.officeTableRowCount } : {},
18077
+ ...typeof entry.officeTableSignature === "string" ? { officeTableSignature: entry.officeTableSignature } : {},
16899
18078
  ...typeof entry.pdfBlockNumber === "number" ? { pdfBlockNumber: entry.pdfBlockNumber } : {},
18079
+ ...typeof entry.pdfFigureCaptionBlockNumber === "number" ? {
18080
+ pdfFigureCaptionBlockNumber: entry.pdfFigureCaptionBlockNumber
18081
+ } : {},
18082
+ ...typeof entry.pdfFigureLabel === "string" ? { pdfFigureLabel: entry.pdfFigureLabel } : {},
18083
+ ...entry.pdfSemanticRole ? { pdfSemanticRole: entry.pdfSemanticRole } : {},
18084
+ ...typeof entry.pdfTableBodyRowEnd === "number" ? { pdfTableBodyRowEnd: entry.pdfTableBodyRowEnd } : {},
18085
+ ...typeof entry.pdfTableBodyRowCount === "number" ? { pdfTableBodyRowCount: entry.pdfTableBodyRowCount } : {},
18086
+ ...typeof entry.pdfTableBodyRowStart === "number" ? { pdfTableBodyRowStart: entry.pdfTableBodyRowStart } : {},
18087
+ ...entry.pdfTableChunkKind ? { pdfTableChunkKind: entry.pdfTableChunkKind } : {},
18088
+ ...typeof entry.pdfTableColumnCount === "number" ? { pdfTableColumnCount: entry.pdfTableColumnCount } : {},
18089
+ ...typeof entry.pdfTableHeaderText === "string" ? { pdfTableHeaderText: entry.pdfTableHeaderText } : {},
18090
+ ...Array.isArray(entry.pdfTableHeaders) && entry.pdfTableHeaders.length > 0 ? { pdfTableHeaders: entry.pdfTableHeaders } : {},
18091
+ ...typeof entry.pdfTableRowCount === "number" ? { pdfTableRowCount: entry.pdfTableRowCount } : {},
18092
+ ...typeof entry.pdfTableSignature === "string" ? { pdfTableSignature: entry.pdfTableSignature } : {},
16900
18093
  ...entry.pdfTextKind ? { pdfTextKind: entry.pdfTextKind } : {},
16901
18094
  ...entry.sectionKind ? { sectionKind: entry.sectionKind } : {},
16902
18095
  ...entry.sourceAwareChunkReason ? { sourceAwareChunkReason: entry.sourceAwareChunkReason } : {},
@@ -17616,6 +18809,166 @@ var annotateRetrievalChannels = (input) => {
17616
18809
  };
17617
18810
  });
17618
18811
  };
18812
+ var getPDFRetrievalEvidencePreference = (metadata) => {
18813
+ if (!metadata) {
18814
+ return 0;
18815
+ }
18816
+ const pdfEvidenceMode = typeof metadata.pdfEvidenceMode === "string" ? metadata.pdfEvidenceMode : undefined;
18817
+ const pdfEvidenceOrigin = typeof metadata.pdfEvidenceOrigin === "string" ? metadata.pdfEvidenceOrigin : undefined;
18818
+ const pdfEvidenceSupplement = typeof metadata.pdfEvidenceSupplement === "string" ? metadata.pdfEvidenceSupplement : undefined;
18819
+ if (pdfEvidenceMode === "hybrid" && pdfEvidenceOrigin === "native" && pdfEvidenceSupplement === "ocr") {
18820
+ return 3;
18821
+ }
18822
+ if (pdfEvidenceMode === "native" && pdfEvidenceOrigin === "native") {
18823
+ return 2;
18824
+ }
18825
+ if (pdfEvidenceMode === "ocr" && pdfEvidenceOrigin === "ocr") {
18826
+ return 1;
18827
+ }
18828
+ return 0;
18829
+ };
18830
+ var getPDFRetrievalScope = (result) => {
18831
+ const metadata = result.metadata;
18832
+ if (!metadata) {
18833
+ return;
18834
+ }
18835
+ const pageNumber = typeof metadata.pageNumber === "number" ? metadata.pageNumber : typeof metadata.page === "number" ? metadata.page : typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined;
18836
+ const sectionTitle = typeof metadata.sectionTitle === "string" && metadata.sectionTitle.length > 0 ? metadata.sectionTitle : undefined;
18837
+ const source = typeof result.source === "string" && result.source.length > 0 ? result.source : undefined;
18838
+ if (!source) {
18839
+ return;
18840
+ }
18841
+ return {
18842
+ pageNumber,
18843
+ sectionTitle,
18844
+ source
18845
+ };
18846
+ };
18847
+ var getPDFRetrievalComparableScopeKey = (scope) => {
18848
+ if (!scope) {
18849
+ return;
18850
+ }
18851
+ if (typeof scope.pageNumber === "number") {
18852
+ return `${scope.source}::page:${scope.pageNumber}`;
18853
+ }
18854
+ if (scope.sectionTitle) {
18855
+ return `${scope.source}::section:${scope.sectionTitle}`;
18856
+ }
18857
+ return;
18858
+ };
18859
+ var getOfficeRetrievalScope = (result) => {
18860
+ const metadata = result.metadata;
18861
+ if (!metadata) {
18862
+ return;
18863
+ }
18864
+ const officeBlockKind = metadata.officeBlockKind === "table" || metadata.officeBlockKind === "list" ? metadata.officeBlockKind : undefined;
18865
+ if (officeBlockKind !== "table" && officeBlockKind !== "list") {
18866
+ return;
18867
+ }
18868
+ const source = typeof result.source === "string" && result.source.length > 0 ? result.source : undefined;
18869
+ if (!source) {
18870
+ return;
18871
+ }
18872
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
18873
+ const sectionTitle = (typeof metadata.sectionTitle === "string" && metadata.sectionTitle.trim().length > 0 ? metadata.sectionTitle.trim() : undefined) ?? sectionPath.at(-1);
18874
+ if (!sectionTitle) {
18875
+ return;
18876
+ }
18877
+ return {
18878
+ blockKind: officeBlockKind,
18879
+ hasContext: officeBlockKind === "table" ? typeof metadata.officeTableContextText === "string" && metadata.officeTableContextText.trim().length > 0 : typeof metadata.officeListContextText === "string" && metadata.officeListContextText.trim().length > 0,
18880
+ pathDepth: sectionPath.length,
18881
+ sectionTitle,
18882
+ source
18883
+ };
18884
+ };
18885
+ var getOfficeRetrievalComparableScopeKey = (scope) => {
18886
+ if (!scope) {
18887
+ return;
18888
+ }
18889
+ return `${scope.source}::office_section:${scope.blockKind}:${scope.sectionTitle}`;
18890
+ };
18891
+ var getOfficeRetrievalEvidencePreference = (metadata) => {
18892
+ if (!metadata) {
18893
+ return 0;
18894
+ }
18895
+ const officeBlockKind = metadata.officeBlockKind === "table" || metadata.officeBlockKind === "list" ? metadata.officeBlockKind : undefined;
18896
+ if (officeBlockKind !== "table" && officeBlockKind !== "list") {
18897
+ return 0;
18898
+ }
18899
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
18900
+ return sectionPath.length * 10 + ((officeBlockKind === "table" ? typeof metadata.officeTableContextText === "string" && metadata.officeTableContextText.trim().length > 0 : typeof metadata.officeListContextText === "string" && metadata.officeListContextText.trim().length > 0) ? 1 : 0) + (officeBlockKind === "list" && typeof metadata.officeListGroupItemCount === "number" && metadata.officeListGroupItemCount > 1 ? 1 : 0);
18901
+ };
18902
+ var buildStructuredEvidenceReconcileLabel = (input) => {
18903
+ if (input.officeAffectedScopeCount > 0 && input.pdfAffectedScopeCount === 0) {
18904
+ return "Preferred deeper office-structure evidence within matching sections";
18905
+ }
18906
+ if (input.pdfAffectedScopeCount > 0 && input.officeAffectedScopeCount === 0) {
18907
+ return "Preferred native-layout PDF evidence within matching sections";
18908
+ }
18909
+ return "Preferred stronger structured evidence within matching sections";
18910
+ };
18911
+ var reconcileStructuredEvidenceOrdering = (results) => {
18912
+ const indexed = results.map((result, index) => ({ index, result }));
18913
+ const sorted = [...indexed].sort((leftEntry, rightEntry) => {
18914
+ const left = leftEntry.result;
18915
+ const right = rightEntry.result;
18916
+ const leftOfficeScope = getOfficeRetrievalScope(left);
18917
+ const rightOfficeScope = getOfficeRetrievalScope(right);
18918
+ if (leftOfficeScope && rightOfficeScope && leftOfficeScope.source === rightOfficeScope.source && leftOfficeScope.blockKind === rightOfficeScope.blockKind && leftOfficeScope.sectionTitle === rightOfficeScope.sectionTitle) {
18919
+ const leftPreference = getOfficeRetrievalEvidencePreference(left.metadata);
18920
+ const rightPreference = getOfficeRetrievalEvidencePreference(right.metadata);
18921
+ if (rightPreference !== leftPreference) {
18922
+ return rightPreference - leftPreference;
18923
+ }
18924
+ }
18925
+ const leftScope = getPDFRetrievalScope(left);
18926
+ const rightScope = getPDFRetrievalScope(right);
18927
+ if (leftScope && rightScope && leftScope.source === rightScope.source && (leftScope.sectionTitle && rightScope.sectionTitle && leftScope.sectionTitle === rightScope.sectionTitle || typeof leftScope.pageNumber === "number" && typeof rightScope.pageNumber === "number" && leftScope.pageNumber === rightScope.pageNumber)) {
18928
+ const leftPreference = getPDFRetrievalEvidencePreference(left.metadata);
18929
+ const rightPreference = getPDFRetrievalEvidencePreference(right.metadata);
18930
+ if (rightPreference !== leftPreference) {
18931
+ return rightPreference - leftPreference;
18932
+ }
18933
+ }
18934
+ return leftEntry.index - rightEntry.index;
18935
+ });
18936
+ const orderedResults = sorted.map((entry) => entry.result);
18937
+ const reorderedResults = sorted.reduce((count, entry, index) => count + (results[index]?.chunkId === entry.result.chunkId ? 0 : 1), 0);
18938
+ const officeAffectedScopes = new Set;
18939
+ const pdfAffectedScopes = new Set;
18940
+ for (const [index, entry] of sorted.entries()) {
18941
+ if (results[index]?.chunkId === entry.result.chunkId) {
18942
+ continue;
18943
+ }
18944
+ const officeScope = getOfficeRetrievalScope(entry.result);
18945
+ if (officeScope) {
18946
+ const officeScopeKey = getOfficeRetrievalComparableScopeKey(officeScope);
18947
+ if (officeScopeKey) {
18948
+ officeAffectedScopes.add(officeScopeKey);
18949
+ }
18950
+ continue;
18951
+ }
18952
+ const pdfScope = getPDFRetrievalScope(entry.result);
18953
+ const pdfScopeKey = getPDFRetrievalComparableScopeKey(pdfScope);
18954
+ if (pdfScopeKey) {
18955
+ pdfAffectedScopes.add(pdfScopeKey);
18956
+ }
18957
+ }
18958
+ const affectedScopeCount = officeAffectedScopes.size + pdfAffectedScopes.size;
18959
+ return {
18960
+ affectedScopeCount,
18961
+ label: buildStructuredEvidenceReconcileLabel({
18962
+ officeAffectedScopeCount: officeAffectedScopes.size,
18963
+ pdfAffectedScopeCount: pdfAffectedScopes.size
18964
+ }),
18965
+ applied: reorderedResults > 0,
18966
+ officeAffectedScopeCount: officeAffectedScopes.size,
18967
+ pdfAffectedScopeCount: pdfAffectedScopes.size,
18968
+ results: orderedResults,
18969
+ reorderedResults
18970
+ };
18971
+ };
17619
18972
  var getStructuredSectionScoreWeight2 = (metadata) => {
17620
18973
  const pdfTextKind = typeof metadata?.pdfTextKind === "string" ? metadata.pdfTextKind : undefined;
17621
18974
  const officeBlockKind = typeof metadata?.officeBlockKind === "string" ? metadata.officeBlockKind : undefined;
@@ -18084,9 +19437,25 @@ var createRAGCollection = (options) => {
18084
19437
  stage: "source_balance"
18085
19438
  });
18086
19439
  }
19440
+ const evidenceReconciled = reconcileStructuredEvidenceOrdering(diversified);
19441
+ if (evidenceReconciled.applied) {
19442
+ steps.push({
19443
+ count: evidenceReconciled.results.length,
19444
+ label: evidenceReconciled.label,
19445
+ metadata: {
19446
+ affectedScopes: evidenceReconciled.affectedScopeCount,
19447
+ officeAffectedScopes: evidenceReconciled.officeAffectedScopeCount,
19448
+ pdfAffectedScopes: evidenceReconciled.pdfAffectedScopeCount,
19449
+ reorderedResults: evidenceReconciled.reorderedResults
19450
+ },
19451
+ sectionCounts: buildTraceSectionCounts(evidenceReconciled.results),
19452
+ sectionScores: buildTraceSectionScores(evidenceReconciled.results),
19453
+ stage: "evidence_reconcile"
19454
+ });
19455
+ }
18087
19456
  const limited = annotateRetrievalChannels({
18088
19457
  lexicalResults,
18089
- results: diversified.slice(0, topK),
19458
+ results: evidenceReconciled.results.slice(0, topK),
18090
19459
  vectorResults
18091
19460
  });
18092
19461
  if (typeof input.scoreThreshold !== "number") {
@@ -18252,6 +19621,37 @@ var searchDocuments = async (collection, input) => collection.search(input);
18252
19621
  // src/ai/rag/htmxWorkflowRenderers.ts
18253
19622
  init_constants();
18254
19623
  var escapeHtml2 = (text) => text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
19624
+ var renderLabelValueRows = (rows) => rows.length > 0 ? `<dl class="rag-status">${rows.map((row) => `<div><dt>${escapeHtml2(row.label)}</dt><dd>${escapeHtml2(row.value)}</dd></div>`).join("")}</dl>` : "";
19625
+ var renderBenchmarkRuntimePanel = (input) => {
19626
+ const rows = [
19627
+ {
19628
+ label: "Suite",
19629
+ value: input.response.suite?.label ?? input.response.suite?.id ?? "n/a"
19630
+ },
19631
+ input.response.groupKey ? { label: "Group", value: input.response.groupKey } : undefined,
19632
+ input.response.corpusGroupKey ? { label: "Corpus group", value: input.response.corpusGroupKey } : undefined
19633
+ ].filter((row) => Boolean(row));
19634
+ const latestRows = input.response.historyPresentation?.rows ?? [];
19635
+ const recentRuns = input.response.historyPresentation?.recentRuns ?? [];
19636
+ const snapshotRows = input.response.snapshotHistoryPresentation?.rows ?? [];
19637
+ const snapshots = input.response.snapshotHistoryPresentation?.snapshots ?? [];
19638
+ return `<section class="rag-status-governance"><h3>${escapeHtml2(input.title)}</h3>` + renderLabelValueRows(rows) + `<h4>Run history</h4>` + renderLabelValueRows(latestRows) + (recentRuns.length > 0 ? `<ul class="rag-status-capabilities">${recentRuns.slice(0, 3).map((run) => `<li><strong>${escapeHtml2(run.label)}</strong> ${escapeHtml2(run.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No persisted benchmark runs yet.</p>`) + `<h4>Snapshot history</h4>` + renderLabelValueRows(snapshotRows) + (snapshots.length > 0 ? `<ul class="rag-status-capabilities">${snapshots.slice(0, 3).map((snapshot) => `<li><strong>${escapeHtml2(snapshot.label)}</strong> ${escapeHtml2(snapshot.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No saved suite snapshots yet.</p>`) + `</section>`;
19639
+ };
19640
+ var renderBenchmarkSnapshotPanel = (input) => {
19641
+ const summaryRows = [
19642
+ {
19643
+ label: "Suite",
19644
+ value: input.response.suite?.label ?? input.response.suite?.id ?? "n/a"
19645
+ },
19646
+ input.response.snapshot ? {
19647
+ label: "Saved snapshot",
19648
+ value: `${input.response.snapshot.label ?? input.response.snapshot.suiteId} \xB7 v${input.response.snapshot.version}`
19649
+ } : undefined
19650
+ ].filter((row) => Boolean(row));
19651
+ const snapshotRows = input.response.snapshotHistoryPresentation?.rows ?? [];
19652
+ const snapshots = input.response.snapshotHistoryPresentation?.snapshots ?? [];
19653
+ return `<section class="rag-status-governance"><h3>${escapeHtml2(input.title)}</h3>` + renderLabelValueRows(summaryRows) + renderLabelValueRows(snapshotRows) + (snapshots.length > 0 ? `<ul class="rag-status-capabilities">${snapshots.slice(0, 3).map((snapshot) => `<li><strong>${escapeHtml2(snapshot.label)}</strong> ${escapeHtml2(snapshot.summary)}</li>`).join("")}</ul>` : `<p class="rag-empty">No saved suite snapshots yet.</p>`) + `</section>`;
19654
+ };
18255
19655
  var renderSourceLabels = (input) => {
18256
19656
  if (!input) {
18257
19657
  return "";
@@ -18448,7 +19848,7 @@ var renderRetrievalGovernancePanel = (retrievalComparisons) => {
18448
19848
  const latest = retrievalComparisons.latest;
18449
19849
  const alerts = (retrievalComparisons.alerts ?? []).slice(0, 3);
18450
19850
  const releaseGroups = (retrievalComparisons.releaseGroups ?? []).slice(0, 2);
18451
- const formatClassification = (classification) => classification === "multivector" ? "multivector regression" : classification === "runtime" ? "runtime regression" : classification === "general" ? "general regression" : undefined;
19851
+ const formatClassification = (classification) => classification === "multivector" ? "multivector regression" : classification === "evidence" ? "evidence regression" : classification === "runtime" ? "runtime regression" : classification === "general" ? "general regression" : undefined;
18452
19852
  return `<section class="rag-status-governance"><h3>Retrieval governance</h3>` + (latest ? `<dl class="rag-status">` + `<div><dt>Latest comparison</dt><dd>${escapeHtml2(latest.label)}</dd></div>` + (latest.bestByPassingRate ? `<div><dt>Best passing rate</dt><dd>${escapeHtml2(latest.bestByPassingRate)}</dd></div>` : "") + (latest.bestByAverageF1 ? `<div><dt>Best average F1</dt><dd>${escapeHtml2(latest.bestByAverageF1)}</dd></div>` : "") + (latest.bestByMultivectorCollapsedCases ? `<div><dt>Best multivector collapse</dt><dd>${escapeHtml2(latest.bestByMultivectorCollapsedCases)}</dd></div>` : "") + (latest.bestByMultivectorLexicalHitCases ? `<div><dt>Best multivector lexical hits</dt><dd>${escapeHtml2(latest.bestByMultivectorLexicalHitCases)}</dd></div>` : "") + (latest.bestByMultivectorVectorHitCases ? `<div><dt>Best multivector vector hits</dt><dd>${escapeHtml2(latest.bestByMultivectorVectorHitCases)}</dd></div>` : "") + (latest.decisionSummary?.gate?.status ? `<div><dt>Gate</dt><dd>${escapeHtml2(latest.decisionSummary.gate.status)}</dd></div>` : "") + (latest.releaseVerdict?.status ? `<div><dt>Verdict</dt><dd>${escapeHtml2(latest.releaseVerdict.status)}</dd></div>` : "") + `</dl>` : "") + `<h4>Active alerts</h4>` + (alerts.length > 0 ? `<ul class="rag-status-capabilities">${alerts.map((alert) => `<li><strong>${escapeHtml2(alert.kind)}</strong>${formatClassification(alert.classification) ? ` <span>${escapeHtml2(formatClassification(alert.classification) ?? "")}</span>` : ""} ${escapeHtml2(alert.message)}</li>`).join("")}</ul>` : `<p class="rag-empty">No active retrieval comparison alerts.</p>`) + (releaseGroups.length > 0 ? `<h4>Release groups</h4><ul class="rag-status-capabilities">${releaseGroups.map((group) => {
18453
19853
  const reasons = group.recommendedActionReasons?.slice(0, 2).join("; ") ?? "No recommended action.";
18454
19854
  return `<li><strong>${escapeHtml2(group.groupKey)}</strong>${formatClassification(group.classification) ? ` <span>${escapeHtml2(formatClassification(group.classification) ?? "")}</span>` : ""} ${escapeHtml2(group.recommendedAction ?? "monitor")} \xB7 ${escapeHtml2(reasons)}</li>`;
@@ -18512,6 +19912,22 @@ var defaultSearchResults = ({
18512
19912
  return defaultSearchResultItem(result, index, sectionJumps);
18513
19913
  }).join("")}</section>`;
18514
19914
  })();
19915
+ var defaultAdaptiveNativePlannerBenchmark = (input) => renderBenchmarkRuntimePanel({
19916
+ response: input,
19917
+ title: "Adaptive native planner benchmark"
19918
+ });
19919
+ var defaultNativeBackendComparisonBenchmark = (input) => renderBenchmarkRuntimePanel({
19920
+ response: input,
19921
+ title: "Native backend comparison benchmark"
19922
+ });
19923
+ var defaultAdaptiveNativePlannerBenchmarkSnapshot = (input) => renderBenchmarkSnapshotPanel({
19924
+ response: input,
19925
+ title: "Adaptive native planner snapshots"
19926
+ });
19927
+ var defaultNativeBackendComparisonBenchmarkSnapshot = (input) => renderBenchmarkSnapshotPanel({
19928
+ response: input,
19929
+ title: "Native backend comparison snapshots"
19930
+ });
18515
19931
  var defaultDocumentItem = (document, index) => '<article class="rag-document">' + `<h3>${escapeHtml2(document.title || `Document ${index + 1}`)}</h3>` + `<p class="rag-document-id">${escapeHtml2(document.id)}</p>` + `<p class="rag-document-source">${escapeHtml2(document.source)}</p>` + renderSourceLabels(document.labels) + `<p class="rag-document-meta">${escapeHtml2(document.format ?? "text")} \xB7 ${escapeHtml2(document.chunkStrategy ?? "paragraphs")} \xB7 ${document.chunkCount ?? 0} chunks</p>` + "</article>";
18516
19932
  var defaultDocuments = ({
18517
19933
  documents
@@ -18584,6 +20000,8 @@ var defaultEvaluateResult = ({
18584
20000
  var defaultError2 = (message) => `<div class="rag-error">${escapeHtml2(message)}</div>`;
18585
20001
  var defaultMaintenance = (input) => renderMaintenancePanel(input);
18586
20002
  var resolveRAGWorkflowRenderers = (custom) => ({
20003
+ adaptiveNativePlannerBenchmark: custom?.adaptiveNativePlannerBenchmark ?? defaultAdaptiveNativePlannerBenchmark,
20004
+ adaptiveNativePlannerBenchmarkSnapshot: custom?.adaptiveNativePlannerBenchmarkSnapshot ?? defaultAdaptiveNativePlannerBenchmarkSnapshot,
18587
20005
  chunkPreview: custom?.chunkPreview ?? defaultChunkPreview,
18588
20006
  documentItem: custom?.documentItem ?? defaultDocumentItem,
18589
20007
  documents: custom?.documents ?? defaultDocuments,
@@ -18591,6 +20009,8 @@ var resolveRAGWorkflowRenderers = (custom) => ({
18591
20009
  error: custom?.error ?? defaultError2,
18592
20010
  maintenance: custom?.maintenance ?? defaultMaintenance,
18593
20011
  mutationResult: custom?.mutationResult ?? defaultMutationResult,
20012
+ nativeBackendComparisonBenchmark: custom?.nativeBackendComparisonBenchmark ?? defaultNativeBackendComparisonBenchmark,
20013
+ nativeBackendComparisonBenchmarkSnapshot: custom?.nativeBackendComparisonBenchmarkSnapshot ?? defaultNativeBackendComparisonBenchmarkSnapshot,
18594
20014
  evaluateResult: custom?.evaluateResult ?? defaultEvaluateResult,
18595
20015
  searchResultItem: custom?.searchResultItem ?? defaultSearchResultItem,
18596
20016
  searchResults: custom?.searchResults ?? defaultSearchResults,
@@ -18885,7 +20305,12 @@ var parseRAGRetrieval = (value) => {
18885
20305
  "fusion",
18886
20306
  "fusionConstant",
18887
20307
  "lexicalWeight",
18888
- "vectorWeight"
20308
+ "vectorWeight",
20309
+ "nativeQueryProfile",
20310
+ "nativeCandidateLimit",
20311
+ "nativeMaxBackfills",
20312
+ "nativeMinResults",
20313
+ "nativeFillPolicy"
18889
20314
  ]);
18890
20315
  for (const key of Object.keys(value)) {
18891
20316
  if (!allowedFields.has(key)) {
@@ -18962,6 +20387,36 @@ var parseRAGRetrieval = (value) => {
18962
20387
  }
18963
20388
  retrieval.vectorWeight = value.vectorWeight;
18964
20389
  }
20390
+ if (getOwnProperty(value, "nativeQueryProfile")) {
20391
+ if (value.nativeQueryProfile !== "latency" && value.nativeQueryProfile !== "balanced" && value.nativeQueryProfile !== "recall") {
20392
+ return null;
20393
+ }
20394
+ retrieval.nativeQueryProfile = value.nativeQueryProfile;
20395
+ }
20396
+ if (getOwnProperty(value, "nativeCandidateLimit")) {
20397
+ if (typeof value.nativeCandidateLimit !== "number") {
20398
+ return null;
20399
+ }
20400
+ retrieval.nativeCandidateLimit = value.nativeCandidateLimit;
20401
+ }
20402
+ if (getOwnProperty(value, "nativeMaxBackfills")) {
20403
+ if (typeof value.nativeMaxBackfills !== "number") {
20404
+ return null;
20405
+ }
20406
+ retrieval.nativeMaxBackfills = value.nativeMaxBackfills;
20407
+ }
20408
+ if (getOwnProperty(value, "nativeMinResults")) {
20409
+ if (typeof value.nativeMinResults !== "number") {
20410
+ return null;
20411
+ }
20412
+ retrieval.nativeMinResults = value.nativeMinResults;
20413
+ }
20414
+ if (getOwnProperty(value, "nativeFillPolicy")) {
20415
+ if (value.nativeFillPolicy !== "strict_topk" && value.nativeFillPolicy !== "satisfy_min_results") {
20416
+ return null;
20417
+ }
20418
+ retrieval.nativeFillPolicy = value.nativeFillPolicy;
20419
+ }
18965
20420
  return retrieval;
18966
20421
  };
18967
20422
  var getNumericStatus = (status) => typeof status === "number" ? status : HTTP_STATUS_OK;
@@ -18973,9 +20428,12 @@ var classifyGovernanceReasons = (reasons) => {
18973
20428
  if (normalized.some((reason) => reason.includes("runtime ") || reason.includes("planner") || reason.includes("candidate-budget-exhausted") || reason.includes("underfilled-topk"))) {
18974
20429
  return "runtime";
18975
20430
  }
20431
+ if (normalized.some((reason) => reason.includes("evidence reconcile") || reason.includes("hybrid evidence") || reason.includes("ocr supplement"))) {
20432
+ return "evidence";
20433
+ }
18976
20434
  return "general";
18977
20435
  };
18978
- var buildRegressionRemediationLabel = (classification) => classification === "multivector" ? "Inspect multivector coverage deltas, variant-hit traces, and collapsed-parent recovery before promotion." : classification === "runtime" ? "Inspect planner-profile shifts, candidate-budget exhaustion, and underfilled native retrieval before promotion." : "Inspect the latest retrieval comparison deltas and resolve the gate failure before promotion.";
20436
+ var buildRegressionRemediationLabel = (classification) => classification === "multivector" ? "Inspect multivector coverage deltas, variant-hit traces, and collapsed-parent recovery before promotion." : classification === "evidence" ? "Inspect hybrid evidence reconciliation, native-vs-OCR passage selection, and PDF evidence provenance before promotion." : classification === "runtime" ? "Inspect planner-profile shifts, candidate-budget exhaustion, and underfilled native retrieval before promotion." : "Inspect the latest retrieval comparison deltas and resolve the gate failure before promotion.";
18979
20437
  var summarizeIncidentClassifications = (incidents) => {
18980
20438
  const allIncidents = incidents ?? [];
18981
20439
  const countBy = (status, classification) => allIncidents.filter((entry) => entry.status === status && (entry.classification ?? "general") === classification).length;
@@ -18983,12 +20441,15 @@ var summarizeIncidentClassifications = (incidents) => {
18983
20441
  openGeneralCount: countBy("open", "general"),
18984
20442
  openMultiVectorCount: countBy("open", "multivector"),
18985
20443
  openRuntimeCount: countBy("open", "runtime"),
20444
+ openEvidenceCount: countBy("open", "evidence"),
18986
20445
  resolvedGeneralCount: countBy("resolved", "general"),
18987
20446
  resolvedMultiVectorCount: countBy("resolved", "multivector"),
18988
20447
  resolvedRuntimeCount: countBy("resolved", "runtime"),
20448
+ resolvedEvidenceCount: countBy("resolved", "evidence"),
18989
20449
  totalGeneralCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "general").length,
18990
20450
  totalMultiVectorCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "multivector").length,
18991
- totalRuntimeCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "runtime").length
20451
+ totalRuntimeCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "runtime").length,
20452
+ totalEvidenceCount: allIncidents.filter((entry) => (entry.classification ?? "general") === "evidence").length
18992
20453
  };
18993
20454
  };
18994
20455
  var getBooleanProperty = (value, key) => {
@@ -19190,6 +20651,7 @@ var ragChat = (config) => {
19190
20651
  const { retrievalReleasePolicies } = config;
19191
20652
  const { retrievalReleasePoliciesByRolloutLabel } = config;
19192
20653
  const { retrievalReleasePoliciesByGroupAndRolloutLabel } = config;
20654
+ const { retrievalBaselineGatePoliciesByGroup } = config;
19193
20655
  const { retrievalBaselineGatePoliciesByRolloutLabel } = config;
19194
20656
  const { retrievalBaselineGatePoliciesByGroupAndRolloutLabel } = config;
19195
20657
  const workflowRenderConfig = typeof config.htmx === "object" ? config.htmx.workflowRender ?? config.htmx.workflow?.render : undefined;
@@ -19803,6 +21265,7 @@ var ragChat = (config) => {
19803
21265
  groupKey: getStringProperty(body, "groupKey"),
19804
21266
  label: getStringProperty(body, "label"),
19805
21267
  persistRun: getBooleanProperty(body, "persistRun") === true,
21268
+ suiteId: getStringProperty(body, "suiteId"),
19806
21269
  tags: normalizeStringArray2(body.tags),
19807
21270
  retrievals
19808
21271
  };
@@ -20310,7 +21773,7 @@ var ragChat = (config) => {
20310
21773
  const baselineRetrievalId = input.baselineRetrievalId ?? activeBaseline?.retrievalId;
20311
21774
  const candidateRetrievalId = input.candidateRetrievalId ?? input.retrievals.find((entry) => entry.id !== baselineRetrievalId)?.id;
20312
21775
  const startedAt = Date.now();
20313
- const suiteId = generateId();
21776
+ const suiteId = input.suiteId ?? generateId();
20314
21777
  const suiteLabel = input.label ?? "Retrieval comparison";
20315
21778
  const comparison = await compareRAGRetrievalStrategies({
20316
21779
  collection,
@@ -20349,7 +21812,12 @@ var ragChat = (config) => {
20349
21812
  baselineRetrievalId,
20350
21813
  candidateRetrievalId,
20351
21814
  comparison,
20352
- policy: activeBaseline?.policy
21815
+ policy: getEffectiveRetrievalBaselineGatePolicy({
21816
+ baselinePolicy: activeBaseline?.policy,
21817
+ groupKey: input.groupKey,
21818
+ rolloutLabel: activeBaseline?.rolloutLabel,
21819
+ suiteId
21820
+ })
20353
21821
  });
20354
21822
  await persistRAGRetrievalComparisonRun({
20355
21823
  run: {
@@ -20485,9 +21953,42 @@ var ragChat = (config) => {
20485
21953
  });
20486
21954
  const getRetrievalLaneHandoffAutoCompletePolicy = (groupKey, targetRolloutLabel) => (groupKey && targetRolloutLabel ? config.retrievalLaneHandoffAutoCompletePoliciesByGroupAndTargetRolloutLabel?.[groupKey]?.[targetRolloutLabel] : undefined) ?? {};
20487
21955
  const getDefaultRetrievalBaselineGatePolicy = (groupKey, rolloutLabel) => ({
21956
+ ...(groupKey ? retrievalBaselineGatePoliciesByGroup?.[groupKey] : undefined) ?? {},
20488
21957
  ...(rolloutLabel ? retrievalBaselineGatePoliciesByRolloutLabel?.[rolloutLabel] : undefined) ?? {},
20489
21958
  ...(groupKey && rolloutLabel ? retrievalBaselineGatePoliciesByGroupAndRolloutLabel?.[groupKey]?.[rolloutLabel] : undefined) ?? {}
20490
21959
  });
21960
+ const buildRuntimeRetrievalBenchmarkRecommendedGatePolicy = () => ({
21961
+ minEvidenceReconcileCasesDelta: 0,
21962
+ maxRuntimeCandidateBudgetExhaustedCasesDelta: 0,
21963
+ maxRuntimeUnderfilledTopKCasesDelta: 0,
21964
+ minAverageF1Delta: 0,
21965
+ minPassingRateDelta: 0,
21966
+ severity: "fail"
21967
+ });
21968
+ const getRecommendedBenchmarkBaselineGatePolicy = (input) => {
21969
+ const adaptiveSuite = createRAGAdaptiveNativePlannerBenchmarkSuite();
21970
+ const backendSuite = createRAGNativeBackendComparisonBenchmarkSuite();
21971
+ if (input.groupKey === (typeof adaptiveSuite.metadata?.recommendedGroupKey === "string" ? adaptiveSuite.metadata.recommendedGroupKey : undefined) || input.suiteId === adaptiveSuite.id) {
21972
+ return buildRuntimeRetrievalBenchmarkRecommendedGatePolicy();
21973
+ }
21974
+ if (input.groupKey === (typeof backendSuite.metadata?.recommendedGroupKey === "string" ? backendSuite.metadata.recommendedGroupKey : undefined) || input.suiteId === backendSuite.id) {
21975
+ return buildRuntimeRetrievalBenchmarkRecommendedGatePolicy();
21976
+ }
21977
+ return;
21978
+ };
21979
+ const getEffectiveRetrievalBaselineGatePolicy = (input) => {
21980
+ if (input.baselinePolicy && Object.keys(input.baselinePolicy).length > 0) {
21981
+ return input.baselinePolicy;
21982
+ }
21983
+ const defaultPolicy = getDefaultRetrievalBaselineGatePolicy(input.groupKey, input.rolloutLabel);
21984
+ if (Object.keys(defaultPolicy).length > 0) {
21985
+ return defaultPolicy;
21986
+ }
21987
+ return getRecommendedBenchmarkBaselineGatePolicy({
21988
+ groupKey: input.groupKey,
21989
+ suiteId: input.suiteId
21990
+ });
21991
+ };
20491
21992
  const getRetrievalReleaseIncidentSeverity = (rolloutLabel) => rolloutLabel === "stable" ? "critical" : "warning";
20492
21993
  const getLatestLaneHandoffDecision = (input) => input.decisions?.find((entry) => entry.groupKey === input.groupKey && entry.sourceRolloutLabel === input.sourceRolloutLabel && entry.targetRolloutLabel === input.targetRolloutLabel && (!input.kind || entry.kind === input.kind));
20493
21994
  const getLaneHandoffFreshnessWindow = (input) => {
@@ -20843,7 +22344,11 @@ var ragChat = (config) => {
20843
22344
  const gate = decision?.gate;
20844
22345
  const reasons = gate?.status && gate.status !== "pass" ? gate.reasons.length > 0 ? [...gate.reasons] : [`gate status is ${gate.status}`] : [];
20845
22346
  const effectiveReleasePolicy = getRetrievalReleasePolicy(input.run.groupKey, input.targetRolloutLabel);
20846
- const effectiveBaselineGatePolicy = getDefaultRetrievalBaselineGatePolicy(input.run.groupKey, input.targetRolloutLabel);
22347
+ const effectiveBaselineGatePolicy = getEffectiveRetrievalBaselineGatePolicy({
22348
+ groupKey: input.run.groupKey,
22349
+ rolloutLabel: input.targetRolloutLabel,
22350
+ suiteId: input.run.suiteId
22351
+ }) ?? {};
20847
22352
  const requiresApproval = Boolean(effectiveReleasePolicy.requireApprovalBeforePromotion);
20848
22353
  const approvalFreshness = latestDecision ? getDecisionFreshness({
20849
22354
  now: input.now,
@@ -20894,7 +22399,7 @@ var ragChat = (config) => {
20894
22399
  if (reason.includes("approval")) {
20895
22400
  actions.add("Renew or record the required approval for this rollout lane.");
20896
22401
  }
20897
- if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average")) {
22402
+ if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average") || reason.includes("evidence reconcile") || reason.includes("ocr supplement") || reason.includes("hybrid evidence")) {
20898
22403
  actions.add(buildRegressionRemediationLabel(classifyGovernanceReasons([reason])));
20899
22404
  }
20900
22405
  if (reason.includes("source comparison run was not found")) {
@@ -20925,7 +22430,7 @@ var ragChat = (config) => {
20925
22430
  })
20926
22431
  });
20927
22432
  }
20928
- if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average")) {
22433
+ if (reason.includes("gate") || reason.includes("passing rate") || reason.includes("average") || reason.includes("evidence reconcile") || reason.includes("ocr supplement") || reason.includes("hybrid evidence")) {
20929
22434
  steps.push({
20930
22435
  kind: "inspect_gate",
20931
22436
  label: buildRegressionRemediationLabel(classifyGovernanceReasons([reason])),
@@ -20986,7 +22491,10 @@ var ragChat = (config) => {
20986
22491
  baselineRetrievalId,
20987
22492
  candidateRetrievalId: input.retrievalId,
20988
22493
  classification: input.baseline ? "general" : undefined,
20989
- effectiveBaselineGatePolicy: targetRolloutLabel || input.groupKey ? getDefaultRetrievalBaselineGatePolicy(input.groupKey, targetRolloutLabel) : undefined,
22494
+ effectiveBaselineGatePolicy: targetRolloutLabel || input.groupKey ? getEffectiveRetrievalBaselineGatePolicy({
22495
+ groupKey: input.groupKey,
22496
+ rolloutLabel: targetRolloutLabel
22497
+ }) : undefined,
20990
22498
  effectiveReleasePolicy: getRetrievalReleasePolicy(input.groupKey, targetRolloutLabel),
20991
22499
  groupKey: input.groupKey,
20992
22500
  gateStatus: undefined,
@@ -21711,32 +23219,20 @@ var ragChat = (config) => {
21711
23219
  store: retrievalComparisonHistoryStore
21712
23220
  }) : undefined;
21713
23221
  const latest = decisions?.[0];
21714
- const adaptiveNativePlannerBenchmark = await (async () => {
21715
- const suite = createRAGAdaptiveNativePlannerBenchmarkSuite();
21716
- const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
21717
- const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
21718
- if (!config.evaluationSuiteSnapshotHistoryStore) {
21719
- return {
21720
- recommendedGroupKey,
21721
- recommendedTags,
21722
- suiteId: suite.id,
21723
- suiteLabel: suite.label ?? suite.id
21724
- };
21725
- }
21726
- const snapshotHistory = await loadRAGEvaluationSuiteSnapshotHistory({
21727
- limit: getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5,
21728
- store: config.evaluationSuiteSnapshotHistoryStore,
21729
- suite
21730
- });
21731
- return {
21732
- recommendedGroupKey,
21733
- recommendedTags,
21734
- snapshotHistory,
21735
- snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
21736
- suiteId: suite.id,
21737
- suiteLabel: suite.label ?? suite.id
21738
- };
21739
- })();
23222
+ const adaptiveNativePlannerBenchmark = await loadAdaptiveNativePlannerBenchmarkRuntime({
23223
+ corpusGroupKey: getStringProperty(queryInput, "benchmarkCorpusGroupKey"),
23224
+ groupKey: getStringProperty(queryInput, "benchmarkGroupKey"),
23225
+ historyLimit: getIntegerLikeProperty(queryInput, "benchmarkRunLimit") ?? getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5,
23226
+ queryInput,
23227
+ snapshotLimit: getIntegerLikeProperty(queryInput, "benchmarkLimit") ?? 5
23228
+ });
23229
+ const nativeBackendComparisonBenchmark = await loadNativeBackendComparisonBenchmarkRuntime({
23230
+ corpusGroupKey: getStringProperty(queryInput, "backendBenchmarkCorpusGroupKey"),
23231
+ groupKey: getStringProperty(queryInput, "backendBenchmarkGroupKey"),
23232
+ historyLimit: getIntegerLikeProperty(queryInput, "backendBenchmarkRunLimit") ?? getIntegerLikeProperty(queryInput, "backendBenchmarkLimit") ?? 5,
23233
+ queryInput,
23234
+ snapshotLimit: getIntegerLikeProperty(queryInput, "backendBenchmarkLimit") ?? 5
23235
+ });
21740
23236
  const presentation = buildRAGRetrievalReleaseGroupHistoryPresentation({
21741
23237
  runs,
21742
23238
  timeline: {
@@ -21770,23 +23266,338 @@ var ragChat = (config) => {
21770
23266
  latestDecisionAt: latest?.decidedAt,
21771
23267
  latestDecisionFreshnessStatus: latest?.freshnessStatus,
21772
23268
  latestDecisionKind: latest?.kind
21773
- }
23269
+ },
23270
+ nativeBackendComparisonBenchmark
21774
23271
  };
21775
23272
  };
21776
- const handleAdaptiveNativePlannerBenchmark = async (queryInput) => {
21777
- const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
21778
- description: getStringProperty(queryInput, "description"),
21779
- label: getStringProperty(queryInput, "label")
21780
- });
23273
+ const loadAdaptiveNativePlannerBenchmarkRuntime = async (input) => {
23274
+ const suite = input?.suite ?? createRAGAdaptiveNativePlannerBenchmarkSuite();
23275
+ const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
23276
+ const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata?.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
23277
+ const groupKey = input?.groupKey ?? getStringProperty(input?.queryInput, "benchmarkGroupKey") ?? recommendedGroupKey;
23278
+ const corpusGroupKey = input?.corpusGroupKey ?? getStringProperty(input?.queryInput, "benchmarkCorpusGroupKey");
23279
+ const recentRuns = retrievalComparisonHistoryStore ? await loadRAGRetrievalComparisonHistory({
23280
+ corpusGroupKey,
23281
+ groupKey,
23282
+ limit: input?.historyLimit ?? 5,
23283
+ store: retrievalComparisonHistoryStore,
23284
+ suiteId: suite.id
23285
+ }) : undefined;
23286
+ const historyTimelineGroupKey = groupKey ?? recentRuns?.[0]?.groupKey;
23287
+ const historyPresentation = recentRuns && recentRuns.length > 0 ? buildRAGRetrievalReleaseGroupHistoryPresentation({
23288
+ runs: recentRuns,
23289
+ timeline: historyTimelineGroupKey ? {
23290
+ corpusGroupKey: corpusGroupKey ?? recentRuns[0]?.corpusGroupKey,
23291
+ groupKey: historyTimelineGroupKey
23292
+ } : undefined
23293
+ }) : undefined;
21781
23294
  const snapshotHistory = config.evaluationSuiteSnapshotHistoryStore ? await loadRAGEvaluationSuiteSnapshotHistory({
21782
- limit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
23295
+ limit: input?.snapshotLimit ?? 5,
21783
23296
  store: config.evaluationSuiteSnapshotHistoryStore,
21784
23297
  suite
21785
23298
  }) : undefined;
23299
+ const fixtureVariants = getRetrievalBenchmarkFixtureVariants(recentRuns);
21786
23300
  return {
21787
- ok: true,
23301
+ corpusGroupKey,
23302
+ fixtureVariants,
23303
+ groupKey,
23304
+ historyPresentation,
23305
+ latestFixtureVariant: fixtureVariants[0],
23306
+ latestRun: recentRuns?.[0],
23307
+ recentRuns,
23308
+ recommendedGroupKey,
23309
+ recommendedTags,
23310
+ snapshotHistory,
23311
+ snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
23312
+ suiteId: suite.id,
23313
+ suiteLabel: suite.label ?? suite.id
23314
+ };
23315
+ };
23316
+ const buildRetrievalBenchmarkBackendTags = () => {
23317
+ const status = resolveCollection()?.getStatus?.();
23318
+ const fixtureVariant = "current-collection";
23319
+ if (!status) {
23320
+ return [`fixture:${fixtureVariant}`];
23321
+ }
23322
+ const tags = [
23323
+ `fixture:${fixtureVariant}`,
23324
+ `backend:${status.backend}`,
23325
+ `vector-mode:${status.vectorMode}`
23326
+ ];
23327
+ if (status.native && "mode" in status.native) {
23328
+ tags.push(`native-mode:${status.native.mode}`);
23329
+ }
23330
+ return tags;
23331
+ };
23332
+ const getRetrievalBenchmarkFixtureVariants = (runs) => (runs ?? []).flatMap((run) => run.tags ?? []).filter((tag) => tag.startsWith("fixture:")).map((tag) => tag.slice("fixture:".length)).filter((tag, index, all) => tag.trim().length > 0 && all.indexOf(tag) === index);
23333
+ const ensureRetrievalBenchmarkFixtureTag = (tags) => {
23334
+ if (tags.some((tag) => tag.startsWith("fixture:"))) {
23335
+ return tags;
23336
+ }
23337
+ const fixtureTags = buildRetrievalBenchmarkBackendTags().filter((tag) => tag.startsWith("fixture:"));
23338
+ return [...tags, ...fixtureTags].filter((tag, index, all) => all.indexOf(tag) === index);
23339
+ };
23340
+ const loadNativeBackendComparisonBenchmarkRuntime = async (input) => {
23341
+ const suite = input?.suite ?? createRAGNativeBackendComparisonBenchmarkSuite();
23342
+ const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
23343
+ const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined;
23344
+ const groupKey = input?.groupKey ?? getStringProperty(input?.queryInput, "benchmarkGroupKey") ?? recommendedGroupKey;
23345
+ const corpusGroupKey = input?.corpusGroupKey ?? getStringProperty(input?.queryInput, "benchmarkCorpusGroupKey");
23346
+ const recentRuns = retrievalComparisonHistoryStore ? await loadRAGRetrievalComparisonHistory({
23347
+ corpusGroupKey,
23348
+ groupKey,
23349
+ limit: input?.historyLimit ?? 5,
23350
+ store: retrievalComparisonHistoryStore,
23351
+ suiteId: suite.id
23352
+ }) : undefined;
23353
+ const historyTimelineGroupKey = groupKey ?? recentRuns?.[0]?.groupKey;
23354
+ const historyPresentation = recentRuns && recentRuns.length > 0 ? buildRAGRetrievalReleaseGroupHistoryPresentation({
23355
+ runs: recentRuns,
23356
+ timeline: historyTimelineGroupKey ? {
23357
+ corpusGroupKey: corpusGroupKey ?? recentRuns[0]?.corpusGroupKey,
23358
+ groupKey: historyTimelineGroupKey
23359
+ } : undefined
23360
+ }) : undefined;
23361
+ const snapshotHistory = config.evaluationSuiteSnapshotHistoryStore ? await loadRAGEvaluationSuiteSnapshotHistory({
23362
+ limit: input?.snapshotLimit ?? 5,
23363
+ store: config.evaluationSuiteSnapshotHistoryStore,
23364
+ suite
23365
+ }) : undefined;
23366
+ const fixtureVariants = getRetrievalBenchmarkFixtureVariants(recentRuns);
23367
+ return {
23368
+ corpusGroupKey,
23369
+ fixtureVariants,
23370
+ groupKey,
23371
+ historyPresentation,
23372
+ latestFixtureVariant: fixtureVariants[0],
23373
+ latestRun: recentRuns?.[0],
23374
+ recentRuns,
23375
+ recommendedGroupKey,
23376
+ recommendedTags,
21788
23377
  snapshotHistory,
21789
23378
  snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
23379
+ suiteId: suite.id,
23380
+ suiteLabel: suite.label ?? suite.id
23381
+ };
23382
+ };
23383
+ const handleAdaptiveNativePlannerBenchmark = async (queryInput) => {
23384
+ const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
23385
+ description: getStringProperty(queryInput, "description"),
23386
+ label: getStringProperty(queryInput, "label"),
23387
+ metadata: getObjectProperty(queryInput, "metadata"),
23388
+ topK: getIntegerLikeProperty(queryInput, "topK") ?? undefined
23389
+ });
23390
+ const runtime = await loadAdaptiveNativePlannerBenchmarkRuntime({
23391
+ historyLimit: getIntegerLikeProperty(queryInput, "runLimit") ?? 5,
23392
+ queryInput,
23393
+ snapshotLimit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
23394
+ suite
23395
+ });
23396
+ return {
23397
+ corpusGroupKey: runtime.corpusGroupKey,
23398
+ fixtureVariants: runtime.fixtureVariants,
23399
+ groupKey: runtime.groupKey,
23400
+ historyPresentation: runtime.historyPresentation,
23401
+ latestFixtureVariant: runtime.latestFixtureVariant,
23402
+ latestRun: runtime.latestRun,
23403
+ ok: true,
23404
+ recentRuns: runtime.recentRuns,
23405
+ snapshotHistory: runtime.snapshotHistory,
23406
+ snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
23407
+ suite
23408
+ };
23409
+ };
23410
+ const handleNativeBackendComparisonBenchmark = async (queryInput) => {
23411
+ const suite = createRAGNativeBackendComparisonBenchmarkSuite({
23412
+ description: getStringProperty(queryInput, "description"),
23413
+ label: getStringProperty(queryInput, "label"),
23414
+ metadata: getObjectProperty(queryInput, "metadata"),
23415
+ topK: getIntegerLikeProperty(queryInput, "topK") ?? undefined
23416
+ });
23417
+ const runtime = await loadNativeBackendComparisonBenchmarkRuntime({
23418
+ historyLimit: getIntegerLikeProperty(queryInput, "runLimit") ?? 5,
23419
+ queryInput,
23420
+ snapshotLimit: getIntegerLikeProperty(queryInput, "limit") ?? 5,
23421
+ suite
23422
+ });
23423
+ return {
23424
+ corpusGroupKey: runtime.corpusGroupKey,
23425
+ fixtureVariants: runtime.fixtureVariants,
23426
+ groupKey: runtime.groupKey,
23427
+ historyPresentation: runtime.historyPresentation,
23428
+ latestFixtureVariant: runtime.latestFixtureVariant,
23429
+ latestRun: runtime.latestRun,
23430
+ ok: true,
23431
+ recentRuns: runtime.recentRuns,
23432
+ snapshotHistory: runtime.snapshotHistory,
23433
+ snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
23434
+ suite
23435
+ };
23436
+ };
23437
+ const handleRunAdaptiveNativePlannerBenchmark = async (bodyInput, request) => {
23438
+ const suite = createRAGAdaptiveNativePlannerBenchmarkSuite({
23439
+ description: getStringProperty(bodyInput, "description"),
23440
+ label: getStringProperty(bodyInput, "label"),
23441
+ metadata: getObjectProperty(bodyInput, "metadata"),
23442
+ topK: getIntegerLikeProperty(bodyInput, "topK") ?? undefined
23443
+ });
23444
+ const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
23445
+ const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : [];
23446
+ const explicitTags = normalizeStringArray2(bodyInput?.tags);
23447
+ const comparisonBody = {
23448
+ ...suite.input,
23449
+ baselineRetrievalId: getStringProperty(bodyInput, "baselineRetrievalId") ?? "native-latency",
23450
+ candidateRetrievalId: getStringProperty(bodyInput, "candidateRetrievalId") ?? "native-adaptive",
23451
+ corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
23452
+ groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
23453
+ label: suite.label,
23454
+ persistRun: getBooleanProperty(bodyInput, "persistRun") !== false,
23455
+ suiteId: suite.id,
23456
+ retrievals: Array.isArray(bodyInput?.retrievals) ? bodyInput.retrievals : [
23457
+ {
23458
+ id: "native-latency",
23459
+ label: "Native latency",
23460
+ retrieval: {
23461
+ mode: "vector",
23462
+ nativeQueryProfile: "latency"
23463
+ }
23464
+ },
23465
+ {
23466
+ id: "native-adaptive",
23467
+ label: "Adaptive native planner",
23468
+ retrieval: {
23469
+ mode: "vector"
23470
+ }
23471
+ },
23472
+ {
23473
+ id: "hybrid-adaptive",
23474
+ label: "Hybrid adaptive",
23475
+ retrieval: {
23476
+ mode: "hybrid"
23477
+ }
23478
+ },
23479
+ {
23480
+ id: "hybrid-transform",
23481
+ label: "Hybrid transform",
23482
+ queryTransform: createHeuristicRAGQueryTransform(),
23483
+ retrieval: {
23484
+ mode: "hybrid"
23485
+ }
23486
+ }
23487
+ ],
23488
+ tags: explicitTags.length > 0 ? ensureRetrievalBenchmarkFixtureTag(explicitTags) : ensureRetrievalBenchmarkFixtureTag(recommendedTags)
23489
+ };
23490
+ const comparisonResult = await handleEvaluateRetrievals(comparisonBody, request);
23491
+ if (!comparisonResult.ok) {
23492
+ return {
23493
+ error: comparisonResult.error,
23494
+ ok: false
23495
+ };
23496
+ }
23497
+ const runtime = await loadAdaptiveNativePlannerBenchmarkRuntime({
23498
+ corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
23499
+ groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
23500
+ historyLimit: getIntegerLikeProperty(bodyInput, "runLimit") ?? 5,
23501
+ snapshotLimit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
23502
+ suite
23503
+ });
23504
+ return {
23505
+ comparison: comparisonResult.comparison,
23506
+ corpusGroupKey: runtime.corpusGroupKey,
23507
+ fixtureVariants: runtime.fixtureVariants,
23508
+ groupKey: runtime.groupKey,
23509
+ historyPresentation: runtime.historyPresentation,
23510
+ latestFixtureVariant: runtime.latestFixtureVariant,
23511
+ latestRun: runtime.latestRun,
23512
+ ok: true,
23513
+ recentRuns: runtime.recentRuns,
23514
+ snapshotHistory: runtime.snapshotHistory,
23515
+ snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
23516
+ suite
23517
+ };
23518
+ };
23519
+ const handleRunNativeBackendComparisonBenchmark = async (bodyInput, request) => {
23520
+ const suite = createRAGNativeBackendComparisonBenchmarkSuite({
23521
+ description: getStringProperty(bodyInput, "description"),
23522
+ label: getStringProperty(bodyInput, "label"),
23523
+ metadata: getObjectProperty(bodyInput, "metadata"),
23524
+ topK: getIntegerLikeProperty(bodyInput, "topK") ?? undefined
23525
+ });
23526
+ const recommendedGroupKey = typeof suite.metadata?.recommendedGroupKey === "string" ? suite.metadata.recommendedGroupKey : undefined;
23527
+ const recommendedTags = Array.isArray(suite.metadata?.recommendedTags) ? suite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : [];
23528
+ const explicitTags = normalizeStringArray2(bodyInput?.tags);
23529
+ const comparisonBody = {
23530
+ ...suite.input,
23531
+ baselineRetrievalId: getStringProperty(bodyInput, "baselineRetrievalId") ?? "native-latency",
23532
+ candidateRetrievalId: getStringProperty(bodyInput, "candidateRetrievalId") ?? "native-adaptive",
23533
+ corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
23534
+ groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
23535
+ label: suite.label,
23536
+ persistRun: getBooleanProperty(bodyInput, "persistRun") !== false,
23537
+ suiteId: suite.id,
23538
+ retrievals: Array.isArray(bodyInput?.retrievals) ? bodyInput.retrievals : [
23539
+ {
23540
+ id: "native-latency",
23541
+ label: "Native latency",
23542
+ retrieval: {
23543
+ mode: "vector",
23544
+ nativeQueryProfile: "latency"
23545
+ }
23546
+ },
23547
+ {
23548
+ id: "native-adaptive",
23549
+ label: "Adaptive native planner",
23550
+ retrieval: {
23551
+ mode: "vector"
23552
+ }
23553
+ },
23554
+ {
23555
+ id: "hybrid-adaptive",
23556
+ label: "Hybrid adaptive",
23557
+ retrieval: {
23558
+ mode: "hybrid"
23559
+ }
23560
+ },
23561
+ {
23562
+ id: "hybrid-transform",
23563
+ label: "Hybrid transform",
23564
+ queryTransform: createHeuristicRAGQueryTransform(),
23565
+ retrieval: {
23566
+ mode: "hybrid"
23567
+ }
23568
+ }
23569
+ ],
23570
+ tags: explicitTags.length > 0 ? ensureRetrievalBenchmarkFixtureTag(explicitTags) : ensureRetrievalBenchmarkFixtureTag([
23571
+ ...recommendedTags,
23572
+ ...buildRetrievalBenchmarkBackendTags()
23573
+ ])
23574
+ };
23575
+ const comparisonResult = await handleEvaluateRetrievals(comparisonBody, request);
23576
+ if (!comparisonResult.ok) {
23577
+ return {
23578
+ error: comparisonResult.error,
23579
+ ok: false
23580
+ };
23581
+ }
23582
+ const runtime = await loadNativeBackendComparisonBenchmarkRuntime({
23583
+ corpusGroupKey: getStringProperty(bodyInput, "corpusGroupKey"),
23584
+ groupKey: getStringProperty(bodyInput, "groupKey") ?? recommendedGroupKey,
23585
+ historyLimit: getIntegerLikeProperty(bodyInput, "runLimit") ?? 5,
23586
+ snapshotLimit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
23587
+ suite
23588
+ });
23589
+ return {
23590
+ comparison: comparisonResult.comparison,
23591
+ corpusGroupKey: runtime.corpusGroupKey,
23592
+ fixtureVariants: runtime.fixtureVariants,
23593
+ groupKey: runtime.groupKey,
23594
+ historyPresentation: runtime.historyPresentation,
23595
+ latestFixtureVariant: runtime.latestFixtureVariant,
23596
+ latestRun: runtime.latestRun,
23597
+ ok: true,
23598
+ recentRuns: runtime.recentRuns,
23599
+ snapshotHistory: runtime.snapshotHistory,
23600
+ snapshotHistoryPresentation: runtime.snapshotHistoryPresentation,
21790
23601
  suite
21791
23602
  };
21792
23603
  };
@@ -21836,6 +23647,52 @@ var ragChat = (config) => {
21836
23647
  suite
21837
23648
  };
21838
23649
  };
23650
+ const handlePersistNativeBackendComparisonBenchmarkSnapshot = async (bodyInput, request) => {
23651
+ if (request) {
23652
+ const decision = await checkAuthorization(request, "manage_retrieval_admin");
23653
+ if (!decision.allowed) {
23654
+ return {
23655
+ error: decision.reason ?? "Forbidden",
23656
+ ok: false
23657
+ };
23658
+ }
23659
+ }
23660
+ if (!config.evaluationSuiteSnapshotHistoryStore) {
23661
+ return {
23662
+ error: "Evaluation suite snapshot history store is not configured",
23663
+ ok: false
23664
+ };
23665
+ }
23666
+ const suite = createRAGNativeBackendComparisonBenchmarkSuite({
23667
+ description: getStringProperty(bodyInput, "description"),
23668
+ label: getStringProperty(bodyInput, "label"),
23669
+ metadata: getObjectProperty(bodyInput, "metadata")
23670
+ });
23671
+ const previousHistory = await loadRAGEvaluationSuiteSnapshotHistory({
23672
+ limit: 1,
23673
+ store: config.evaluationSuiteSnapshotHistoryStore,
23674
+ suite
23675
+ });
23676
+ const snapshot = createRAGNativeBackendComparisonBenchmarkSnapshot({
23677
+ createdAt: getNumberProperty(bodyInput, "createdAt"),
23678
+ metadata: getObjectProperty(bodyInput, "snapshotMetadata"),
23679
+ suite,
23680
+ version: getIntegerLikeProperty(bodyInput, "version") ?? (previousHistory.latestSnapshot?.version ?? 0) + 1
23681
+ });
23682
+ await config.evaluationSuiteSnapshotHistoryStore.saveSnapshot(snapshot);
23683
+ const snapshotHistory = await loadRAGEvaluationSuiteSnapshotHistory({
23684
+ limit: getIntegerLikeProperty(bodyInput, "limit") ?? 5,
23685
+ store: config.evaluationSuiteSnapshotHistoryStore,
23686
+ suite
23687
+ });
23688
+ return {
23689
+ ok: true,
23690
+ snapshot,
23691
+ snapshotHistory,
23692
+ snapshotHistoryPresentation: buildRAGEvaluationSuiteSnapshotHistoryPresentation(snapshotHistory),
23693
+ suite
23694
+ };
23695
+ };
21839
23696
  const handleRetrievalLaneHandoffList = async (queryInput, request) => {
21840
23697
  const result = await buildOperationsPayload();
21841
23698
  const accessScope = await loadAccessScope(request);
@@ -23829,29 +25686,14 @@ var ragChat = (config) => {
23829
25686
  });
23830
25687
  const latestRejectedCandidate = enrichedRecentRetrievalReleaseDecisions?.find((entry) => entry.kind === "reject");
23831
25688
  const latestRetrievalComparisonRun = recentRetrievalComparisonRuns?.[0];
23832
- const adaptiveNativePlannerBenchmarkSuite = createRAGAdaptiveNativePlannerBenchmarkSuite();
23833
- const adaptiveNativePlannerBenchmark = config.evaluationSuiteSnapshotHistoryStore ? {
23834
- recommendedGroupKey: typeof adaptiveNativePlannerBenchmarkSuite.metadata?.recommendedGroupKey === "string" ? adaptiveNativePlannerBenchmarkSuite.metadata.recommendedGroupKey : undefined,
23835
- recommendedTags: Array.isArray(adaptiveNativePlannerBenchmarkSuite.metadata?.recommendedTags) ? adaptiveNativePlannerBenchmarkSuite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined,
23836
- snapshotHistory: await loadRAGEvaluationSuiteSnapshotHistory({
23837
- limit: 5,
23838
- store: config.evaluationSuiteSnapshotHistoryStore,
23839
- suite: adaptiveNativePlannerBenchmarkSuite
23840
- }),
23841
- snapshotHistoryPresentation: undefined,
23842
- suiteId: adaptiveNativePlannerBenchmarkSuite.id,
23843
- suiteLabel: adaptiveNativePlannerBenchmarkSuite.label ?? adaptiveNativePlannerBenchmarkSuite.id
23844
- } : {
23845
- recommendedGroupKey: typeof adaptiveNativePlannerBenchmarkSuite.metadata?.recommendedGroupKey === "string" ? adaptiveNativePlannerBenchmarkSuite.metadata.recommendedGroupKey : undefined,
23846
- recommendedTags: Array.isArray(adaptiveNativePlannerBenchmarkSuite.metadata?.recommendedTags) ? adaptiveNativePlannerBenchmarkSuite.metadata.recommendedTags.filter((entry) => typeof entry === "string") : undefined,
23847
- snapshotHistory: undefined,
23848
- snapshotHistoryPresentation: undefined,
23849
- suiteId: adaptiveNativePlannerBenchmarkSuite.id,
23850
- suiteLabel: adaptiveNativePlannerBenchmarkSuite.label ?? adaptiveNativePlannerBenchmarkSuite.id
23851
- };
23852
- if (adaptiveNativePlannerBenchmark.snapshotHistory) {
23853
- adaptiveNativePlannerBenchmark.snapshotHistoryPresentation = buildRAGEvaluationSuiteSnapshotHistoryPresentation(adaptiveNativePlannerBenchmark.snapshotHistory);
23854
- }
25689
+ const adaptiveNativePlannerBenchmark = await loadAdaptiveNativePlannerBenchmarkRuntime({
25690
+ historyLimit: 5,
25691
+ snapshotLimit: 5
25692
+ });
25693
+ const nativeBackendComparisonBenchmark = await loadNativeBackendComparisonBenchmarkRuntime({
25694
+ historyLimit: 5,
25695
+ snapshotLimit: 5
25696
+ });
23855
25697
  const latestPromotionReadiness = latestRetrievalComparisonRun ? (() => {
23856
25698
  const activeTargetRolloutLabel = activeRetrievalBaselines?.find((entry) => entry.groupKey === latestRetrievalComparisonRun.groupKey)?.rolloutLabel;
23857
25699
  const state = getPromotionCandidateState({
@@ -23990,7 +25832,7 @@ var ragChat = (config) => {
23990
25832
  return {
23991
25833
  ...group,
23992
25834
  acknowledgedOpenIncidentCount,
23993
- classification: groupOpenIncidents.some((entry) => entry.classification === "runtime") ? "runtime" : groupOpenIncidents.some((entry) => entry.classification === "multivector") ? "multivector" : group.classification,
25835
+ classification: groupOpenIncidents.some((entry) => entry.classification === "runtime") ? "runtime" : groupOpenIncidents.some((entry) => entry.classification === "evidence") ? "evidence" : groupOpenIncidents.some((entry) => entry.classification === "multivector") ? "multivector" : group.classification,
23994
25836
  openIncidentCount: groupOpenIncidents.length,
23995
25837
  unacknowledgedOpenIncidentCount: groupOpenIncidents.length - acknowledgedOpenIncidentCount
23996
25838
  };
@@ -24198,7 +26040,7 @@ var ragChat = (config) => {
24198
26040
  ] : candidate?.ready ? [
24199
26041
  "latest candidate is ready to promote"
24200
26042
  ] : ["continue monitoring release state"];
24201
- const classification = candidate?.reasons?.length ? classifyGovernanceReasons(candidate.reasons) : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "runtime") ? "runtime" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "multivector") ? "multivector" : "general";
26043
+ const classification = candidate?.reasons?.length ? classifyGovernanceReasons(candidate.reasons) : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "runtime") ? "runtime" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "evidence") ? "evidence" : (recentIncidents ?? []).some((entry) => entry.groupKey === group.groupKey && entry.targetRolloutLabel === targetRolloutLabel && entry.classification === "multivector") ? "multivector" : "general";
24202
26044
  summaries.push({
24203
26045
  baselineRetrievalId: candidate?.baselineRetrievalId,
24204
26046
  candidateRetrievalId: candidate?.candidateRetrievalId,
@@ -24674,6 +26516,9 @@ var ragChat = (config) => {
24674
26516
  ] : [],
24675
26517
  ...(input.delta?.runtimeUnderfilledTopKCasesDelta ?? 0) > 0 ? [
24676
26518
  `runtime underfilled-topk delta ${input.delta?.runtimeUnderfilledTopKCasesDelta ?? 0}`
26519
+ ] : [],
26520
+ ...(input.delta?.evidenceReconcileCasesDelta ?? 0) < 0 ? [
26521
+ `evidence reconcile delta ${input.delta?.evidenceReconcileCasesDelta ?? 0}`
24677
26522
  ] : []
24678
26523
  ]);
24679
26524
  const latestWinner = latestRetrievalComparisonRun.comparison.summary.bestByPassingRate;
@@ -24853,12 +26698,14 @@ var ragChat = (config) => {
24853
26698
  readiness: buildReadiness(),
24854
26699
  retrievalComparisons: {
24855
26700
  adaptiveNativePlannerBenchmark,
26701
+ nativeBackendComparisonBenchmark,
24856
26702
  configured: Boolean(retrievalComparisonHistoryStore),
24857
26703
  latest: latestRetrievalComparisonRun ? {
24858
26704
  bestByAverageF1: latestRetrievalComparisonRun.comparison.summary.bestByAverageF1,
24859
26705
  bestByMultivectorCollapsedCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorCollapsedCases,
24860
26706
  bestByMultivectorLexicalHitCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorLexicalHitCases,
24861
26707
  bestByMultivectorVectorHitCases: latestRetrievalComparisonRun.comparison.summary.bestByMultivectorVectorHitCases,
26708
+ bestByEvidenceReconcileCases: latestRetrievalComparisonRun.comparison.summary.bestByEvidenceReconcileCases,
24862
26709
  bestByLowestRuntimeCandidateBudgetExhaustedCases: latestRetrievalComparisonRun.comparison.summary.bestByLowestRuntimeCandidateBudgetExhaustedCases,
24863
26710
  bestByLowestRuntimeUnderfilledTopKCases: latestRetrievalComparisonRun.comparison.summary.bestByLowestRuntimeUnderfilledTopKCases,
24864
26711
  bestByPassingRate: latestRetrievalComparisonRun.comparison.summary.bestByPassingRate,
@@ -25785,10 +27632,43 @@ var ragChat = (config) => {
25785
27632
  if (!result.ok) {
25786
27633
  return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark failed"), getNumericStatus(set.status));
25787
27634
  }
25788
- return new Response("", {
25789
- headers: HTML_HEADERS,
25790
- status: getNumericStatus(set.status)
25791
- });
27635
+ return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmark(result), getNumericStatus(set.status));
27636
+ }
27637
+ return result;
27638
+ }).get(`${path}/compare/retrieval/benchmarks/native-backend-comparison`, async ({ query, request, set }) => {
27639
+ const result = await handleNativeBackendComparisonBenchmark(query);
27640
+ if (!result.ok) {
27641
+ set.status = HTTP_STATUS_BAD_REQUEST;
27642
+ }
27643
+ if (config.htmx && isHTMXRequest(request)) {
27644
+ if (!result.ok) {
27645
+ return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark failed"), getNumericStatus(set.status));
27646
+ }
27647
+ return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmark(result), getNumericStatus(set.status));
27648
+ }
27649
+ return result;
27650
+ }).post(`${path}/compare/retrieval/benchmarks/adaptive-native-planner/run`, async ({ body, request, set }) => {
27651
+ const result = await handleRunAdaptiveNativePlannerBenchmark(body, request);
27652
+ if (!result.ok) {
27653
+ set.status = HTTP_STATUS_BAD_REQUEST;
27654
+ }
27655
+ if (config.htmx && isHTMXRequest(request)) {
27656
+ if (!result.ok) {
27657
+ return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark run failed"), getNumericStatus(set.status));
27658
+ }
27659
+ return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmark(result), getNumericStatus(set.status));
27660
+ }
27661
+ return result;
27662
+ }).post(`${path}/compare/retrieval/benchmarks/native-backend-comparison/run`, async ({ body, request, set }) => {
27663
+ const result = await handleRunNativeBackendComparisonBenchmark(body, request);
27664
+ if (!result.ok) {
27665
+ set.status = HTTP_STATUS_BAD_REQUEST;
27666
+ }
27667
+ if (config.htmx && isHTMXRequest(request)) {
27668
+ if (!result.ok) {
27669
+ return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark run failed"), getNumericStatus(set.status));
27670
+ }
27671
+ return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmark(result), getNumericStatus(set.status));
25792
27672
  }
25793
27673
  return result;
25794
27674
  }).post(`${path}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, async ({ body, request, set }) => {
@@ -25800,10 +27680,19 @@ var ragChat = (config) => {
25800
27680
  if (!result.ok) {
25801
27681
  return toHTMXResponse(workflowRenderers.error(result.error ?? "Adaptive native planner benchmark snapshot failed"), getNumericStatus(set.status));
25802
27682
  }
25803
- return new Response("", {
25804
- headers: HTML_HEADERS,
25805
- status: getNumericStatus(set.status)
25806
- });
27683
+ return toHTMXResponse(workflowRenderers.adaptiveNativePlannerBenchmarkSnapshot(result), getNumericStatus(set.status));
27684
+ }
27685
+ return result;
27686
+ }).post(`${path}/compare/retrieval/benchmarks/native-backend-comparison/snapshots`, async ({ body, request, set }) => {
27687
+ const result = await handlePersistNativeBackendComparisonBenchmarkSnapshot(body, request);
27688
+ if (!result.ok) {
27689
+ set.status = HTTP_STATUS_BAD_REQUEST;
27690
+ }
27691
+ if (config.htmx && isHTMXRequest(request)) {
27692
+ if (!result.ok) {
27693
+ return toHTMXResponse(workflowRenderers.error(result.error ?? "Native backend comparison benchmark snapshot failed"), getNumericStatus(set.status));
27694
+ }
27695
+ return toHTMXResponse(workflowRenderers.nativeBackendComparisonBenchmarkSnapshot(result), getNumericStatus(set.status));
25807
27696
  }
25808
27697
  return result;
25809
27698
  }).get(`${path}/compare/retrieval/baselines`, async ({ query, request, set }) => {
@@ -33900,12 +35789,21 @@ var createRAGClient = (options) => {
33900
35789
  if (typeof input?.limit === "number") {
33901
35790
  searchParams.set("limit", String(input.limit));
33902
35791
  }
35792
+ if (typeof input?.runLimit === "number") {
35793
+ searchParams.set("runLimit", String(input.runLimit));
35794
+ }
33903
35795
  if (input?.label) {
33904
35796
  searchParams.set("label", input.label);
33905
35797
  }
33906
35798
  if (input?.description) {
33907
35799
  searchParams.set("description", input.description);
33908
35800
  }
35801
+ if (input?.groupKey) {
35802
+ searchParams.set("benchmarkGroupKey", input.groupKey);
35803
+ }
35804
+ if (input?.corpusGroupKey) {
35805
+ searchParams.set("benchmarkCorpusGroupKey", input.corpusGroupKey);
35806
+ }
33909
35807
  const suffix = searchParams.size ? `?${searchParams}` : "";
33910
35808
  const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner${suffix}`);
33911
35809
  if (!response.ok) {
@@ -33917,6 +35815,35 @@ var createRAGClient = (options) => {
33917
35815
  }
33918
35816
  return payload;
33919
35817
  },
35818
+ async runAdaptiveNativePlannerBenchmark(input) {
35819
+ const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/run`, {
35820
+ body: JSON.stringify({
35821
+ baselineRetrievalId: input?.baselineRetrievalId,
35822
+ candidateRetrievalId: input?.candidateRetrievalId,
35823
+ corpusGroupKey: input?.corpusGroupKey,
35824
+ description: input?.description,
35825
+ groupKey: input?.groupKey,
35826
+ label: input?.label,
35827
+ limit: input?.limit,
35828
+ metadata: input?.metadata,
35829
+ persistRun: input?.persistRun,
35830
+ retrievals: input?.retrievals,
35831
+ runLimit: input?.runLimit,
35832
+ tags: input?.tags,
35833
+ topK: input?.topK
35834
+ }),
35835
+ headers: jsonHeaders,
35836
+ method: "POST"
35837
+ });
35838
+ if (!response.ok) {
35839
+ throw new Error(await toErrorMessage3(response));
35840
+ }
35841
+ const payload = await parseJson(response);
35842
+ if (!payload.ok) {
35843
+ throw new Error(payload.error ?? "Adaptive native planner benchmark run failed");
35844
+ }
35845
+ return payload;
35846
+ },
33920
35847
  async saveAdaptiveNativePlannerBenchmarkSnapshot(input) {
33921
35848
  const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/adaptive-native-planner/snapshots`, {
33922
35849
  body: JSON.stringify({
@@ -33940,6 +35867,89 @@ var createRAGClient = (options) => {
33940
35867
  }
33941
35868
  return payload;
33942
35869
  },
35870
+ async nativeBackendComparisonBenchmark(input) {
35871
+ const searchParams = new URLSearchParams;
35872
+ if (typeof input?.limit === "number") {
35873
+ searchParams.set("limit", String(input.limit));
35874
+ }
35875
+ if (typeof input?.runLimit === "number") {
35876
+ searchParams.set("runLimit", String(input.runLimit));
35877
+ }
35878
+ if (input?.label) {
35879
+ searchParams.set("label", input.label);
35880
+ }
35881
+ if (input?.description) {
35882
+ searchParams.set("description", input.description);
35883
+ }
35884
+ if (input?.groupKey) {
35885
+ searchParams.set("benchmarkGroupKey", input.groupKey);
35886
+ }
35887
+ if (input?.corpusGroupKey) {
35888
+ searchParams.set("benchmarkCorpusGroupKey", input.corpusGroupKey);
35889
+ }
35890
+ const suffix = searchParams.size ? `?${searchParams}` : "";
35891
+ const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison${suffix}`);
35892
+ if (!response.ok) {
35893
+ throw new Error(await toErrorMessage3(response));
35894
+ }
35895
+ const payload = await parseJson(response);
35896
+ if (!payload.ok) {
35897
+ throw new Error(payload.error ?? "Native backend comparison benchmark history failed");
35898
+ }
35899
+ return payload;
35900
+ },
35901
+ async runNativeBackendComparisonBenchmark(input) {
35902
+ const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison/run`, {
35903
+ body: JSON.stringify({
35904
+ baselineRetrievalId: input?.baselineRetrievalId,
35905
+ candidateRetrievalId: input?.candidateRetrievalId,
35906
+ corpusGroupKey: input?.corpusGroupKey,
35907
+ description: input?.description,
35908
+ groupKey: input?.groupKey,
35909
+ label: input?.label,
35910
+ limit: input?.limit,
35911
+ metadata: input?.metadata,
35912
+ persistRun: input?.persistRun,
35913
+ retrievals: input?.retrievals,
35914
+ runLimit: input?.runLimit,
35915
+ tags: input?.tags,
35916
+ topK: input?.topK
35917
+ }),
35918
+ headers: jsonHeaders,
35919
+ method: "POST"
35920
+ });
35921
+ if (!response.ok) {
35922
+ throw new Error(await toErrorMessage3(response));
35923
+ }
35924
+ const payload = await parseJson(response);
35925
+ if (!payload.ok) {
35926
+ throw new Error(payload.error ?? "Native backend comparison benchmark run failed");
35927
+ }
35928
+ return payload;
35929
+ },
35930
+ async saveNativeBackendComparisonBenchmarkSnapshot(input) {
35931
+ const response = await fetchImpl(`${basePath}/compare/retrieval/benchmarks/native-backend-comparison/snapshots`, {
35932
+ body: JSON.stringify({
35933
+ createdAt: input?.createdAt,
35934
+ description: input?.description,
35935
+ label: input?.label,
35936
+ limit: input?.limit,
35937
+ metadata: input?.metadata,
35938
+ snapshotMetadata: input?.snapshotMetadata,
35939
+ version: input?.version
35940
+ }),
35941
+ headers: jsonHeaders,
35942
+ method: "POST"
35943
+ });
35944
+ if (!response.ok) {
35945
+ throw new Error(await toErrorMessage3(response));
35946
+ }
35947
+ const payload = await parseJson(response);
35948
+ if (!payload.ok) {
35949
+ throw new Error(payload.error ?? "Native backend comparison benchmark snapshot failed");
35950
+ }
35951
+ return payload;
35952
+ },
33943
35953
  async retrievalLaneHandoffs(input) {
33944
35954
  const searchParams = new URLSearchParams;
33945
35955
  if (input?.groupKey) {
@@ -34876,6 +36886,10 @@ export {
34876
36886
  createRAGQueryTransform,
34877
36887
  createRAGPDFOCRExtractor,
34878
36888
  createRAGOCRProvider,
36889
+ createRAGNativeBackendComparisonBenchmarkSuite,
36890
+ createRAGNativeBackendComparisonBenchmarkSnapshot,
36891
+ createRAGNativeBackendBenchmarkMockEmbedding,
36892
+ createRAGNativeBackendBenchmarkCorpus,
34879
36893
  createRAGMediaTranscriber,
34880
36894
  createRAGMediaFileExtractor,
34881
36895
  createRAGImageOCRExtractor,
@@ -34973,5 +36987,5 @@ export {
34973
36987
  addRAGEvaluationSuiteCase
34974
36988
  };
34975
36989
 
34976
- //# debugId=025E4FAB1EEDD6C464756E2164756E21
36990
+ //# debugId=EA75EA5E660B29F864756E2164756E21
34977
36991
  //# sourceMappingURL=index.js.map