@absolutejs/absolute 0.19.0-beta.619 → 0.19.0-beta.620

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -782,6 +782,12 @@ var formatMediaTimestamp = (value) => {
782
782
  const milliseconds = Math.floor(value % 1000);
783
783
  return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
784
784
  };
785
+ var formatMediaDurationLabel = (value) => {
786
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
787
+ return;
788
+ }
789
+ return formatMediaTimestamp(value);
790
+ };
785
791
  var buildLocatorLabel = (metadata, source, title) => {
786
792
  if (!metadata) {
787
793
  return;
@@ -844,6 +850,12 @@ var buildProvenanceLabel = (metadata) => {
844
850
  const sentAt = formatTimestampLabel(metadata.sentAt) ?? formatTimestampLabel(metadata.receivedAt);
845
851
  const speaker = getContextString(metadata.speaker);
846
852
  const mediaKind = getContextString(metadata.mediaKind);
853
+ const mediaSegmentCount = getContextNumber(metadata.mediaSegmentCount);
854
+ const mediaSegmentGroupSize = getContextNumber(metadata.mediaSegmentGroupSize);
855
+ const mediaSegmentGroupIndex = getContextNumber(metadata.mediaSegmentGroupIndex);
856
+ const mediaChannel = getContextString(metadata.mediaChannel);
857
+ const mediaSpeakerCount = getContextNumber(metadata.mediaSpeakerCount);
858
+ const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
847
859
  const transcriptSource = getContextString(metadata.transcriptSource);
848
860
  const pdfTextMode = getContextString(metadata.pdfTextMode);
849
861
  const ocrEngine = getContextString(metadata.ocrEngine);
@@ -853,6 +865,12 @@ var buildProvenanceLabel = (metadata) => {
853
865
  ocrEngine ? `OCR ${ocrEngine}` : "",
854
866
  typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
855
867
  mediaKind ? `Media ${mediaKind}` : "",
868
+ mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
869
+ mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
870
+ mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
871
+ mediaChannel ? `Channel ${mediaChannel}` : "",
872
+ mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
873
+ mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
856
874
  transcriptSource ? `Transcript ${transcriptSource}` : "",
857
875
  threadTopic ? `Thread ${threadTopic}` : "",
858
876
  speaker ? `Speaker ${speaker}` : "",
@@ -1208,6 +1226,34 @@ var getAttachmentName2 = (source, title) => {
1208
1226
  }
1209
1227
  return;
1210
1228
  };
1229
+ var getSpreadsheetHeaders = (metadata) => Array.isArray(metadata?.spreadsheetHeaders) ? metadata.spreadsheetHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1230
+ var formatSpreadsheetRowRange = (rowStart, rowEnd) => {
1231
+ if (typeof rowStart !== "number" || !Number.isFinite(rowStart)) {
1232
+ return;
1233
+ }
1234
+ if (typeof rowEnd !== "number" && typeof rowStart === "number" && Number.isFinite(rowStart)) {
1235
+ return `Rows ${rowStart}`;
1236
+ }
1237
+ if (rowStart === rowEnd) {
1238
+ return `Rows ${rowStart}`;
1239
+ }
1240
+ return `Rows ${rowStart}-${rowEnd}`;
1241
+ };
1242
+ var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
1243
+ if (typeof tableIndex !== "number" || !Number.isFinite(tableIndex) || tableIndex < 1) {
1244
+ return;
1245
+ }
1246
+ if (typeof tableCount === "number" && Number.isFinite(tableCount) && tableCount >= tableIndex) {
1247
+ return `Table ${tableIndex} of ${tableCount}`;
1248
+ }
1249
+ return `Table ${tableIndex}`;
1250
+ };
1251
+ var formatMediaDurationLabel2 = (value) => {
1252
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
1253
+ return;
1254
+ }
1255
+ return formatMediaTimestamp2(value);
1256
+ };
1211
1257
  var buildContextLabel2 = (metadata) => {
1212
1258
  if (!metadata) {
1213
1259
  return;
@@ -1233,29 +1279,61 @@ var buildContextLabel2 = (metadata) => {
1233
1279
  }
1234
1280
  const emailKind = getContextString2(metadata.emailKind);
1235
1281
  if (emailKind === "attachment") {
1236
- return "Attachment evidence";
1282
+ const attachmentName = getContextString2(metadata.attachmentName);
1283
+ const threadTopic2 = getContextString2(metadata.threadTopic);
1284
+ return attachmentName ? threadTopic2 ? `Attachment evidence ${attachmentName} in ${threadTopic2}` : `Attachment evidence ${attachmentName}` : "Attachment evidence";
1237
1285
  }
1238
1286
  if (emailKind === "message") {
1287
+ const threadTopic2 = getContextString2(metadata.threadTopic);
1239
1288
  const from = getContextString2(metadata.from);
1289
+ if (threadTopic2) {
1290
+ return from ? `Message in ${threadTopic2} from ${from}` : `Message in ${threadTopic2}`;
1291
+ }
1240
1292
  return from ? `Message from ${from}` : "Message evidence";
1241
1293
  }
1242
1294
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
1243
1295
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
1296
+ const hasOCRTrace = typeof metadata.ocrRegionConfidence === "number" || typeof metadata.ocrConfidence === "number" || getContextString2(metadata.pdfTextMode) === "ocr" || typeof metadata.ocrRegionCount === "number";
1244
1297
  if (page && region) {
1298
+ if (hasOCRTrace) {
1299
+ return `OCR page ${page} region ${region}`;
1300
+ }
1245
1301
  return `Page ${page} region ${region}`;
1246
1302
  }
1247
1303
  if (page) {
1304
+ if (hasOCRTrace) {
1305
+ return `OCR page ${page}`;
1306
+ }
1248
1307
  return `Page ${page}`;
1249
1308
  }
1250
1309
  const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
1251
1310
  if (sheet) {
1311
+ const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
1312
+ const rowRange = formatSpreadsheetRowRange(getContextNumber2(metadata.spreadsheetRowStart), getContextNumber2(metadata.spreadsheetRowEnd));
1313
+ const headers = getSpreadsheetHeaders(metadata);
1314
+ if (tableLabel && rowRange) {
1315
+ return `Sheet ${sheet} ${tableLabel} ${rowRange}`;
1316
+ }
1317
+ if (tableLabel) {
1318
+ return `Sheet ${sheet} ${tableLabel}`;
1319
+ }
1320
+ if (rowRange) {
1321
+ return `Sheet ${sheet} ${rowRange}`;
1322
+ }
1323
+ if (headers.length > 0) {
1324
+ return `Sheet ${sheet} by ${headers.slice(0, 2).join(", ")}`;
1325
+ }
1252
1326
  return `Sheet ${sheet}`;
1253
1327
  }
1254
1328
  const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
1329
+ const slideTitle = getContextString2(metadata.slideTitle);
1255
1330
  if (slide) {
1331
+ if (slideTitle) {
1332
+ return `Slide ${slide} ${slideTitle}`;
1333
+ }
1256
1334
  return `Slide ${slide}`;
1257
1335
  }
1258
- const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1336
+ const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1259
1337
  if (archiveEntry) {
1260
1338
  return `Archive entry ${archiveEntry}`;
1261
1339
  }
@@ -1280,6 +1358,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1280
1358
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1281
1359
  const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
1282
1360
  const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
1361
+ const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
1362
+ const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
1363
+ const slideTitle = getContextString2(metadata.slideTitle);
1283
1364
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
1284
1365
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
1285
1366
  if (page && region) {
@@ -1296,19 +1377,31 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1296
1377
  }
1297
1378
  const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
1298
1379
  if (sheet) {
1299
- return `Sheet ${sheet}`;
1380
+ const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
1381
+ const rowRange = formatSpreadsheetRowRange(spreadsheetRowStart, spreadsheetRowEnd);
1382
+ if (tableLabel && rowRange) {
1383
+ return `Sheet ${sheet} · ${tableLabel} · ${rowRange}`;
1384
+ }
1385
+ if (tableLabel) {
1386
+ return `Sheet ${sheet} · ${tableLabel}`;
1387
+ }
1388
+ return rowRange ? `Sheet ${sheet} · ${rowRange}` : `Sheet ${sheet}`;
1300
1389
  }
1301
1390
  const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
1302
1391
  if (slide) {
1303
- return `Slide ${slide}`;
1392
+ return slideTitle ? `Slide ${slide} · ${slideTitle}` : `Slide ${slide}`;
1304
1393
  }
1305
- const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1394
+ const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1306
1395
  if (archiveEntry) {
1307
1396
  return `Archive entry ${archiveEntry}`;
1308
1397
  }
1309
1398
  const emailKind = getContextString2(metadata.emailKind);
1310
1399
  if (emailKind === "attachment") {
1311
1400
  const attachmentName = getContextString2(metadata.attachmentName) ?? getAttachmentName2(source, title);
1401
+ const replyDepth = getContextNumber2(metadata.replyDepth);
1402
+ if (attachmentName && replyDepth && replyDepth > 0) {
1403
+ return `Attachment ${attachmentName} · Reply depth ${replyDepth}`;
1404
+ }
1312
1405
  return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
1313
1406
  }
1314
1407
  const mediaStart = formatMediaTimestamp2(metadata.startMs);
@@ -1339,18 +1432,36 @@ var buildProvenanceLabel2 = (metadata) => {
1339
1432
  return;
1340
1433
  }
1341
1434
  const threadTopic = getContextString2(metadata.threadTopic);
1435
+ const replyDepth = getContextNumber2(metadata.replyDepth);
1436
+ const threadMessageCount = getContextNumber2(metadata.threadMessageCount);
1437
+ const threadRootMessageId = getContextString2(metadata.threadRootMessageId);
1342
1438
  const from = getContextString2(metadata.from);
1343
1439
  const sentAt = formatTimestampLabel2(metadata.sentAt) ?? formatTimestampLabel2(metadata.receivedAt);
1344
1440
  const speaker = getContextString2(metadata.speaker);
1345
1441
  const mediaKind = getContextString2(metadata.mediaKind);
1346
1442
  const transcriptSource = getContextString2(metadata.transcriptSource);
1443
+ const mediaSpeakerCount = getContextNumber2(metadata.mediaSpeakerCount);
1444
+ const mediaSegmentCount = getContextNumber2(metadata.mediaSegmentCount);
1445
+ const mediaSegmentGroupSize = getContextNumber2(metadata.mediaSegmentGroupSize);
1446
+ const mediaSegmentGroupIndex = getContextNumber2(metadata.mediaSegmentGroupIndex);
1447
+ const mediaChannel = getContextString2(metadata.mediaChannel);
1448
+ const mediaDurationLabel = formatMediaDurationLabel2(metadata.mediaDurationMs);
1449
+ const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
1450
+ const slideNotesText = getContextString2(metadata.slideNotesText);
1347
1451
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
1348
1452
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1349
1453
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1350
1454
  const ocrEngine = getContextString2(metadata.ocrEngine);
1351
1455
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
1352
1456
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
1457
+ const archiveDepth = getContextNumber2(metadata.archiveDepth);
1458
+ const archiveNestedDepth = getContextNumber2(metadata.archiveNestedDepth);
1459
+ const archiveContainerPath = getContextString2(metadata.archiveContainerPath);
1460
+ const archiveRootName = getContextString2(metadata.archiveRootName);
1461
+ const spreadsheetTableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
1353
1462
  const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
1463
+ const ocrAverageConfidence = getContextNumber2(metadata.ocrPageAverageConfidence) ?? getContextNumber2(metadata.ocrAverageConfidence);
1464
+ const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
1354
1465
  const labels = [
1355
1466
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
1356
1467
  pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
@@ -1359,9 +1470,27 @@ var buildProvenanceLabel2 = (metadata) => {
1359
1470
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
1360
1471
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
1361
1472
  typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
1473
+ typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
1474
+ typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
1475
+ spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
1476
+ spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
1362
1477
  mediaKind ? `Media ${mediaKind}` : "",
1478
+ mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
1479
+ mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
1480
+ mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
1481
+ mediaChannel ? `Channel ${mediaChannel}` : "",
1482
+ mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
1483
+ mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
1363
1484
  transcriptSource ? `Transcript ${transcriptSource}` : "",
1364
1485
  threadTopic ? `Thread ${threadTopic}` : "",
1486
+ threadRootMessageId ? `Thread root ${threadRootMessageId}` : "",
1487
+ threadMessageCount ? `${threadMessageCount} thread messages` : "",
1488
+ replyDepth ? `Reply depth ${replyDepth}` : "",
1489
+ slideNotesText ? "Speaker notes" : "",
1490
+ archiveDepth ? `Archive depth ${archiveDepth}` : "",
1491
+ archiveNestedDepth ? `Archive nested depth ${archiveNestedDepth}` : "",
1492
+ archiveContainerPath ? `Archive container ${archiveContainerPath}` : "",
1493
+ archiveRootName ? `Archive root ${archiveRootName}` : "",
1365
1494
  speaker ? `Speaker ${speaker}` : "",
1366
1495
  from ? `Sender ${from}` : "",
1367
1496
  sentAt ? `Sent ${sentAt}` : ""
@@ -1738,7 +1867,7 @@ var getSectionPathFromSource = (source) => {
1738
1867
  const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
1739
1868
  return path.length > 0 ? path : undefined;
1740
1869
  };
1741
- var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
1870
+ var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office ") || value.startsWith("Slide "));
1742
1871
  var getStructuredSectionScoreWeight = (metadata) => {
1743
1872
  if (!metadata) {
1744
1873
  return 1;
@@ -1746,6 +1875,8 @@ var getStructuredSectionScoreWeight = (metadata) => {
1746
1875
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1747
1876
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1748
1877
  const sectionKind = getContextString2(metadata.sectionKind);
1878
+ const slideTitle = getContextString2(metadata.slideTitle);
1879
+ const slideNotesText = getContextString2(metadata.slideNotesText);
1749
1880
  if (pdfTextKind === "table_like") {
1750
1881
  return 1.28;
1751
1882
  }
@@ -1755,6 +1886,12 @@ var getStructuredSectionScoreWeight = (metadata) => {
1755
1886
  if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
1756
1887
  return 1.12;
1757
1888
  }
1889
+ if (sectionKind === "presentation_slide" && slideNotesText) {
1890
+ return 1.2;
1891
+ }
1892
+ if (sectionKind === "presentation_slide" && slideTitle) {
1893
+ return 1.14;
1894
+ }
1758
1895
  return 1;
1759
1896
  };
1760
1897
  var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
@@ -742,6 +742,12 @@ var formatMediaTimestamp = (value) => {
742
742
  const milliseconds = Math.floor(value % 1000);
743
743
  return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
744
744
  };
745
+ var formatMediaDurationLabel = (value) => {
746
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
747
+ return;
748
+ }
749
+ return formatMediaTimestamp(value);
750
+ };
745
751
  var buildLocatorLabel = (metadata, source, title) => {
746
752
  if (!metadata) {
747
753
  return;
@@ -804,6 +810,12 @@ var buildProvenanceLabel = (metadata) => {
804
810
  const sentAt = formatTimestampLabel(metadata.sentAt) ?? formatTimestampLabel(metadata.receivedAt);
805
811
  const speaker = getContextString(metadata.speaker);
806
812
  const mediaKind = getContextString(metadata.mediaKind);
813
+ const mediaSegmentCount = getContextNumber(metadata.mediaSegmentCount);
814
+ const mediaSegmentGroupSize = getContextNumber(metadata.mediaSegmentGroupSize);
815
+ const mediaSegmentGroupIndex = getContextNumber(metadata.mediaSegmentGroupIndex);
816
+ const mediaChannel = getContextString(metadata.mediaChannel);
817
+ const mediaSpeakerCount = getContextNumber(metadata.mediaSpeakerCount);
818
+ const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
807
819
  const transcriptSource = getContextString(metadata.transcriptSource);
808
820
  const pdfTextMode = getContextString(metadata.pdfTextMode);
809
821
  const ocrEngine = getContextString(metadata.ocrEngine);
@@ -813,6 +825,12 @@ var buildProvenanceLabel = (metadata) => {
813
825
  ocrEngine ? `OCR ${ocrEngine}` : "",
814
826
  typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
815
827
  mediaKind ? `Media ${mediaKind}` : "",
828
+ mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
829
+ mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
830
+ mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
831
+ mediaChannel ? `Channel ${mediaChannel}` : "",
832
+ mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
833
+ mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
816
834
  transcriptSource ? `Transcript ${transcriptSource}` : "",
817
835
  threadTopic ? `Thread ${threadTopic}` : "",
818
836
  speaker ? `Speaker ${speaker}` : "",
@@ -1168,6 +1186,34 @@ var getAttachmentName2 = (source, title) => {
1168
1186
  }
1169
1187
  return;
1170
1188
  };
1189
+ var getSpreadsheetHeaders = (metadata) => Array.isArray(metadata?.spreadsheetHeaders) ? metadata.spreadsheetHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1190
+ var formatSpreadsheetRowRange = (rowStart, rowEnd) => {
1191
+ if (typeof rowStart !== "number" || !Number.isFinite(rowStart)) {
1192
+ return;
1193
+ }
1194
+ if (typeof rowEnd !== "number" && typeof rowStart === "number" && Number.isFinite(rowStart)) {
1195
+ return `Rows ${rowStart}`;
1196
+ }
1197
+ if (rowStart === rowEnd) {
1198
+ return `Rows ${rowStart}`;
1199
+ }
1200
+ return `Rows ${rowStart}-${rowEnd}`;
1201
+ };
1202
+ var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
1203
+ if (typeof tableIndex !== "number" || !Number.isFinite(tableIndex) || tableIndex < 1) {
1204
+ return;
1205
+ }
1206
+ if (typeof tableCount === "number" && Number.isFinite(tableCount) && tableCount >= tableIndex) {
1207
+ return `Table ${tableIndex} of ${tableCount}`;
1208
+ }
1209
+ return `Table ${tableIndex}`;
1210
+ };
1211
+ var formatMediaDurationLabel2 = (value) => {
1212
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
1213
+ return;
1214
+ }
1215
+ return formatMediaTimestamp2(value);
1216
+ };
1171
1217
  var buildContextLabel2 = (metadata) => {
1172
1218
  if (!metadata) {
1173
1219
  return;
@@ -1193,29 +1239,61 @@ var buildContextLabel2 = (metadata) => {
1193
1239
  }
1194
1240
  const emailKind = getContextString2(metadata.emailKind);
1195
1241
  if (emailKind === "attachment") {
1196
- return "Attachment evidence";
1242
+ const attachmentName = getContextString2(metadata.attachmentName);
1243
+ const threadTopic2 = getContextString2(metadata.threadTopic);
1244
+ return attachmentName ? threadTopic2 ? `Attachment evidence ${attachmentName} in ${threadTopic2}` : `Attachment evidence ${attachmentName}` : "Attachment evidence";
1197
1245
  }
1198
1246
  if (emailKind === "message") {
1247
+ const threadTopic2 = getContextString2(metadata.threadTopic);
1199
1248
  const from = getContextString2(metadata.from);
1249
+ if (threadTopic2) {
1250
+ return from ? `Message in ${threadTopic2} from ${from}` : `Message in ${threadTopic2}`;
1251
+ }
1200
1252
  return from ? `Message from ${from}` : "Message evidence";
1201
1253
  }
1202
1254
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
1203
1255
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
1256
+ const hasOCRTrace = typeof metadata.ocrRegionConfidence === "number" || typeof metadata.ocrConfidence === "number" || getContextString2(metadata.pdfTextMode) === "ocr" || typeof metadata.ocrRegionCount === "number";
1204
1257
  if (page && region) {
1258
+ if (hasOCRTrace) {
1259
+ return `OCR page ${page} region ${region}`;
1260
+ }
1205
1261
  return `Page ${page} region ${region}`;
1206
1262
  }
1207
1263
  if (page) {
1264
+ if (hasOCRTrace) {
1265
+ return `OCR page ${page}`;
1266
+ }
1208
1267
  return `Page ${page}`;
1209
1268
  }
1210
1269
  const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
1211
1270
  if (sheet) {
1271
+ const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
1272
+ const rowRange = formatSpreadsheetRowRange(getContextNumber2(metadata.spreadsheetRowStart), getContextNumber2(metadata.spreadsheetRowEnd));
1273
+ const headers = getSpreadsheetHeaders(metadata);
1274
+ if (tableLabel && rowRange) {
1275
+ return `Sheet ${sheet} ${tableLabel} ${rowRange}`;
1276
+ }
1277
+ if (tableLabel) {
1278
+ return `Sheet ${sheet} ${tableLabel}`;
1279
+ }
1280
+ if (rowRange) {
1281
+ return `Sheet ${sheet} ${rowRange}`;
1282
+ }
1283
+ if (headers.length > 0) {
1284
+ return `Sheet ${sheet} by ${headers.slice(0, 2).join(", ")}`;
1285
+ }
1212
1286
  return `Sheet ${sheet}`;
1213
1287
  }
1214
1288
  const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
1289
+ const slideTitle = getContextString2(metadata.slideTitle);
1215
1290
  if (slide) {
1291
+ if (slideTitle) {
1292
+ return `Slide ${slide} ${slideTitle}`;
1293
+ }
1216
1294
  return `Slide ${slide}`;
1217
1295
  }
1218
- const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1296
+ const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1219
1297
  if (archiveEntry) {
1220
1298
  return `Archive entry ${archiveEntry}`;
1221
1299
  }
@@ -1240,6 +1318,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1240
1318
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1241
1319
  const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
1242
1320
  const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
1321
+ const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
1322
+ const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
1323
+ const slideTitle = getContextString2(metadata.slideTitle);
1243
1324
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
1244
1325
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
1245
1326
  if (page && region) {
@@ -1256,19 +1337,31 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1256
1337
  }
1257
1338
  const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
1258
1339
  if (sheet) {
1259
- return `Sheet ${sheet}`;
1340
+ const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
1341
+ const rowRange = formatSpreadsheetRowRange(spreadsheetRowStart, spreadsheetRowEnd);
1342
+ if (tableLabel && rowRange) {
1343
+ return `Sheet ${sheet} · ${tableLabel} · ${rowRange}`;
1344
+ }
1345
+ if (tableLabel) {
1346
+ return `Sheet ${sheet} · ${tableLabel}`;
1347
+ }
1348
+ return rowRange ? `Sheet ${sheet} · ${rowRange}` : `Sheet ${sheet}`;
1260
1349
  }
1261
1350
  const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
1262
1351
  if (slide) {
1263
- return `Slide ${slide}`;
1352
+ return slideTitle ? `Slide ${slide} · ${slideTitle}` : `Slide ${slide}`;
1264
1353
  }
1265
- const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1354
+ const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
1266
1355
  if (archiveEntry) {
1267
1356
  return `Archive entry ${archiveEntry}`;
1268
1357
  }
1269
1358
  const emailKind = getContextString2(metadata.emailKind);
1270
1359
  if (emailKind === "attachment") {
1271
1360
  const attachmentName = getContextString2(metadata.attachmentName) ?? getAttachmentName2(source, title);
1361
+ const replyDepth = getContextNumber2(metadata.replyDepth);
1362
+ if (attachmentName && replyDepth && replyDepth > 0) {
1363
+ return `Attachment ${attachmentName} · Reply depth ${replyDepth}`;
1364
+ }
1272
1365
  return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
1273
1366
  }
1274
1367
  const mediaStart = formatMediaTimestamp2(metadata.startMs);
@@ -1299,18 +1392,36 @@ var buildProvenanceLabel2 = (metadata) => {
1299
1392
  return;
1300
1393
  }
1301
1394
  const threadTopic = getContextString2(metadata.threadTopic);
1395
+ const replyDepth = getContextNumber2(metadata.replyDepth);
1396
+ const threadMessageCount = getContextNumber2(metadata.threadMessageCount);
1397
+ const threadRootMessageId = getContextString2(metadata.threadRootMessageId);
1302
1398
  const from = getContextString2(metadata.from);
1303
1399
  const sentAt = formatTimestampLabel2(metadata.sentAt) ?? formatTimestampLabel2(metadata.receivedAt);
1304
1400
  const speaker = getContextString2(metadata.speaker);
1305
1401
  const mediaKind = getContextString2(metadata.mediaKind);
1306
1402
  const transcriptSource = getContextString2(metadata.transcriptSource);
1403
+ const mediaSpeakerCount = getContextNumber2(metadata.mediaSpeakerCount);
1404
+ const mediaSegmentCount = getContextNumber2(metadata.mediaSegmentCount);
1405
+ const mediaSegmentGroupSize = getContextNumber2(metadata.mediaSegmentGroupSize);
1406
+ const mediaSegmentGroupIndex = getContextNumber2(metadata.mediaSegmentGroupIndex);
1407
+ const mediaChannel = getContextString2(metadata.mediaChannel);
1408
+ const mediaDurationLabel = formatMediaDurationLabel2(metadata.mediaDurationMs);
1409
+ const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
1410
+ const slideNotesText = getContextString2(metadata.slideNotesText);
1307
1411
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
1308
1412
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1309
1413
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1310
1414
  const ocrEngine = getContextString2(metadata.ocrEngine);
1311
1415
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
1312
1416
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
1417
+ const archiveDepth = getContextNumber2(metadata.archiveDepth);
1418
+ const archiveNestedDepth = getContextNumber2(metadata.archiveNestedDepth);
1419
+ const archiveContainerPath = getContextString2(metadata.archiveContainerPath);
1420
+ const archiveRootName = getContextString2(metadata.archiveRootName);
1421
+ const spreadsheetTableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
1313
1422
  const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
1423
+ const ocrAverageConfidence = getContextNumber2(metadata.ocrPageAverageConfidence) ?? getContextNumber2(metadata.ocrAverageConfidence);
1424
+ const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
1314
1425
  const labels = [
1315
1426
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
1316
1427
  pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
@@ -1319,9 +1430,27 @@ var buildProvenanceLabel2 = (metadata) => {
1319
1430
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
1320
1431
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
1321
1432
  typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
1433
+ typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
1434
+ typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
1435
+ spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
1436
+ spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
1322
1437
  mediaKind ? `Media ${mediaKind}` : "",
1438
+ mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
1439
+ mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
1440
+ mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
1441
+ mediaChannel ? `Channel ${mediaChannel}` : "",
1442
+ mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
1443
+ mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
1323
1444
  transcriptSource ? `Transcript ${transcriptSource}` : "",
1324
1445
  threadTopic ? `Thread ${threadTopic}` : "",
1446
+ threadRootMessageId ? `Thread root ${threadRootMessageId}` : "",
1447
+ threadMessageCount ? `${threadMessageCount} thread messages` : "",
1448
+ replyDepth ? `Reply depth ${replyDepth}` : "",
1449
+ slideNotesText ? "Speaker notes" : "",
1450
+ archiveDepth ? `Archive depth ${archiveDepth}` : "",
1451
+ archiveNestedDepth ? `Archive nested depth ${archiveNestedDepth}` : "",
1452
+ archiveContainerPath ? `Archive container ${archiveContainerPath}` : "",
1453
+ archiveRootName ? `Archive root ${archiveRootName}` : "",
1325
1454
  speaker ? `Speaker ${speaker}` : "",
1326
1455
  from ? `Sender ${from}` : "",
1327
1456
  sentAt ? `Sent ${sentAt}` : ""
@@ -1698,7 +1827,7 @@ var getSectionPathFromSource = (source) => {
1698
1827
  const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
1699
1828
  return path.length > 0 ? path : undefined;
1700
1829
  };
1701
- var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
1830
+ var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office ") || value.startsWith("Slide "));
1702
1831
  var getStructuredSectionScoreWeight = (metadata) => {
1703
1832
  if (!metadata) {
1704
1833
  return 1;
@@ -1706,6 +1835,8 @@ var getStructuredSectionScoreWeight = (metadata) => {
1706
1835
  const pdfTextKind = getContextString2(metadata.pdfTextKind);
1707
1836
  const officeBlockKind = getContextString2(metadata.officeBlockKind);
1708
1837
  const sectionKind = getContextString2(metadata.sectionKind);
1838
+ const slideTitle = getContextString2(metadata.slideTitle);
1839
+ const slideNotesText = getContextString2(metadata.slideNotesText);
1709
1840
  if (pdfTextKind === "table_like") {
1710
1841
  return 1.28;
1711
1842
  }
@@ -1715,6 +1846,12 @@ var getStructuredSectionScoreWeight = (metadata) => {
1715
1846
  if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
1716
1847
  return 1.12;
1717
1848
  }
1849
+ if (sectionKind === "presentation_slide" && slideNotesText) {
1850
+ return 1.2;
1851
+ }
1852
+ if (sectionKind === "presentation_slide" && slideTitle) {
1853
+ return 1.14;
1854
+ }
1718
1855
  return 1;
1719
1856
  };
1720
1857
  var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);