@absolutejs/absolute 0.19.0-beta.619 → 0.19.0-beta.620
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +144 -7
- package/dist/ai/client/index.js.map +4 -4
- package/dist/ai/client/ui.js +144 -7
- package/dist/ai/client/ui.js.map +4 -4
- package/dist/ai/index.js +624 -66
- package/dist/ai/index.js.map +6 -6
- package/dist/ai/rag/quality.js +19 -1
- package/dist/ai/rag/quality.js.map +3 -3
- package/dist/ai/rag/ui.js +144 -7
- package/dist/ai/rag/ui.js.map +4 -4
- package/dist/ai-client/angular/ai/index.js +143 -6
- package/dist/ai-client/react/ai/index.js +143 -6
- package/dist/ai-client/vue/ai/index.js +143 -6
- package/dist/angular/ai/index.js +144 -7
- package/dist/angular/ai/index.js.map +4 -4
- package/dist/react/ai/index.js +144 -7
- package/dist/react/ai/index.js.map +4 -4
- package/dist/svelte/ai/index.js +144 -7
- package/dist/svelte/ai/index.js.map +4 -4
- package/dist/types/ai.d.ts +1 -0
- package/dist/vue/ai/index.js +144 -7
- package/dist/vue/ai/index.js.map +4 -4
- package/package.json +1 -1
|
@@ -782,6 +782,12 @@ var formatMediaTimestamp = (value) => {
|
|
|
782
782
|
const milliseconds = Math.floor(value % 1000);
|
|
783
783
|
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
|
|
784
784
|
};
|
|
785
|
+
var formatMediaDurationLabel = (value) => {
|
|
786
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
787
|
+
return;
|
|
788
|
+
}
|
|
789
|
+
return formatMediaTimestamp(value);
|
|
790
|
+
};
|
|
785
791
|
var buildLocatorLabel = (metadata, source, title) => {
|
|
786
792
|
if (!metadata) {
|
|
787
793
|
return;
|
|
@@ -844,6 +850,12 @@ var buildProvenanceLabel = (metadata) => {
|
|
|
844
850
|
const sentAt = formatTimestampLabel(metadata.sentAt) ?? formatTimestampLabel(metadata.receivedAt);
|
|
845
851
|
const speaker = getContextString(metadata.speaker);
|
|
846
852
|
const mediaKind = getContextString(metadata.mediaKind);
|
|
853
|
+
const mediaSegmentCount = getContextNumber(metadata.mediaSegmentCount);
|
|
854
|
+
const mediaSegmentGroupSize = getContextNumber(metadata.mediaSegmentGroupSize);
|
|
855
|
+
const mediaSegmentGroupIndex = getContextNumber(metadata.mediaSegmentGroupIndex);
|
|
856
|
+
const mediaChannel = getContextString(metadata.mediaChannel);
|
|
857
|
+
const mediaSpeakerCount = getContextNumber(metadata.mediaSpeakerCount);
|
|
858
|
+
const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
|
|
847
859
|
const transcriptSource = getContextString(metadata.transcriptSource);
|
|
848
860
|
const pdfTextMode = getContextString(metadata.pdfTextMode);
|
|
849
861
|
const ocrEngine = getContextString(metadata.ocrEngine);
|
|
@@ -853,6 +865,12 @@ var buildProvenanceLabel = (metadata) => {
|
|
|
853
865
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
854
866
|
typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
|
|
855
867
|
mediaKind ? `Media ${mediaKind}` : "",
|
|
868
|
+
mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
|
|
869
|
+
mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
|
|
870
|
+
mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
|
|
871
|
+
mediaChannel ? `Channel ${mediaChannel}` : "",
|
|
872
|
+
mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
|
|
873
|
+
mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
|
|
856
874
|
transcriptSource ? `Transcript ${transcriptSource}` : "",
|
|
857
875
|
threadTopic ? `Thread ${threadTopic}` : "",
|
|
858
876
|
speaker ? `Speaker ${speaker}` : "",
|
|
@@ -1208,6 +1226,34 @@ var getAttachmentName2 = (source, title) => {
|
|
|
1208
1226
|
}
|
|
1209
1227
|
return;
|
|
1210
1228
|
};
|
|
1229
|
+
var getSpreadsheetHeaders = (metadata) => Array.isArray(metadata?.spreadsheetHeaders) ? metadata.spreadsheetHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1230
|
+
var formatSpreadsheetRowRange = (rowStart, rowEnd) => {
|
|
1231
|
+
if (typeof rowStart !== "number" || !Number.isFinite(rowStart)) {
|
|
1232
|
+
return;
|
|
1233
|
+
}
|
|
1234
|
+
if (typeof rowEnd !== "number" && typeof rowStart === "number" && Number.isFinite(rowStart)) {
|
|
1235
|
+
return `Rows ${rowStart}`;
|
|
1236
|
+
}
|
|
1237
|
+
if (rowStart === rowEnd) {
|
|
1238
|
+
return `Rows ${rowStart}`;
|
|
1239
|
+
}
|
|
1240
|
+
return `Rows ${rowStart}-${rowEnd}`;
|
|
1241
|
+
};
|
|
1242
|
+
var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
|
|
1243
|
+
if (typeof tableIndex !== "number" || !Number.isFinite(tableIndex) || tableIndex < 1) {
|
|
1244
|
+
return;
|
|
1245
|
+
}
|
|
1246
|
+
if (typeof tableCount === "number" && Number.isFinite(tableCount) && tableCount >= tableIndex) {
|
|
1247
|
+
return `Table ${tableIndex} of ${tableCount}`;
|
|
1248
|
+
}
|
|
1249
|
+
return `Table ${tableIndex}`;
|
|
1250
|
+
};
|
|
1251
|
+
var formatMediaDurationLabel2 = (value) => {
|
|
1252
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
1253
|
+
return;
|
|
1254
|
+
}
|
|
1255
|
+
return formatMediaTimestamp2(value);
|
|
1256
|
+
};
|
|
1211
1257
|
var buildContextLabel2 = (metadata) => {
|
|
1212
1258
|
if (!metadata) {
|
|
1213
1259
|
return;
|
|
@@ -1233,29 +1279,61 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1233
1279
|
}
|
|
1234
1280
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1235
1281
|
if (emailKind === "attachment") {
|
|
1236
|
-
|
|
1282
|
+
const attachmentName = getContextString2(metadata.attachmentName);
|
|
1283
|
+
const threadTopic2 = getContextString2(metadata.threadTopic);
|
|
1284
|
+
return attachmentName ? threadTopic2 ? `Attachment evidence ${attachmentName} in ${threadTopic2}` : `Attachment evidence ${attachmentName}` : "Attachment evidence";
|
|
1237
1285
|
}
|
|
1238
1286
|
if (emailKind === "message") {
|
|
1287
|
+
const threadTopic2 = getContextString2(metadata.threadTopic);
|
|
1239
1288
|
const from = getContextString2(metadata.from);
|
|
1289
|
+
if (threadTopic2) {
|
|
1290
|
+
return from ? `Message in ${threadTopic2} from ${from}` : `Message in ${threadTopic2}`;
|
|
1291
|
+
}
|
|
1240
1292
|
return from ? `Message from ${from}` : "Message evidence";
|
|
1241
1293
|
}
|
|
1242
1294
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
1243
1295
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
1296
|
+
const hasOCRTrace = typeof metadata.ocrRegionConfidence === "number" || typeof metadata.ocrConfidence === "number" || getContextString2(metadata.pdfTextMode) === "ocr" || typeof metadata.ocrRegionCount === "number";
|
|
1244
1297
|
if (page && region) {
|
|
1298
|
+
if (hasOCRTrace) {
|
|
1299
|
+
return `OCR page ${page} region ${region}`;
|
|
1300
|
+
}
|
|
1245
1301
|
return `Page ${page} region ${region}`;
|
|
1246
1302
|
}
|
|
1247
1303
|
if (page) {
|
|
1304
|
+
if (hasOCRTrace) {
|
|
1305
|
+
return `OCR page ${page}`;
|
|
1306
|
+
}
|
|
1248
1307
|
return `Page ${page}`;
|
|
1249
1308
|
}
|
|
1250
1309
|
const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
|
|
1251
1310
|
if (sheet) {
|
|
1311
|
+
const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
|
|
1312
|
+
const rowRange = formatSpreadsheetRowRange(getContextNumber2(metadata.spreadsheetRowStart), getContextNumber2(metadata.spreadsheetRowEnd));
|
|
1313
|
+
const headers = getSpreadsheetHeaders(metadata);
|
|
1314
|
+
if (tableLabel && rowRange) {
|
|
1315
|
+
return `Sheet ${sheet} ${tableLabel} ${rowRange}`;
|
|
1316
|
+
}
|
|
1317
|
+
if (tableLabel) {
|
|
1318
|
+
return `Sheet ${sheet} ${tableLabel}`;
|
|
1319
|
+
}
|
|
1320
|
+
if (rowRange) {
|
|
1321
|
+
return `Sheet ${sheet} ${rowRange}`;
|
|
1322
|
+
}
|
|
1323
|
+
if (headers.length > 0) {
|
|
1324
|
+
return `Sheet ${sheet} by ${headers.slice(0, 2).join(", ")}`;
|
|
1325
|
+
}
|
|
1252
1326
|
return `Sheet ${sheet}`;
|
|
1253
1327
|
}
|
|
1254
1328
|
const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
|
|
1329
|
+
const slideTitle = getContextString2(metadata.slideTitle);
|
|
1255
1330
|
if (slide) {
|
|
1331
|
+
if (slideTitle) {
|
|
1332
|
+
return `Slide ${slide} ${slideTitle}`;
|
|
1333
|
+
}
|
|
1256
1334
|
return `Slide ${slide}`;
|
|
1257
1335
|
}
|
|
1258
|
-
const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1336
|
+
const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1259
1337
|
if (archiveEntry) {
|
|
1260
1338
|
return `Archive entry ${archiveEntry}`;
|
|
1261
1339
|
}
|
|
@@ -1280,6 +1358,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1280
1358
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1281
1359
|
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
1282
1360
|
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
1361
|
+
const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
|
|
1362
|
+
const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
|
|
1363
|
+
const slideTitle = getContextString2(metadata.slideTitle);
|
|
1283
1364
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
1284
1365
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
1285
1366
|
if (page && region) {
|
|
@@ -1296,19 +1377,31 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1296
1377
|
}
|
|
1297
1378
|
const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
|
|
1298
1379
|
if (sheet) {
|
|
1299
|
-
|
|
1380
|
+
const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
|
|
1381
|
+
const rowRange = formatSpreadsheetRowRange(spreadsheetRowStart, spreadsheetRowEnd);
|
|
1382
|
+
if (tableLabel && rowRange) {
|
|
1383
|
+
return `Sheet ${sheet} · ${tableLabel} · ${rowRange}`;
|
|
1384
|
+
}
|
|
1385
|
+
if (tableLabel) {
|
|
1386
|
+
return `Sheet ${sheet} · ${tableLabel}`;
|
|
1387
|
+
}
|
|
1388
|
+
return rowRange ? `Sheet ${sheet} · ${rowRange}` : `Sheet ${sheet}`;
|
|
1300
1389
|
}
|
|
1301
1390
|
const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
|
|
1302
1391
|
if (slide) {
|
|
1303
|
-
return `Slide ${slide}`;
|
|
1392
|
+
return slideTitle ? `Slide ${slide} · ${slideTitle}` : `Slide ${slide}`;
|
|
1304
1393
|
}
|
|
1305
|
-
const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1394
|
+
const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1306
1395
|
if (archiveEntry) {
|
|
1307
1396
|
return `Archive entry ${archiveEntry}`;
|
|
1308
1397
|
}
|
|
1309
1398
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1310
1399
|
if (emailKind === "attachment") {
|
|
1311
1400
|
const attachmentName = getContextString2(metadata.attachmentName) ?? getAttachmentName2(source, title);
|
|
1401
|
+
const replyDepth = getContextNumber2(metadata.replyDepth);
|
|
1402
|
+
if (attachmentName && replyDepth && replyDepth > 0) {
|
|
1403
|
+
return `Attachment ${attachmentName} · Reply depth ${replyDepth}`;
|
|
1404
|
+
}
|
|
1312
1405
|
return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
|
|
1313
1406
|
}
|
|
1314
1407
|
const mediaStart = formatMediaTimestamp2(metadata.startMs);
|
|
@@ -1339,18 +1432,36 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1339
1432
|
return;
|
|
1340
1433
|
}
|
|
1341
1434
|
const threadTopic = getContextString2(metadata.threadTopic);
|
|
1435
|
+
const replyDepth = getContextNumber2(metadata.replyDepth);
|
|
1436
|
+
const threadMessageCount = getContextNumber2(metadata.threadMessageCount);
|
|
1437
|
+
const threadRootMessageId = getContextString2(metadata.threadRootMessageId);
|
|
1342
1438
|
const from = getContextString2(metadata.from);
|
|
1343
1439
|
const sentAt = formatTimestampLabel2(metadata.sentAt) ?? formatTimestampLabel2(metadata.receivedAt);
|
|
1344
1440
|
const speaker = getContextString2(metadata.speaker);
|
|
1345
1441
|
const mediaKind = getContextString2(metadata.mediaKind);
|
|
1346
1442
|
const transcriptSource = getContextString2(metadata.transcriptSource);
|
|
1443
|
+
const mediaSpeakerCount = getContextNumber2(metadata.mediaSpeakerCount);
|
|
1444
|
+
const mediaSegmentCount = getContextNumber2(metadata.mediaSegmentCount);
|
|
1445
|
+
const mediaSegmentGroupSize = getContextNumber2(metadata.mediaSegmentGroupSize);
|
|
1446
|
+
const mediaSegmentGroupIndex = getContextNumber2(metadata.mediaSegmentGroupIndex);
|
|
1447
|
+
const mediaChannel = getContextString2(metadata.mediaChannel);
|
|
1448
|
+
const mediaDurationLabel = formatMediaDurationLabel2(metadata.mediaDurationMs);
|
|
1449
|
+
const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
|
|
1450
|
+
const slideNotesText = getContextString2(metadata.slideNotesText);
|
|
1347
1451
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
1348
1452
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1349
1453
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1350
1454
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
1351
1455
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
1352
1456
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
1457
|
+
const archiveDepth = getContextNumber2(metadata.archiveDepth);
|
|
1458
|
+
const archiveNestedDepth = getContextNumber2(metadata.archiveNestedDepth);
|
|
1459
|
+
const archiveContainerPath = getContextString2(metadata.archiveContainerPath);
|
|
1460
|
+
const archiveRootName = getContextString2(metadata.archiveRootName);
|
|
1461
|
+
const spreadsheetTableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
|
|
1353
1462
|
const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
|
|
1463
|
+
const ocrAverageConfidence = getContextNumber2(metadata.ocrPageAverageConfidence) ?? getContextNumber2(metadata.ocrAverageConfidence);
|
|
1464
|
+
const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
|
|
1354
1465
|
const labels = [
|
|
1355
1466
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
1356
1467
|
pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
@@ -1359,9 +1470,27 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1359
1470
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
1360
1471
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
1361
1472
|
typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
|
|
1473
|
+
typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
|
|
1474
|
+
typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
|
|
1475
|
+
spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
|
|
1476
|
+
spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
|
|
1362
1477
|
mediaKind ? `Media ${mediaKind}` : "",
|
|
1478
|
+
mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
|
|
1479
|
+
mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
|
|
1480
|
+
mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
|
|
1481
|
+
mediaChannel ? `Channel ${mediaChannel}` : "",
|
|
1482
|
+
mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
|
|
1483
|
+
mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
|
|
1363
1484
|
transcriptSource ? `Transcript ${transcriptSource}` : "",
|
|
1364
1485
|
threadTopic ? `Thread ${threadTopic}` : "",
|
|
1486
|
+
threadRootMessageId ? `Thread root ${threadRootMessageId}` : "",
|
|
1487
|
+
threadMessageCount ? `${threadMessageCount} thread messages` : "",
|
|
1488
|
+
replyDepth ? `Reply depth ${replyDepth}` : "",
|
|
1489
|
+
slideNotesText ? "Speaker notes" : "",
|
|
1490
|
+
archiveDepth ? `Archive depth ${archiveDepth}` : "",
|
|
1491
|
+
archiveNestedDepth ? `Archive nested depth ${archiveNestedDepth}` : "",
|
|
1492
|
+
archiveContainerPath ? `Archive container ${archiveContainerPath}` : "",
|
|
1493
|
+
archiveRootName ? `Archive root ${archiveRootName}` : "",
|
|
1365
1494
|
speaker ? `Speaker ${speaker}` : "",
|
|
1366
1495
|
from ? `Sender ${from}` : "",
|
|
1367
1496
|
sentAt ? `Sent ${sentAt}` : ""
|
|
@@ -1738,7 +1867,7 @@ var getSectionPathFromSource = (source) => {
|
|
|
1738
1867
|
const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
|
|
1739
1868
|
return path.length > 0 ? path : undefined;
|
|
1740
1869
|
};
|
|
1741
|
-
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
|
|
1870
|
+
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office ") || value.startsWith("Slide "));
|
|
1742
1871
|
var getStructuredSectionScoreWeight = (metadata) => {
|
|
1743
1872
|
if (!metadata) {
|
|
1744
1873
|
return 1;
|
|
@@ -1746,6 +1875,8 @@ var getStructuredSectionScoreWeight = (metadata) => {
|
|
|
1746
1875
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1747
1876
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1748
1877
|
const sectionKind = getContextString2(metadata.sectionKind);
|
|
1878
|
+
const slideTitle = getContextString2(metadata.slideTitle);
|
|
1879
|
+
const slideNotesText = getContextString2(metadata.slideNotesText);
|
|
1749
1880
|
if (pdfTextKind === "table_like") {
|
|
1750
1881
|
return 1.28;
|
|
1751
1882
|
}
|
|
@@ -1755,6 +1886,12 @@ var getStructuredSectionScoreWeight = (metadata) => {
|
|
|
1755
1886
|
if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
|
|
1756
1887
|
return 1.12;
|
|
1757
1888
|
}
|
|
1889
|
+
if (sectionKind === "presentation_slide" && slideNotesText) {
|
|
1890
|
+
return 1.2;
|
|
1891
|
+
}
|
|
1892
|
+
if (sectionKind === "presentation_slide" && slideTitle) {
|
|
1893
|
+
return 1.14;
|
|
1894
|
+
}
|
|
1758
1895
|
return 1;
|
|
1759
1896
|
};
|
|
1760
1897
|
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
@@ -742,6 +742,12 @@ var formatMediaTimestamp = (value) => {
|
|
|
742
742
|
const milliseconds = Math.floor(value % 1000);
|
|
743
743
|
return `${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}.${String(milliseconds).padStart(3, "0")}`;
|
|
744
744
|
};
|
|
745
|
+
var formatMediaDurationLabel = (value) => {
|
|
746
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
747
|
+
return;
|
|
748
|
+
}
|
|
749
|
+
return formatMediaTimestamp(value);
|
|
750
|
+
};
|
|
745
751
|
var buildLocatorLabel = (metadata, source, title) => {
|
|
746
752
|
if (!metadata) {
|
|
747
753
|
return;
|
|
@@ -804,6 +810,12 @@ var buildProvenanceLabel = (metadata) => {
|
|
|
804
810
|
const sentAt = formatTimestampLabel(metadata.sentAt) ?? formatTimestampLabel(metadata.receivedAt);
|
|
805
811
|
const speaker = getContextString(metadata.speaker);
|
|
806
812
|
const mediaKind = getContextString(metadata.mediaKind);
|
|
813
|
+
const mediaSegmentCount = getContextNumber(metadata.mediaSegmentCount);
|
|
814
|
+
const mediaSegmentGroupSize = getContextNumber(metadata.mediaSegmentGroupSize);
|
|
815
|
+
const mediaSegmentGroupIndex = getContextNumber(metadata.mediaSegmentGroupIndex);
|
|
816
|
+
const mediaChannel = getContextString(metadata.mediaChannel);
|
|
817
|
+
const mediaSpeakerCount = getContextNumber(metadata.mediaSpeakerCount);
|
|
818
|
+
const mediaDurationLabel = formatMediaDurationLabel(metadata.mediaDurationMs);
|
|
807
819
|
const transcriptSource = getContextString(metadata.transcriptSource);
|
|
808
820
|
const pdfTextMode = getContextString(metadata.pdfTextMode);
|
|
809
821
|
const ocrEngine = getContextString(metadata.ocrEngine);
|
|
@@ -813,6 +825,12 @@ var buildProvenanceLabel = (metadata) => {
|
|
|
813
825
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
814
826
|
typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
|
|
815
827
|
mediaKind ? `Media ${mediaKind}` : "",
|
|
828
|
+
mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
|
|
829
|
+
mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
|
|
830
|
+
mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
|
|
831
|
+
mediaChannel ? `Channel ${mediaChannel}` : "",
|
|
832
|
+
mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
|
|
833
|
+
mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
|
|
816
834
|
transcriptSource ? `Transcript ${transcriptSource}` : "",
|
|
817
835
|
threadTopic ? `Thread ${threadTopic}` : "",
|
|
818
836
|
speaker ? `Speaker ${speaker}` : "",
|
|
@@ -1168,6 +1186,34 @@ var getAttachmentName2 = (source, title) => {
|
|
|
1168
1186
|
}
|
|
1169
1187
|
return;
|
|
1170
1188
|
};
|
|
1189
|
+
var getSpreadsheetHeaders = (metadata) => Array.isArray(metadata?.spreadsheetHeaders) ? metadata.spreadsheetHeaders.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1190
|
+
var formatSpreadsheetRowRange = (rowStart, rowEnd) => {
|
|
1191
|
+
if (typeof rowStart !== "number" || !Number.isFinite(rowStart)) {
|
|
1192
|
+
return;
|
|
1193
|
+
}
|
|
1194
|
+
if (typeof rowEnd !== "number" && typeof rowStart === "number" && Number.isFinite(rowStart)) {
|
|
1195
|
+
return `Rows ${rowStart}`;
|
|
1196
|
+
}
|
|
1197
|
+
if (rowStart === rowEnd) {
|
|
1198
|
+
return `Rows ${rowStart}`;
|
|
1199
|
+
}
|
|
1200
|
+
return `Rows ${rowStart}-${rowEnd}`;
|
|
1201
|
+
};
|
|
1202
|
+
var formatSpreadsheetTableLabel = (tableIndex, tableCount) => {
|
|
1203
|
+
if (typeof tableIndex !== "number" || !Number.isFinite(tableIndex) || tableIndex < 1) {
|
|
1204
|
+
return;
|
|
1205
|
+
}
|
|
1206
|
+
if (typeof tableCount === "number" && Number.isFinite(tableCount) && tableCount >= tableIndex) {
|
|
1207
|
+
return `Table ${tableIndex} of ${tableCount}`;
|
|
1208
|
+
}
|
|
1209
|
+
return `Table ${tableIndex}`;
|
|
1210
|
+
};
|
|
1211
|
+
var formatMediaDurationLabel2 = (value) => {
|
|
1212
|
+
if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
|
|
1213
|
+
return;
|
|
1214
|
+
}
|
|
1215
|
+
return formatMediaTimestamp2(value);
|
|
1216
|
+
};
|
|
1171
1217
|
var buildContextLabel2 = (metadata) => {
|
|
1172
1218
|
if (!metadata) {
|
|
1173
1219
|
return;
|
|
@@ -1193,29 +1239,61 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1193
1239
|
}
|
|
1194
1240
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1195
1241
|
if (emailKind === "attachment") {
|
|
1196
|
-
|
|
1242
|
+
const attachmentName = getContextString2(metadata.attachmentName);
|
|
1243
|
+
const threadTopic2 = getContextString2(metadata.threadTopic);
|
|
1244
|
+
return attachmentName ? threadTopic2 ? `Attachment evidence ${attachmentName} in ${threadTopic2}` : `Attachment evidence ${attachmentName}` : "Attachment evidence";
|
|
1197
1245
|
}
|
|
1198
1246
|
if (emailKind === "message") {
|
|
1247
|
+
const threadTopic2 = getContextString2(metadata.threadTopic);
|
|
1199
1248
|
const from = getContextString2(metadata.from);
|
|
1249
|
+
if (threadTopic2) {
|
|
1250
|
+
return from ? `Message in ${threadTopic2} from ${from}` : `Message in ${threadTopic2}`;
|
|
1251
|
+
}
|
|
1200
1252
|
return from ? `Message from ${from}` : "Message evidence";
|
|
1201
1253
|
}
|
|
1202
1254
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
1203
1255
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
1256
|
+
const hasOCRTrace = typeof metadata.ocrRegionConfidence === "number" || typeof metadata.ocrConfidence === "number" || getContextString2(metadata.pdfTextMode) === "ocr" || typeof metadata.ocrRegionCount === "number";
|
|
1204
1257
|
if (page && region) {
|
|
1258
|
+
if (hasOCRTrace) {
|
|
1259
|
+
return `OCR page ${page} region ${region}`;
|
|
1260
|
+
}
|
|
1205
1261
|
return `Page ${page} region ${region}`;
|
|
1206
1262
|
}
|
|
1207
1263
|
if (page) {
|
|
1264
|
+
if (hasOCRTrace) {
|
|
1265
|
+
return `OCR page ${page}`;
|
|
1266
|
+
}
|
|
1208
1267
|
return `Page ${page}`;
|
|
1209
1268
|
}
|
|
1210
1269
|
const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
|
|
1211
1270
|
if (sheet) {
|
|
1271
|
+
const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
|
|
1272
|
+
const rowRange = formatSpreadsheetRowRange(getContextNumber2(metadata.spreadsheetRowStart), getContextNumber2(metadata.spreadsheetRowEnd));
|
|
1273
|
+
const headers = getSpreadsheetHeaders(metadata);
|
|
1274
|
+
if (tableLabel && rowRange) {
|
|
1275
|
+
return `Sheet ${sheet} ${tableLabel} ${rowRange}`;
|
|
1276
|
+
}
|
|
1277
|
+
if (tableLabel) {
|
|
1278
|
+
return `Sheet ${sheet} ${tableLabel}`;
|
|
1279
|
+
}
|
|
1280
|
+
if (rowRange) {
|
|
1281
|
+
return `Sheet ${sheet} ${rowRange}`;
|
|
1282
|
+
}
|
|
1283
|
+
if (headers.length > 0) {
|
|
1284
|
+
return `Sheet ${sheet} by ${headers.slice(0, 2).join(", ")}`;
|
|
1285
|
+
}
|
|
1212
1286
|
return `Sheet ${sheet}`;
|
|
1213
1287
|
}
|
|
1214
1288
|
const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
|
|
1289
|
+
const slideTitle = getContextString2(metadata.slideTitle);
|
|
1215
1290
|
if (slide) {
|
|
1291
|
+
if (slideTitle) {
|
|
1292
|
+
return `Slide ${slide} ${slideTitle}`;
|
|
1293
|
+
}
|
|
1216
1294
|
return `Slide ${slide}`;
|
|
1217
1295
|
}
|
|
1218
|
-
const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1296
|
+
const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1219
1297
|
if (archiveEntry) {
|
|
1220
1298
|
return `Archive entry ${archiveEntry}`;
|
|
1221
1299
|
}
|
|
@@ -1240,6 +1318,9 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1240
1318
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1241
1319
|
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
1242
1320
|
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
1321
|
+
const spreadsheetRowStart = getContextNumber2(metadata.spreadsheetRowStart);
|
|
1322
|
+
const spreadsheetRowEnd = getContextNumber2(metadata.spreadsheetRowEnd);
|
|
1323
|
+
const slideTitle = getContextString2(metadata.slideTitle);
|
|
1243
1324
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
1244
1325
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
1245
1326
|
if (page && region) {
|
|
@@ -1256,19 +1337,31 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1256
1337
|
}
|
|
1257
1338
|
const sheet = getContextString2(metadata.sheetName) ?? (Array.isArray(metadata.sheetNames) ? getContextString2(metadata.sheetNames[0]) : undefined);
|
|
1258
1339
|
if (sheet) {
|
|
1259
|
-
|
|
1340
|
+
const tableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
|
|
1341
|
+
const rowRange = formatSpreadsheetRowRange(spreadsheetRowStart, spreadsheetRowEnd);
|
|
1342
|
+
if (tableLabel && rowRange) {
|
|
1343
|
+
return `Sheet ${sheet} · ${tableLabel} · ${rowRange}`;
|
|
1344
|
+
}
|
|
1345
|
+
if (tableLabel) {
|
|
1346
|
+
return `Sheet ${sheet} · ${tableLabel}`;
|
|
1347
|
+
}
|
|
1348
|
+
return rowRange ? `Sheet ${sheet} · ${rowRange}` : `Sheet ${sheet}`;
|
|
1260
1349
|
}
|
|
1261
1350
|
const slide = getContextNumber2(metadata.slide) ?? getContextNumber2(metadata.slideNumber) ?? (typeof metadata.slideIndex === "number" ? metadata.slideIndex + 1 : undefined);
|
|
1262
1351
|
if (slide) {
|
|
1263
|
-
return `Slide ${slide}`;
|
|
1352
|
+
return slideTitle ? `Slide ${slide} · ${slideTitle}` : `Slide ${slide}`;
|
|
1264
1353
|
}
|
|
1265
|
-
const archiveEntry = getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1354
|
+
const archiveEntry = getContextString2(metadata.archiveFullPath) ?? getContextString2(metadata.archivePath) ?? getContextString2(metadata.archiveEntryPath) ?? getContextString2(metadata.entryPath);
|
|
1266
1355
|
if (archiveEntry) {
|
|
1267
1356
|
return `Archive entry ${archiveEntry}`;
|
|
1268
1357
|
}
|
|
1269
1358
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1270
1359
|
if (emailKind === "attachment") {
|
|
1271
1360
|
const attachmentName = getContextString2(metadata.attachmentName) ?? getAttachmentName2(source, title);
|
|
1361
|
+
const replyDepth = getContextNumber2(metadata.replyDepth);
|
|
1362
|
+
if (attachmentName && replyDepth && replyDepth > 0) {
|
|
1363
|
+
return `Attachment ${attachmentName} · Reply depth ${replyDepth}`;
|
|
1364
|
+
}
|
|
1272
1365
|
return attachmentName ? `Attachment ${attachmentName}` : "Attachment";
|
|
1273
1366
|
}
|
|
1274
1367
|
const mediaStart = formatMediaTimestamp2(metadata.startMs);
|
|
@@ -1299,18 +1392,36 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1299
1392
|
return;
|
|
1300
1393
|
}
|
|
1301
1394
|
const threadTopic = getContextString2(metadata.threadTopic);
|
|
1395
|
+
const replyDepth = getContextNumber2(metadata.replyDepth);
|
|
1396
|
+
const threadMessageCount = getContextNumber2(metadata.threadMessageCount);
|
|
1397
|
+
const threadRootMessageId = getContextString2(metadata.threadRootMessageId);
|
|
1302
1398
|
const from = getContextString2(metadata.from);
|
|
1303
1399
|
const sentAt = formatTimestampLabel2(metadata.sentAt) ?? formatTimestampLabel2(metadata.receivedAt);
|
|
1304
1400
|
const speaker = getContextString2(metadata.speaker);
|
|
1305
1401
|
const mediaKind = getContextString2(metadata.mediaKind);
|
|
1306
1402
|
const transcriptSource = getContextString2(metadata.transcriptSource);
|
|
1403
|
+
const mediaSpeakerCount = getContextNumber2(metadata.mediaSpeakerCount);
|
|
1404
|
+
const mediaSegmentCount = getContextNumber2(metadata.mediaSegmentCount);
|
|
1405
|
+
const mediaSegmentGroupSize = getContextNumber2(metadata.mediaSegmentGroupSize);
|
|
1406
|
+
const mediaSegmentGroupIndex = getContextNumber2(metadata.mediaSegmentGroupIndex);
|
|
1407
|
+
const mediaChannel = getContextString2(metadata.mediaChannel);
|
|
1408
|
+
const mediaDurationLabel = formatMediaDurationLabel2(metadata.mediaDurationMs);
|
|
1409
|
+
const spreadsheetHeaders = getSpreadsheetHeaders(metadata);
|
|
1410
|
+
const slideNotesText = getContextString2(metadata.slideNotesText);
|
|
1307
1411
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
1308
1412
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1309
1413
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1310
1414
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
1311
1415
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
1312
1416
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
1417
|
+
const archiveDepth = getContextNumber2(metadata.archiveDepth);
|
|
1418
|
+
const archiveNestedDepth = getContextNumber2(metadata.archiveNestedDepth);
|
|
1419
|
+
const archiveContainerPath = getContextString2(metadata.archiveContainerPath);
|
|
1420
|
+
const archiveRootName = getContextString2(metadata.archiveRootName);
|
|
1421
|
+
const spreadsheetTableLabel = formatSpreadsheetTableLabel(getContextNumber2(metadata.spreadsheetTableIndex), getContextNumber2(metadata.spreadsheetTableCount));
|
|
1313
1422
|
const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
|
|
1423
|
+
const ocrAverageConfidence = getContextNumber2(metadata.ocrPageAverageConfidence) ?? getContextNumber2(metadata.ocrAverageConfidence);
|
|
1424
|
+
const ocrRegionCount = getContextNumber2(metadata.ocrRegionCount);
|
|
1314
1425
|
const labels = [
|
|
1315
1426
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
1316
1427
|
pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
@@ -1319,9 +1430,27 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1319
1430
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
1320
1431
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
1321
1432
|
typeof ocrConfidence === "number" ? `Confidence ${ocrConfidence.toFixed(2)}` : "",
|
|
1433
|
+
typeof ocrAverageConfidence === "number" && ocrAverageConfidence !== ocrConfidence ? `Average ${ocrAverageConfidence.toFixed(2)}` : "",
|
|
1434
|
+
typeof ocrRegionCount === "number" ? `${ocrRegionCount} regions` : "",
|
|
1435
|
+
spreadsheetHeaders.length > 0 ? `Spreadsheet ${spreadsheetHeaders.join(", ")}` : "",
|
|
1436
|
+
spreadsheetTableLabel ? `Spreadsheet ${spreadsheetTableLabel}` : "",
|
|
1322
1437
|
mediaKind ? `Media ${mediaKind}` : "",
|
|
1438
|
+
mediaSegmentCount ? `${mediaSegmentCount} segments` : "",
|
|
1439
|
+
mediaSegmentGroupSize ? `${mediaSegmentGroupSize} grouped segments` : "",
|
|
1440
|
+
mediaSegmentGroupIndex !== undefined ? `Segment group ${mediaSegmentGroupIndex + 1}` : "",
|
|
1441
|
+
mediaChannel ? `Channel ${mediaChannel}` : "",
|
|
1442
|
+
mediaSpeakerCount ? `${mediaSpeakerCount} speakers` : "",
|
|
1443
|
+
mediaDurationLabel ? `Duration ${mediaDurationLabel}` : "",
|
|
1323
1444
|
transcriptSource ? `Transcript ${transcriptSource}` : "",
|
|
1324
1445
|
threadTopic ? `Thread ${threadTopic}` : "",
|
|
1446
|
+
threadRootMessageId ? `Thread root ${threadRootMessageId}` : "",
|
|
1447
|
+
threadMessageCount ? `${threadMessageCount} thread messages` : "",
|
|
1448
|
+
replyDepth ? `Reply depth ${replyDepth}` : "",
|
|
1449
|
+
slideNotesText ? "Speaker notes" : "",
|
|
1450
|
+
archiveDepth ? `Archive depth ${archiveDepth}` : "",
|
|
1451
|
+
archiveNestedDepth ? `Archive nested depth ${archiveNestedDepth}` : "",
|
|
1452
|
+
archiveContainerPath ? `Archive container ${archiveContainerPath}` : "",
|
|
1453
|
+
archiveRootName ? `Archive root ${archiveRootName}` : "",
|
|
1325
1454
|
speaker ? `Speaker ${speaker}` : "",
|
|
1326
1455
|
from ? `Sender ${from}` : "",
|
|
1327
1456
|
sentAt ? `Sent ${sentAt}` : ""
|
|
@@ -1698,7 +1827,7 @@ var getSectionPathFromSource = (source) => {
|
|
|
1698
1827
|
const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
|
|
1699
1828
|
return path.length > 0 ? path : undefined;
|
|
1700
1829
|
};
|
|
1701
|
-
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
|
|
1830
|
+
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office ") || value.startsWith("Slide "));
|
|
1702
1831
|
var getStructuredSectionScoreWeight = (metadata) => {
|
|
1703
1832
|
if (!metadata) {
|
|
1704
1833
|
return 1;
|
|
@@ -1706,6 +1835,8 @@ var getStructuredSectionScoreWeight = (metadata) => {
|
|
|
1706
1835
|
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1707
1836
|
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1708
1837
|
const sectionKind = getContextString2(metadata.sectionKind);
|
|
1838
|
+
const slideTitle = getContextString2(metadata.slideTitle);
|
|
1839
|
+
const slideNotesText = getContextString2(metadata.slideNotesText);
|
|
1709
1840
|
if (pdfTextKind === "table_like") {
|
|
1710
1841
|
return 1.28;
|
|
1711
1842
|
}
|
|
@@ -1715,6 +1846,12 @@ var getStructuredSectionScoreWeight = (metadata) => {
|
|
|
1715
1846
|
if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
|
|
1716
1847
|
return 1.12;
|
|
1717
1848
|
}
|
|
1849
|
+
if (sectionKind === "presentation_slide" && slideNotesText) {
|
|
1850
|
+
return 1.2;
|
|
1851
|
+
}
|
|
1852
|
+
if (sectionKind === "presentation_slide" && slideTitle) {
|
|
1853
|
+
return 1.14;
|
|
1854
|
+
}
|
|
1718
1855
|
return 1;
|
|
1719
1856
|
};
|
|
1720
1857
|
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|