@absolutejs/absolute 0.19.0-beta.617 → 0.19.0-beta.619

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1212,6 +1212,25 @@ var buildContextLabel2 = (metadata) => {
1212
1212
  if (!metadata) {
1213
1213
  return;
1214
1214
  }
1215
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1216
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1217
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1218
+ const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
1219
+ if (pdfTextKind === "table_like" && sectionTitle) {
1220
+ return `PDF table block ${sectionTitle}`;
1221
+ }
1222
+ if (pdfTextKind === "paragraph" && sectionTitle) {
1223
+ return `PDF text block ${sectionTitle}`;
1224
+ }
1225
+ if (officeBlockKind === "table" && sectionTitle) {
1226
+ return `Office table block ${sectionTitle}`;
1227
+ }
1228
+ if (officeBlockKind === "list" && sectionTitle) {
1229
+ return `Office list block ${sectionTitle}`;
1230
+ }
1231
+ if (officeBlockKind === "paragraph" && sectionTitle) {
1232
+ return `Office paragraph block ${sectionTitle}`;
1233
+ }
1215
1234
  const emailKind = getContextString2(metadata.emailKind);
1216
1235
  if (emailKind === "attachment") {
1217
1236
  return "Attachment evidence";
@@ -1248,8 +1267,6 @@ var buildContextLabel2 = (metadata) => {
1248
1267
  if (speaker) {
1249
1268
  return `Speaker ${speaker}`;
1250
1269
  }
1251
- const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1252
- const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
1253
1270
  if (sectionTitle) {
1254
1271
  return `Section ${sectionTitle}`;
1255
1272
  }
@@ -1259,11 +1276,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1259
1276
  if (!metadata) {
1260
1277
  return;
1261
1278
  }
1279
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1280
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1281
+ const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
1282
+ const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
1262
1283
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
1263
1284
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
1264
1285
  if (page && region) {
1265
1286
  return `Page ${page} · Region ${region}`;
1266
1287
  }
1288
+ if (page && pdfBlockNumber && pdfTextKind === "table_like") {
1289
+ return `Page ${page} · Table Block ${pdfBlockNumber}`;
1290
+ }
1291
+ if (page && pdfBlockNumber) {
1292
+ return `Page ${page} · Text Block ${pdfBlockNumber}`;
1293
+ }
1267
1294
  if (page) {
1268
1295
  return `Page ${page}`;
1269
1296
  }
@@ -1292,6 +1319,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1292
1319
  if (mediaStart) {
1293
1320
  return `Timestamp ${mediaStart}`;
1294
1321
  }
1322
+ if (officeBlockNumber && officeBlockKind === "table") {
1323
+ return `Office table block ${officeBlockNumber}`;
1324
+ }
1325
+ if (officeBlockNumber && officeBlockKind === "list") {
1326
+ return `Office list block ${officeBlockNumber}`;
1327
+ }
1328
+ if (officeBlockNumber && officeBlockKind === "paragraph") {
1329
+ return `Office paragraph block ${officeBlockNumber}`;
1330
+ }
1295
1331
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1296
1332
  if (sectionPath.length > 0) {
1297
1333
  return `Section ${sectionPath.join(" > ")}`;
@@ -1309,12 +1345,16 @@ var buildProvenanceLabel2 = (metadata) => {
1309
1345
  const mediaKind = getContextString2(metadata.mediaKind);
1310
1346
  const transcriptSource = getContextString2(metadata.transcriptSource);
1311
1347
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
1348
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1349
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1312
1350
  const ocrEngine = getContextString2(metadata.ocrEngine);
1313
1351
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
1314
1352
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
1315
1353
  const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
1316
1354
  const labels = [
1317
1355
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
1356
+ pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
1357
+ officeBlockKind ? `Office ${officeBlockKind}` : "",
1318
1358
  ocrEngine ? `OCR ${ocrEngine}` : "",
1319
1359
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
1320
1360
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
@@ -1350,7 +1390,7 @@ var buildRAGChunkStructure = (metadata) => {
1350
1390
  return;
1351
1391
  }
1352
1392
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
1353
- const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
1393
+ const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
1354
1394
  const section = {
1355
1395
  depth: getContextNumber2(metadata.sectionDepth),
1356
1396
  kind: sectionKind,
@@ -1670,7 +1710,7 @@ var buildRAGSourceSummaries = (sources) => {
1670
1710
  const citationReferenceMap = buildRAGCitationReferenceMap(citations);
1671
1711
  return sourceGroups.map((group) => {
1672
1712
  const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
1673
- const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
1713
+ const leadChunk = getPreferredSourceLeadChunk(group.chunks);
1674
1714
  const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
1675
1715
  const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
1676
1716
  const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
@@ -1698,13 +1738,45 @@ var getSectionPathFromSource = (source) => {
1698
1738
  const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
1699
1739
  return path.length > 0 ? path : undefined;
1700
1740
  };
1741
+ var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
1742
+ var getStructuredSectionScoreWeight = (metadata) => {
1743
+ if (!metadata) {
1744
+ return 1;
1745
+ }
1746
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1747
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1748
+ const sectionKind = getContextString2(metadata.sectionKind);
1749
+ if (pdfTextKind === "table_like") {
1750
+ return 1.28;
1751
+ }
1752
+ if (officeBlockKind === "table" || officeBlockKind === "list") {
1753
+ return 1.24;
1754
+ }
1755
+ if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
1756
+ return 1.12;
1757
+ }
1758
+ return 1;
1759
+ };
1760
+ var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
1761
+ var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
1762
+ const leftWeightedScore = getStructuredSourceLeadScore(left);
1763
+ const rightWeightedScore = getStructuredSourceLeadScore(right);
1764
+ if (rightWeightedScore !== leftWeightedScore) {
1765
+ return rightWeightedScore - leftWeightedScore;
1766
+ }
1767
+ if (right.score !== left.score) {
1768
+ return right.score - left.score;
1769
+ }
1770
+ return left.chunkId.localeCompare(right.chunkId);
1771
+ })[0];
1701
1772
  var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1702
- const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
1773
+ const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
1703
1774
  if (sources.length === 0 || totalScore <= 0) {
1704
1775
  return [];
1705
1776
  }
1706
1777
  const sections = new Map;
1707
1778
  for (const source of sources) {
1779
+ const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
1708
1780
  const path = getSectionPathFromSource(source);
1709
1781
  if (!path) {
1710
1782
  continue;
@@ -1736,7 +1808,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1736
1808
  sourceSet: new Set(source.source ? [source.source] : []),
1737
1809
  topChunkId: source.chunkId,
1738
1810
  topSource: source.source,
1739
- totalScore: source.score,
1811
+ totalScore: structuredScore,
1740
1812
  transformedHits,
1741
1813
  variantHits,
1742
1814
  vectorHits
@@ -1744,7 +1816,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1744
1816
  continue;
1745
1817
  }
1746
1818
  existing.count += 1;
1747
- existing.totalScore += source.score;
1819
+ existing.totalScore += structuredScore;
1748
1820
  if (source.source) {
1749
1821
  existing.sourceSet.add(source.source);
1750
1822
  }
@@ -1772,6 +1844,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1772
1844
  const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
1773
1845
  const scoreShare = section.totalScore / totalScore;
1774
1846
  const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
1847
+ const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
1848
+ const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
1775
1849
  const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
1776
1850
  count: entry.count,
1777
1851
  isActive: entry.key === section.key,
@@ -1897,6 +1971,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1897
1971
  reasons.push("concentrated_evidence");
1898
1972
  }
1899
1973
  const summaryParts = [
1974
+ isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
1900
1975
  `${section.count} hit${section.count === 1 ? "" : "s"}`,
1901
1976
  `${(scoreShare * 100).toFixed(0)}% score share`,
1902
1977
  `vector ${section.vectorHits} · lexical ${section.lexicalHits} · hybrid ${section.hybridHits}`,
@@ -2108,22 +2183,21 @@ var updateSourceGroup = (groups, source) => {
2108
2183
  groups.set(key, buildSourceGroup(source, key));
2109
2184
  return;
2110
2185
  }
2111
- if (source.score > existing.bestScore) {
2112
- existing.bestScore = source.score;
2113
- existing.label = buildSourceLabel2(source);
2114
- existing.labels = source.labels ?? buildRAGSourceLabels({
2115
- metadata: source.metadata,
2116
- source: source.source,
2117
- title: source.title
2118
- });
2119
- existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
2120
- existing.source = source.source;
2121
- existing.title = source.title;
2122
- } else {
2123
- existing.bestScore = Math.max(existing.bestScore, source.score);
2124
- }
2186
+ existing.bestScore = Math.max(existing.bestScore, source.score);
2125
2187
  existing.count += 1;
2126
2188
  existing.chunks.push(source);
2189
+ const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
2190
+ if (leadChunk) {
2191
+ existing.label = buildSourceLabel2(leadChunk);
2192
+ existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
2193
+ metadata: leadChunk.metadata,
2194
+ source: leadChunk.source,
2195
+ title: leadChunk.title
2196
+ });
2197
+ existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
2198
+ existing.source = leadChunk.source;
2199
+ existing.title = leadChunk.title;
2200
+ }
2127
2201
  };
2128
2202
  var getLatestAssistantMessage = (messages) => {
2129
2203
  for (let index = messages.length - 1;index >= 0; index -= 1) {
@@ -1172,6 +1172,25 @@ var buildContextLabel2 = (metadata) => {
1172
1172
  if (!metadata) {
1173
1173
  return;
1174
1174
  }
1175
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1176
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1177
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1178
+ const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
1179
+ if (pdfTextKind === "table_like" && sectionTitle) {
1180
+ return `PDF table block ${sectionTitle}`;
1181
+ }
1182
+ if (pdfTextKind === "paragraph" && sectionTitle) {
1183
+ return `PDF text block ${sectionTitle}`;
1184
+ }
1185
+ if (officeBlockKind === "table" && sectionTitle) {
1186
+ return `Office table block ${sectionTitle}`;
1187
+ }
1188
+ if (officeBlockKind === "list" && sectionTitle) {
1189
+ return `Office list block ${sectionTitle}`;
1190
+ }
1191
+ if (officeBlockKind === "paragraph" && sectionTitle) {
1192
+ return `Office paragraph block ${sectionTitle}`;
1193
+ }
1175
1194
  const emailKind = getContextString2(metadata.emailKind);
1176
1195
  if (emailKind === "attachment") {
1177
1196
  return "Attachment evidence";
@@ -1208,8 +1227,6 @@ var buildContextLabel2 = (metadata) => {
1208
1227
  if (speaker) {
1209
1228
  return `Speaker ${speaker}`;
1210
1229
  }
1211
- const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1212
- const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
1213
1230
  if (sectionTitle) {
1214
1231
  return `Section ${sectionTitle}`;
1215
1232
  }
@@ -1219,11 +1236,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1219
1236
  if (!metadata) {
1220
1237
  return;
1221
1238
  }
1239
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1240
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1241
+ const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
1242
+ const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
1222
1243
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
1223
1244
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
1224
1245
  if (page && region) {
1225
1246
  return `Page ${page} · Region ${region}`;
1226
1247
  }
1248
+ if (page && pdfBlockNumber && pdfTextKind === "table_like") {
1249
+ return `Page ${page} · Table Block ${pdfBlockNumber}`;
1250
+ }
1251
+ if (page && pdfBlockNumber) {
1252
+ return `Page ${page} · Text Block ${pdfBlockNumber}`;
1253
+ }
1227
1254
  if (page) {
1228
1255
  return `Page ${page}`;
1229
1256
  }
@@ -1252,6 +1279,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
1252
1279
  if (mediaStart) {
1253
1280
  return `Timestamp ${mediaStart}`;
1254
1281
  }
1282
+ if (officeBlockNumber && officeBlockKind === "table") {
1283
+ return `Office table block ${officeBlockNumber}`;
1284
+ }
1285
+ if (officeBlockNumber && officeBlockKind === "list") {
1286
+ return `Office list block ${officeBlockNumber}`;
1287
+ }
1288
+ if (officeBlockNumber && officeBlockKind === "paragraph") {
1289
+ return `Office paragraph block ${officeBlockNumber}`;
1290
+ }
1255
1291
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
1256
1292
  if (sectionPath.length > 0) {
1257
1293
  return `Section ${sectionPath.join(" > ")}`;
@@ -1269,12 +1305,16 @@ var buildProvenanceLabel2 = (metadata) => {
1269
1305
  const mediaKind = getContextString2(metadata.mediaKind);
1270
1306
  const transcriptSource = getContextString2(metadata.transcriptSource);
1271
1307
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
1308
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1309
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1272
1310
  const ocrEngine = getContextString2(metadata.ocrEngine);
1273
1311
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
1274
1312
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
1275
1313
  const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
1276
1314
  const labels = [
1277
1315
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
1316
+ pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
1317
+ officeBlockKind ? `Office ${officeBlockKind}` : "",
1278
1318
  ocrEngine ? `OCR ${ocrEngine}` : "",
1279
1319
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
1280
1320
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
@@ -1310,7 +1350,7 @@ var buildRAGChunkStructure = (metadata) => {
1310
1350
  return;
1311
1351
  }
1312
1352
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
1313
- const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
1353
+ const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
1314
1354
  const section = {
1315
1355
  depth: getContextNumber2(metadata.sectionDepth),
1316
1356
  kind: sectionKind,
@@ -1630,7 +1670,7 @@ var buildRAGSourceSummaries = (sources) => {
1630
1670
  const citationReferenceMap = buildRAGCitationReferenceMap(citations);
1631
1671
  return sourceGroups.map((group) => {
1632
1672
  const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
1633
- const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
1673
+ const leadChunk = getPreferredSourceLeadChunk(group.chunks);
1634
1674
  const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
1635
1675
  const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
1636
1676
  const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
@@ -1658,13 +1698,45 @@ var getSectionPathFromSource = (source) => {
1658
1698
  const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
1659
1699
  return path.length > 0 ? path : undefined;
1660
1700
  };
1701
+ var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
1702
+ var getStructuredSectionScoreWeight = (metadata) => {
1703
+ if (!metadata) {
1704
+ return 1;
1705
+ }
1706
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
1707
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
1708
+ const sectionKind = getContextString2(metadata.sectionKind);
1709
+ if (pdfTextKind === "table_like") {
1710
+ return 1.28;
1711
+ }
1712
+ if (officeBlockKind === "table" || officeBlockKind === "list") {
1713
+ return 1.24;
1714
+ }
1715
+ if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
1716
+ return 1.12;
1717
+ }
1718
+ return 1;
1719
+ };
1720
+ var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
1721
+ var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
1722
+ const leftWeightedScore = getStructuredSourceLeadScore(left);
1723
+ const rightWeightedScore = getStructuredSourceLeadScore(right);
1724
+ if (rightWeightedScore !== leftWeightedScore) {
1725
+ return rightWeightedScore - leftWeightedScore;
1726
+ }
1727
+ if (right.score !== left.score) {
1728
+ return right.score - left.score;
1729
+ }
1730
+ return left.chunkId.localeCompare(right.chunkId);
1731
+ })[0];
1661
1732
  var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1662
- const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
1733
+ const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
1663
1734
  if (sources.length === 0 || totalScore <= 0) {
1664
1735
  return [];
1665
1736
  }
1666
1737
  const sections = new Map;
1667
1738
  for (const source of sources) {
1739
+ const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
1668
1740
  const path = getSectionPathFromSource(source);
1669
1741
  if (!path) {
1670
1742
  continue;
@@ -1696,7 +1768,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1696
1768
  sourceSet: new Set(source.source ? [source.source] : []),
1697
1769
  topChunkId: source.chunkId,
1698
1770
  topSource: source.source,
1699
- totalScore: source.score,
1771
+ totalScore: structuredScore,
1700
1772
  transformedHits,
1701
1773
  variantHits,
1702
1774
  vectorHits
@@ -1704,7 +1776,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1704
1776
  continue;
1705
1777
  }
1706
1778
  existing.count += 1;
1707
- existing.totalScore += source.score;
1779
+ existing.totalScore += structuredScore;
1708
1780
  if (source.source) {
1709
1781
  existing.sourceSet.add(source.source);
1710
1782
  }
@@ -1732,6 +1804,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1732
1804
  const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
1733
1805
  const scoreShare = section.totalScore / totalScore;
1734
1806
  const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
1807
+ const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
1808
+ const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
1735
1809
  const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
1736
1810
  count: entry.count,
1737
1811
  isActive: entry.key === section.key,
@@ -1857,6 +1931,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
1857
1931
  reasons.push("concentrated_evidence");
1858
1932
  }
1859
1933
  const summaryParts = [
1934
+ isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
1860
1935
  `${section.count} hit${section.count === 1 ? "" : "s"}`,
1861
1936
  `${(scoreShare * 100).toFixed(0)}% score share`,
1862
1937
  `vector ${section.vectorHits} · lexical ${section.lexicalHits} · hybrid ${section.hybridHits}`,
@@ -2068,22 +2143,21 @@ var updateSourceGroup = (groups, source) => {
2068
2143
  groups.set(key, buildSourceGroup(source, key));
2069
2144
  return;
2070
2145
  }
2071
- if (source.score > existing.bestScore) {
2072
- existing.bestScore = source.score;
2073
- existing.label = buildSourceLabel2(source);
2074
- existing.labels = source.labels ?? buildRAGSourceLabels({
2075
- metadata: source.metadata,
2076
- source: source.source,
2077
- title: source.title
2078
- });
2079
- existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
2080
- existing.source = source.source;
2081
- existing.title = source.title;
2082
- } else {
2083
- existing.bestScore = Math.max(existing.bestScore, source.score);
2084
- }
2146
+ existing.bestScore = Math.max(existing.bestScore, source.score);
2085
2147
  existing.count += 1;
2086
2148
  existing.chunks.push(source);
2149
+ const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
2150
+ if (leadChunk) {
2151
+ existing.label = buildSourceLabel2(leadChunk);
2152
+ existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
2153
+ metadata: leadChunk.metadata,
2154
+ source: leadChunk.source,
2155
+ title: leadChunk.title
2156
+ });
2157
+ existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
2158
+ existing.source = leadChunk.source;
2159
+ existing.title = leadChunk.title;
2160
+ }
2087
2161
  };
2088
2162
  var getLatestAssistantMessage = (messages) => {
2089
2163
  for (let index = messages.length - 1;index >= 0; index -= 1) {
@@ -2215,6 +2215,25 @@ var buildContextLabel2 = (metadata) => {
2215
2215
  if (!metadata) {
2216
2216
  return;
2217
2217
  }
2218
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
2219
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
2220
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
2221
+ const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
2222
+ if (pdfTextKind === "table_like" && sectionTitle) {
2223
+ return `PDF table block ${sectionTitle}`;
2224
+ }
2225
+ if (pdfTextKind === "paragraph" && sectionTitle) {
2226
+ return `PDF text block ${sectionTitle}`;
2227
+ }
2228
+ if (officeBlockKind === "table" && sectionTitle) {
2229
+ return `Office table block ${sectionTitle}`;
2230
+ }
2231
+ if (officeBlockKind === "list" && sectionTitle) {
2232
+ return `Office list block ${sectionTitle}`;
2233
+ }
2234
+ if (officeBlockKind === "paragraph" && sectionTitle) {
2235
+ return `Office paragraph block ${sectionTitle}`;
2236
+ }
2218
2237
  const emailKind = getContextString2(metadata.emailKind);
2219
2238
  if (emailKind === "attachment") {
2220
2239
  return "Attachment evidence";
@@ -2251,8 +2270,6 @@ var buildContextLabel2 = (metadata) => {
2251
2270
  if (speaker) {
2252
2271
  return `Speaker ${speaker}`;
2253
2272
  }
2254
- const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
2255
- const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
2256
2273
  if (sectionTitle) {
2257
2274
  return `Section ${sectionTitle}`;
2258
2275
  }
@@ -2262,11 +2279,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
2262
2279
  if (!metadata) {
2263
2280
  return;
2264
2281
  }
2282
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
2283
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
2284
+ const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
2285
+ const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
2265
2286
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
2266
2287
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
2267
2288
  if (page && region) {
2268
2289
  return `Page ${page} · Region ${region}`;
2269
2290
  }
2291
+ if (page && pdfBlockNumber && pdfTextKind === "table_like") {
2292
+ return `Page ${page} · Table Block ${pdfBlockNumber}`;
2293
+ }
2294
+ if (page && pdfBlockNumber) {
2295
+ return `Page ${page} · Text Block ${pdfBlockNumber}`;
2296
+ }
2270
2297
  if (page) {
2271
2298
  return `Page ${page}`;
2272
2299
  }
@@ -2295,6 +2322,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
2295
2322
  if (mediaStart) {
2296
2323
  return `Timestamp ${mediaStart}`;
2297
2324
  }
2325
+ if (officeBlockNumber && officeBlockKind === "table") {
2326
+ return `Office table block ${officeBlockNumber}`;
2327
+ }
2328
+ if (officeBlockNumber && officeBlockKind === "list") {
2329
+ return `Office list block ${officeBlockNumber}`;
2330
+ }
2331
+ if (officeBlockNumber && officeBlockKind === "paragraph") {
2332
+ return `Office paragraph block ${officeBlockNumber}`;
2333
+ }
2298
2334
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
2299
2335
  if (sectionPath.length > 0) {
2300
2336
  return `Section ${sectionPath.join(" > ")}`;
@@ -2312,12 +2348,16 @@ var buildProvenanceLabel2 = (metadata) => {
2312
2348
  const mediaKind = getContextString2(metadata.mediaKind);
2313
2349
  const transcriptSource = getContextString2(metadata.transcriptSource);
2314
2350
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
2351
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
2352
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
2315
2353
  const ocrEngine = getContextString2(metadata.ocrEngine);
2316
2354
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
2317
2355
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
2318
2356
  const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
2319
2357
  const labels = [
2320
2358
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
2359
+ pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
2360
+ officeBlockKind ? `Office ${officeBlockKind}` : "",
2321
2361
  ocrEngine ? `OCR ${ocrEngine}` : "",
2322
2362
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
2323
2363
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
@@ -2353,7 +2393,7 @@ var buildRAGChunkStructure = (metadata) => {
2353
2393
  return;
2354
2394
  }
2355
2395
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
2356
- const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
2396
+ const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
2357
2397
  const section = {
2358
2398
  depth: getContextNumber2(metadata.sectionDepth),
2359
2399
  kind: sectionKind,
@@ -2673,7 +2713,7 @@ var buildRAGSourceSummaries = (sources) => {
2673
2713
  const citationReferenceMap = buildRAGCitationReferenceMap(citations);
2674
2714
  return sourceGroups.map((group) => {
2675
2715
  const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
2676
- const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
2716
+ const leadChunk = getPreferredSourceLeadChunk(group.chunks);
2677
2717
  const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
2678
2718
  const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
2679
2719
  const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
@@ -2701,13 +2741,45 @@ var getSectionPathFromSource = (source) => {
2701
2741
  const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
2702
2742
  return path.length > 0 ? path : undefined;
2703
2743
  };
2744
+ var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
2745
+ var getStructuredSectionScoreWeight = (metadata) => {
2746
+ if (!metadata) {
2747
+ return 1;
2748
+ }
2749
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
2750
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
2751
+ const sectionKind = getContextString2(metadata.sectionKind);
2752
+ if (pdfTextKind === "table_like") {
2753
+ return 1.28;
2754
+ }
2755
+ if (officeBlockKind === "table" || officeBlockKind === "list") {
2756
+ return 1.24;
2757
+ }
2758
+ if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
2759
+ return 1.12;
2760
+ }
2761
+ return 1;
2762
+ };
2763
+ var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
2764
+ var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
2765
+ const leftWeightedScore = getStructuredSourceLeadScore(left);
2766
+ const rightWeightedScore = getStructuredSourceLeadScore(right);
2767
+ if (rightWeightedScore !== leftWeightedScore) {
2768
+ return rightWeightedScore - leftWeightedScore;
2769
+ }
2770
+ if (right.score !== left.score) {
2771
+ return right.score - left.score;
2772
+ }
2773
+ return left.chunkId.localeCompare(right.chunkId);
2774
+ })[0];
2704
2775
  var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
2705
- const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
2776
+ const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
2706
2777
  if (sources.length === 0 || totalScore <= 0) {
2707
2778
  return [];
2708
2779
  }
2709
2780
  const sections = new Map;
2710
2781
  for (const source of sources) {
2782
+ const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
2711
2783
  const path = getSectionPathFromSource(source);
2712
2784
  if (!path) {
2713
2785
  continue;
@@ -2739,7 +2811,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
2739
2811
  sourceSet: new Set(source.source ? [source.source] : []),
2740
2812
  topChunkId: source.chunkId,
2741
2813
  topSource: source.source,
2742
- totalScore: source.score,
2814
+ totalScore: structuredScore,
2743
2815
  transformedHits,
2744
2816
  variantHits,
2745
2817
  vectorHits
@@ -2747,7 +2819,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
2747
2819
  continue;
2748
2820
  }
2749
2821
  existing.count += 1;
2750
- existing.totalScore += source.score;
2822
+ existing.totalScore += structuredScore;
2751
2823
  if (source.source) {
2752
2824
  existing.sourceSet.add(source.source);
2753
2825
  }
@@ -2775,6 +2847,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
2775
2847
  const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
2776
2848
  const scoreShare = section.totalScore / totalScore;
2777
2849
  const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
2850
+ const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
2851
+ const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
2778
2852
  const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
2779
2853
  count: entry.count,
2780
2854
  isActive: entry.key === section.key,
@@ -2900,6 +2974,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
2900
2974
  reasons.push("concentrated_evidence");
2901
2975
  }
2902
2976
  const summaryParts = [
2977
+ isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
2903
2978
  `${section.count} hit${section.count === 1 ? "" : "s"}`,
2904
2979
  `${(scoreShare * 100).toFixed(0)}% score share`,
2905
2980
  `vector ${section.vectorHits} · lexical ${section.lexicalHits} · hybrid ${section.hybridHits}`,
@@ -3111,22 +3186,21 @@ var updateSourceGroup = (groups, source) => {
3111
3186
  groups.set(key, buildSourceGroup(source, key));
3112
3187
  return;
3113
3188
  }
3114
- if (source.score > existing.bestScore) {
3115
- existing.bestScore = source.score;
3116
- existing.label = buildSourceLabel2(source);
3117
- existing.labels = source.labels ?? buildRAGSourceLabels({
3118
- metadata: source.metadata,
3119
- source: source.source,
3120
- title: source.title
3121
- });
3122
- existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
3123
- existing.source = source.source;
3124
- existing.title = source.title;
3125
- } else {
3126
- existing.bestScore = Math.max(existing.bestScore, source.score);
3127
- }
3189
+ existing.bestScore = Math.max(existing.bestScore, source.score);
3128
3190
  existing.count += 1;
3129
3191
  existing.chunks.push(source);
3192
+ const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
3193
+ if (leadChunk) {
3194
+ existing.label = buildSourceLabel2(leadChunk);
3195
+ existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
3196
+ metadata: leadChunk.metadata,
3197
+ source: leadChunk.source,
3198
+ title: leadChunk.title
3199
+ });
3200
+ existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
3201
+ existing.source = leadChunk.source;
3202
+ existing.title = leadChunk.title;
3203
+ }
3130
3204
  };
3131
3205
  var getLatestAssistantMessage = (messages) => {
3132
3206
  for (let index = messages.length - 1;index >= 0; index -= 1) {