@absolutejs/absolute 0.19.0-beta.617 → 0.19.0-beta.619
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +100 -23
- package/dist/ai/client/index.js.map +4 -4
- package/dist/ai/client/ui.js +96 -22
- package/dist/ai/client/ui.js.map +3 -3
- package/dist/ai/index.js +502 -84
- package/dist/ai/index.js.map +9 -9
- package/dist/ai/rag/quality.js +5 -2
- package/dist/ai/rag/quality.js.map +3 -3
- package/dist/ai/rag/ui.js +96 -22
- package/dist/ai/rag/ui.js.map +3 -3
- package/dist/ai-client/angular/ai/index.js +95 -21
- package/dist/ai-client/react/ai/index.js +95 -21
- package/dist/ai-client/vue/ai/index.js +95 -21
- package/dist/angular/ai/index.js +96 -22
- package/dist/angular/ai/index.js.map +3 -3
- package/dist/react/ai/index.js +100 -23
- package/dist/react/ai/index.js.map +4 -4
- package/dist/src/ai/rag/quality.d.ts +2 -1
- package/dist/src/vue/ai/useRAG.d.ts +4 -4
- package/dist/src/vue/ai/useRAGChunkPreview.d.ts +2 -2
- package/dist/src/vue/ai/useRAGSearch.d.ts +2 -2
- package/dist/svelte/ai/index.js +100 -23
- package/dist/svelte/ai/index.js.map +4 -4
- package/dist/types/ai.d.ts +3 -2
- package/dist/vue/ai/index.js +100 -23
- package/dist/vue/ai/index.js.map +4 -4
- package/package.json +7 -7
|
@@ -1212,6 +1212,25 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1212
1212
|
if (!metadata) {
|
|
1213
1213
|
return;
|
|
1214
1214
|
}
|
|
1215
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1216
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1217
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1218
|
+
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1219
|
+
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
1220
|
+
return `PDF table block ${sectionTitle}`;
|
|
1221
|
+
}
|
|
1222
|
+
if (pdfTextKind === "paragraph" && sectionTitle) {
|
|
1223
|
+
return `PDF text block ${sectionTitle}`;
|
|
1224
|
+
}
|
|
1225
|
+
if (officeBlockKind === "table" && sectionTitle) {
|
|
1226
|
+
return `Office table block ${sectionTitle}`;
|
|
1227
|
+
}
|
|
1228
|
+
if (officeBlockKind === "list" && sectionTitle) {
|
|
1229
|
+
return `Office list block ${sectionTitle}`;
|
|
1230
|
+
}
|
|
1231
|
+
if (officeBlockKind === "paragraph" && sectionTitle) {
|
|
1232
|
+
return `Office paragraph block ${sectionTitle}`;
|
|
1233
|
+
}
|
|
1215
1234
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1216
1235
|
if (emailKind === "attachment") {
|
|
1217
1236
|
return "Attachment evidence";
|
|
@@ -1248,8 +1267,6 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1248
1267
|
if (speaker) {
|
|
1249
1268
|
return `Speaker ${speaker}`;
|
|
1250
1269
|
}
|
|
1251
|
-
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1252
|
-
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1253
1270
|
if (sectionTitle) {
|
|
1254
1271
|
return `Section ${sectionTitle}`;
|
|
1255
1272
|
}
|
|
@@ -1259,11 +1276,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1259
1276
|
if (!metadata) {
|
|
1260
1277
|
return;
|
|
1261
1278
|
}
|
|
1279
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1280
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1281
|
+
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
1282
|
+
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
1262
1283
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
1263
1284
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
1264
1285
|
if (page && region) {
|
|
1265
1286
|
return `Page ${page} · Region ${region}`;
|
|
1266
1287
|
}
|
|
1288
|
+
if (page && pdfBlockNumber && pdfTextKind === "table_like") {
|
|
1289
|
+
return `Page ${page} · Table Block ${pdfBlockNumber}`;
|
|
1290
|
+
}
|
|
1291
|
+
if (page && pdfBlockNumber) {
|
|
1292
|
+
return `Page ${page} · Text Block ${pdfBlockNumber}`;
|
|
1293
|
+
}
|
|
1267
1294
|
if (page) {
|
|
1268
1295
|
return `Page ${page}`;
|
|
1269
1296
|
}
|
|
@@ -1292,6 +1319,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1292
1319
|
if (mediaStart) {
|
|
1293
1320
|
return `Timestamp ${mediaStart}`;
|
|
1294
1321
|
}
|
|
1322
|
+
if (officeBlockNumber && officeBlockKind === "table") {
|
|
1323
|
+
return `Office table block ${officeBlockNumber}`;
|
|
1324
|
+
}
|
|
1325
|
+
if (officeBlockNumber && officeBlockKind === "list") {
|
|
1326
|
+
return `Office list block ${officeBlockNumber}`;
|
|
1327
|
+
}
|
|
1328
|
+
if (officeBlockNumber && officeBlockKind === "paragraph") {
|
|
1329
|
+
return `Office paragraph block ${officeBlockNumber}`;
|
|
1330
|
+
}
|
|
1295
1331
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1296
1332
|
if (sectionPath.length > 0) {
|
|
1297
1333
|
return `Section ${sectionPath.join(" > ")}`;
|
|
@@ -1309,12 +1345,16 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1309
1345
|
const mediaKind = getContextString2(metadata.mediaKind);
|
|
1310
1346
|
const transcriptSource = getContextString2(metadata.transcriptSource);
|
|
1311
1347
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
1348
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1349
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1312
1350
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
1313
1351
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
1314
1352
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
1315
1353
|
const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
|
|
1316
1354
|
const labels = [
|
|
1317
1355
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
1356
|
+
pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
1357
|
+
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
1318
1358
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
1319
1359
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
1320
1360
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
@@ -1350,7 +1390,7 @@ var buildRAGChunkStructure = (metadata) => {
|
|
|
1350
1390
|
return;
|
|
1351
1391
|
}
|
|
1352
1392
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
|
|
1353
|
-
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
1393
|
+
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
1354
1394
|
const section = {
|
|
1355
1395
|
depth: getContextNumber2(metadata.sectionDepth),
|
|
1356
1396
|
kind: sectionKind,
|
|
@@ -1670,7 +1710,7 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
1670
1710
|
const citationReferenceMap = buildRAGCitationReferenceMap(citations);
|
|
1671
1711
|
return sourceGroups.map((group) => {
|
|
1672
1712
|
const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
|
|
1673
|
-
const leadChunk = group.chunks
|
|
1713
|
+
const leadChunk = getPreferredSourceLeadChunk(group.chunks);
|
|
1674
1714
|
const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
|
|
1675
1715
|
const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
|
|
1676
1716
|
const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
|
|
@@ -1698,13 +1738,45 @@ var getSectionPathFromSource = (source) => {
|
|
|
1698
1738
|
const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
|
|
1699
1739
|
return path.length > 0 ? path : undefined;
|
|
1700
1740
|
};
|
|
1741
|
+
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
|
|
1742
|
+
var getStructuredSectionScoreWeight = (metadata) => {
|
|
1743
|
+
if (!metadata) {
|
|
1744
|
+
return 1;
|
|
1745
|
+
}
|
|
1746
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1747
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1748
|
+
const sectionKind = getContextString2(metadata.sectionKind);
|
|
1749
|
+
if (pdfTextKind === "table_like") {
|
|
1750
|
+
return 1.28;
|
|
1751
|
+
}
|
|
1752
|
+
if (officeBlockKind === "table" || officeBlockKind === "list") {
|
|
1753
|
+
return 1.24;
|
|
1754
|
+
}
|
|
1755
|
+
if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
|
|
1756
|
+
return 1.12;
|
|
1757
|
+
}
|
|
1758
|
+
return 1;
|
|
1759
|
+
};
|
|
1760
|
+
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
1761
|
+
var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
|
|
1762
|
+
const leftWeightedScore = getStructuredSourceLeadScore(left);
|
|
1763
|
+
const rightWeightedScore = getStructuredSourceLeadScore(right);
|
|
1764
|
+
if (rightWeightedScore !== leftWeightedScore) {
|
|
1765
|
+
return rightWeightedScore - leftWeightedScore;
|
|
1766
|
+
}
|
|
1767
|
+
if (right.score !== left.score) {
|
|
1768
|
+
return right.score - left.score;
|
|
1769
|
+
}
|
|
1770
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
1771
|
+
})[0];
|
|
1701
1772
|
var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
1702
|
-
const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
|
|
1773
|
+
const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
|
|
1703
1774
|
if (sources.length === 0 || totalScore <= 0) {
|
|
1704
1775
|
return [];
|
|
1705
1776
|
}
|
|
1706
1777
|
const sections = new Map;
|
|
1707
1778
|
for (const source of sources) {
|
|
1779
|
+
const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
1708
1780
|
const path = getSectionPathFromSource(source);
|
|
1709
1781
|
if (!path) {
|
|
1710
1782
|
continue;
|
|
@@ -1736,7 +1808,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1736
1808
|
sourceSet: new Set(source.source ? [source.source] : []),
|
|
1737
1809
|
topChunkId: source.chunkId,
|
|
1738
1810
|
topSource: source.source,
|
|
1739
|
-
totalScore:
|
|
1811
|
+
totalScore: structuredScore,
|
|
1740
1812
|
transformedHits,
|
|
1741
1813
|
variantHits,
|
|
1742
1814
|
vectorHits
|
|
@@ -1744,7 +1816,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1744
1816
|
continue;
|
|
1745
1817
|
}
|
|
1746
1818
|
existing.count += 1;
|
|
1747
|
-
existing.totalScore +=
|
|
1819
|
+
existing.totalScore += structuredScore;
|
|
1748
1820
|
if (source.source) {
|
|
1749
1821
|
existing.sourceSet.add(source.source);
|
|
1750
1822
|
}
|
|
@@ -1772,6 +1844,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1772
1844
|
const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
|
|
1773
1845
|
const scoreShare = section.totalScore / totalScore;
|
|
1774
1846
|
const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
|
|
1847
|
+
const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
|
|
1848
|
+
const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
|
|
1775
1849
|
const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
|
|
1776
1850
|
count: entry.count,
|
|
1777
1851
|
isActive: entry.key === section.key,
|
|
@@ -1897,6 +1971,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1897
1971
|
reasons.push("concentrated_evidence");
|
|
1898
1972
|
}
|
|
1899
1973
|
const summaryParts = [
|
|
1974
|
+
isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
|
|
1900
1975
|
`${section.count} hit${section.count === 1 ? "" : "s"}`,
|
|
1901
1976
|
`${(scoreShare * 100).toFixed(0)}% score share`,
|
|
1902
1977
|
`vector ${section.vectorHits} · lexical ${section.lexicalHits} · hybrid ${section.hybridHits}`,
|
|
@@ -2108,22 +2183,21 @@ var updateSourceGroup = (groups, source) => {
|
|
|
2108
2183
|
groups.set(key, buildSourceGroup(source, key));
|
|
2109
2184
|
return;
|
|
2110
2185
|
}
|
|
2111
|
-
|
|
2112
|
-
existing.bestScore = source.score;
|
|
2113
|
-
existing.label = buildSourceLabel2(source);
|
|
2114
|
-
existing.labels = source.labels ?? buildRAGSourceLabels({
|
|
2115
|
-
metadata: source.metadata,
|
|
2116
|
-
source: source.source,
|
|
2117
|
-
title: source.title
|
|
2118
|
-
});
|
|
2119
|
-
existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
|
|
2120
|
-
existing.source = source.source;
|
|
2121
|
-
existing.title = source.title;
|
|
2122
|
-
} else {
|
|
2123
|
-
existing.bestScore = Math.max(existing.bestScore, source.score);
|
|
2124
|
-
}
|
|
2186
|
+
existing.bestScore = Math.max(existing.bestScore, source.score);
|
|
2125
2187
|
existing.count += 1;
|
|
2126
2188
|
existing.chunks.push(source);
|
|
2189
|
+
const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
|
|
2190
|
+
if (leadChunk) {
|
|
2191
|
+
existing.label = buildSourceLabel2(leadChunk);
|
|
2192
|
+
existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
|
|
2193
|
+
metadata: leadChunk.metadata,
|
|
2194
|
+
source: leadChunk.source,
|
|
2195
|
+
title: leadChunk.title
|
|
2196
|
+
});
|
|
2197
|
+
existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
|
|
2198
|
+
existing.source = leadChunk.source;
|
|
2199
|
+
existing.title = leadChunk.title;
|
|
2200
|
+
}
|
|
2127
2201
|
};
|
|
2128
2202
|
var getLatestAssistantMessage = (messages) => {
|
|
2129
2203
|
for (let index = messages.length - 1;index >= 0; index -= 1) {
|
|
@@ -1172,6 +1172,25 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1172
1172
|
if (!metadata) {
|
|
1173
1173
|
return;
|
|
1174
1174
|
}
|
|
1175
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1176
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1177
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1178
|
+
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1179
|
+
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
1180
|
+
return `PDF table block ${sectionTitle}`;
|
|
1181
|
+
}
|
|
1182
|
+
if (pdfTextKind === "paragraph" && sectionTitle) {
|
|
1183
|
+
return `PDF text block ${sectionTitle}`;
|
|
1184
|
+
}
|
|
1185
|
+
if (officeBlockKind === "table" && sectionTitle) {
|
|
1186
|
+
return `Office table block ${sectionTitle}`;
|
|
1187
|
+
}
|
|
1188
|
+
if (officeBlockKind === "list" && sectionTitle) {
|
|
1189
|
+
return `Office list block ${sectionTitle}`;
|
|
1190
|
+
}
|
|
1191
|
+
if (officeBlockKind === "paragraph" && sectionTitle) {
|
|
1192
|
+
return `Office paragraph block ${sectionTitle}`;
|
|
1193
|
+
}
|
|
1175
1194
|
const emailKind = getContextString2(metadata.emailKind);
|
|
1176
1195
|
if (emailKind === "attachment") {
|
|
1177
1196
|
return "Attachment evidence";
|
|
@@ -1208,8 +1227,6 @@ var buildContextLabel2 = (metadata) => {
|
|
|
1208
1227
|
if (speaker) {
|
|
1209
1228
|
return `Speaker ${speaker}`;
|
|
1210
1229
|
}
|
|
1211
|
-
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1212
|
-
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
1213
1230
|
if (sectionTitle) {
|
|
1214
1231
|
return `Section ${sectionTitle}`;
|
|
1215
1232
|
}
|
|
@@ -1219,11 +1236,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1219
1236
|
if (!metadata) {
|
|
1220
1237
|
return;
|
|
1221
1238
|
}
|
|
1239
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1240
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1241
|
+
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
1242
|
+
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
1222
1243
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
1223
1244
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
1224
1245
|
if (page && region) {
|
|
1225
1246
|
return `Page ${page} · Region ${region}`;
|
|
1226
1247
|
}
|
|
1248
|
+
if (page && pdfBlockNumber && pdfTextKind === "table_like") {
|
|
1249
|
+
return `Page ${page} · Table Block ${pdfBlockNumber}`;
|
|
1250
|
+
}
|
|
1251
|
+
if (page && pdfBlockNumber) {
|
|
1252
|
+
return `Page ${page} · Text Block ${pdfBlockNumber}`;
|
|
1253
|
+
}
|
|
1227
1254
|
if (page) {
|
|
1228
1255
|
return `Page ${page}`;
|
|
1229
1256
|
}
|
|
@@ -1252,6 +1279,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
1252
1279
|
if (mediaStart) {
|
|
1253
1280
|
return `Timestamp ${mediaStart}`;
|
|
1254
1281
|
}
|
|
1282
|
+
if (officeBlockNumber && officeBlockKind === "table") {
|
|
1283
|
+
return `Office table block ${officeBlockNumber}`;
|
|
1284
|
+
}
|
|
1285
|
+
if (officeBlockNumber && officeBlockKind === "list") {
|
|
1286
|
+
return `Office list block ${officeBlockNumber}`;
|
|
1287
|
+
}
|
|
1288
|
+
if (officeBlockNumber && officeBlockKind === "paragraph") {
|
|
1289
|
+
return `Office paragraph block ${officeBlockNumber}`;
|
|
1290
|
+
}
|
|
1255
1291
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
1256
1292
|
if (sectionPath.length > 0) {
|
|
1257
1293
|
return `Section ${sectionPath.join(" > ")}`;
|
|
@@ -1269,12 +1305,16 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
1269
1305
|
const mediaKind = getContextString2(metadata.mediaKind);
|
|
1270
1306
|
const transcriptSource = getContextString2(metadata.transcriptSource);
|
|
1271
1307
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
1308
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1309
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1272
1310
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
1273
1311
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
1274
1312
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
1275
1313
|
const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
|
|
1276
1314
|
const labels = [
|
|
1277
1315
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
1316
|
+
pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
1317
|
+
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
1278
1318
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
1279
1319
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
1280
1320
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
@@ -1310,7 +1350,7 @@ var buildRAGChunkStructure = (metadata) => {
|
|
|
1310
1350
|
return;
|
|
1311
1351
|
}
|
|
1312
1352
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
|
|
1313
|
-
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
1353
|
+
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
1314
1354
|
const section = {
|
|
1315
1355
|
depth: getContextNumber2(metadata.sectionDepth),
|
|
1316
1356
|
kind: sectionKind,
|
|
@@ -1630,7 +1670,7 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
1630
1670
|
const citationReferenceMap = buildRAGCitationReferenceMap(citations);
|
|
1631
1671
|
return sourceGroups.map((group) => {
|
|
1632
1672
|
const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
|
|
1633
|
-
const leadChunk = group.chunks
|
|
1673
|
+
const leadChunk = getPreferredSourceLeadChunk(group.chunks);
|
|
1634
1674
|
const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
|
|
1635
1675
|
const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
|
|
1636
1676
|
const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
|
|
@@ -1658,13 +1698,45 @@ var getSectionPathFromSource = (source) => {
|
|
|
1658
1698
|
const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
|
|
1659
1699
|
return path.length > 0 ? path : undefined;
|
|
1660
1700
|
};
|
|
1701
|
+
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
|
|
1702
|
+
var getStructuredSectionScoreWeight = (metadata) => {
|
|
1703
|
+
if (!metadata) {
|
|
1704
|
+
return 1;
|
|
1705
|
+
}
|
|
1706
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
1707
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
1708
|
+
const sectionKind = getContextString2(metadata.sectionKind);
|
|
1709
|
+
if (pdfTextKind === "table_like") {
|
|
1710
|
+
return 1.28;
|
|
1711
|
+
}
|
|
1712
|
+
if (officeBlockKind === "table" || officeBlockKind === "list") {
|
|
1713
|
+
return 1.24;
|
|
1714
|
+
}
|
|
1715
|
+
if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
|
|
1716
|
+
return 1.12;
|
|
1717
|
+
}
|
|
1718
|
+
return 1;
|
|
1719
|
+
};
|
|
1720
|
+
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
1721
|
+
var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
|
|
1722
|
+
const leftWeightedScore = getStructuredSourceLeadScore(left);
|
|
1723
|
+
const rightWeightedScore = getStructuredSourceLeadScore(right);
|
|
1724
|
+
if (rightWeightedScore !== leftWeightedScore) {
|
|
1725
|
+
return rightWeightedScore - leftWeightedScore;
|
|
1726
|
+
}
|
|
1727
|
+
if (right.score !== left.score) {
|
|
1728
|
+
return right.score - left.score;
|
|
1729
|
+
}
|
|
1730
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
1731
|
+
})[0];
|
|
1661
1732
|
var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
1662
|
-
const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
|
|
1733
|
+
const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
|
|
1663
1734
|
if (sources.length === 0 || totalScore <= 0) {
|
|
1664
1735
|
return [];
|
|
1665
1736
|
}
|
|
1666
1737
|
const sections = new Map;
|
|
1667
1738
|
for (const source of sources) {
|
|
1739
|
+
const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
1668
1740
|
const path = getSectionPathFromSource(source);
|
|
1669
1741
|
if (!path) {
|
|
1670
1742
|
continue;
|
|
@@ -1696,7 +1768,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1696
1768
|
sourceSet: new Set(source.source ? [source.source] : []),
|
|
1697
1769
|
topChunkId: source.chunkId,
|
|
1698
1770
|
topSource: source.source,
|
|
1699
|
-
totalScore:
|
|
1771
|
+
totalScore: structuredScore,
|
|
1700
1772
|
transformedHits,
|
|
1701
1773
|
variantHits,
|
|
1702
1774
|
vectorHits
|
|
@@ -1704,7 +1776,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1704
1776
|
continue;
|
|
1705
1777
|
}
|
|
1706
1778
|
existing.count += 1;
|
|
1707
|
-
existing.totalScore +=
|
|
1779
|
+
existing.totalScore += structuredScore;
|
|
1708
1780
|
if (source.source) {
|
|
1709
1781
|
existing.sourceSet.add(source.source);
|
|
1710
1782
|
}
|
|
@@ -1732,6 +1804,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1732
1804
|
const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
|
|
1733
1805
|
const scoreShare = section.totalScore / totalScore;
|
|
1734
1806
|
const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
|
|
1807
|
+
const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
|
|
1808
|
+
const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
|
|
1735
1809
|
const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
|
|
1736
1810
|
count: entry.count,
|
|
1737
1811
|
isActive: entry.key === section.key,
|
|
@@ -1857,6 +1931,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
1857
1931
|
reasons.push("concentrated_evidence");
|
|
1858
1932
|
}
|
|
1859
1933
|
const summaryParts = [
|
|
1934
|
+
isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
|
|
1860
1935
|
`${section.count} hit${section.count === 1 ? "" : "s"}`,
|
|
1861
1936
|
`${(scoreShare * 100).toFixed(0)}% score share`,
|
|
1862
1937
|
`vector ${section.vectorHits} · lexical ${section.lexicalHits} · hybrid ${section.hybridHits}`,
|
|
@@ -2068,22 +2143,21 @@ var updateSourceGroup = (groups, source) => {
|
|
|
2068
2143
|
groups.set(key, buildSourceGroup(source, key));
|
|
2069
2144
|
return;
|
|
2070
2145
|
}
|
|
2071
|
-
|
|
2072
|
-
existing.bestScore = source.score;
|
|
2073
|
-
existing.label = buildSourceLabel2(source);
|
|
2074
|
-
existing.labels = source.labels ?? buildRAGSourceLabels({
|
|
2075
|
-
metadata: source.metadata,
|
|
2076
|
-
source: source.source,
|
|
2077
|
-
title: source.title
|
|
2078
|
-
});
|
|
2079
|
-
existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
|
|
2080
|
-
existing.source = source.source;
|
|
2081
|
-
existing.title = source.title;
|
|
2082
|
-
} else {
|
|
2083
|
-
existing.bestScore = Math.max(existing.bestScore, source.score);
|
|
2084
|
-
}
|
|
2146
|
+
existing.bestScore = Math.max(existing.bestScore, source.score);
|
|
2085
2147
|
existing.count += 1;
|
|
2086
2148
|
existing.chunks.push(source);
|
|
2149
|
+
const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
|
|
2150
|
+
if (leadChunk) {
|
|
2151
|
+
existing.label = buildSourceLabel2(leadChunk);
|
|
2152
|
+
existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
|
|
2153
|
+
metadata: leadChunk.metadata,
|
|
2154
|
+
source: leadChunk.source,
|
|
2155
|
+
title: leadChunk.title
|
|
2156
|
+
});
|
|
2157
|
+
existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
|
|
2158
|
+
existing.source = leadChunk.source;
|
|
2159
|
+
existing.title = leadChunk.title;
|
|
2160
|
+
}
|
|
2087
2161
|
};
|
|
2088
2162
|
var getLatestAssistantMessage = (messages) => {
|
|
2089
2163
|
for (let index = messages.length - 1;index >= 0; index -= 1) {
|
|
@@ -2215,6 +2215,25 @@ var buildContextLabel2 = (metadata) => {
|
|
|
2215
2215
|
if (!metadata) {
|
|
2216
2216
|
return;
|
|
2217
2217
|
}
|
|
2218
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
2219
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
2220
|
+
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
2221
|
+
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
2222
|
+
if (pdfTextKind === "table_like" && sectionTitle) {
|
|
2223
|
+
return `PDF table block ${sectionTitle}`;
|
|
2224
|
+
}
|
|
2225
|
+
if (pdfTextKind === "paragraph" && sectionTitle) {
|
|
2226
|
+
return `PDF text block ${sectionTitle}`;
|
|
2227
|
+
}
|
|
2228
|
+
if (officeBlockKind === "table" && sectionTitle) {
|
|
2229
|
+
return `Office table block ${sectionTitle}`;
|
|
2230
|
+
}
|
|
2231
|
+
if (officeBlockKind === "list" && sectionTitle) {
|
|
2232
|
+
return `Office list block ${sectionTitle}`;
|
|
2233
|
+
}
|
|
2234
|
+
if (officeBlockKind === "paragraph" && sectionTitle) {
|
|
2235
|
+
return `Office paragraph block ${sectionTitle}`;
|
|
2236
|
+
}
|
|
2218
2237
|
const emailKind = getContextString2(metadata.emailKind);
|
|
2219
2238
|
if (emailKind === "attachment") {
|
|
2220
2239
|
return "Attachment evidence";
|
|
@@ -2251,8 +2270,6 @@ var buildContextLabel2 = (metadata) => {
|
|
|
2251
2270
|
if (speaker) {
|
|
2252
2271
|
return `Speaker ${speaker}`;
|
|
2253
2272
|
}
|
|
2254
|
-
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
2255
|
-
const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
|
|
2256
2273
|
if (sectionTitle) {
|
|
2257
2274
|
return `Section ${sectionTitle}`;
|
|
2258
2275
|
}
|
|
@@ -2262,11 +2279,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
2262
2279
|
if (!metadata) {
|
|
2263
2280
|
return;
|
|
2264
2281
|
}
|
|
2282
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
2283
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
2284
|
+
const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
|
|
2285
|
+
const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
|
|
2265
2286
|
const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
|
|
2266
2287
|
const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
|
|
2267
2288
|
if (page && region) {
|
|
2268
2289
|
return `Page ${page} · Region ${region}`;
|
|
2269
2290
|
}
|
|
2291
|
+
if (page && pdfBlockNumber && pdfTextKind === "table_like") {
|
|
2292
|
+
return `Page ${page} · Table Block ${pdfBlockNumber}`;
|
|
2293
|
+
}
|
|
2294
|
+
if (page && pdfBlockNumber) {
|
|
2295
|
+
return `Page ${page} · Text Block ${pdfBlockNumber}`;
|
|
2296
|
+
}
|
|
2270
2297
|
if (page) {
|
|
2271
2298
|
return `Page ${page}`;
|
|
2272
2299
|
}
|
|
@@ -2295,6 +2322,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
|
|
|
2295
2322
|
if (mediaStart) {
|
|
2296
2323
|
return `Timestamp ${mediaStart}`;
|
|
2297
2324
|
}
|
|
2325
|
+
if (officeBlockNumber && officeBlockKind === "table") {
|
|
2326
|
+
return `Office table block ${officeBlockNumber}`;
|
|
2327
|
+
}
|
|
2328
|
+
if (officeBlockNumber && officeBlockKind === "list") {
|
|
2329
|
+
return `Office list block ${officeBlockNumber}`;
|
|
2330
|
+
}
|
|
2331
|
+
if (officeBlockNumber && officeBlockKind === "paragraph") {
|
|
2332
|
+
return `Office paragraph block ${officeBlockNumber}`;
|
|
2333
|
+
}
|
|
2298
2334
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
|
|
2299
2335
|
if (sectionPath.length > 0) {
|
|
2300
2336
|
return `Section ${sectionPath.join(" > ")}`;
|
|
@@ -2312,12 +2348,16 @@ var buildProvenanceLabel2 = (metadata) => {
|
|
|
2312
2348
|
const mediaKind = getContextString2(metadata.mediaKind);
|
|
2313
2349
|
const transcriptSource = getContextString2(metadata.transcriptSource);
|
|
2314
2350
|
const pdfTextMode = getContextString2(metadata.pdfTextMode);
|
|
2351
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
2352
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
2315
2353
|
const ocrEngine = getContextString2(metadata.ocrEngine);
|
|
2316
2354
|
const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
|
|
2317
2355
|
const chunkingProfile = getContextString2(metadata.chunkingProfile);
|
|
2318
2356
|
const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
|
|
2319
2357
|
const labels = [
|
|
2320
2358
|
pdfTextMode ? `PDF ${pdfTextMode}` : "",
|
|
2359
|
+
pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
|
|
2360
|
+
officeBlockKind ? `Office ${officeBlockKind}` : "",
|
|
2321
2361
|
ocrEngine ? `OCR ${ocrEngine}` : "",
|
|
2322
2362
|
extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
|
|
2323
2363
|
chunkingProfile ? `Chunking ${chunkingProfile}` : "",
|
|
@@ -2353,7 +2393,7 @@ var buildRAGChunkStructure = (metadata) => {
|
|
|
2353
2393
|
return;
|
|
2354
2394
|
}
|
|
2355
2395
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
|
|
2356
|
-
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
2396
|
+
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
2357
2397
|
const section = {
|
|
2358
2398
|
depth: getContextNumber2(metadata.sectionDepth),
|
|
2359
2399
|
kind: sectionKind,
|
|
@@ -2673,7 +2713,7 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
2673
2713
|
const citationReferenceMap = buildRAGCitationReferenceMap(citations);
|
|
2674
2714
|
return sourceGroups.map((group) => {
|
|
2675
2715
|
const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
|
|
2676
|
-
const leadChunk = group.chunks
|
|
2716
|
+
const leadChunk = getPreferredSourceLeadChunk(group.chunks);
|
|
2677
2717
|
const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
|
|
2678
2718
|
const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
|
|
2679
2719
|
const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
|
|
@@ -2701,13 +2741,45 @@ var getSectionPathFromSource = (source) => {
|
|
|
2701
2741
|
const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
|
|
2702
2742
|
return path.length > 0 ? path : undefined;
|
|
2703
2743
|
};
|
|
2744
|
+
var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
|
|
2745
|
+
var getStructuredSectionScoreWeight = (metadata) => {
|
|
2746
|
+
if (!metadata) {
|
|
2747
|
+
return 1;
|
|
2748
|
+
}
|
|
2749
|
+
const pdfTextKind = getContextString2(metadata.pdfTextKind);
|
|
2750
|
+
const officeBlockKind = getContextString2(metadata.officeBlockKind);
|
|
2751
|
+
const sectionKind = getContextString2(metadata.sectionKind);
|
|
2752
|
+
if (pdfTextKind === "table_like") {
|
|
2753
|
+
return 1.28;
|
|
2754
|
+
}
|
|
2755
|
+
if (officeBlockKind === "table" || officeBlockKind === "list") {
|
|
2756
|
+
return 1.24;
|
|
2757
|
+
}
|
|
2758
|
+
if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
|
|
2759
|
+
return 1.12;
|
|
2760
|
+
}
|
|
2761
|
+
return 1;
|
|
2762
|
+
};
|
|
2763
|
+
var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
2764
|
+
var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
|
|
2765
|
+
const leftWeightedScore = getStructuredSourceLeadScore(left);
|
|
2766
|
+
const rightWeightedScore = getStructuredSourceLeadScore(right);
|
|
2767
|
+
if (rightWeightedScore !== leftWeightedScore) {
|
|
2768
|
+
return rightWeightedScore - leftWeightedScore;
|
|
2769
|
+
}
|
|
2770
|
+
if (right.score !== left.score) {
|
|
2771
|
+
return right.score - left.score;
|
|
2772
|
+
}
|
|
2773
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
2774
|
+
})[0];
|
|
2704
2775
|
var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
2705
|
-
const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
|
|
2776
|
+
const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
|
|
2706
2777
|
if (sources.length === 0 || totalScore <= 0) {
|
|
2707
2778
|
return [];
|
|
2708
2779
|
}
|
|
2709
2780
|
const sections = new Map;
|
|
2710
2781
|
for (const source of sources) {
|
|
2782
|
+
const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
|
|
2711
2783
|
const path = getSectionPathFromSource(source);
|
|
2712
2784
|
if (!path) {
|
|
2713
2785
|
continue;
|
|
@@ -2739,7 +2811,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
2739
2811
|
sourceSet: new Set(source.source ? [source.source] : []),
|
|
2740
2812
|
topChunkId: source.chunkId,
|
|
2741
2813
|
topSource: source.source,
|
|
2742
|
-
totalScore:
|
|
2814
|
+
totalScore: structuredScore,
|
|
2743
2815
|
transformedHits,
|
|
2744
2816
|
variantHits,
|
|
2745
2817
|
vectorHits
|
|
@@ -2747,7 +2819,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
2747
2819
|
continue;
|
|
2748
2820
|
}
|
|
2749
2821
|
existing.count += 1;
|
|
2750
|
-
existing.totalScore +=
|
|
2822
|
+
existing.totalScore += structuredScore;
|
|
2751
2823
|
if (source.source) {
|
|
2752
2824
|
existing.sourceSet.add(source.source);
|
|
2753
2825
|
}
|
|
@@ -2775,6 +2847,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
2775
2847
|
const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
|
|
2776
2848
|
const scoreShare = section.totalScore / totalScore;
|
|
2777
2849
|
const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
|
|
2850
|
+
const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
|
|
2851
|
+
const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
|
|
2778
2852
|
const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
|
|
2779
2853
|
count: entry.count,
|
|
2780
2854
|
isActive: entry.key === section.key,
|
|
@@ -2900,6 +2974,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
|
|
|
2900
2974
|
reasons.push("concentrated_evidence");
|
|
2901
2975
|
}
|
|
2902
2976
|
const summaryParts = [
|
|
2977
|
+
isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
|
|
2903
2978
|
`${section.count} hit${section.count === 1 ? "" : "s"}`,
|
|
2904
2979
|
`${(scoreShare * 100).toFixed(0)}% score share`,
|
|
2905
2980
|
`vector ${section.vectorHits} · lexical ${section.lexicalHits} · hybrid ${section.hybridHits}`,
|
|
@@ -3111,22 +3186,21 @@ var updateSourceGroup = (groups, source) => {
|
|
|
3111
3186
|
groups.set(key, buildSourceGroup(source, key));
|
|
3112
3187
|
return;
|
|
3113
3188
|
}
|
|
3114
|
-
|
|
3115
|
-
existing.bestScore = source.score;
|
|
3116
|
-
existing.label = buildSourceLabel2(source);
|
|
3117
|
-
existing.labels = source.labels ?? buildRAGSourceLabels({
|
|
3118
|
-
metadata: source.metadata,
|
|
3119
|
-
source: source.source,
|
|
3120
|
-
title: source.title
|
|
3121
|
-
});
|
|
3122
|
-
existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
|
|
3123
|
-
existing.source = source.source;
|
|
3124
|
-
existing.title = source.title;
|
|
3125
|
-
} else {
|
|
3126
|
-
existing.bestScore = Math.max(existing.bestScore, source.score);
|
|
3127
|
-
}
|
|
3189
|
+
existing.bestScore = Math.max(existing.bestScore, source.score);
|
|
3128
3190
|
existing.count += 1;
|
|
3129
3191
|
existing.chunks.push(source);
|
|
3192
|
+
const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
|
|
3193
|
+
if (leadChunk) {
|
|
3194
|
+
existing.label = buildSourceLabel2(leadChunk);
|
|
3195
|
+
existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
|
|
3196
|
+
metadata: leadChunk.metadata,
|
|
3197
|
+
source: leadChunk.source,
|
|
3198
|
+
title: leadChunk.title
|
|
3199
|
+
});
|
|
3200
|
+
existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
|
|
3201
|
+
existing.source = leadChunk.source;
|
|
3202
|
+
existing.title = leadChunk.title;
|
|
3203
|
+
}
|
|
3130
3204
|
};
|
|
3131
3205
|
var getLatestAssistantMessage = (messages) => {
|
|
3132
3206
|
for (let index = messages.length - 1;index >= 0; index -= 1) {
|