@absolutejs/absolute 0.19.0-beta.607 → 0.19.0-beta.608
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +136 -18
- package/dist/ai/client/index.js.map +4 -4
- package/dist/ai/client/ui.js +137 -18
- package/dist/ai/client/ui.js.map +4 -4
- package/dist/ai/index.js +365 -28
- package/dist/ai/index.js.map +7 -7
- package/dist/ai/rag/quality.js +86 -16
- package/dist/ai/rag/quality.js.map +3 -3
- package/dist/ai/rag/ui.js +137 -18
- package/dist/ai/rag/ui.js.map +4 -4
- package/dist/ai-client/angular/ai/index.js +135 -17
- package/dist/ai-client/react/ai/index.js +135 -17
- package/dist/ai-client/vue/ai/index.js +135 -17
- package/dist/angular/ai/index.js +136 -18
- package/dist/angular/ai/index.js.map +4 -4
- package/dist/react/ai/index.js +136 -18
- package/dist/react/ai/index.js.map +6 -6
- package/dist/src/ai/client/ui.d.ts +1 -1
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/presentation.d.ts +4 -1
- package/dist/src/ai/rag/ui.d.ts +1 -1
- package/dist/src/vue/ai/useRAG.d.ts +14 -4
- package/dist/src/vue/ai/useRAGChunkPreview.d.ts +12 -2
- package/dist/src/vue/ai/useRAGSearch.d.ts +2 -2
- package/dist/svelte/ai/index.js +136 -18
- package/dist/svelte/ai/index.js.map +6 -6
- package/dist/types/ai.d.ts +13 -2
- package/dist/vue/ai/index.js +136 -18
- package/dist/vue/ai/index.js.map +5 -5
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -347,6 +347,56 @@ var buildExcerpt = (text, maxLength = 160) => {
|
|
|
347
347
|
}
|
|
348
348
|
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
|
|
349
349
|
};
|
|
350
|
+
var selectPreferredExcerpt = (excerpts, sectionChunkCount) => {
|
|
351
|
+
if (!excerpts) {
|
|
352
|
+
return "";
|
|
353
|
+
}
|
|
354
|
+
const chunkExcerpt = excerpts.chunkExcerpt?.trim() ?? "";
|
|
355
|
+
const windowExcerpt = excerpts.windowExcerpt?.trim() ?? "";
|
|
356
|
+
const sectionExcerpt = excerpts.sectionExcerpt?.trim() ?? "";
|
|
357
|
+
if (sectionChunkCount && sectionChunkCount > 1 && chunkExcerpt.length > 0 && chunkExcerpt.length < 72) {
|
|
358
|
+
if (sectionChunkCount <= 3 && sectionExcerpt) {
|
|
359
|
+
return sectionExcerpt;
|
|
360
|
+
}
|
|
361
|
+
if (windowExcerpt) {
|
|
362
|
+
return windowExcerpt;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
return chunkExcerpt || windowExcerpt || sectionExcerpt;
|
|
366
|
+
};
|
|
367
|
+
var buildGroundingChunkExcerpts = (sources, activeChunkId) => {
|
|
368
|
+
if (sources.length === 0) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
const activeSource = (activeChunkId ? sources.find((source) => source.chunkId === activeChunkId) : undefined) ?? sources[0];
|
|
372
|
+
if (!activeSource) {
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
const chunkMap = new Map(sources.map((source) => [source.chunkId, source]));
|
|
376
|
+
const activeMetadata = activeSource.metadata ?? {};
|
|
377
|
+
const previousChunkId = getContextString(activeMetadata.previousChunkId);
|
|
378
|
+
const nextChunkId = getContextString(activeMetadata.nextChunkId);
|
|
379
|
+
const sectionChunkId = getContextString(activeMetadata.sectionChunkId);
|
|
380
|
+
const sectionSources = sectionChunkId ? sources.filter((source) => getContextString(source.metadata?.sectionChunkId) === sectionChunkId).sort((left, right) => {
|
|
381
|
+
const leftIndex = getContextNumber(left.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
|
|
382
|
+
const rightIndex = getContextNumber(right.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
|
|
383
|
+
if (leftIndex !== rightIndex) {
|
|
384
|
+
return leftIndex - rightIndex;
|
|
385
|
+
}
|
|
386
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
387
|
+
}) : [activeSource];
|
|
388
|
+
const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
|
|
389
|
+
|
|
390
|
+
`);
|
|
391
|
+
const orderedWindowIds = [previousChunkId, activeSource.chunkId, nextChunkId].filter((chunkId, index, values) => Boolean(chunkId) && values.indexOf(chunkId) === index);
|
|
392
|
+
return {
|
|
393
|
+
chunkExcerpt: buildExcerpt(activeSource.text, 160),
|
|
394
|
+
sectionExcerpt: buildExcerpt(sectionSources.map((source) => source.text).join(`
|
|
395
|
+
|
|
396
|
+
`), 320),
|
|
397
|
+
windowExcerpt: buildExcerpt(collectText(orderedWindowIds), 240)
|
|
398
|
+
};
|
|
399
|
+
};
|
|
350
400
|
var buildGroundingReferenceEvidenceLabel = (reference) => [reference.label, reference.locatorLabel, reference.contextLabel].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
|
|
351
401
|
var buildGroundingReferenceEvidenceSummary = (reference) => [
|
|
352
402
|
reference.source ?? reference.title ?? reference.chunkId,
|
|
@@ -365,7 +415,8 @@ var buildGroundedAnswerCitationDetail = (reference) => ({
|
|
|
365
415
|
contextLabel: reference.contextLabel,
|
|
366
416
|
evidenceLabel: buildGroundingReferenceEvidenceLabel(reference),
|
|
367
417
|
evidenceSummary: buildGroundingReferenceEvidenceSummary(reference),
|
|
368
|
-
excerpt: reference.excerpt,
|
|
418
|
+
excerpt: selectPreferredExcerpt(reference.excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || reference.excerpt,
|
|
419
|
+
excerpts: reference.excerpts,
|
|
369
420
|
label: reference.label,
|
|
370
421
|
locatorLabel: reference.locatorLabel,
|
|
371
422
|
number: reference.number,
|
|
@@ -461,10 +512,17 @@ var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
|
461
512
|
const key = buildGroundingSectionKey(reference);
|
|
462
513
|
const existing = groups.get(key);
|
|
463
514
|
if (!existing) {
|
|
515
|
+
const excerpts = reference.excerpts ? {
|
|
516
|
+
chunkExcerpt: reference.excerpts.chunkExcerpt,
|
|
517
|
+
sectionExcerpt: reference.excerpts.sectionExcerpt,
|
|
518
|
+
windowExcerpt: reference.excerpts.windowExcerpt
|
|
519
|
+
} : undefined;
|
|
464
520
|
groups.set(key, {
|
|
465
521
|
chunkIds: [reference.chunkId],
|
|
466
522
|
contextLabel: reference.contextLabel,
|
|
467
523
|
count: 1,
|
|
524
|
+
excerpt: selectPreferredExcerpt(excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || excerpts?.sectionExcerpt || reference.excerpt,
|
|
525
|
+
excerpts,
|
|
468
526
|
key,
|
|
469
527
|
label: key,
|
|
470
528
|
locatorLabel: reference.locatorLabel,
|
|
@@ -492,6 +550,14 @@ var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
|
492
550
|
if (!existing.provenanceLabel && reference.provenanceLabel) {
|
|
493
551
|
existing.provenanceLabel = reference.provenanceLabel;
|
|
494
552
|
}
|
|
553
|
+
if (!existing.excerpts && reference.excerpts) {
|
|
554
|
+
existing.excerpts = {
|
|
555
|
+
chunkExcerpt: reference.excerpts.chunkExcerpt,
|
|
556
|
+
sectionExcerpt: reference.excerpts.sectionExcerpt,
|
|
557
|
+
windowExcerpt: reference.excerpts.windowExcerpt
|
|
558
|
+
};
|
|
559
|
+
existing.excerpt = reference.excerpts.sectionExcerpt;
|
|
560
|
+
}
|
|
495
561
|
}
|
|
496
562
|
return [...groups.values()].map((group) => ({
|
|
497
563
|
...group,
|
|
@@ -509,20 +575,24 @@ var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
|
509
575
|
var buildRAGGroundingReferences = (sources) => {
|
|
510
576
|
const citations = buildRAGCitations(sources);
|
|
511
577
|
const citationReferenceMap = buildRAGCitationReferenceMap(citations);
|
|
512
|
-
return citations.map((citation) =>
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
578
|
+
return citations.map((citation) => {
|
|
579
|
+
const excerpts = buildGroundingChunkExcerpts(sources, citation.chunkId);
|
|
580
|
+
return {
|
|
581
|
+
chunkId: citation.chunkId,
|
|
582
|
+
contextLabel: citation.contextLabel ?? buildContextLabel(citation.metadata),
|
|
583
|
+
excerpt: selectPreferredExcerpt(excerpts, getContextNumber(citation.metadata?.sectionChunkCount)) || excerpts?.chunkExcerpt || buildExcerpt(citation.text),
|
|
584
|
+
excerpts,
|
|
585
|
+
label: citation.label,
|
|
586
|
+
locatorLabel: citation.locatorLabel ?? buildLocatorLabel(citation.metadata, citation.source, citation.title),
|
|
587
|
+
metadata: citation.metadata,
|
|
588
|
+
number: citationReferenceMap[citation.chunkId] ?? 0,
|
|
589
|
+
provenanceLabel: citation.provenanceLabel ?? buildProvenanceLabel(citation.metadata),
|
|
590
|
+
score: citation.score,
|
|
591
|
+
source: citation.source,
|
|
592
|
+
text: citation.text,
|
|
593
|
+
title: citation.title
|
|
594
|
+
};
|
|
595
|
+
});
|
|
526
596
|
};
|
|
527
597
|
|
|
528
598
|
// src/ai/rag/quality.ts
|
|
@@ -4182,7 +4252,7 @@ var buildRAGChunkStructure = (metadata) => {
|
|
|
4182
4252
|
return;
|
|
4183
4253
|
}
|
|
4184
4254
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
|
|
4185
|
-
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" ? metadata.sectionKind : undefined;
|
|
4255
|
+
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
4186
4256
|
const section = {
|
|
4187
4257
|
depth: getContextNumber2(metadata.sectionDepth),
|
|
4188
4258
|
kind: sectionKind,
|
|
@@ -4211,6 +4281,52 @@ var buildExcerpt2 = (text, maxLength = 160) => {
|
|
|
4211
4281
|
}
|
|
4212
4282
|
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
|
|
4213
4283
|
};
|
|
4284
|
+
var buildRAGChunkExcerpts = (chunks, activeChunkId) => {
|
|
4285
|
+
if (chunks.length === 0) {
|
|
4286
|
+
return;
|
|
4287
|
+
}
|
|
4288
|
+
const graph = buildRAGChunkGraph(chunks.map((chunk) => ({
|
|
4289
|
+
chunkId: chunk.chunkId,
|
|
4290
|
+
metadata: chunk.metadata,
|
|
4291
|
+
structure: chunk.structure
|
|
4292
|
+
})));
|
|
4293
|
+
const navigation = buildRAGChunkGraphNavigation(graph, activeChunkId);
|
|
4294
|
+
const activeChunk = chunks.find((chunk) => chunk.chunkId === navigation.activeChunkId) ?? chunks[0];
|
|
4295
|
+
if (!activeChunk) {
|
|
4296
|
+
return;
|
|
4297
|
+
}
|
|
4298
|
+
const chunkMap = new Map(chunks.map((chunk) => [chunk.chunkId, chunk]));
|
|
4299
|
+
const orderedWindowIds = [
|
|
4300
|
+
navigation.previousNode?.chunkId,
|
|
4301
|
+
activeChunk.chunkId,
|
|
4302
|
+
navigation.nextNode?.chunkId
|
|
4303
|
+
].filter((chunkId, index, ids) => Boolean(chunkId) && ids.indexOf(chunkId) === index);
|
|
4304
|
+
const orderedSectionIds = navigation.sectionNodes.length > 0 ? navigation.sectionNodes.map((node) => node.chunkId) : [activeChunk.chunkId];
|
|
4305
|
+
const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
|
|
4306
|
+
|
|
4307
|
+
`);
|
|
4308
|
+
return {
|
|
4309
|
+
chunkExcerpt: buildExcerpt2(activeChunk.text, 160),
|
|
4310
|
+
sectionExcerpt: buildExcerpt2(collectText(orderedSectionIds), 320),
|
|
4311
|
+
windowExcerpt: buildExcerpt2(collectText(orderedWindowIds), 240)
|
|
4312
|
+
};
|
|
4313
|
+
};
|
|
4314
|
+
var buildRAGPreferredExcerpt = (excerpts, structure) => {
|
|
4315
|
+
if (!excerpts) {
|
|
4316
|
+
return "";
|
|
4317
|
+
}
|
|
4318
|
+
const chunkLength = excerpts.chunkExcerpt.trim().length;
|
|
4319
|
+
const sectionChunkCount = structure?.sequence?.sectionChunkCount ?? 1;
|
|
4320
|
+
if (sectionChunkCount > 1 && chunkLength > 0 && chunkLength < 72) {
|
|
4321
|
+
if (sectionChunkCount <= 3 && excerpts.sectionExcerpt.trim().length > 0) {
|
|
4322
|
+
return excerpts.sectionExcerpt;
|
|
4323
|
+
}
|
|
4324
|
+
if (excerpts.windowExcerpt.trim().length > 0) {
|
|
4325
|
+
return excerpts.windowExcerpt;
|
|
4326
|
+
}
|
|
4327
|
+
}
|
|
4328
|
+
return excerpts.chunkExcerpt;
|
|
4329
|
+
};
|
|
4214
4330
|
var buildRAGChunkGraph = (chunks) => {
|
|
4215
4331
|
const nodes = [];
|
|
4216
4332
|
const edges = [];
|
|
@@ -4422,6 +4538,7 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
4422
4538
|
return sourceGroups.map((group) => {
|
|
4423
4539
|
const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
|
|
4424
4540
|
const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
|
|
4541
|
+
const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
|
|
4425
4542
|
return {
|
|
4426
4543
|
bestScore: group.bestScore,
|
|
4427
4544
|
citationNumbers: groupCitations.map((citation) => citationReferenceMap[citation.chunkId] ?? 0),
|
|
@@ -4429,7 +4546,8 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
4429
4546
|
chunkIds: group.chunks.map((chunk) => chunk.chunkId),
|
|
4430
4547
|
contextLabel: leadChunk?.labels?.contextLabel ?? buildContextLabel2(leadChunk?.metadata),
|
|
4431
4548
|
count: group.count,
|
|
4432
|
-
excerpt: buildExcerpt2(leadChunk?.text ?? ""),
|
|
4549
|
+
excerpt: buildRAGPreferredExcerpt(excerpts, leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata)) || buildExcerpt2(leadChunk?.text ?? ""),
|
|
4550
|
+
excerpts,
|
|
4433
4551
|
key: group.key,
|
|
4434
4552
|
label: group.label,
|
|
4435
4553
|
locatorLabel: leadChunk?.labels?.locatorLabel ?? buildLocatorLabel2(leadChunk?.metadata, leadChunk?.source, leadChunk?.title),
|
|
@@ -8304,6 +8422,128 @@ var htmlStructureUnits = (value) => {
|
|
|
8304
8422
|
}
|
|
8305
8423
|
return [{ text: normalizeWhitespace(stripHtmlTags(focused)) }].filter((section) => Boolean(section.text));
|
|
8306
8424
|
};
|
|
8425
|
+
var isLikelyOfficeHeadingParagraph = (value) => {
|
|
8426
|
+
const normalized = normalizeWhitespace(value);
|
|
8427
|
+
if (!normalized || normalized.length > 80) {
|
|
8428
|
+
return false;
|
|
8429
|
+
}
|
|
8430
|
+
if (/[.!?]$/.test(normalized)) {
|
|
8431
|
+
return false;
|
|
8432
|
+
}
|
|
8433
|
+
const words = normalized.split(/\s+/);
|
|
8434
|
+
if (words.length > 8) {
|
|
8435
|
+
return false;
|
|
8436
|
+
}
|
|
8437
|
+
const headingLikeWords = words.filter((word) => {
|
|
8438
|
+
const stripped = word.replace(/^[^A-Za-z0-9]+|[^A-Za-z0-9]+$/g, "");
|
|
8439
|
+
if (!stripped) {
|
|
8440
|
+
return false;
|
|
8441
|
+
}
|
|
8442
|
+
return /^[A-Z0-9]/.test(stripped);
|
|
8443
|
+
}).length;
|
|
8444
|
+
return headingLikeWords / words.length >= 0.6;
|
|
8445
|
+
};
|
|
8446
|
+
var officeHeadingStructureUnits = (value) => {
|
|
8447
|
+
const paragraphs = paragraphUnits(value);
|
|
8448
|
+
const sections = [];
|
|
8449
|
+
let currentHeading;
|
|
8450
|
+
let currentParagraphs = [];
|
|
8451
|
+
const flush = () => {
|
|
8452
|
+
if (!currentHeading && currentParagraphs.length === 0) {
|
|
8453
|
+
return;
|
|
8454
|
+
}
|
|
8455
|
+
const text = normalizeWhitespace([currentHeading, ...currentParagraphs].filter(Boolean).join(`
|
|
8456
|
+
|
|
8457
|
+
`));
|
|
8458
|
+
if (!text) {
|
|
8459
|
+
currentHeading = undefined;
|
|
8460
|
+
currentParagraphs = [];
|
|
8461
|
+
return;
|
|
8462
|
+
}
|
|
8463
|
+
sections.push({
|
|
8464
|
+
sectionDepth: currentHeading ? 1 : undefined,
|
|
8465
|
+
sectionKind: currentHeading ? "office_heading" : undefined,
|
|
8466
|
+
sectionPath: currentHeading ? [currentHeading] : undefined,
|
|
8467
|
+
sectionTitle: currentHeading,
|
|
8468
|
+
text
|
|
8469
|
+
});
|
|
8470
|
+
currentHeading = undefined;
|
|
8471
|
+
currentParagraphs = [];
|
|
8472
|
+
};
|
|
8473
|
+
for (const paragraph of paragraphs) {
|
|
8474
|
+
if (isLikelyOfficeHeadingParagraph(paragraph)) {
|
|
8475
|
+
flush();
|
|
8476
|
+
currentHeading = paragraph;
|
|
8477
|
+
continue;
|
|
8478
|
+
}
|
|
8479
|
+
currentParagraphs.push(paragraph);
|
|
8480
|
+
}
|
|
8481
|
+
flush();
|
|
8482
|
+
return sections.length > 0 ? sections : [{ text: normalizeWhitespace(value) }];
|
|
8483
|
+
};
|
|
8484
|
+
var spreadsheetStructureUnits = (value, metadata) => {
|
|
8485
|
+
const lines = value.replace(/\r\n?/g, `
|
|
8486
|
+
`).split(`
|
|
8487
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
8488
|
+
if (lines.length === 0) {
|
|
8489
|
+
return [];
|
|
8490
|
+
}
|
|
8491
|
+
const sheetName = typeof metadata?.sheetName === "string" && metadata.sheetName || lines[0].replace(/^Sheet\s+/i, "");
|
|
8492
|
+
const rowLines = lines.filter((line) => /^Row \d+\./.test(line));
|
|
8493
|
+
if (rowLines.length === 0) {
|
|
8494
|
+
return [
|
|
8495
|
+
{
|
|
8496
|
+
sectionDepth: 1,
|
|
8497
|
+
sectionKind: "spreadsheet_rows",
|
|
8498
|
+
sectionPath: [sheetName],
|
|
8499
|
+
sectionTitle: sheetName,
|
|
8500
|
+
text: normalizeWhitespace(lines.join(`
|
|
8501
|
+
`))
|
|
8502
|
+
}
|
|
8503
|
+
];
|
|
8504
|
+
}
|
|
8505
|
+
const groups = [];
|
|
8506
|
+
let current = [];
|
|
8507
|
+
for (const row of rowLines) {
|
|
8508
|
+
const candidate = [...current, row].join(`
|
|
8509
|
+
`);
|
|
8510
|
+
if (current.length > 0 && candidate.length > DEFAULT_MAX_CHUNK_LENGTH) {
|
|
8511
|
+
groups.push(current);
|
|
8512
|
+
current = [row];
|
|
8513
|
+
continue;
|
|
8514
|
+
}
|
|
8515
|
+
current.push(row);
|
|
8516
|
+
}
|
|
8517
|
+
if (current.length > 0) {
|
|
8518
|
+
groups.push(current);
|
|
8519
|
+
}
|
|
8520
|
+
return groups.map((rows) => ({
|
|
8521
|
+
preferredChunkUnits: rows,
|
|
8522
|
+
sectionDepth: 1,
|
|
8523
|
+
sectionKind: "spreadsheet_rows",
|
|
8524
|
+
sectionPath: [sheetName],
|
|
8525
|
+
sectionTitle: sheetName,
|
|
8526
|
+
text: normalizeWhitespace([`Sheet ${sheetName}`, ...rows].join(`
|
|
8527
|
+
`))
|
|
8528
|
+
}));
|
|
8529
|
+
};
|
|
8530
|
+
var presentationStructureUnits = (value, metadata) => {
|
|
8531
|
+
const slideNumber = typeof metadata?.slideNumber === "number" ? metadata.slideNumber : typeof metadata?.slideIndex === "number" ? metadata.slideIndex + 1 : undefined;
|
|
8532
|
+
const slideLabel = slideNumber ? `Slide ${slideNumber}` : "Slide";
|
|
8533
|
+
const paragraphs = paragraphUnits(value);
|
|
8534
|
+
return [
|
|
8535
|
+
{
|
|
8536
|
+
preferredChunkUnits: paragraphs,
|
|
8537
|
+
sectionDepth: 1,
|
|
8538
|
+
sectionKind: "presentation_slide",
|
|
8539
|
+
sectionPath: [slideLabel],
|
|
8540
|
+
sectionTitle: slideLabel,
|
|
8541
|
+
text: normalizeWhitespace([slideLabel, ...paragraphs].join(`
|
|
8542
|
+
|
|
8543
|
+
`))
|
|
8544
|
+
}
|
|
8545
|
+
];
|
|
8546
|
+
};
|
|
8307
8547
|
var inferFormat = (document) => {
|
|
8308
8548
|
if (document.format) {
|
|
8309
8549
|
return document.format;
|
|
@@ -9479,6 +9719,15 @@ var sourceAwareUnits = (document, format, normalizedText) => {
|
|
|
9479
9719
|
}
|
|
9480
9720
|
case "text":
|
|
9481
9721
|
default:
|
|
9722
|
+
if (document.metadata?.sourceNativeKind === "spreadsheet_sheet") {
|
|
9723
|
+
return spreadsheetStructureUnits(normalizedText, document.metadata);
|
|
9724
|
+
}
|
|
9725
|
+
if (document.metadata?.sourceNativeKind === "presentation_slide") {
|
|
9726
|
+
return presentationStructureUnits(normalizedText, document.metadata);
|
|
9727
|
+
}
|
|
9728
|
+
if (document.source?.toLowerCase().endsWith(".docx") || document.source?.toLowerCase().endsWith(".odt")) {
|
|
9729
|
+
return officeHeadingStructureUnits(normalizedText);
|
|
9730
|
+
}
|
|
9482
9731
|
return paragraphUnits(normalizedText).map((text) => ({ text }));
|
|
9483
9732
|
}
|
|
9484
9733
|
};
|
|
@@ -9546,9 +9795,82 @@ var chunkSourceAwareUnit = (unit, options) => {
|
|
|
9546
9795
|
if (unit.text.length <= options.maxChunkLength) {
|
|
9547
9796
|
return [unit];
|
|
9548
9797
|
}
|
|
9549
|
-
|
|
9798
|
+
const expandOversizedParagraph = (paragraph) => {
|
|
9799
|
+
if (paragraph.length <= options.maxChunkLength) {
|
|
9800
|
+
return [paragraph];
|
|
9801
|
+
}
|
|
9802
|
+
const sentenceChunks = chunkFromUnits(sentenceUnits(paragraph), options.maxChunkLength, 0, options.minChunkLength);
|
|
9803
|
+
if (sentenceChunks.length > 1) {
|
|
9804
|
+
return sentenceChunks;
|
|
9805
|
+
}
|
|
9806
|
+
return chunkFromUnits(fixedUnits(paragraph, options.maxChunkLength), options.maxChunkLength, 0, options.minChunkLength);
|
|
9807
|
+
};
|
|
9808
|
+
const stableParagraphs = (unit.preferredChunkUnits ?? paragraphUnits(unit.text)).flatMap(expandOversizedParagraph);
|
|
9809
|
+
const stableChunks = [];
|
|
9810
|
+
let current = "";
|
|
9811
|
+
const pushCurrent = () => {
|
|
9812
|
+
if (!current) {
|
|
9813
|
+
return;
|
|
9814
|
+
}
|
|
9815
|
+
stableChunks.push(normalizeWhitespace(current));
|
|
9816
|
+
current = "";
|
|
9817
|
+
};
|
|
9818
|
+
for (const paragraph of stableParagraphs) {
|
|
9819
|
+
const trimmed = paragraph.trim();
|
|
9820
|
+
if (!trimmed) {
|
|
9821
|
+
continue;
|
|
9822
|
+
}
|
|
9823
|
+
if (!current) {
|
|
9824
|
+
current = trimmed;
|
|
9825
|
+
continue;
|
|
9826
|
+
}
|
|
9827
|
+
const candidate = `${current}
|
|
9828
|
+
|
|
9829
|
+
${trimmed}`;
|
|
9830
|
+
if (candidate.length <= options.maxChunkLength) {
|
|
9831
|
+
current = candidate;
|
|
9832
|
+
continue;
|
|
9833
|
+
}
|
|
9834
|
+
pushCurrent();
|
|
9835
|
+
current = trimmed;
|
|
9836
|
+
}
|
|
9837
|
+
pushCurrent();
|
|
9838
|
+
const merged = [];
|
|
9839
|
+
for (const chunk of stableChunks) {
|
|
9840
|
+
const last = merged.at(-1);
|
|
9841
|
+
if (last && chunk.length < options.minChunkLength && `${last}
|
|
9842
|
+
|
|
9843
|
+
${chunk}`.length <= options.maxChunkLength) {
|
|
9844
|
+
merged[merged.length - 1] = normalizeWhitespace(`${last}
|
|
9845
|
+
|
|
9846
|
+
${chunk}`);
|
|
9847
|
+
continue;
|
|
9848
|
+
}
|
|
9849
|
+
merged.push(chunk);
|
|
9850
|
+
}
|
|
9851
|
+
const decorateSourceAwareChunkText = (text) => {
|
|
9852
|
+
if (!unit.preferredChunkUnits || !unit.sectionTitle) {
|
|
9853
|
+
return text;
|
|
9854
|
+
}
|
|
9855
|
+
if (unit.sectionKind === "spreadsheet_rows") {
|
|
9856
|
+
if (text.includes(`Sheet ${unit.sectionTitle}`)) {
|
|
9857
|
+
return text;
|
|
9858
|
+
}
|
|
9859
|
+
return normalizeWhitespace(`Sheet ${unit.sectionTitle}
|
|
9860
|
+
${text}`);
|
|
9861
|
+
}
|
|
9862
|
+
if (unit.sectionKind === "presentation_slide") {
|
|
9863
|
+
if (text.includes(unit.sectionTitle)) {
|
|
9864
|
+
return text;
|
|
9865
|
+
}
|
|
9866
|
+
return normalizeWhitespace(`${unit.sectionTitle}
|
|
9867
|
+
${text}`);
|
|
9868
|
+
}
|
|
9869
|
+
return text;
|
|
9870
|
+
};
|
|
9871
|
+
return merged.map((text) => ({
|
|
9550
9872
|
...unit,
|
|
9551
|
-
text
|
|
9873
|
+
text: decorateSourceAwareChunkText(text)
|
|
9552
9874
|
}));
|
|
9553
9875
|
};
|
|
9554
9876
|
var resolveChunkingUnits = (text, options) => {
|
|
@@ -10311,6 +10633,17 @@ var renderChunkStructure = (structure) => {
|
|
|
10311
10633
|
].filter((row) => row.length > 0);
|
|
10312
10634
|
return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
|
|
10313
10635
|
};
|
|
10636
|
+
var renderChunkExcerpts = (input) => {
|
|
10637
|
+
if (!input) {
|
|
10638
|
+
return "";
|
|
10639
|
+
}
|
|
10640
|
+
const rows = [
|
|
10641
|
+
input.chunkExcerpt ? `<li><strong>Chunk excerpt</strong> ${escapeHtml2(input.chunkExcerpt)}</li>` : "",
|
|
10642
|
+
input.windowExcerpt ? `<li><strong>Neighbor window</strong> ${escapeHtml2(input.windowExcerpt)}</li>` : "",
|
|
10643
|
+
input.sectionExcerpt ? `<li><strong>Section excerpt</strong> ${escapeHtml2(input.sectionExcerpt)}</li>` : ""
|
|
10644
|
+
].filter((row) => row.length > 0);
|
|
10645
|
+
return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
|
|
10646
|
+
};
|
|
10314
10647
|
var renderSectionJumpList = (label, items) => {
|
|
10315
10648
|
const rows = items.map((item) => item.href ? `<li><strong>${escapeHtml2(label)}</strong> <a href="${escapeHtml2(item.href)}"${item.active ? ' aria-current="true"' : ""}>${escapeHtml2(item.label)}</a></li>` : `<li><strong>${escapeHtml2(label)}</strong> ${escapeHtml2(item.label)}</li>`).join("");
|
|
10316
10649
|
return rows ? `<ul class="rag-chunk-structure">${rows}</ul>` : "";
|
|
@@ -10410,7 +10743,7 @@ var defaultChunkPreview = (input) => {
|
|
|
10410
10743
|
return acc;
|
|
10411
10744
|
}, []);
|
|
10412
10745
|
const groupHtml = groups.map((group) => {
|
|
10413
|
-
const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
|
|
10746
|
+
const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + renderChunkExcerpts(chunk.excerpts) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
|
|
10414
10747
|
return `<section class="rag-chunk-group"><h4>${escapeHtml2(group.title)}</h4>${chunkHtml}</section>`;
|
|
10415
10748
|
}).join("");
|
|
10416
10749
|
return `<section class="rag-chunk-preview">` + `<h3>${escapeHtml2(input.document.title)}</h3>` + `<p class="rag-chunk-preview-source">${escapeHtml2(input.document.source)}</p>` + renderSourceLabels(input.document.labels) + (navigation.parentSection ? renderSectionJumpList("Parent section", [
|
|
@@ -15984,6 +16317,15 @@ var ragChat = (config) => {
|
|
|
15984
16317
|
ok: false
|
|
15985
16318
|
};
|
|
15986
16319
|
}
|
|
16320
|
+
const chunks = preview.chunks.map((chunk) => ({
|
|
16321
|
+
...chunk,
|
|
16322
|
+
labels: buildRAGSourceLabels({
|
|
16323
|
+
metadata: chunk.metadata,
|
|
16324
|
+
source: chunk.source ?? preview.document.source,
|
|
16325
|
+
title: chunk.title ?? preview.document.title
|
|
16326
|
+
}),
|
|
16327
|
+
structure: buildRAGChunkStructure(chunk.metadata)
|
|
16328
|
+
}));
|
|
15987
16329
|
return {
|
|
15988
16330
|
ok: true,
|
|
15989
16331
|
...preview,
|
|
@@ -15995,14 +16337,9 @@ var ragChat = (config) => {
|
|
|
15995
16337
|
title: preview.document.title
|
|
15996
16338
|
})
|
|
15997
16339
|
},
|
|
15998
|
-
chunks:
|
|
16340
|
+
chunks: chunks.map((chunk) => ({
|
|
15999
16341
|
...chunk,
|
|
16000
|
-
|
|
16001
|
-
metadata: chunk.metadata,
|
|
16002
|
-
source: chunk.source ?? preview.document.source,
|
|
16003
|
-
title: chunk.title ?? preview.document.title
|
|
16004
|
-
}),
|
|
16005
|
-
structure: buildRAGChunkStructure(chunk.metadata)
|
|
16342
|
+
excerpts: buildRAGChunkExcerpts(chunks, chunk.chunkId)
|
|
16006
16343
|
}))
|
|
16007
16344
|
};
|
|
16008
16345
|
};
|
|
@@ -21599,5 +21936,5 @@ export {
|
|
|
21599
21936
|
aiChat
|
|
21600
21937
|
};
|
|
21601
21938
|
|
|
21602
|
-
//# debugId=
|
|
21939
|
+
//# debugId=7A0BBAE3433EF77D64756E2164756E21
|
|
21603
21940
|
//# sourceMappingURL=index.js.map
|