@absolutejs/absolute 0.19.0-beta.607 → 0.19.0-beta.609
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +142 -18
- package/dist/ai/client/index.js.map +4 -4
- package/dist/ai/client/ui.js +143 -18
- package/dist/ai/client/ui.js.map +4 -4
- package/dist/ai/index.js +371 -28
- package/dist/ai/index.js.map +7 -7
- package/dist/ai/rag/quality.js +92 -16
- package/dist/ai/rag/quality.js.map +3 -3
- package/dist/ai/rag/ui.js +143 -18
- package/dist/ai/rag/ui.js.map +4 -4
- package/dist/ai-client/angular/ai/index.js +141 -17
- package/dist/ai-client/react/ai/index.js +141 -17
- package/dist/ai-client/vue/ai/index.js +141 -17
- package/dist/angular/ai/index.js +142 -18
- package/dist/angular/ai/index.js.map +4 -4
- package/dist/angular/index.js +2 -2
- package/dist/angular/index.js.map +1 -1
- package/dist/angular/server.js +2 -2
- package/dist/angular/server.js.map +1 -1
- package/dist/build.js +2 -2
- package/dist/build.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/react/ai/index.js +142 -18
- package/dist/react/ai/index.js.map +6 -6
- package/dist/src/ai/client/ui.d.ts +1 -1
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/presentation.d.ts +4 -1
- package/dist/src/ai/rag/ui.d.ts +1 -1
- package/dist/src/vue/ai/useRAG.d.ts +14 -4
- package/dist/src/vue/ai/useRAGChunkPreview.d.ts +12 -2
- package/dist/src/vue/ai/useRAGSearch.d.ts +2 -2
- package/dist/svelte/ai/index.js +142 -18
- package/dist/svelte/ai/index.js.map +6 -6
- package/dist/types/ai.d.ts +15 -2
- package/dist/vue/ai/index.js +142 -18
- package/dist/vue/ai/index.js.map +5 -5
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -347,6 +347,60 @@ var buildExcerpt = (text, maxLength = 160) => {
|
|
|
347
347
|
}
|
|
348
348
|
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
|
|
349
349
|
};
|
|
350
|
+
var selectPreferredExcerpt = (excerpts, sectionChunkCount) => {
|
|
351
|
+
if (!excerpts) {
|
|
352
|
+
return "";
|
|
353
|
+
}
|
|
354
|
+
const chunkExcerpt = excerpts.chunkExcerpt?.trim() ?? "";
|
|
355
|
+
const windowExcerpt = excerpts.windowExcerpt?.trim() ?? "";
|
|
356
|
+
const sectionExcerpt = excerpts.sectionExcerpt?.trim() ?? "";
|
|
357
|
+
if (sectionChunkCount && sectionChunkCount > 1 && chunkExcerpt.length > 0 && chunkExcerpt.length < 72) {
|
|
358
|
+
if (sectionChunkCount <= 3 && sectionExcerpt) {
|
|
359
|
+
return sectionExcerpt;
|
|
360
|
+
}
|
|
361
|
+
if (windowExcerpt) {
|
|
362
|
+
return windowExcerpt;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
return chunkExcerpt || windowExcerpt || sectionExcerpt;
|
|
366
|
+
};
|
|
367
|
+
var buildGroundingChunkExcerpts = (sources, activeChunkId) => {
|
|
368
|
+
if (sources.length === 0) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
const activeSource = (activeChunkId ? sources.find((source) => source.chunkId === activeChunkId) : undefined) ?? sources[0];
|
|
372
|
+
if (!activeSource) {
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
const chunkMap = new Map(sources.map((source) => [source.chunkId, source]));
|
|
376
|
+
const activeMetadata = activeSource.metadata ?? {};
|
|
377
|
+
const previousChunkId = getContextString(activeMetadata.previousChunkId);
|
|
378
|
+
const nextChunkId = getContextString(activeMetadata.nextChunkId);
|
|
379
|
+
const sectionChunkId = getContextString(activeMetadata.sectionChunkId);
|
|
380
|
+
const sectionSources = sectionChunkId ? sources.filter((source) => getContextString(source.metadata?.sectionChunkId) === sectionChunkId).sort((left, right) => {
|
|
381
|
+
const leftIndex = getContextNumber(left.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
|
|
382
|
+
const rightIndex = getContextNumber(right.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
|
|
383
|
+
if (leftIndex !== rightIndex) {
|
|
384
|
+
return leftIndex - rightIndex;
|
|
385
|
+
}
|
|
386
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
387
|
+
}) : [activeSource];
|
|
388
|
+
const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
|
|
389
|
+
|
|
390
|
+
`);
|
|
391
|
+
const orderedWindowIds = [
|
|
392
|
+
previousChunkId,
|
|
393
|
+
activeSource.chunkId,
|
|
394
|
+
nextChunkId
|
|
395
|
+
].filter((chunkId, index, values) => Boolean(chunkId) && values.indexOf(chunkId) === index);
|
|
396
|
+
return {
|
|
397
|
+
chunkExcerpt: buildExcerpt(activeSource.text, 160),
|
|
398
|
+
sectionExcerpt: buildExcerpt(sectionSources.map((source) => source.text).join(`
|
|
399
|
+
|
|
400
|
+
`), 320),
|
|
401
|
+
windowExcerpt: buildExcerpt(collectText(orderedWindowIds), 240)
|
|
402
|
+
};
|
|
403
|
+
};
|
|
350
404
|
var buildGroundingReferenceEvidenceLabel = (reference) => [reference.label, reference.locatorLabel, reference.contextLabel].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
|
|
351
405
|
var buildGroundingReferenceEvidenceSummary = (reference) => [
|
|
352
406
|
reference.source ?? reference.title ?? reference.chunkId,
|
|
@@ -365,7 +419,8 @@ var buildGroundedAnswerCitationDetail = (reference) => ({
|
|
|
365
419
|
contextLabel: reference.contextLabel,
|
|
366
420
|
evidenceLabel: buildGroundingReferenceEvidenceLabel(reference),
|
|
367
421
|
evidenceSummary: buildGroundingReferenceEvidenceSummary(reference),
|
|
368
|
-
excerpt: reference.excerpt,
|
|
422
|
+
excerpt: selectPreferredExcerpt(reference.excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || reference.excerpt,
|
|
423
|
+
excerpts: reference.excerpts,
|
|
369
424
|
label: reference.label,
|
|
370
425
|
locatorLabel: reference.locatorLabel,
|
|
371
426
|
number: reference.number,
|
|
@@ -385,6 +440,8 @@ var buildRAGCitations = (sources) => {
|
|
|
385
440
|
unique.set(key, {
|
|
386
441
|
chunkId: source.chunkId,
|
|
387
442
|
contextLabel: source.labels?.contextLabel ?? buildContextLabel(source.metadata),
|
|
443
|
+
excerpt: selectPreferredExcerpt(buildGroundingChunkExcerpts(sources, source.chunkId), getContextNumber(source.metadata?.sectionChunkCount)) || buildExcerpt(source.text),
|
|
444
|
+
excerpts: buildGroundingChunkExcerpts(sources, source.chunkId),
|
|
388
445
|
key,
|
|
389
446
|
label: buildSourceLabel(source),
|
|
390
447
|
locatorLabel: source.labels?.locatorLabel ?? buildLocatorLabel(source.metadata, source.source, source.title),
|
|
@@ -461,10 +518,17 @@ var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
|
461
518
|
const key = buildGroundingSectionKey(reference);
|
|
462
519
|
const existing = groups.get(key);
|
|
463
520
|
if (!existing) {
|
|
521
|
+
const excerpts = reference.excerpts ? {
|
|
522
|
+
chunkExcerpt: reference.excerpts.chunkExcerpt,
|
|
523
|
+
sectionExcerpt: reference.excerpts.sectionExcerpt,
|
|
524
|
+
windowExcerpt: reference.excerpts.windowExcerpt
|
|
525
|
+
} : undefined;
|
|
464
526
|
groups.set(key, {
|
|
465
527
|
chunkIds: [reference.chunkId],
|
|
466
528
|
contextLabel: reference.contextLabel,
|
|
467
529
|
count: 1,
|
|
530
|
+
excerpt: selectPreferredExcerpt(excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || excerpts?.sectionExcerpt || reference.excerpt,
|
|
531
|
+
excerpts,
|
|
468
532
|
key,
|
|
469
533
|
label: key,
|
|
470
534
|
locatorLabel: reference.locatorLabel,
|
|
@@ -492,6 +556,14 @@ var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
|
492
556
|
if (!existing.provenanceLabel && reference.provenanceLabel) {
|
|
493
557
|
existing.provenanceLabel = reference.provenanceLabel;
|
|
494
558
|
}
|
|
559
|
+
if (!existing.excerpts && reference.excerpts) {
|
|
560
|
+
existing.excerpts = {
|
|
561
|
+
chunkExcerpt: reference.excerpts.chunkExcerpt,
|
|
562
|
+
sectionExcerpt: reference.excerpts.sectionExcerpt,
|
|
563
|
+
windowExcerpt: reference.excerpts.windowExcerpt
|
|
564
|
+
};
|
|
565
|
+
existing.excerpt = reference.excerpts.sectionExcerpt;
|
|
566
|
+
}
|
|
495
567
|
}
|
|
496
568
|
return [...groups.values()].map((group) => ({
|
|
497
569
|
...group,
|
|
@@ -509,20 +581,24 @@ var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
|
509
581
|
var buildRAGGroundingReferences = (sources) => {
|
|
510
582
|
const citations = buildRAGCitations(sources);
|
|
511
583
|
const citationReferenceMap = buildRAGCitationReferenceMap(citations);
|
|
512
|
-
return citations.map((citation) =>
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
584
|
+
return citations.map((citation) => {
|
|
585
|
+
const excerpts = buildGroundingChunkExcerpts(sources, citation.chunkId);
|
|
586
|
+
return {
|
|
587
|
+
chunkId: citation.chunkId,
|
|
588
|
+
contextLabel: citation.contextLabel ?? buildContextLabel(citation.metadata),
|
|
589
|
+
excerpt: selectPreferredExcerpt(excerpts, getContextNumber(citation.metadata?.sectionChunkCount)) || excerpts?.chunkExcerpt || buildExcerpt(citation.text),
|
|
590
|
+
excerpts,
|
|
591
|
+
label: citation.label,
|
|
592
|
+
locatorLabel: citation.locatorLabel ?? buildLocatorLabel(citation.metadata, citation.source, citation.title),
|
|
593
|
+
metadata: citation.metadata,
|
|
594
|
+
number: citationReferenceMap[citation.chunkId] ?? 0,
|
|
595
|
+
provenanceLabel: citation.provenanceLabel ?? buildProvenanceLabel(citation.metadata),
|
|
596
|
+
score: citation.score,
|
|
597
|
+
source: citation.source,
|
|
598
|
+
text: citation.text,
|
|
599
|
+
title: citation.title
|
|
600
|
+
};
|
|
601
|
+
});
|
|
526
602
|
};
|
|
527
603
|
|
|
528
604
|
// src/ai/rag/quality.ts
|
|
@@ -4182,7 +4258,7 @@ var buildRAGChunkStructure = (metadata) => {
|
|
|
4182
4258
|
return;
|
|
4183
4259
|
}
|
|
4184
4260
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
|
|
4185
|
-
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" ? metadata.sectionKind : undefined;
|
|
4261
|
+
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
4186
4262
|
const section = {
|
|
4187
4263
|
depth: getContextNumber2(metadata.sectionDepth),
|
|
4188
4264
|
kind: sectionKind,
|
|
@@ -4211,6 +4287,52 @@ var buildExcerpt2 = (text, maxLength = 160) => {
|
|
|
4211
4287
|
}
|
|
4212
4288
|
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
|
|
4213
4289
|
};
|
|
4290
|
+
var buildRAGChunkExcerpts = (chunks, activeChunkId) => {
|
|
4291
|
+
if (chunks.length === 0) {
|
|
4292
|
+
return;
|
|
4293
|
+
}
|
|
4294
|
+
const graph = buildRAGChunkGraph(chunks.map((chunk) => ({
|
|
4295
|
+
chunkId: chunk.chunkId,
|
|
4296
|
+
metadata: chunk.metadata,
|
|
4297
|
+
structure: chunk.structure
|
|
4298
|
+
})));
|
|
4299
|
+
const navigation = buildRAGChunkGraphNavigation(graph, activeChunkId);
|
|
4300
|
+
const activeChunk = chunks.find((chunk) => chunk.chunkId === navigation.activeChunkId) ?? chunks[0];
|
|
4301
|
+
if (!activeChunk) {
|
|
4302
|
+
return;
|
|
4303
|
+
}
|
|
4304
|
+
const chunkMap = new Map(chunks.map((chunk) => [chunk.chunkId, chunk]));
|
|
4305
|
+
const orderedWindowIds = [
|
|
4306
|
+
navigation.previousNode?.chunkId,
|
|
4307
|
+
activeChunk.chunkId,
|
|
4308
|
+
navigation.nextNode?.chunkId
|
|
4309
|
+
].filter((chunkId, index, ids) => Boolean(chunkId) && ids.indexOf(chunkId) === index);
|
|
4310
|
+
const orderedSectionIds = navigation.sectionNodes.length > 0 ? navigation.sectionNodes.map((node) => node.chunkId) : [activeChunk.chunkId];
|
|
4311
|
+
const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
|
|
4312
|
+
|
|
4313
|
+
`);
|
|
4314
|
+
return {
|
|
4315
|
+
chunkExcerpt: buildExcerpt2(activeChunk.text, 160),
|
|
4316
|
+
sectionExcerpt: buildExcerpt2(collectText(orderedSectionIds), 320),
|
|
4317
|
+
windowExcerpt: buildExcerpt2(collectText(orderedWindowIds), 240)
|
|
4318
|
+
};
|
|
4319
|
+
};
|
|
4320
|
+
var buildRAGPreferredExcerpt = (excerpts, structure) => {
|
|
4321
|
+
if (!excerpts) {
|
|
4322
|
+
return "";
|
|
4323
|
+
}
|
|
4324
|
+
const chunkLength = excerpts.chunkExcerpt.trim().length;
|
|
4325
|
+
const sectionChunkCount = structure?.sequence?.sectionChunkCount ?? 1;
|
|
4326
|
+
if (sectionChunkCount > 1 && chunkLength > 0 && chunkLength < 72) {
|
|
4327
|
+
if (sectionChunkCount <= 3 && excerpts.sectionExcerpt.trim().length > 0) {
|
|
4328
|
+
return excerpts.sectionExcerpt;
|
|
4329
|
+
}
|
|
4330
|
+
if (excerpts.windowExcerpt.trim().length > 0) {
|
|
4331
|
+
return excerpts.windowExcerpt;
|
|
4332
|
+
}
|
|
4333
|
+
}
|
|
4334
|
+
return excerpts.chunkExcerpt;
|
|
4335
|
+
};
|
|
4214
4336
|
var buildRAGChunkGraph = (chunks) => {
|
|
4215
4337
|
const nodes = [];
|
|
4216
4338
|
const edges = [];
|
|
@@ -4422,6 +4544,7 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
4422
4544
|
return sourceGroups.map((group) => {
|
|
4423
4545
|
const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
|
|
4424
4546
|
const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
|
|
4547
|
+
const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
|
|
4425
4548
|
return {
|
|
4426
4549
|
bestScore: group.bestScore,
|
|
4427
4550
|
citationNumbers: groupCitations.map((citation) => citationReferenceMap[citation.chunkId] ?? 0),
|
|
@@ -4429,7 +4552,8 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
4429
4552
|
chunkIds: group.chunks.map((chunk) => chunk.chunkId),
|
|
4430
4553
|
contextLabel: leadChunk?.labels?.contextLabel ?? buildContextLabel2(leadChunk?.metadata),
|
|
4431
4554
|
count: group.count,
|
|
4432
|
-
excerpt: buildExcerpt2(leadChunk?.text ?? ""),
|
|
4555
|
+
excerpt: buildRAGPreferredExcerpt(excerpts, leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata)) || buildExcerpt2(leadChunk?.text ?? ""),
|
|
4556
|
+
excerpts,
|
|
4433
4557
|
key: group.key,
|
|
4434
4558
|
label: group.label,
|
|
4435
4559
|
locatorLabel: leadChunk?.labels?.locatorLabel ?? buildLocatorLabel2(leadChunk?.metadata, leadChunk?.source, leadChunk?.title),
|
|
@@ -8304,6 +8428,128 @@ var htmlStructureUnits = (value) => {
|
|
|
8304
8428
|
}
|
|
8305
8429
|
return [{ text: normalizeWhitespace(stripHtmlTags(focused)) }].filter((section) => Boolean(section.text));
|
|
8306
8430
|
};
|
|
8431
|
+
var isLikelyOfficeHeadingParagraph = (value) => {
|
|
8432
|
+
const normalized = normalizeWhitespace(value);
|
|
8433
|
+
if (!normalized || normalized.length > 80) {
|
|
8434
|
+
return false;
|
|
8435
|
+
}
|
|
8436
|
+
if (/[.!?]$/.test(normalized)) {
|
|
8437
|
+
return false;
|
|
8438
|
+
}
|
|
8439
|
+
const words = normalized.split(/\s+/);
|
|
8440
|
+
if (words.length > 8) {
|
|
8441
|
+
return false;
|
|
8442
|
+
}
|
|
8443
|
+
const headingLikeWords = words.filter((word) => {
|
|
8444
|
+
const stripped = word.replace(/^[^A-Za-z0-9]+|[^A-Za-z0-9]+$/g, "");
|
|
8445
|
+
if (!stripped) {
|
|
8446
|
+
return false;
|
|
8447
|
+
}
|
|
8448
|
+
return /^[A-Z0-9]/.test(stripped);
|
|
8449
|
+
}).length;
|
|
8450
|
+
return headingLikeWords / words.length >= 0.6;
|
|
8451
|
+
};
|
|
8452
|
+
var officeHeadingStructureUnits = (value) => {
|
|
8453
|
+
const paragraphs = paragraphUnits(value);
|
|
8454
|
+
const sections = [];
|
|
8455
|
+
let currentHeading;
|
|
8456
|
+
let currentParagraphs = [];
|
|
8457
|
+
const flush = () => {
|
|
8458
|
+
if (!currentHeading && currentParagraphs.length === 0) {
|
|
8459
|
+
return;
|
|
8460
|
+
}
|
|
8461
|
+
const text = normalizeWhitespace([currentHeading, ...currentParagraphs].filter(Boolean).join(`
|
|
8462
|
+
|
|
8463
|
+
`));
|
|
8464
|
+
if (!text) {
|
|
8465
|
+
currentHeading = undefined;
|
|
8466
|
+
currentParagraphs = [];
|
|
8467
|
+
return;
|
|
8468
|
+
}
|
|
8469
|
+
sections.push({
|
|
8470
|
+
sectionDepth: currentHeading ? 1 : undefined,
|
|
8471
|
+
sectionKind: currentHeading ? "office_heading" : undefined,
|
|
8472
|
+
sectionPath: currentHeading ? [currentHeading] : undefined,
|
|
8473
|
+
sectionTitle: currentHeading,
|
|
8474
|
+
text
|
|
8475
|
+
});
|
|
8476
|
+
currentHeading = undefined;
|
|
8477
|
+
currentParagraphs = [];
|
|
8478
|
+
};
|
|
8479
|
+
for (const paragraph of paragraphs) {
|
|
8480
|
+
if (isLikelyOfficeHeadingParagraph(paragraph)) {
|
|
8481
|
+
flush();
|
|
8482
|
+
currentHeading = paragraph;
|
|
8483
|
+
continue;
|
|
8484
|
+
}
|
|
8485
|
+
currentParagraphs.push(paragraph);
|
|
8486
|
+
}
|
|
8487
|
+
flush();
|
|
8488
|
+
return sections.length > 0 ? sections : [{ text: normalizeWhitespace(value) }];
|
|
8489
|
+
};
|
|
8490
|
+
var spreadsheetStructureUnits = (value, metadata) => {
|
|
8491
|
+
const lines = value.replace(/\r\n?/g, `
|
|
8492
|
+
`).split(`
|
|
8493
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
8494
|
+
if (lines.length === 0) {
|
|
8495
|
+
return [];
|
|
8496
|
+
}
|
|
8497
|
+
const sheetName = typeof metadata?.sheetName === "string" && metadata.sheetName || lines[0].replace(/^Sheet\s+/i, "");
|
|
8498
|
+
const rowLines = lines.filter((line) => /^Row \d+\./.test(line));
|
|
8499
|
+
if (rowLines.length === 0) {
|
|
8500
|
+
return [
|
|
8501
|
+
{
|
|
8502
|
+
sectionDepth: 1,
|
|
8503
|
+
sectionKind: "spreadsheet_rows",
|
|
8504
|
+
sectionPath: [sheetName],
|
|
8505
|
+
sectionTitle: sheetName,
|
|
8506
|
+
text: normalizeWhitespace(lines.join(`
|
|
8507
|
+
`))
|
|
8508
|
+
}
|
|
8509
|
+
];
|
|
8510
|
+
}
|
|
8511
|
+
const groups = [];
|
|
8512
|
+
let current = [];
|
|
8513
|
+
for (const row of rowLines) {
|
|
8514
|
+
const candidate = [...current, row].join(`
|
|
8515
|
+
`);
|
|
8516
|
+
if (current.length > 0 && candidate.length > DEFAULT_MAX_CHUNK_LENGTH) {
|
|
8517
|
+
groups.push(current);
|
|
8518
|
+
current = [row];
|
|
8519
|
+
continue;
|
|
8520
|
+
}
|
|
8521
|
+
current.push(row);
|
|
8522
|
+
}
|
|
8523
|
+
if (current.length > 0) {
|
|
8524
|
+
groups.push(current);
|
|
8525
|
+
}
|
|
8526
|
+
return groups.map((rows) => ({
|
|
8527
|
+
preferredChunkUnits: rows,
|
|
8528
|
+
sectionDepth: 1,
|
|
8529
|
+
sectionKind: "spreadsheet_rows",
|
|
8530
|
+
sectionPath: [sheetName],
|
|
8531
|
+
sectionTitle: sheetName,
|
|
8532
|
+
text: normalizeWhitespace([`Sheet ${sheetName}`, ...rows].join(`
|
|
8533
|
+
`))
|
|
8534
|
+
}));
|
|
8535
|
+
};
|
|
8536
|
+
var presentationStructureUnits = (value, metadata) => {
|
|
8537
|
+
const slideNumber = typeof metadata?.slideNumber === "number" ? metadata.slideNumber : typeof metadata?.slideIndex === "number" ? metadata.slideIndex + 1 : undefined;
|
|
8538
|
+
const slideLabel = slideNumber ? `Slide ${slideNumber}` : "Slide";
|
|
8539
|
+
const paragraphs = paragraphUnits(value);
|
|
8540
|
+
return [
|
|
8541
|
+
{
|
|
8542
|
+
preferredChunkUnits: paragraphs,
|
|
8543
|
+
sectionDepth: 1,
|
|
8544
|
+
sectionKind: "presentation_slide",
|
|
8545
|
+
sectionPath: [slideLabel],
|
|
8546
|
+
sectionTitle: slideLabel,
|
|
8547
|
+
text: normalizeWhitespace([slideLabel, ...paragraphs].join(`
|
|
8548
|
+
|
|
8549
|
+
`))
|
|
8550
|
+
}
|
|
8551
|
+
];
|
|
8552
|
+
};
|
|
8307
8553
|
var inferFormat = (document) => {
|
|
8308
8554
|
if (document.format) {
|
|
8309
8555
|
return document.format;
|
|
@@ -9479,6 +9725,15 @@ var sourceAwareUnits = (document, format, normalizedText) => {
|
|
|
9479
9725
|
}
|
|
9480
9726
|
case "text":
|
|
9481
9727
|
default:
|
|
9728
|
+
if (document.metadata?.sourceNativeKind === "spreadsheet_sheet") {
|
|
9729
|
+
return spreadsheetStructureUnits(normalizedText, document.metadata);
|
|
9730
|
+
}
|
|
9731
|
+
if (document.metadata?.sourceNativeKind === "presentation_slide") {
|
|
9732
|
+
return presentationStructureUnits(normalizedText, document.metadata);
|
|
9733
|
+
}
|
|
9734
|
+
if (document.source?.toLowerCase().endsWith(".docx") || document.source?.toLowerCase().endsWith(".odt")) {
|
|
9735
|
+
return officeHeadingStructureUnits(normalizedText);
|
|
9736
|
+
}
|
|
9482
9737
|
return paragraphUnits(normalizedText).map((text) => ({ text }));
|
|
9483
9738
|
}
|
|
9484
9739
|
};
|
|
@@ -9546,9 +9801,82 @@ var chunkSourceAwareUnit = (unit, options) => {
|
|
|
9546
9801
|
if (unit.text.length <= options.maxChunkLength) {
|
|
9547
9802
|
return [unit];
|
|
9548
9803
|
}
|
|
9549
|
-
|
|
9804
|
+
const expandOversizedParagraph = (paragraph) => {
|
|
9805
|
+
if (paragraph.length <= options.maxChunkLength) {
|
|
9806
|
+
return [paragraph];
|
|
9807
|
+
}
|
|
9808
|
+
const sentenceChunks = chunkFromUnits(sentenceUnits(paragraph), options.maxChunkLength, 0, options.minChunkLength);
|
|
9809
|
+
if (sentenceChunks.length > 1) {
|
|
9810
|
+
return sentenceChunks;
|
|
9811
|
+
}
|
|
9812
|
+
return chunkFromUnits(fixedUnits(paragraph, options.maxChunkLength), options.maxChunkLength, 0, options.minChunkLength);
|
|
9813
|
+
};
|
|
9814
|
+
const stableParagraphs = (unit.preferredChunkUnits ?? paragraphUnits(unit.text)).flatMap(expandOversizedParagraph);
|
|
9815
|
+
const stableChunks = [];
|
|
9816
|
+
let current = "";
|
|
9817
|
+
const pushCurrent = () => {
|
|
9818
|
+
if (!current) {
|
|
9819
|
+
return;
|
|
9820
|
+
}
|
|
9821
|
+
stableChunks.push(normalizeWhitespace(current));
|
|
9822
|
+
current = "";
|
|
9823
|
+
};
|
|
9824
|
+
for (const paragraph of stableParagraphs) {
|
|
9825
|
+
const trimmed = paragraph.trim();
|
|
9826
|
+
if (!trimmed) {
|
|
9827
|
+
continue;
|
|
9828
|
+
}
|
|
9829
|
+
if (!current) {
|
|
9830
|
+
current = trimmed;
|
|
9831
|
+
continue;
|
|
9832
|
+
}
|
|
9833
|
+
const candidate = `${current}
|
|
9834
|
+
|
|
9835
|
+
${trimmed}`;
|
|
9836
|
+
if (candidate.length <= options.maxChunkLength) {
|
|
9837
|
+
current = candidate;
|
|
9838
|
+
continue;
|
|
9839
|
+
}
|
|
9840
|
+
pushCurrent();
|
|
9841
|
+
current = trimmed;
|
|
9842
|
+
}
|
|
9843
|
+
pushCurrent();
|
|
9844
|
+
const merged = [];
|
|
9845
|
+
for (const chunk of stableChunks) {
|
|
9846
|
+
const last = merged.at(-1);
|
|
9847
|
+
if (last && chunk.length < options.minChunkLength && `${last}
|
|
9848
|
+
|
|
9849
|
+
${chunk}`.length <= options.maxChunkLength) {
|
|
9850
|
+
merged[merged.length - 1] = normalizeWhitespace(`${last}
|
|
9851
|
+
|
|
9852
|
+
${chunk}`);
|
|
9853
|
+
continue;
|
|
9854
|
+
}
|
|
9855
|
+
merged.push(chunk);
|
|
9856
|
+
}
|
|
9857
|
+
const decorateSourceAwareChunkText = (text) => {
|
|
9858
|
+
if (!unit.preferredChunkUnits || !unit.sectionTitle) {
|
|
9859
|
+
return text;
|
|
9860
|
+
}
|
|
9861
|
+
if (unit.sectionKind === "spreadsheet_rows") {
|
|
9862
|
+
if (text.includes(`Sheet ${unit.sectionTitle}`)) {
|
|
9863
|
+
return text;
|
|
9864
|
+
}
|
|
9865
|
+
return normalizeWhitespace(`Sheet ${unit.sectionTitle}
|
|
9866
|
+
${text}`);
|
|
9867
|
+
}
|
|
9868
|
+
if (unit.sectionKind === "presentation_slide") {
|
|
9869
|
+
if (text.includes(unit.sectionTitle)) {
|
|
9870
|
+
return text;
|
|
9871
|
+
}
|
|
9872
|
+
return normalizeWhitespace(`${unit.sectionTitle}
|
|
9873
|
+
${text}`);
|
|
9874
|
+
}
|
|
9875
|
+
return text;
|
|
9876
|
+
};
|
|
9877
|
+
return merged.map((text) => ({
|
|
9550
9878
|
...unit,
|
|
9551
|
-
text
|
|
9879
|
+
text: decorateSourceAwareChunkText(text)
|
|
9552
9880
|
}));
|
|
9553
9881
|
};
|
|
9554
9882
|
var resolveChunkingUnits = (text, options) => {
|
|
@@ -10311,6 +10639,17 @@ var renderChunkStructure = (structure) => {
|
|
|
10311
10639
|
].filter((row) => row.length > 0);
|
|
10312
10640
|
return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
|
|
10313
10641
|
};
|
|
10642
|
+
var renderChunkExcerpts = (input) => {
|
|
10643
|
+
if (!input) {
|
|
10644
|
+
return "";
|
|
10645
|
+
}
|
|
10646
|
+
const rows = [
|
|
10647
|
+
input.chunkExcerpt ? `<li><strong>Chunk excerpt</strong> ${escapeHtml2(input.chunkExcerpt)}</li>` : "",
|
|
10648
|
+
input.windowExcerpt ? `<li><strong>Neighbor window</strong> ${escapeHtml2(input.windowExcerpt)}</li>` : "",
|
|
10649
|
+
input.sectionExcerpt ? `<li><strong>Section excerpt</strong> ${escapeHtml2(input.sectionExcerpt)}</li>` : ""
|
|
10650
|
+
].filter((row) => row.length > 0);
|
|
10651
|
+
return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
|
|
10652
|
+
};
|
|
10314
10653
|
var renderSectionJumpList = (label, items) => {
|
|
10315
10654
|
const rows = items.map((item) => item.href ? `<li><strong>${escapeHtml2(label)}</strong> <a href="${escapeHtml2(item.href)}"${item.active ? ' aria-current="true"' : ""}>${escapeHtml2(item.label)}</a></li>` : `<li><strong>${escapeHtml2(label)}</strong> ${escapeHtml2(item.label)}</li>`).join("");
|
|
10316
10655
|
return rows ? `<ul class="rag-chunk-structure">${rows}</ul>` : "";
|
|
@@ -10410,7 +10749,7 @@ var defaultChunkPreview = (input) => {
|
|
|
10410
10749
|
return acc;
|
|
10411
10750
|
}, []);
|
|
10412
10751
|
const groupHtml = groups.map((group) => {
|
|
10413
|
-
const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
|
|
10752
|
+
const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + renderChunkExcerpts(chunk.excerpts) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
|
|
10414
10753
|
return `<section class="rag-chunk-group"><h4>${escapeHtml2(group.title)}</h4>${chunkHtml}</section>`;
|
|
10415
10754
|
}).join("");
|
|
10416
10755
|
return `<section class="rag-chunk-preview">` + `<h3>${escapeHtml2(input.document.title)}</h3>` + `<p class="rag-chunk-preview-source">${escapeHtml2(input.document.source)}</p>` + renderSourceLabels(input.document.labels) + (navigation.parentSection ? renderSectionJumpList("Parent section", [
|
|
@@ -15984,6 +16323,15 @@ var ragChat = (config) => {
|
|
|
15984
16323
|
ok: false
|
|
15985
16324
|
};
|
|
15986
16325
|
}
|
|
16326
|
+
const chunks = preview.chunks.map((chunk) => ({
|
|
16327
|
+
...chunk,
|
|
16328
|
+
labels: buildRAGSourceLabels({
|
|
16329
|
+
metadata: chunk.metadata,
|
|
16330
|
+
source: chunk.source ?? preview.document.source,
|
|
16331
|
+
title: chunk.title ?? preview.document.title
|
|
16332
|
+
}),
|
|
16333
|
+
structure: buildRAGChunkStructure(chunk.metadata)
|
|
16334
|
+
}));
|
|
15987
16335
|
return {
|
|
15988
16336
|
ok: true,
|
|
15989
16337
|
...preview,
|
|
@@ -15995,14 +16343,9 @@ var ragChat = (config) => {
|
|
|
15995
16343
|
title: preview.document.title
|
|
15996
16344
|
})
|
|
15997
16345
|
},
|
|
15998
|
-
chunks:
|
|
16346
|
+
chunks: chunks.map((chunk) => ({
|
|
15999
16347
|
...chunk,
|
|
16000
|
-
|
|
16001
|
-
metadata: chunk.metadata,
|
|
16002
|
-
source: chunk.source ?? preview.document.source,
|
|
16003
|
-
title: chunk.title ?? preview.document.title
|
|
16004
|
-
}),
|
|
16005
|
-
structure: buildRAGChunkStructure(chunk.metadata)
|
|
16348
|
+
excerpts: buildRAGChunkExcerpts(chunks, chunk.chunkId)
|
|
16006
16349
|
}))
|
|
16007
16350
|
};
|
|
16008
16351
|
};
|
|
@@ -21599,5 +21942,5 @@ export {
|
|
|
21599
21942
|
aiChat
|
|
21600
21943
|
};
|
|
21601
21944
|
|
|
21602
|
-
//# debugId=
|
|
21945
|
+
//# debugId=B6314DD20BD25BC564756E2164756E21
|
|
21603
21946
|
//# sourceMappingURL=index.js.map
|