@absolutejs/absolute 0.19.0-beta.606 → 0.19.0-beta.608
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/client/index.js +195 -18
- package/dist/ai/client/index.js.map +4 -4
- package/dist/ai/client/ui.js +197 -18
- package/dist/ai/client/ui.js.map +4 -4
- package/dist/ai/index.js +424 -28
- package/dist/ai/index.js.map +7 -7
- package/dist/ai/rag/quality.js +145 -16
- package/dist/ai/rag/quality.js.map +3 -3
- package/dist/ai/rag/ui.js +197 -18
- package/dist/ai/rag/ui.js.map +4 -4
- package/dist/ai-client/angular/ai/index.js +194 -17
- package/dist/ai-client/react/ai/index.js +194 -17
- package/dist/ai-client/vue/ai/index.js +194 -17
- package/dist/angular/ai/index.js +195 -18
- package/dist/angular/ai/index.js.map +4 -4
- package/dist/react/ai/index.js +195 -18
- package/dist/react/ai/index.js.map +6 -6
- package/dist/src/ai/client/ui.d.ts +1 -1
- package/dist/src/ai/rag/grounding.d.ts +2 -1
- package/dist/src/ai/rag/index.d.ts +1 -1
- package/dist/src/ai/rag/presentation.d.ts +6 -3
- package/dist/src/ai/rag/ui.d.ts +1 -1
- package/dist/src/vue/ai/useRAG.d.ts +14 -4
- package/dist/src/vue/ai/useRAGChunkPreview.d.ts +12 -2
- package/dist/src/vue/ai/useRAGSearch.d.ts +2 -2
- package/dist/svelte/ai/index.js +195 -18
- package/dist/svelte/ai/index.js.map +6 -6
- package/dist/types/ai.d.ts +26 -2
- package/dist/vue/ai/index.js +195 -18
- package/dist/vue/ai/index.js.map +5 -5
- package/package.json +1 -1
package/dist/ai/index.js
CHANGED
|
@@ -347,6 +347,56 @@ var buildExcerpt = (text, maxLength = 160) => {
|
|
|
347
347
|
}
|
|
348
348
|
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
|
|
349
349
|
};
|
|
350
|
+
var selectPreferredExcerpt = (excerpts, sectionChunkCount) => {
|
|
351
|
+
if (!excerpts) {
|
|
352
|
+
return "";
|
|
353
|
+
}
|
|
354
|
+
const chunkExcerpt = excerpts.chunkExcerpt?.trim() ?? "";
|
|
355
|
+
const windowExcerpt = excerpts.windowExcerpt?.trim() ?? "";
|
|
356
|
+
const sectionExcerpt = excerpts.sectionExcerpt?.trim() ?? "";
|
|
357
|
+
if (sectionChunkCount && sectionChunkCount > 1 && chunkExcerpt.length > 0 && chunkExcerpt.length < 72) {
|
|
358
|
+
if (sectionChunkCount <= 3 && sectionExcerpt) {
|
|
359
|
+
return sectionExcerpt;
|
|
360
|
+
}
|
|
361
|
+
if (windowExcerpt) {
|
|
362
|
+
return windowExcerpt;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
return chunkExcerpt || windowExcerpt || sectionExcerpt;
|
|
366
|
+
};
|
|
367
|
+
var buildGroundingChunkExcerpts = (sources, activeChunkId) => {
|
|
368
|
+
if (sources.length === 0) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
const activeSource = (activeChunkId ? sources.find((source) => source.chunkId === activeChunkId) : undefined) ?? sources[0];
|
|
372
|
+
if (!activeSource) {
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
const chunkMap = new Map(sources.map((source) => [source.chunkId, source]));
|
|
376
|
+
const activeMetadata = activeSource.metadata ?? {};
|
|
377
|
+
const previousChunkId = getContextString(activeMetadata.previousChunkId);
|
|
378
|
+
const nextChunkId = getContextString(activeMetadata.nextChunkId);
|
|
379
|
+
const sectionChunkId = getContextString(activeMetadata.sectionChunkId);
|
|
380
|
+
const sectionSources = sectionChunkId ? sources.filter((source) => getContextString(source.metadata?.sectionChunkId) === sectionChunkId).sort((left, right) => {
|
|
381
|
+
const leftIndex = getContextNumber(left.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
|
|
382
|
+
const rightIndex = getContextNumber(right.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
|
|
383
|
+
if (leftIndex !== rightIndex) {
|
|
384
|
+
return leftIndex - rightIndex;
|
|
385
|
+
}
|
|
386
|
+
return left.chunkId.localeCompare(right.chunkId);
|
|
387
|
+
}) : [activeSource];
|
|
388
|
+
const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
|
|
389
|
+
|
|
390
|
+
`);
|
|
391
|
+
const orderedWindowIds = [previousChunkId, activeSource.chunkId, nextChunkId].filter((chunkId, index, values) => Boolean(chunkId) && values.indexOf(chunkId) === index);
|
|
392
|
+
return {
|
|
393
|
+
chunkExcerpt: buildExcerpt(activeSource.text, 160),
|
|
394
|
+
sectionExcerpt: buildExcerpt(sectionSources.map((source) => source.text).join(`
|
|
395
|
+
|
|
396
|
+
`), 320),
|
|
397
|
+
windowExcerpt: buildExcerpt(collectText(orderedWindowIds), 240)
|
|
398
|
+
};
|
|
399
|
+
};
|
|
350
400
|
var buildGroundingReferenceEvidenceLabel = (reference) => [reference.label, reference.locatorLabel, reference.contextLabel].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
|
|
351
401
|
var buildGroundingReferenceEvidenceSummary = (reference) => [
|
|
352
402
|
reference.source ?? reference.title ?? reference.chunkId,
|
|
@@ -354,11 +404,19 @@ var buildGroundingReferenceEvidenceSummary = (reference) => [
|
|
|
354
404
|
reference.contextLabel,
|
|
355
405
|
reference.provenanceLabel
|
|
356
406
|
].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
|
|
407
|
+
var buildGroundingSectionKey = (reference) => reference.contextLabel ?? reference.locatorLabel ?? reference.label ?? reference.source ?? reference.chunkId;
|
|
408
|
+
var buildGroundingSectionSummaryLine = (reference) => [
|
|
409
|
+
reference.source ?? reference.title ?? reference.chunkId,
|
|
410
|
+
reference.locatorLabel,
|
|
411
|
+
reference.contextLabel,
|
|
412
|
+
reference.provenanceLabel
|
|
413
|
+
].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
|
|
357
414
|
var buildGroundedAnswerCitationDetail = (reference) => ({
|
|
358
415
|
contextLabel: reference.contextLabel,
|
|
359
416
|
evidenceLabel: buildGroundingReferenceEvidenceLabel(reference),
|
|
360
417
|
evidenceSummary: buildGroundingReferenceEvidenceSummary(reference),
|
|
361
|
-
excerpt: reference.excerpt,
|
|
418
|
+
excerpt: selectPreferredExcerpt(reference.excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || reference.excerpt,
|
|
419
|
+
excerpts: reference.excerpts,
|
|
362
420
|
label: reference.label,
|
|
363
421
|
locatorLabel: reference.locatorLabel,
|
|
364
422
|
number: reference.number,
|
|
@@ -444,26 +502,97 @@ var buildRAGGroundedAnswer = (content, sources) => {
|
|
|
444
502
|
hasCitations,
|
|
445
503
|
parts,
|
|
446
504
|
references,
|
|
505
|
+
sectionSummaries: buildRAGGroundedAnswerSectionSummaries(references),
|
|
447
506
|
ungroundedReferenceNumbers: [...ungroundedReferenceNumbers].sort((left, right) => left - right)
|
|
448
507
|
};
|
|
449
508
|
};
|
|
509
|
+
var buildRAGGroundedAnswerSectionSummaries = (references) => {
|
|
510
|
+
const groups = new Map;
|
|
511
|
+
for (const reference of references) {
|
|
512
|
+
const key = buildGroundingSectionKey(reference);
|
|
513
|
+
const existing = groups.get(key);
|
|
514
|
+
if (!existing) {
|
|
515
|
+
const excerpts = reference.excerpts ? {
|
|
516
|
+
chunkExcerpt: reference.excerpts.chunkExcerpt,
|
|
517
|
+
sectionExcerpt: reference.excerpts.sectionExcerpt,
|
|
518
|
+
windowExcerpt: reference.excerpts.windowExcerpt
|
|
519
|
+
} : undefined;
|
|
520
|
+
groups.set(key, {
|
|
521
|
+
chunkIds: [reference.chunkId],
|
|
522
|
+
contextLabel: reference.contextLabel,
|
|
523
|
+
count: 1,
|
|
524
|
+
excerpt: selectPreferredExcerpt(excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || excerpts?.sectionExcerpt || reference.excerpt,
|
|
525
|
+
excerpts,
|
|
526
|
+
key,
|
|
527
|
+
label: key,
|
|
528
|
+
locatorLabel: reference.locatorLabel,
|
|
529
|
+
provenanceLabel: reference.provenanceLabel,
|
|
530
|
+
referenceNumbers: [reference.number],
|
|
531
|
+
references: [reference],
|
|
532
|
+
summary: buildGroundingSectionSummaryLine(reference) || reference.label || reference.chunkId
|
|
533
|
+
});
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
existing.count += 1;
|
|
537
|
+
if (!existing.chunkIds.includes(reference.chunkId)) {
|
|
538
|
+
existing.chunkIds.push(reference.chunkId);
|
|
539
|
+
}
|
|
540
|
+
if (!existing.referenceNumbers.includes(reference.number)) {
|
|
541
|
+
existing.referenceNumbers.push(reference.number);
|
|
542
|
+
}
|
|
543
|
+
existing.references.push(reference);
|
|
544
|
+
if (!existing.contextLabel && reference.contextLabel) {
|
|
545
|
+
existing.contextLabel = reference.contextLabel;
|
|
546
|
+
}
|
|
547
|
+
if (!existing.locatorLabel && reference.locatorLabel) {
|
|
548
|
+
existing.locatorLabel = reference.locatorLabel;
|
|
549
|
+
}
|
|
550
|
+
if (!existing.provenanceLabel && reference.provenanceLabel) {
|
|
551
|
+
existing.provenanceLabel = reference.provenanceLabel;
|
|
552
|
+
}
|
|
553
|
+
if (!existing.excerpts && reference.excerpts) {
|
|
554
|
+
existing.excerpts = {
|
|
555
|
+
chunkExcerpt: reference.excerpts.chunkExcerpt,
|
|
556
|
+
sectionExcerpt: reference.excerpts.sectionExcerpt,
|
|
557
|
+
windowExcerpt: reference.excerpts.windowExcerpt
|
|
558
|
+
};
|
|
559
|
+
existing.excerpt = reference.excerpts.sectionExcerpt;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
return [...groups.values()].map((group) => ({
|
|
563
|
+
...group,
|
|
564
|
+
referenceNumbers: [...group.referenceNumbers].sort((left, right) => left - right),
|
|
565
|
+
references: group.references.slice().sort((left, right) => left.number - right.number)
|
|
566
|
+
})).sort((left, right) => {
|
|
567
|
+
const leftFirst = left.referenceNumbers[0] ?? Number.POSITIVE_INFINITY;
|
|
568
|
+
const rightFirst = right.referenceNumbers[0] ?? Number.POSITIVE_INFINITY;
|
|
569
|
+
if (leftFirst !== rightFirst) {
|
|
570
|
+
return leftFirst - rightFirst;
|
|
571
|
+
}
|
|
572
|
+
return left.label.localeCompare(right.label);
|
|
573
|
+
});
|
|
574
|
+
};
|
|
450
575
|
var buildRAGGroundingReferences = (sources) => {
|
|
451
576
|
const citations = buildRAGCitations(sources);
|
|
452
577
|
const citationReferenceMap = buildRAGCitationReferenceMap(citations);
|
|
453
|
-
return citations.map((citation) =>
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
578
|
+
return citations.map((citation) => {
|
|
579
|
+
const excerpts = buildGroundingChunkExcerpts(sources, citation.chunkId);
|
|
580
|
+
return {
|
|
581
|
+
chunkId: citation.chunkId,
|
|
582
|
+
contextLabel: citation.contextLabel ?? buildContextLabel(citation.metadata),
|
|
583
|
+
excerpt: selectPreferredExcerpt(excerpts, getContextNumber(citation.metadata?.sectionChunkCount)) || excerpts?.chunkExcerpt || buildExcerpt(citation.text),
|
|
584
|
+
excerpts,
|
|
585
|
+
label: citation.label,
|
|
586
|
+
locatorLabel: citation.locatorLabel ?? buildLocatorLabel(citation.metadata, citation.source, citation.title),
|
|
587
|
+
metadata: citation.metadata,
|
|
588
|
+
number: citationReferenceMap[citation.chunkId] ?? 0,
|
|
589
|
+
provenanceLabel: citation.provenanceLabel ?? buildProvenanceLabel(citation.metadata),
|
|
590
|
+
score: citation.score,
|
|
591
|
+
source: citation.source,
|
|
592
|
+
text: citation.text,
|
|
593
|
+
title: citation.title
|
|
594
|
+
};
|
|
595
|
+
});
|
|
467
596
|
};
|
|
468
597
|
|
|
469
598
|
// src/ai/rag/quality.ts
|
|
@@ -4123,7 +4252,7 @@ var buildRAGChunkStructure = (metadata) => {
|
|
|
4123
4252
|
return;
|
|
4124
4253
|
}
|
|
4125
4254
|
const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
|
|
4126
|
-
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" ? metadata.sectionKind : undefined;
|
|
4255
|
+
const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
|
|
4127
4256
|
const section = {
|
|
4128
4257
|
depth: getContextNumber2(metadata.sectionDepth),
|
|
4129
4258
|
kind: sectionKind,
|
|
@@ -4152,6 +4281,52 @@ var buildExcerpt2 = (text, maxLength = 160) => {
|
|
|
4152
4281
|
}
|
|
4153
4282
|
return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
|
|
4154
4283
|
};
|
|
4284
|
+
var buildRAGChunkExcerpts = (chunks, activeChunkId) => {
|
|
4285
|
+
if (chunks.length === 0) {
|
|
4286
|
+
return;
|
|
4287
|
+
}
|
|
4288
|
+
const graph = buildRAGChunkGraph(chunks.map((chunk) => ({
|
|
4289
|
+
chunkId: chunk.chunkId,
|
|
4290
|
+
metadata: chunk.metadata,
|
|
4291
|
+
structure: chunk.structure
|
|
4292
|
+
})));
|
|
4293
|
+
const navigation = buildRAGChunkGraphNavigation(graph, activeChunkId);
|
|
4294
|
+
const activeChunk = chunks.find((chunk) => chunk.chunkId === navigation.activeChunkId) ?? chunks[0];
|
|
4295
|
+
if (!activeChunk) {
|
|
4296
|
+
return;
|
|
4297
|
+
}
|
|
4298
|
+
const chunkMap = new Map(chunks.map((chunk) => [chunk.chunkId, chunk]));
|
|
4299
|
+
const orderedWindowIds = [
|
|
4300
|
+
navigation.previousNode?.chunkId,
|
|
4301
|
+
activeChunk.chunkId,
|
|
4302
|
+
navigation.nextNode?.chunkId
|
|
4303
|
+
].filter((chunkId, index, ids) => Boolean(chunkId) && ids.indexOf(chunkId) === index);
|
|
4304
|
+
const orderedSectionIds = navigation.sectionNodes.length > 0 ? navigation.sectionNodes.map((node) => node.chunkId) : [activeChunk.chunkId];
|
|
4305
|
+
const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
|
|
4306
|
+
|
|
4307
|
+
`);
|
|
4308
|
+
return {
|
|
4309
|
+
chunkExcerpt: buildExcerpt2(activeChunk.text, 160),
|
|
4310
|
+
sectionExcerpt: buildExcerpt2(collectText(orderedSectionIds), 320),
|
|
4311
|
+
windowExcerpt: buildExcerpt2(collectText(orderedWindowIds), 240)
|
|
4312
|
+
};
|
|
4313
|
+
};
|
|
4314
|
+
var buildRAGPreferredExcerpt = (excerpts, structure) => {
|
|
4315
|
+
if (!excerpts) {
|
|
4316
|
+
return "";
|
|
4317
|
+
}
|
|
4318
|
+
const chunkLength = excerpts.chunkExcerpt.trim().length;
|
|
4319
|
+
const sectionChunkCount = structure?.sequence?.sectionChunkCount ?? 1;
|
|
4320
|
+
if (sectionChunkCount > 1 && chunkLength > 0 && chunkLength < 72) {
|
|
4321
|
+
if (sectionChunkCount <= 3 && excerpts.sectionExcerpt.trim().length > 0) {
|
|
4322
|
+
return excerpts.sectionExcerpt;
|
|
4323
|
+
}
|
|
4324
|
+
if (excerpts.windowExcerpt.trim().length > 0) {
|
|
4325
|
+
return excerpts.windowExcerpt;
|
|
4326
|
+
}
|
|
4327
|
+
}
|
|
4328
|
+
return excerpts.chunkExcerpt;
|
|
4329
|
+
};
|
|
4155
4330
|
var buildRAGChunkGraph = (chunks) => {
|
|
4156
4331
|
const nodes = [];
|
|
4157
4332
|
const edges = [];
|
|
@@ -4363,6 +4538,7 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
4363
4538
|
return sourceGroups.map((group) => {
|
|
4364
4539
|
const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
|
|
4365
4540
|
const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
|
|
4541
|
+
const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
|
|
4366
4542
|
return {
|
|
4367
4543
|
bestScore: group.bestScore,
|
|
4368
4544
|
citationNumbers: groupCitations.map((citation) => citationReferenceMap[citation.chunkId] ?? 0),
|
|
@@ -4370,7 +4546,8 @@ var buildRAGSourceSummaries = (sources) => {
|
|
|
4370
4546
|
chunkIds: group.chunks.map((chunk) => chunk.chunkId),
|
|
4371
4547
|
contextLabel: leadChunk?.labels?.contextLabel ?? buildContextLabel2(leadChunk?.metadata),
|
|
4372
4548
|
count: group.count,
|
|
4373
|
-
excerpt: buildExcerpt2(leadChunk?.text ?? ""),
|
|
4549
|
+
excerpt: buildRAGPreferredExcerpt(excerpts, leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata)) || buildExcerpt2(leadChunk?.text ?? ""),
|
|
4550
|
+
excerpts,
|
|
4374
4551
|
key: group.key,
|
|
4375
4552
|
label: group.label,
|
|
4376
4553
|
locatorLabel: leadChunk?.labels?.locatorLabel ?? buildLocatorLabel2(leadChunk?.metadata, leadChunk?.source, leadChunk?.title),
|
|
@@ -8245,6 +8422,128 @@ var htmlStructureUnits = (value) => {
|
|
|
8245
8422
|
}
|
|
8246
8423
|
return [{ text: normalizeWhitespace(stripHtmlTags(focused)) }].filter((section) => Boolean(section.text));
|
|
8247
8424
|
};
|
|
8425
|
+
var isLikelyOfficeHeadingParagraph = (value) => {
|
|
8426
|
+
const normalized = normalizeWhitespace(value);
|
|
8427
|
+
if (!normalized || normalized.length > 80) {
|
|
8428
|
+
return false;
|
|
8429
|
+
}
|
|
8430
|
+
if (/[.!?]$/.test(normalized)) {
|
|
8431
|
+
return false;
|
|
8432
|
+
}
|
|
8433
|
+
const words = normalized.split(/\s+/);
|
|
8434
|
+
if (words.length > 8) {
|
|
8435
|
+
return false;
|
|
8436
|
+
}
|
|
8437
|
+
const headingLikeWords = words.filter((word) => {
|
|
8438
|
+
const stripped = word.replace(/^[^A-Za-z0-9]+|[^A-Za-z0-9]+$/g, "");
|
|
8439
|
+
if (!stripped) {
|
|
8440
|
+
return false;
|
|
8441
|
+
}
|
|
8442
|
+
return /^[A-Z0-9]/.test(stripped);
|
|
8443
|
+
}).length;
|
|
8444
|
+
return headingLikeWords / words.length >= 0.6;
|
|
8445
|
+
};
|
|
8446
|
+
var officeHeadingStructureUnits = (value) => {
|
|
8447
|
+
const paragraphs = paragraphUnits(value);
|
|
8448
|
+
const sections = [];
|
|
8449
|
+
let currentHeading;
|
|
8450
|
+
let currentParagraphs = [];
|
|
8451
|
+
const flush = () => {
|
|
8452
|
+
if (!currentHeading && currentParagraphs.length === 0) {
|
|
8453
|
+
return;
|
|
8454
|
+
}
|
|
8455
|
+
const text = normalizeWhitespace([currentHeading, ...currentParagraphs].filter(Boolean).join(`
|
|
8456
|
+
|
|
8457
|
+
`));
|
|
8458
|
+
if (!text) {
|
|
8459
|
+
currentHeading = undefined;
|
|
8460
|
+
currentParagraphs = [];
|
|
8461
|
+
return;
|
|
8462
|
+
}
|
|
8463
|
+
sections.push({
|
|
8464
|
+
sectionDepth: currentHeading ? 1 : undefined,
|
|
8465
|
+
sectionKind: currentHeading ? "office_heading" : undefined,
|
|
8466
|
+
sectionPath: currentHeading ? [currentHeading] : undefined,
|
|
8467
|
+
sectionTitle: currentHeading,
|
|
8468
|
+
text
|
|
8469
|
+
});
|
|
8470
|
+
currentHeading = undefined;
|
|
8471
|
+
currentParagraphs = [];
|
|
8472
|
+
};
|
|
8473
|
+
for (const paragraph of paragraphs) {
|
|
8474
|
+
if (isLikelyOfficeHeadingParagraph(paragraph)) {
|
|
8475
|
+
flush();
|
|
8476
|
+
currentHeading = paragraph;
|
|
8477
|
+
continue;
|
|
8478
|
+
}
|
|
8479
|
+
currentParagraphs.push(paragraph);
|
|
8480
|
+
}
|
|
8481
|
+
flush();
|
|
8482
|
+
return sections.length > 0 ? sections : [{ text: normalizeWhitespace(value) }];
|
|
8483
|
+
};
|
|
8484
|
+
var spreadsheetStructureUnits = (value, metadata) => {
|
|
8485
|
+
const lines = value.replace(/\r\n?/g, `
|
|
8486
|
+
`).split(`
|
|
8487
|
+
`).map((line) => normalizeWhitespace(line)).filter(Boolean);
|
|
8488
|
+
if (lines.length === 0) {
|
|
8489
|
+
return [];
|
|
8490
|
+
}
|
|
8491
|
+
const sheetName = typeof metadata?.sheetName === "string" && metadata.sheetName || lines[0].replace(/^Sheet\s+/i, "");
|
|
8492
|
+
const rowLines = lines.filter((line) => /^Row \d+\./.test(line));
|
|
8493
|
+
if (rowLines.length === 0) {
|
|
8494
|
+
return [
|
|
8495
|
+
{
|
|
8496
|
+
sectionDepth: 1,
|
|
8497
|
+
sectionKind: "spreadsheet_rows",
|
|
8498
|
+
sectionPath: [sheetName],
|
|
8499
|
+
sectionTitle: sheetName,
|
|
8500
|
+
text: normalizeWhitespace(lines.join(`
|
|
8501
|
+
`))
|
|
8502
|
+
}
|
|
8503
|
+
];
|
|
8504
|
+
}
|
|
8505
|
+
const groups = [];
|
|
8506
|
+
let current = [];
|
|
8507
|
+
for (const row of rowLines) {
|
|
8508
|
+
const candidate = [...current, row].join(`
|
|
8509
|
+
`);
|
|
8510
|
+
if (current.length > 0 && candidate.length > DEFAULT_MAX_CHUNK_LENGTH) {
|
|
8511
|
+
groups.push(current);
|
|
8512
|
+
current = [row];
|
|
8513
|
+
continue;
|
|
8514
|
+
}
|
|
8515
|
+
current.push(row);
|
|
8516
|
+
}
|
|
8517
|
+
if (current.length > 0) {
|
|
8518
|
+
groups.push(current);
|
|
8519
|
+
}
|
|
8520
|
+
return groups.map((rows) => ({
|
|
8521
|
+
preferredChunkUnits: rows,
|
|
8522
|
+
sectionDepth: 1,
|
|
8523
|
+
sectionKind: "spreadsheet_rows",
|
|
8524
|
+
sectionPath: [sheetName],
|
|
8525
|
+
sectionTitle: sheetName,
|
|
8526
|
+
text: normalizeWhitespace([`Sheet ${sheetName}`, ...rows].join(`
|
|
8527
|
+
`))
|
|
8528
|
+
}));
|
|
8529
|
+
};
|
|
8530
|
+
var presentationStructureUnits = (value, metadata) => {
|
|
8531
|
+
const slideNumber = typeof metadata?.slideNumber === "number" ? metadata.slideNumber : typeof metadata?.slideIndex === "number" ? metadata.slideIndex + 1 : undefined;
|
|
8532
|
+
const slideLabel = slideNumber ? `Slide ${slideNumber}` : "Slide";
|
|
8533
|
+
const paragraphs = paragraphUnits(value);
|
|
8534
|
+
return [
|
|
8535
|
+
{
|
|
8536
|
+
preferredChunkUnits: paragraphs,
|
|
8537
|
+
sectionDepth: 1,
|
|
8538
|
+
sectionKind: "presentation_slide",
|
|
8539
|
+
sectionPath: [slideLabel],
|
|
8540
|
+
sectionTitle: slideLabel,
|
|
8541
|
+
text: normalizeWhitespace([slideLabel, ...paragraphs].join(`
|
|
8542
|
+
|
|
8543
|
+
`))
|
|
8544
|
+
}
|
|
8545
|
+
];
|
|
8546
|
+
};
|
|
8248
8547
|
var inferFormat = (document) => {
|
|
8249
8548
|
if (document.format) {
|
|
8250
8549
|
return document.format;
|
|
@@ -9420,6 +9719,15 @@ var sourceAwareUnits = (document, format, normalizedText) => {
|
|
|
9420
9719
|
}
|
|
9421
9720
|
case "text":
|
|
9422
9721
|
default:
|
|
9722
|
+
if (document.metadata?.sourceNativeKind === "spreadsheet_sheet") {
|
|
9723
|
+
return spreadsheetStructureUnits(normalizedText, document.metadata);
|
|
9724
|
+
}
|
|
9725
|
+
if (document.metadata?.sourceNativeKind === "presentation_slide") {
|
|
9726
|
+
return presentationStructureUnits(normalizedText, document.metadata);
|
|
9727
|
+
}
|
|
9728
|
+
if (document.source?.toLowerCase().endsWith(".docx") || document.source?.toLowerCase().endsWith(".odt")) {
|
|
9729
|
+
return officeHeadingStructureUnits(normalizedText);
|
|
9730
|
+
}
|
|
9423
9731
|
return paragraphUnits(normalizedText).map((text) => ({ text }));
|
|
9424
9732
|
}
|
|
9425
9733
|
};
|
|
@@ -9487,9 +9795,82 @@ var chunkSourceAwareUnit = (unit, options) => {
|
|
|
9487
9795
|
if (unit.text.length <= options.maxChunkLength) {
|
|
9488
9796
|
return [unit];
|
|
9489
9797
|
}
|
|
9490
|
-
|
|
9798
|
+
const expandOversizedParagraph = (paragraph) => {
|
|
9799
|
+
if (paragraph.length <= options.maxChunkLength) {
|
|
9800
|
+
return [paragraph];
|
|
9801
|
+
}
|
|
9802
|
+
const sentenceChunks = chunkFromUnits(sentenceUnits(paragraph), options.maxChunkLength, 0, options.minChunkLength);
|
|
9803
|
+
if (sentenceChunks.length > 1) {
|
|
9804
|
+
return sentenceChunks;
|
|
9805
|
+
}
|
|
9806
|
+
return chunkFromUnits(fixedUnits(paragraph, options.maxChunkLength), options.maxChunkLength, 0, options.minChunkLength);
|
|
9807
|
+
};
|
|
9808
|
+
const stableParagraphs = (unit.preferredChunkUnits ?? paragraphUnits(unit.text)).flatMap(expandOversizedParagraph);
|
|
9809
|
+
const stableChunks = [];
|
|
9810
|
+
let current = "";
|
|
9811
|
+
const pushCurrent = () => {
|
|
9812
|
+
if (!current) {
|
|
9813
|
+
return;
|
|
9814
|
+
}
|
|
9815
|
+
stableChunks.push(normalizeWhitespace(current));
|
|
9816
|
+
current = "";
|
|
9817
|
+
};
|
|
9818
|
+
for (const paragraph of stableParagraphs) {
|
|
9819
|
+
const trimmed = paragraph.trim();
|
|
9820
|
+
if (!trimmed) {
|
|
9821
|
+
continue;
|
|
9822
|
+
}
|
|
9823
|
+
if (!current) {
|
|
9824
|
+
current = trimmed;
|
|
9825
|
+
continue;
|
|
9826
|
+
}
|
|
9827
|
+
const candidate = `${current}
|
|
9828
|
+
|
|
9829
|
+
${trimmed}`;
|
|
9830
|
+
if (candidate.length <= options.maxChunkLength) {
|
|
9831
|
+
current = candidate;
|
|
9832
|
+
continue;
|
|
9833
|
+
}
|
|
9834
|
+
pushCurrent();
|
|
9835
|
+
current = trimmed;
|
|
9836
|
+
}
|
|
9837
|
+
pushCurrent();
|
|
9838
|
+
const merged = [];
|
|
9839
|
+
for (const chunk of stableChunks) {
|
|
9840
|
+
const last = merged.at(-1);
|
|
9841
|
+
if (last && chunk.length < options.minChunkLength && `${last}
|
|
9842
|
+
|
|
9843
|
+
${chunk}`.length <= options.maxChunkLength) {
|
|
9844
|
+
merged[merged.length - 1] = normalizeWhitespace(`${last}
|
|
9845
|
+
|
|
9846
|
+
${chunk}`);
|
|
9847
|
+
continue;
|
|
9848
|
+
}
|
|
9849
|
+
merged.push(chunk);
|
|
9850
|
+
}
|
|
9851
|
+
const decorateSourceAwareChunkText = (text) => {
|
|
9852
|
+
if (!unit.preferredChunkUnits || !unit.sectionTitle) {
|
|
9853
|
+
return text;
|
|
9854
|
+
}
|
|
9855
|
+
if (unit.sectionKind === "spreadsheet_rows") {
|
|
9856
|
+
if (text.includes(`Sheet ${unit.sectionTitle}`)) {
|
|
9857
|
+
return text;
|
|
9858
|
+
}
|
|
9859
|
+
return normalizeWhitespace(`Sheet ${unit.sectionTitle}
|
|
9860
|
+
${text}`);
|
|
9861
|
+
}
|
|
9862
|
+
if (unit.sectionKind === "presentation_slide") {
|
|
9863
|
+
if (text.includes(unit.sectionTitle)) {
|
|
9864
|
+
return text;
|
|
9865
|
+
}
|
|
9866
|
+
return normalizeWhitespace(`${unit.sectionTitle}
|
|
9867
|
+
${text}`);
|
|
9868
|
+
}
|
|
9869
|
+
return text;
|
|
9870
|
+
};
|
|
9871
|
+
return merged.map((text) => ({
|
|
9491
9872
|
...unit,
|
|
9492
|
-
text
|
|
9873
|
+
text: decorateSourceAwareChunkText(text)
|
|
9493
9874
|
}));
|
|
9494
9875
|
};
|
|
9495
9876
|
var resolveChunkingUnits = (text, options) => {
|
|
@@ -10252,6 +10633,17 @@ var renderChunkStructure = (structure) => {
|
|
|
10252
10633
|
].filter((row) => row.length > 0);
|
|
10253
10634
|
return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
|
|
10254
10635
|
};
|
|
10636
|
+
var renderChunkExcerpts = (input) => {
|
|
10637
|
+
if (!input) {
|
|
10638
|
+
return "";
|
|
10639
|
+
}
|
|
10640
|
+
const rows = [
|
|
10641
|
+
input.chunkExcerpt ? `<li><strong>Chunk excerpt</strong> ${escapeHtml2(input.chunkExcerpt)}</li>` : "",
|
|
10642
|
+
input.windowExcerpt ? `<li><strong>Neighbor window</strong> ${escapeHtml2(input.windowExcerpt)}</li>` : "",
|
|
10643
|
+
input.sectionExcerpt ? `<li><strong>Section excerpt</strong> ${escapeHtml2(input.sectionExcerpt)}</li>` : ""
|
|
10644
|
+
].filter((row) => row.length > 0);
|
|
10645
|
+
return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
|
|
10646
|
+
};
|
|
10255
10647
|
var renderSectionJumpList = (label, items) => {
|
|
10256
10648
|
const rows = items.map((item) => item.href ? `<li><strong>${escapeHtml2(label)}</strong> <a href="${escapeHtml2(item.href)}"${item.active ? ' aria-current="true"' : ""}>${escapeHtml2(item.label)}</a></li>` : `<li><strong>${escapeHtml2(label)}</strong> ${escapeHtml2(item.label)}</li>`).join("");
|
|
10257
10649
|
return rows ? `<ul class="rag-chunk-structure">${rows}</ul>` : "";
|
|
@@ -10351,7 +10743,7 @@ var defaultChunkPreview = (input) => {
|
|
|
10351
10743
|
return acc;
|
|
10352
10744
|
}, []);
|
|
10353
10745
|
const groupHtml = groups.map((group) => {
|
|
10354
|
-
const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
|
|
10746
|
+
const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + renderChunkExcerpts(chunk.excerpts) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
|
|
10355
10747
|
return `<section class="rag-chunk-group"><h4>${escapeHtml2(group.title)}</h4>${chunkHtml}</section>`;
|
|
10356
10748
|
}).join("");
|
|
10357
10749
|
return `<section class="rag-chunk-preview">` + `<h3>${escapeHtml2(input.document.title)}</h3>` + `<p class="rag-chunk-preview-source">${escapeHtml2(input.document.source)}</p>` + renderSourceLabels(input.document.labels) + (navigation.parentSection ? renderSectionJumpList("Parent section", [
|
|
@@ -15925,6 +16317,15 @@ var ragChat = (config) => {
|
|
|
15925
16317
|
ok: false
|
|
15926
16318
|
};
|
|
15927
16319
|
}
|
|
16320
|
+
const chunks = preview.chunks.map((chunk) => ({
|
|
16321
|
+
...chunk,
|
|
16322
|
+
labels: buildRAGSourceLabels({
|
|
16323
|
+
metadata: chunk.metadata,
|
|
16324
|
+
source: chunk.source ?? preview.document.source,
|
|
16325
|
+
title: chunk.title ?? preview.document.title
|
|
16326
|
+
}),
|
|
16327
|
+
structure: buildRAGChunkStructure(chunk.metadata)
|
|
16328
|
+
}));
|
|
15928
16329
|
return {
|
|
15929
16330
|
ok: true,
|
|
15930
16331
|
...preview,
|
|
@@ -15936,14 +16337,9 @@ var ragChat = (config) => {
|
|
|
15936
16337
|
title: preview.document.title
|
|
15937
16338
|
})
|
|
15938
16339
|
},
|
|
15939
|
-
chunks:
|
|
16340
|
+
chunks: chunks.map((chunk) => ({
|
|
15940
16341
|
...chunk,
|
|
15941
|
-
|
|
15942
|
-
metadata: chunk.metadata,
|
|
15943
|
-
source: chunk.source ?? preview.document.source,
|
|
15944
|
-
title: chunk.title ?? preview.document.title
|
|
15945
|
-
}),
|
|
15946
|
-
structure: buildRAGChunkStructure(chunk.metadata)
|
|
16342
|
+
excerpts: buildRAGChunkExcerpts(chunks, chunk.chunkId)
|
|
15947
16343
|
}))
|
|
15948
16344
|
};
|
|
15949
16345
|
};
|
|
@@ -21540,5 +21936,5 @@ export {
|
|
|
21540
21936
|
aiChat
|
|
21541
21937
|
};
|
|
21542
21938
|
|
|
21543
|
-
//# debugId=
|
|
21939
|
+
//# debugId=7A0BBAE3433EF77D64756E2164756E21
|
|
21544
21940
|
//# sourceMappingURL=index.js.map
|