@absolutejs/absolute 0.19.0-beta.606 → 0.19.0-beta.608

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -347,6 +347,56 @@ var buildExcerpt = (text, maxLength = 160) => {
347
347
  }
348
348
  return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
349
349
  };
350
+ var selectPreferredExcerpt = (excerpts, sectionChunkCount) => {
351
+ if (!excerpts) {
352
+ return "";
353
+ }
354
+ const chunkExcerpt = excerpts.chunkExcerpt?.trim() ?? "";
355
+ const windowExcerpt = excerpts.windowExcerpt?.trim() ?? "";
356
+ const sectionExcerpt = excerpts.sectionExcerpt?.trim() ?? "";
357
+ if (sectionChunkCount && sectionChunkCount > 1 && chunkExcerpt.length > 0 && chunkExcerpt.length < 72) {
358
+ if (sectionChunkCount <= 3 && sectionExcerpt) {
359
+ return sectionExcerpt;
360
+ }
361
+ if (windowExcerpt) {
362
+ return windowExcerpt;
363
+ }
364
+ }
365
+ return chunkExcerpt || windowExcerpt || sectionExcerpt;
366
+ };
367
+ var buildGroundingChunkExcerpts = (sources, activeChunkId) => {
368
+ if (sources.length === 0) {
369
+ return;
370
+ }
371
+ const activeSource = (activeChunkId ? sources.find((source) => source.chunkId === activeChunkId) : undefined) ?? sources[0];
372
+ if (!activeSource) {
373
+ return;
374
+ }
375
+ const chunkMap = new Map(sources.map((source) => [source.chunkId, source]));
376
+ const activeMetadata = activeSource.metadata ?? {};
377
+ const previousChunkId = getContextString(activeMetadata.previousChunkId);
378
+ const nextChunkId = getContextString(activeMetadata.nextChunkId);
379
+ const sectionChunkId = getContextString(activeMetadata.sectionChunkId);
380
+ const sectionSources = sectionChunkId ? sources.filter((source) => getContextString(source.metadata?.sectionChunkId) === sectionChunkId).sort((left, right) => {
381
+ const leftIndex = getContextNumber(left.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
382
+ const rightIndex = getContextNumber(right.metadata?.sectionChunkIndex) ?? Number.MAX_SAFE_INTEGER;
383
+ if (leftIndex !== rightIndex) {
384
+ return leftIndex - rightIndex;
385
+ }
386
+ return left.chunkId.localeCompare(right.chunkId);
387
+ }) : [activeSource];
388
+ const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
389
+
390
+ `);
391
+ const orderedWindowIds = [previousChunkId, activeSource.chunkId, nextChunkId].filter((chunkId, index, values) => Boolean(chunkId) && values.indexOf(chunkId) === index);
392
+ return {
393
+ chunkExcerpt: buildExcerpt(activeSource.text, 160),
394
+ sectionExcerpt: buildExcerpt(sectionSources.map((source) => source.text).join(`
395
+
396
+ `), 320),
397
+ windowExcerpt: buildExcerpt(collectText(orderedWindowIds), 240)
398
+ };
399
+ };
350
400
  var buildGroundingReferenceEvidenceLabel = (reference) => [reference.label, reference.locatorLabel, reference.contextLabel].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
351
401
  var buildGroundingReferenceEvidenceSummary = (reference) => [
352
402
  reference.source ?? reference.title ?? reference.chunkId,
@@ -354,11 +404,19 @@ var buildGroundingReferenceEvidenceSummary = (reference) => [
354
404
  reference.contextLabel,
355
405
  reference.provenanceLabel
356
406
  ].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
407
+ var buildGroundingSectionKey = (reference) => reference.contextLabel ?? reference.locatorLabel ?? reference.label ?? reference.source ?? reference.chunkId;
408
+ var buildGroundingSectionSummaryLine = (reference) => [
409
+ reference.source ?? reference.title ?? reference.chunkId,
410
+ reference.locatorLabel,
411
+ reference.contextLabel,
412
+ reference.provenanceLabel
413
+ ].filter((value) => Boolean(value && value.length > 0)).filter((value, index, values) => values.findIndex((entry) => entry === value) === index).join(" \xB7 ");
357
414
  var buildGroundedAnswerCitationDetail = (reference) => ({
358
415
  contextLabel: reference.contextLabel,
359
416
  evidenceLabel: buildGroundingReferenceEvidenceLabel(reference),
360
417
  evidenceSummary: buildGroundingReferenceEvidenceSummary(reference),
361
- excerpt: reference.excerpt,
418
+ excerpt: selectPreferredExcerpt(reference.excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || reference.excerpt,
419
+ excerpts: reference.excerpts,
362
420
  label: reference.label,
363
421
  locatorLabel: reference.locatorLabel,
364
422
  number: reference.number,
@@ -444,26 +502,97 @@ var buildRAGGroundedAnswer = (content, sources) => {
444
502
  hasCitations,
445
503
  parts,
446
504
  references,
505
+ sectionSummaries: buildRAGGroundedAnswerSectionSummaries(references),
447
506
  ungroundedReferenceNumbers: [...ungroundedReferenceNumbers].sort((left, right) => left - right)
448
507
  };
449
508
  };
509
+ var buildRAGGroundedAnswerSectionSummaries = (references) => {
510
+ const groups = new Map;
511
+ for (const reference of references) {
512
+ const key = buildGroundingSectionKey(reference);
513
+ const existing = groups.get(key);
514
+ if (!existing) {
515
+ const excerpts = reference.excerpts ? {
516
+ chunkExcerpt: reference.excerpts.chunkExcerpt,
517
+ sectionExcerpt: reference.excerpts.sectionExcerpt,
518
+ windowExcerpt: reference.excerpts.windowExcerpt
519
+ } : undefined;
520
+ groups.set(key, {
521
+ chunkIds: [reference.chunkId],
522
+ contextLabel: reference.contextLabel,
523
+ count: 1,
524
+ excerpt: selectPreferredExcerpt(excerpts, getContextNumber(reference.metadata?.sectionChunkCount)) || excerpts?.sectionExcerpt || reference.excerpt,
525
+ excerpts,
526
+ key,
527
+ label: key,
528
+ locatorLabel: reference.locatorLabel,
529
+ provenanceLabel: reference.provenanceLabel,
530
+ referenceNumbers: [reference.number],
531
+ references: [reference],
532
+ summary: buildGroundingSectionSummaryLine(reference) || reference.label || reference.chunkId
533
+ });
534
+ continue;
535
+ }
536
+ existing.count += 1;
537
+ if (!existing.chunkIds.includes(reference.chunkId)) {
538
+ existing.chunkIds.push(reference.chunkId);
539
+ }
540
+ if (!existing.referenceNumbers.includes(reference.number)) {
541
+ existing.referenceNumbers.push(reference.number);
542
+ }
543
+ existing.references.push(reference);
544
+ if (!existing.contextLabel && reference.contextLabel) {
545
+ existing.contextLabel = reference.contextLabel;
546
+ }
547
+ if (!existing.locatorLabel && reference.locatorLabel) {
548
+ existing.locatorLabel = reference.locatorLabel;
549
+ }
550
+ if (!existing.provenanceLabel && reference.provenanceLabel) {
551
+ existing.provenanceLabel = reference.provenanceLabel;
552
+ }
553
+ if (!existing.excerpts && reference.excerpts) {
554
+ existing.excerpts = {
555
+ chunkExcerpt: reference.excerpts.chunkExcerpt,
556
+ sectionExcerpt: reference.excerpts.sectionExcerpt,
557
+ windowExcerpt: reference.excerpts.windowExcerpt
558
+ };
559
+ existing.excerpt = reference.excerpts.sectionExcerpt;
560
+ }
561
+ }
562
+ return [...groups.values()].map((group) => ({
563
+ ...group,
564
+ referenceNumbers: [...group.referenceNumbers].sort((left, right) => left - right),
565
+ references: group.references.slice().sort((left, right) => left.number - right.number)
566
+ })).sort((left, right) => {
567
+ const leftFirst = left.referenceNumbers[0] ?? Number.POSITIVE_INFINITY;
568
+ const rightFirst = right.referenceNumbers[0] ?? Number.POSITIVE_INFINITY;
569
+ if (leftFirst !== rightFirst) {
570
+ return leftFirst - rightFirst;
571
+ }
572
+ return left.label.localeCompare(right.label);
573
+ });
574
+ };
450
575
  var buildRAGGroundingReferences = (sources) => {
451
576
  const citations = buildRAGCitations(sources);
452
577
  const citationReferenceMap = buildRAGCitationReferenceMap(citations);
453
- return citations.map((citation) => ({
454
- chunkId: citation.chunkId,
455
- contextLabel: citation.contextLabel ?? buildContextLabel(citation.metadata),
456
- excerpt: buildExcerpt(citation.text),
457
- label: citation.label,
458
- locatorLabel: citation.locatorLabel ?? buildLocatorLabel(citation.metadata, citation.source, citation.title),
459
- metadata: citation.metadata,
460
- number: citationReferenceMap[citation.chunkId] ?? 0,
461
- provenanceLabel: citation.provenanceLabel ?? buildProvenanceLabel(citation.metadata),
462
- score: citation.score,
463
- source: citation.source,
464
- text: citation.text,
465
- title: citation.title
466
- }));
578
+ return citations.map((citation) => {
579
+ const excerpts = buildGroundingChunkExcerpts(sources, citation.chunkId);
580
+ return {
581
+ chunkId: citation.chunkId,
582
+ contextLabel: citation.contextLabel ?? buildContextLabel(citation.metadata),
583
+ excerpt: selectPreferredExcerpt(excerpts, getContextNumber(citation.metadata?.sectionChunkCount)) || excerpts?.chunkExcerpt || buildExcerpt(citation.text),
584
+ excerpts,
585
+ label: citation.label,
586
+ locatorLabel: citation.locatorLabel ?? buildLocatorLabel(citation.metadata, citation.source, citation.title),
587
+ metadata: citation.metadata,
588
+ number: citationReferenceMap[citation.chunkId] ?? 0,
589
+ provenanceLabel: citation.provenanceLabel ?? buildProvenanceLabel(citation.metadata),
590
+ score: citation.score,
591
+ source: citation.source,
592
+ text: citation.text,
593
+ title: citation.title
594
+ };
595
+ });
467
596
  };
468
597
 
469
598
  // src/ai/rag/quality.ts
@@ -4123,7 +4252,7 @@ var buildRAGChunkStructure = (metadata) => {
4123
4252
  return;
4124
4253
  }
4125
4254
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
4126
- const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" ? metadata.sectionKind : undefined;
4255
+ const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
4127
4256
  const section = {
4128
4257
  depth: getContextNumber2(metadata.sectionDepth),
4129
4258
  kind: sectionKind,
@@ -4152,6 +4281,52 @@ var buildExcerpt2 = (text, maxLength = 160) => {
4152
4281
  }
4153
4282
  return `${normalized.slice(0, Math.max(0, maxLength - 1)).trimEnd()}\u2026`;
4154
4283
  };
4284
+ var buildRAGChunkExcerpts = (chunks, activeChunkId) => {
4285
+ if (chunks.length === 0) {
4286
+ return;
4287
+ }
4288
+ const graph = buildRAGChunkGraph(chunks.map((chunk) => ({
4289
+ chunkId: chunk.chunkId,
4290
+ metadata: chunk.metadata,
4291
+ structure: chunk.structure
4292
+ })));
4293
+ const navigation = buildRAGChunkGraphNavigation(graph, activeChunkId);
4294
+ const activeChunk = chunks.find((chunk) => chunk.chunkId === navigation.activeChunkId) ?? chunks[0];
4295
+ if (!activeChunk) {
4296
+ return;
4297
+ }
4298
+ const chunkMap = new Map(chunks.map((chunk) => [chunk.chunkId, chunk]));
4299
+ const orderedWindowIds = [
4300
+ navigation.previousNode?.chunkId,
4301
+ activeChunk.chunkId,
4302
+ navigation.nextNode?.chunkId
4303
+ ].filter((chunkId, index, ids) => Boolean(chunkId) && ids.indexOf(chunkId) === index);
4304
+ const orderedSectionIds = navigation.sectionNodes.length > 0 ? navigation.sectionNodes.map((node) => node.chunkId) : [activeChunk.chunkId];
4305
+ const collectText = (chunkIds) => chunkIds.map((chunkId) => chunkMap.get(chunkId)?.text).filter((text) => typeof text === "string").join(`
4306
+
4307
+ `);
4308
+ return {
4309
+ chunkExcerpt: buildExcerpt2(activeChunk.text, 160),
4310
+ sectionExcerpt: buildExcerpt2(collectText(orderedSectionIds), 320),
4311
+ windowExcerpt: buildExcerpt2(collectText(orderedWindowIds), 240)
4312
+ };
4313
+ };
4314
+ var buildRAGPreferredExcerpt = (excerpts, structure) => {
4315
+ if (!excerpts) {
4316
+ return "";
4317
+ }
4318
+ const chunkLength = excerpts.chunkExcerpt.trim().length;
4319
+ const sectionChunkCount = structure?.sequence?.sectionChunkCount ?? 1;
4320
+ if (sectionChunkCount > 1 && chunkLength > 0 && chunkLength < 72) {
4321
+ if (sectionChunkCount <= 3 && excerpts.sectionExcerpt.trim().length > 0) {
4322
+ return excerpts.sectionExcerpt;
4323
+ }
4324
+ if (excerpts.windowExcerpt.trim().length > 0) {
4325
+ return excerpts.windowExcerpt;
4326
+ }
4327
+ }
4328
+ return excerpts.chunkExcerpt;
4329
+ };
4155
4330
  var buildRAGChunkGraph = (chunks) => {
4156
4331
  const nodes = [];
4157
4332
  const edges = [];
@@ -4363,6 +4538,7 @@ var buildRAGSourceSummaries = (sources) => {
4363
4538
  return sourceGroups.map((group) => {
4364
4539
  const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
4365
4540
  const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
4541
+ const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
4366
4542
  return {
4367
4543
  bestScore: group.bestScore,
4368
4544
  citationNumbers: groupCitations.map((citation) => citationReferenceMap[citation.chunkId] ?? 0),
@@ -4370,7 +4546,8 @@ var buildRAGSourceSummaries = (sources) => {
4370
4546
  chunkIds: group.chunks.map((chunk) => chunk.chunkId),
4371
4547
  contextLabel: leadChunk?.labels?.contextLabel ?? buildContextLabel2(leadChunk?.metadata),
4372
4548
  count: group.count,
4373
- excerpt: buildExcerpt2(leadChunk?.text ?? ""),
4549
+ excerpt: buildRAGPreferredExcerpt(excerpts, leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata)) || buildExcerpt2(leadChunk?.text ?? ""),
4550
+ excerpts,
4374
4551
  key: group.key,
4375
4552
  label: group.label,
4376
4553
  locatorLabel: leadChunk?.labels?.locatorLabel ?? buildLocatorLabel2(leadChunk?.metadata, leadChunk?.source, leadChunk?.title),
@@ -8245,6 +8422,128 @@ var htmlStructureUnits = (value) => {
8245
8422
  }
8246
8423
  return [{ text: normalizeWhitespace(stripHtmlTags(focused)) }].filter((section) => Boolean(section.text));
8247
8424
  };
8425
+ var isLikelyOfficeHeadingParagraph = (value) => {
8426
+ const normalized = normalizeWhitespace(value);
8427
+ if (!normalized || normalized.length > 80) {
8428
+ return false;
8429
+ }
8430
+ if (/[.!?]$/.test(normalized)) {
8431
+ return false;
8432
+ }
8433
+ const words = normalized.split(/\s+/);
8434
+ if (words.length > 8) {
8435
+ return false;
8436
+ }
8437
+ const headingLikeWords = words.filter((word) => {
8438
+ const stripped = word.replace(/^[^A-Za-z0-9]+|[^A-Za-z0-9]+$/g, "");
8439
+ if (!stripped) {
8440
+ return false;
8441
+ }
8442
+ return /^[A-Z0-9]/.test(stripped);
8443
+ }).length;
8444
+ return headingLikeWords / words.length >= 0.6;
8445
+ };
8446
+ var officeHeadingStructureUnits = (value) => {
8447
+ const paragraphs = paragraphUnits(value);
8448
+ const sections = [];
8449
+ let currentHeading;
8450
+ let currentParagraphs = [];
8451
+ const flush = () => {
8452
+ if (!currentHeading && currentParagraphs.length === 0) {
8453
+ return;
8454
+ }
8455
+ const text = normalizeWhitespace([currentHeading, ...currentParagraphs].filter(Boolean).join(`
8456
+
8457
+ `));
8458
+ if (!text) {
8459
+ currentHeading = undefined;
8460
+ currentParagraphs = [];
8461
+ return;
8462
+ }
8463
+ sections.push({
8464
+ sectionDepth: currentHeading ? 1 : undefined,
8465
+ sectionKind: currentHeading ? "office_heading" : undefined,
8466
+ sectionPath: currentHeading ? [currentHeading] : undefined,
8467
+ sectionTitle: currentHeading,
8468
+ text
8469
+ });
8470
+ currentHeading = undefined;
8471
+ currentParagraphs = [];
8472
+ };
8473
+ for (const paragraph of paragraphs) {
8474
+ if (isLikelyOfficeHeadingParagraph(paragraph)) {
8475
+ flush();
8476
+ currentHeading = paragraph;
8477
+ continue;
8478
+ }
8479
+ currentParagraphs.push(paragraph);
8480
+ }
8481
+ flush();
8482
+ return sections.length > 0 ? sections : [{ text: normalizeWhitespace(value) }];
8483
+ };
8484
+ var spreadsheetStructureUnits = (value, metadata) => {
8485
+ const lines = value.replace(/\r\n?/g, `
8486
+ `).split(`
8487
+ `).map((line) => normalizeWhitespace(line)).filter(Boolean);
8488
+ if (lines.length === 0) {
8489
+ return [];
8490
+ }
8491
+ const sheetName = typeof metadata?.sheetName === "string" && metadata.sheetName || lines[0].replace(/^Sheet\s+/i, "");
8492
+ const rowLines = lines.filter((line) => /^Row \d+\./.test(line));
8493
+ if (rowLines.length === 0) {
8494
+ return [
8495
+ {
8496
+ sectionDepth: 1,
8497
+ sectionKind: "spreadsheet_rows",
8498
+ sectionPath: [sheetName],
8499
+ sectionTitle: sheetName,
8500
+ text: normalizeWhitespace(lines.join(`
8501
+ `))
8502
+ }
8503
+ ];
8504
+ }
8505
+ const groups = [];
8506
+ let current = [];
8507
+ for (const row of rowLines) {
8508
+ const candidate = [...current, row].join(`
8509
+ `);
8510
+ if (current.length > 0 && candidate.length > DEFAULT_MAX_CHUNK_LENGTH) {
8511
+ groups.push(current);
8512
+ current = [row];
8513
+ continue;
8514
+ }
8515
+ current.push(row);
8516
+ }
8517
+ if (current.length > 0) {
8518
+ groups.push(current);
8519
+ }
8520
+ return groups.map((rows) => ({
8521
+ preferredChunkUnits: rows,
8522
+ sectionDepth: 1,
8523
+ sectionKind: "spreadsheet_rows",
8524
+ sectionPath: [sheetName],
8525
+ sectionTitle: sheetName,
8526
+ text: normalizeWhitespace([`Sheet ${sheetName}`, ...rows].join(`
8527
+ `))
8528
+ }));
8529
+ };
8530
+ var presentationStructureUnits = (value, metadata) => {
8531
+ const slideNumber = typeof metadata?.slideNumber === "number" ? metadata.slideNumber : typeof metadata?.slideIndex === "number" ? metadata.slideIndex + 1 : undefined;
8532
+ const slideLabel = slideNumber ? `Slide ${slideNumber}` : "Slide";
8533
+ const paragraphs = paragraphUnits(value);
8534
+ return [
8535
+ {
8536
+ preferredChunkUnits: paragraphs,
8537
+ sectionDepth: 1,
8538
+ sectionKind: "presentation_slide",
8539
+ sectionPath: [slideLabel],
8540
+ sectionTitle: slideLabel,
8541
+ text: normalizeWhitespace([slideLabel, ...paragraphs].join(`
8542
+
8543
+ `))
8544
+ }
8545
+ ];
8546
+ };
8248
8547
  var inferFormat = (document) => {
8249
8548
  if (document.format) {
8250
8549
  return document.format;
@@ -9420,6 +9719,15 @@ var sourceAwareUnits = (document, format, normalizedText) => {
9420
9719
  }
9421
9720
  case "text":
9422
9721
  default:
9722
+ if (document.metadata?.sourceNativeKind === "spreadsheet_sheet") {
9723
+ return spreadsheetStructureUnits(normalizedText, document.metadata);
9724
+ }
9725
+ if (document.metadata?.sourceNativeKind === "presentation_slide") {
9726
+ return presentationStructureUnits(normalizedText, document.metadata);
9727
+ }
9728
+ if (document.source?.toLowerCase().endsWith(".docx") || document.source?.toLowerCase().endsWith(".odt")) {
9729
+ return officeHeadingStructureUnits(normalizedText);
9730
+ }
9423
9731
  return paragraphUnits(normalizedText).map((text) => ({ text }));
9424
9732
  }
9425
9733
  };
@@ -9487,9 +9795,82 @@ var chunkSourceAwareUnit = (unit, options) => {
9487
9795
  if (unit.text.length <= options.maxChunkLength) {
9488
9796
  return [unit];
9489
9797
  }
9490
- return chunkFromUnits(paragraphUnits(unit.text), options.maxChunkLength, options.chunkOverlap, options.minChunkLength).map((text) => ({
9798
+ const expandOversizedParagraph = (paragraph) => {
9799
+ if (paragraph.length <= options.maxChunkLength) {
9800
+ return [paragraph];
9801
+ }
9802
+ const sentenceChunks = chunkFromUnits(sentenceUnits(paragraph), options.maxChunkLength, 0, options.minChunkLength);
9803
+ if (sentenceChunks.length > 1) {
9804
+ return sentenceChunks;
9805
+ }
9806
+ return chunkFromUnits(fixedUnits(paragraph, options.maxChunkLength), options.maxChunkLength, 0, options.minChunkLength);
9807
+ };
9808
+ const stableParagraphs = (unit.preferredChunkUnits ?? paragraphUnits(unit.text)).flatMap(expandOversizedParagraph);
9809
+ const stableChunks = [];
9810
+ let current = "";
9811
+ const pushCurrent = () => {
9812
+ if (!current) {
9813
+ return;
9814
+ }
9815
+ stableChunks.push(normalizeWhitespace(current));
9816
+ current = "";
9817
+ };
9818
+ for (const paragraph of stableParagraphs) {
9819
+ const trimmed = paragraph.trim();
9820
+ if (!trimmed) {
9821
+ continue;
9822
+ }
9823
+ if (!current) {
9824
+ current = trimmed;
9825
+ continue;
9826
+ }
9827
+ const candidate = `${current}
9828
+
9829
+ ${trimmed}`;
9830
+ if (candidate.length <= options.maxChunkLength) {
9831
+ current = candidate;
9832
+ continue;
9833
+ }
9834
+ pushCurrent();
9835
+ current = trimmed;
9836
+ }
9837
+ pushCurrent();
9838
+ const merged = [];
9839
+ for (const chunk of stableChunks) {
9840
+ const last = merged.at(-1);
9841
+ if (last && chunk.length < options.minChunkLength && `${last}
9842
+
9843
+ ${chunk}`.length <= options.maxChunkLength) {
9844
+ merged[merged.length - 1] = normalizeWhitespace(`${last}
9845
+
9846
+ ${chunk}`);
9847
+ continue;
9848
+ }
9849
+ merged.push(chunk);
9850
+ }
9851
+ const decorateSourceAwareChunkText = (text) => {
9852
+ if (!unit.preferredChunkUnits || !unit.sectionTitle) {
9853
+ return text;
9854
+ }
9855
+ if (unit.sectionKind === "spreadsheet_rows") {
9856
+ if (text.includes(`Sheet ${unit.sectionTitle}`)) {
9857
+ return text;
9858
+ }
9859
+ return normalizeWhitespace(`Sheet ${unit.sectionTitle}
9860
+ ${text}`);
9861
+ }
9862
+ if (unit.sectionKind === "presentation_slide") {
9863
+ if (text.includes(unit.sectionTitle)) {
9864
+ return text;
9865
+ }
9866
+ return normalizeWhitespace(`${unit.sectionTitle}
9867
+ ${text}`);
9868
+ }
9869
+ return text;
9870
+ };
9871
+ return merged.map((text) => ({
9491
9872
  ...unit,
9492
- text
9873
+ text: decorateSourceAwareChunkText(text)
9493
9874
  }));
9494
9875
  };
9495
9876
  var resolveChunkingUnits = (text, options) => {
@@ -10252,6 +10633,17 @@ var renderChunkStructure = (structure) => {
10252
10633
  ].filter((row) => row.length > 0);
10253
10634
  return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
10254
10635
  };
10636
+ var renderChunkExcerpts = (input) => {
10637
+ if (!input) {
10638
+ return "";
10639
+ }
10640
+ const rows = [
10641
+ input.chunkExcerpt ? `<li><strong>Chunk excerpt</strong> ${escapeHtml2(input.chunkExcerpt)}</li>` : "",
10642
+ input.windowExcerpt ? `<li><strong>Neighbor window</strong> ${escapeHtml2(input.windowExcerpt)}</li>` : "",
10643
+ input.sectionExcerpt ? `<li><strong>Section excerpt</strong> ${escapeHtml2(input.sectionExcerpt)}</li>` : ""
10644
+ ].filter((row) => row.length > 0);
10645
+ return rows.length > 0 ? `<ul class="rag-chunk-structure">${rows.join("")}</ul>` : "";
10646
+ };
10255
10647
  var renderSectionJumpList = (label, items) => {
10256
10648
  const rows = items.map((item) => item.href ? `<li><strong>${escapeHtml2(label)}</strong> <a href="${escapeHtml2(item.href)}"${item.active ? ' aria-current="true"' : ""}>${escapeHtml2(item.label)}</a></li>` : `<li><strong>${escapeHtml2(label)}</strong> ${escapeHtml2(item.label)}</li>`).join("");
10257
10649
  return rows ? `<ul class="rag-chunk-structure">${rows}</ul>` : "";
@@ -10351,7 +10743,7 @@ var defaultChunkPreview = (input) => {
10351
10743
  return acc;
10352
10744
  }, []);
10353
10745
  const groupHtml = groups.map((group) => {
10354
- const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
10746
+ const chunkHtml = group.chunks.map((chunk) => '<article class="rag-chunk">' + `<h5>${escapeHtml2(chunk.chunkId)}</h5>` + `<p class="rag-chunk-meta">chunk ${typeof chunk.metadata?.chunkIndex === "number" ? chunk.metadata.chunkIndex : 0} of ${typeof chunk.metadata?.chunkCount === "number" ? chunk.metadata.chunkCount : input.chunks.length}</p>` + renderSourceLabels(chunk.labels) + renderChunkStructure(chunk.structure) + renderChunkExcerpts(chunk.excerpts) + `<pre>${escapeHtml2(chunk.text)}</pre>` + "</article>").join("");
10355
10747
  return `<section class="rag-chunk-group"><h4>${escapeHtml2(group.title)}</h4>${chunkHtml}</section>`;
10356
10748
  }).join("");
10357
10749
  return `<section class="rag-chunk-preview">` + `<h3>${escapeHtml2(input.document.title)}</h3>` + `<p class="rag-chunk-preview-source">${escapeHtml2(input.document.source)}</p>` + renderSourceLabels(input.document.labels) + (navigation.parentSection ? renderSectionJumpList("Parent section", [
@@ -15925,6 +16317,15 @@ var ragChat = (config) => {
15925
16317
  ok: false
15926
16318
  };
15927
16319
  }
16320
+ const chunks = preview.chunks.map((chunk) => ({
16321
+ ...chunk,
16322
+ labels: buildRAGSourceLabels({
16323
+ metadata: chunk.metadata,
16324
+ source: chunk.source ?? preview.document.source,
16325
+ title: chunk.title ?? preview.document.title
16326
+ }),
16327
+ structure: buildRAGChunkStructure(chunk.metadata)
16328
+ }));
15928
16329
  return {
15929
16330
  ok: true,
15930
16331
  ...preview,
@@ -15936,14 +16337,9 @@ var ragChat = (config) => {
15936
16337
  title: preview.document.title
15937
16338
  })
15938
16339
  },
15939
- chunks: preview.chunks.map((chunk) => ({
16340
+ chunks: chunks.map((chunk) => ({
15940
16341
  ...chunk,
15941
- labels: buildRAGSourceLabels({
15942
- metadata: chunk.metadata,
15943
- source: chunk.source ?? preview.document.source,
15944
- title: chunk.title ?? preview.document.title
15945
- }),
15946
- structure: buildRAGChunkStructure(chunk.metadata)
16342
+ excerpts: buildRAGChunkExcerpts(chunks, chunk.chunkId)
15947
16343
  }))
15948
16344
  };
15949
16345
  };
@@ -21540,5 +21936,5 @@ export {
21540
21936
  aiChat
21541
21937
  };
21542
21938
 
21543
- //# debugId=A318A9B225410C8664756E2164756E21
21939
+ //# debugId=7A0BBAE3433EF77D64756E2164756E21
21544
21940
  //# sourceMappingURL=index.js.map