@absolutejs/absolute 0.19.0-beta.618 → 0.19.0-beta.619

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/ai/index.js CHANGED
@@ -4237,6 +4237,25 @@ var buildContextLabel2 = (metadata) => {
4237
4237
  if (!metadata) {
4238
4238
  return;
4239
4239
  }
4240
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
4241
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
4242
+ const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
4243
+ const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
4244
+ if (pdfTextKind === "table_like" && sectionTitle) {
4245
+ return `PDF table block ${sectionTitle}`;
4246
+ }
4247
+ if (pdfTextKind === "paragraph" && sectionTitle) {
4248
+ return `PDF text block ${sectionTitle}`;
4249
+ }
4250
+ if (officeBlockKind === "table" && sectionTitle) {
4251
+ return `Office table block ${sectionTitle}`;
4252
+ }
4253
+ if (officeBlockKind === "list" && sectionTitle) {
4254
+ return `Office list block ${sectionTitle}`;
4255
+ }
4256
+ if (officeBlockKind === "paragraph" && sectionTitle) {
4257
+ return `Office paragraph block ${sectionTitle}`;
4258
+ }
4240
4259
  const emailKind = getContextString2(metadata.emailKind);
4241
4260
  if (emailKind === "attachment") {
4242
4261
  return "Attachment evidence";
@@ -4273,8 +4292,6 @@ var buildContextLabel2 = (metadata) => {
4273
4292
  if (speaker) {
4274
4293
  return `Speaker ${speaker}`;
4275
4294
  }
4276
- const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
4277
- const sectionTitle = getContextString2(metadata.sectionTitle) ?? sectionPath.at(-1);
4278
4295
  if (sectionTitle) {
4279
4296
  return `Section ${sectionTitle}`;
4280
4297
  }
@@ -4284,11 +4301,21 @@ var buildLocatorLabel2 = (metadata, source, title) => {
4284
4301
  if (!metadata) {
4285
4302
  return;
4286
4303
  }
4304
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
4305
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
4306
+ const pdfBlockNumber = getContextNumber2(metadata.pdfBlockNumber);
4307
+ const officeBlockNumber = getContextNumber2(metadata.officeBlockNumber);
4287
4308
  const page = getContextNumber2(metadata.page) ?? getContextNumber2(metadata.pageNumber) ?? (typeof metadata.pageIndex === "number" ? metadata.pageIndex + 1 : undefined);
4288
4309
  const region = getContextNumber2(metadata.regionNumber) ?? (typeof metadata.regionIndex === "number" ? metadata.regionIndex + 1 : undefined);
4289
4310
  if (page && region) {
4290
4311
  return `Page ${page} \xB7 Region ${region}`;
4291
4312
  }
4313
+ if (page && pdfBlockNumber && pdfTextKind === "table_like") {
4314
+ return `Page ${page} \xB7 Table Block ${pdfBlockNumber}`;
4315
+ }
4316
+ if (page && pdfBlockNumber) {
4317
+ return `Page ${page} \xB7 Text Block ${pdfBlockNumber}`;
4318
+ }
4292
4319
  if (page) {
4293
4320
  return `Page ${page}`;
4294
4321
  }
@@ -4317,6 +4344,15 @@ var buildLocatorLabel2 = (metadata, source, title) => {
4317
4344
  if (mediaStart) {
4318
4345
  return `Timestamp ${mediaStart}`;
4319
4346
  }
4347
+ if (officeBlockNumber && officeBlockKind === "table") {
4348
+ return `Office table block ${officeBlockNumber}`;
4349
+ }
4350
+ if (officeBlockNumber && officeBlockKind === "list") {
4351
+ return `Office list block ${officeBlockNumber}`;
4352
+ }
4353
+ if (officeBlockNumber && officeBlockKind === "paragraph") {
4354
+ return `Office paragraph block ${officeBlockNumber}`;
4355
+ }
4320
4356
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : [];
4321
4357
  if (sectionPath.length > 0) {
4322
4358
  return `Section ${sectionPath.join(" > ")}`;
@@ -4334,12 +4370,16 @@ var buildProvenanceLabel2 = (metadata) => {
4334
4370
  const mediaKind = getContextString2(metadata.mediaKind);
4335
4371
  const transcriptSource = getContextString2(metadata.transcriptSource);
4336
4372
  const pdfTextMode = getContextString2(metadata.pdfTextMode);
4373
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
4374
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
4337
4375
  const ocrEngine = getContextString2(metadata.ocrEngine);
4338
4376
  const extractorRegistryMatch = getContextString2(metadata.extractorRegistryMatch);
4339
4377
  const chunkingProfile = getContextString2(metadata.chunkingProfile);
4340
4378
  const ocrConfidence = getContextNumber2(metadata.ocrRegionConfidence) ?? getContextNumber2(metadata.ocrConfidence);
4341
4379
  const labels = [
4342
4380
  pdfTextMode ? `PDF ${pdfTextMode}` : "",
4381
+ pdfTextKind === "table_like" ? "PDF table block" : pdfTextKind === "paragraph" ? "PDF text block" : "",
4382
+ officeBlockKind ? `Office ${officeBlockKind}` : "",
4343
4383
  ocrEngine ? `OCR ${ocrEngine}` : "",
4344
4384
  extractorRegistryMatch ? `Extractor ${extractorRegistryMatch}` : "",
4345
4385
  chunkingProfile ? `Chunking ${chunkingProfile}` : "",
@@ -4375,7 +4415,7 @@ var buildRAGChunkStructure = (metadata) => {
4375
4415
  return;
4376
4416
  }
4377
4417
  const sectionPath = Array.isArray(metadata.sectionPath) ? metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : undefined;
4378
- const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
4418
+ const sectionKind = metadata.sectionKind === "markdown_heading" || metadata.sectionKind === "html_heading" || metadata.sectionKind === "office_heading" || metadata.sectionKind === "office_block" || metadata.sectionKind === "pdf_block" || metadata.sectionKind === "spreadsheet_rows" || metadata.sectionKind === "presentation_slide" ? metadata.sectionKind : undefined;
4379
4419
  const section = {
4380
4420
  depth: getContextNumber2(metadata.sectionDepth),
4381
4421
  kind: sectionKind,
@@ -4695,7 +4735,7 @@ var buildRAGSourceSummaries = (sources) => {
4695
4735
  const citationReferenceMap = buildRAGCitationReferenceMap(citations);
4696
4736
  return sourceGroups.map((group) => {
4697
4737
  const groupCitations = citations.filter((citation) => group.chunks.some((chunk) => chunk.chunkId === citation.chunkId));
4698
- const leadChunk = group.chunks.slice().sort((left, right) => right.score - left.score)[0];
4738
+ const leadChunk = getPreferredSourceLeadChunk(group.chunks);
4699
4739
  const excerpts = leadChunk ? buildRAGChunkExcerpts(group.chunks, leadChunk.chunkId) : undefined;
4700
4740
  const structure = leadChunk?.structure ?? buildRAGChunkStructure(leadChunk?.metadata);
4701
4741
  const excerptSelection = buildRAGExcerptSelection(excerpts, structure);
@@ -4723,13 +4763,45 @@ var getSectionPathFromSource = (source) => {
4723
4763
  const path = source.structure?.section?.path ?? (Array.isArray(source.metadata?.sectionPath) ? source.metadata.sectionPath.map((value) => getContextString2(value)).filter((value) => typeof value === "string") : []);
4724
4764
  return path.length > 0 ? path : undefined;
4725
4765
  };
4766
+ var isBlockAwareContextLabel = (value) => typeof value === "string" && (value.startsWith("PDF ") || value.startsWith("Office "));
4767
+ var getStructuredSectionScoreWeight = (metadata) => {
4768
+ if (!metadata) {
4769
+ return 1;
4770
+ }
4771
+ const pdfTextKind = getContextString2(metadata.pdfTextKind);
4772
+ const officeBlockKind = getContextString2(metadata.officeBlockKind);
4773
+ const sectionKind = getContextString2(metadata.sectionKind);
4774
+ if (pdfTextKind === "table_like") {
4775
+ return 1.28;
4776
+ }
4777
+ if (officeBlockKind === "table" || officeBlockKind === "list") {
4778
+ return 1.24;
4779
+ }
4780
+ if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
4781
+ return 1.12;
4782
+ }
4783
+ return 1;
4784
+ };
4785
+ var getStructuredSourceLeadScore = (source) => source.score * getStructuredSectionScoreWeight(source.metadata);
4786
+ var getPreferredSourceLeadChunk = (chunks) => chunks.slice().sort((left, right) => {
4787
+ const leftWeightedScore = getStructuredSourceLeadScore(left);
4788
+ const rightWeightedScore = getStructuredSourceLeadScore(right);
4789
+ if (rightWeightedScore !== leftWeightedScore) {
4790
+ return rightWeightedScore - leftWeightedScore;
4791
+ }
4792
+ if (right.score !== left.score) {
4793
+ return right.score - left.score;
4794
+ }
4795
+ return left.chunkId.localeCompare(right.chunkId);
4796
+ })[0];
4726
4797
  var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
4727
- const totalScore = sources.reduce((sum, source) => sum + source.score, 0);
4798
+ const totalScore = sources.reduce((sum, source) => sum + source.score * getStructuredSectionScoreWeight(source.metadata), 0);
4728
4799
  if (sources.length === 0 || totalScore <= 0) {
4729
4800
  return [];
4730
4801
  }
4731
4802
  const sections = new Map;
4732
4803
  for (const source of sources) {
4804
+ const structuredScore = source.score * getStructuredSectionScoreWeight(source.metadata);
4733
4805
  const path = getSectionPathFromSource(source);
4734
4806
  if (!path) {
4735
4807
  continue;
@@ -4761,7 +4833,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
4761
4833
  sourceSet: new Set(source.source ? [source.source] : []),
4762
4834
  topChunkId: source.chunkId,
4763
4835
  topSource: source.source,
4764
- totalScore: source.score,
4836
+ totalScore: structuredScore,
4765
4837
  transformedHits,
4766
4838
  variantHits,
4767
4839
  vectorHits
@@ -4769,7 +4841,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
4769
4841
  continue;
4770
4842
  }
4771
4843
  existing.count += 1;
4772
- existing.totalScore += source.score;
4844
+ existing.totalScore += structuredScore;
4773
4845
  if (source.source) {
4774
4846
  existing.sourceSet.add(source.source);
4775
4847
  }
@@ -4797,6 +4869,8 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
4797
4869
  const parentTotal = siblingPool.reduce((sum, entry) => sum + entry.totalScore, 0);
4798
4870
  const scoreShare = section.totalScore / totalScore;
4799
4871
  const parentShare = parentTotal > 0 ? section.totalScore / parentTotal : undefined;
4872
+ const topChunk = sources.find((source) => source.chunkId === section.topChunkId);
4873
+ const topContextLabel = topChunk?.labels?.contextLabel ?? buildContextLabel2(topChunk?.metadata);
4800
4874
  const parentDistribution = parentTotal > 0 ? siblingPool.map((entry) => ({
4801
4875
  count: entry.count,
4802
4876
  isActive: entry.key === section.key,
@@ -4922,6 +4996,7 @@ var buildRAGSectionRetrievalDiagnostics = (sources, trace) => {
4922
4996
  reasons.push("concentrated_evidence");
4923
4997
  }
4924
4998
  const summaryParts = [
4999
+ isBlockAwareContextLabel(topContextLabel) ? topContextLabel : "",
4925
5000
  `${section.count} hit${section.count === 1 ? "" : "s"}`,
4926
5001
  `${(scoreShare * 100).toFixed(0)}% score share`,
4927
5002
  `vector ${section.vectorHits} \xB7 lexical ${section.lexicalHits} \xB7 hybrid ${section.hybridHits}`,
@@ -5133,22 +5208,21 @@ var updateSourceGroup = (groups, source) => {
5133
5208
  groups.set(key, buildSourceGroup(source, key));
5134
5209
  return;
5135
5210
  }
5136
- if (source.score > existing.bestScore) {
5137
- existing.bestScore = source.score;
5138
- existing.label = buildSourceLabel2(source);
5139
- existing.labels = source.labels ?? buildRAGSourceLabels({
5140
- metadata: source.metadata,
5141
- source: source.source,
5142
- title: source.title
5143
- });
5144
- existing.structure = source.structure ?? buildRAGChunkStructure(source.metadata);
5145
- existing.source = source.source;
5146
- existing.title = source.title;
5147
- } else {
5148
- existing.bestScore = Math.max(existing.bestScore, source.score);
5149
- }
5211
+ existing.bestScore = Math.max(existing.bestScore, source.score);
5150
5212
  existing.count += 1;
5151
5213
  existing.chunks.push(source);
5214
+ const leadChunk = getPreferredSourceLeadChunk(existing.chunks);
5215
+ if (leadChunk) {
5216
+ existing.label = buildSourceLabel2(leadChunk);
5217
+ existing.labels = leadChunk.labels ?? buildRAGSourceLabels({
5218
+ metadata: leadChunk.metadata,
5219
+ source: leadChunk.source,
5220
+ title: leadChunk.title
5221
+ });
5222
+ existing.structure = leadChunk.structure ?? buildRAGChunkStructure(leadChunk.metadata);
5223
+ existing.source = leadChunk.source;
5224
+ existing.title = leadChunk.title;
5225
+ }
5152
5226
  };
5153
5227
  var getLatestAssistantMessage = (messages) => {
5154
5228
  for (let index = messages.length - 1;index >= 0; index -= 1) {
@@ -8485,6 +8559,55 @@ var scoreLoosePhraseMatch2 = (query, text) => {
8485
8559
  }
8486
8560
  return 0;
8487
8561
  };
8562
+ var queryHasAnyToken = (queryTokens, candidates) => candidates.some((candidate) => queryTokens.includes(candidate));
8563
+ var scoreStructuredEvidenceMatch = (queryTokens, result) => {
8564
+ const metadata = result.metadata ?? {};
8565
+ const pdfTextKind = typeof metadata.pdfTextKind === "string" ? metadata.pdfTextKind : undefined;
8566
+ const officeBlockKind = typeof metadata.officeBlockKind === "string" ? metadata.officeBlockKind : undefined;
8567
+ const hasBlockMetadata = typeof metadata.pdfBlockNumber === "number" || typeof metadata.officeBlockNumber === "number";
8568
+ let score = 0;
8569
+ if (hasBlockMetadata) {
8570
+ score += 0.12;
8571
+ }
8572
+ if (pdfTextKind === "table_like" && queryHasAnyToken(queryTokens, [
8573
+ "table",
8574
+ "row",
8575
+ "rows",
8576
+ "column",
8577
+ "columns",
8578
+ "spreadsheet",
8579
+ "sheet",
8580
+ "workbook"
8581
+ ])) {
8582
+ score += 0.65;
8583
+ }
8584
+ if (officeBlockKind === "table" && queryHasAnyToken(queryTokens, [
8585
+ "table",
8586
+ "row",
8587
+ "rows",
8588
+ "column",
8589
+ "columns",
8590
+ "matrix",
8591
+ "grid"
8592
+ ])) {
8593
+ score += 0.55;
8594
+ }
8595
+ if (officeBlockKind === "list" && queryHasAnyToken(queryTokens, [
8596
+ "list",
8597
+ "checklist",
8598
+ "bullet",
8599
+ "bullets",
8600
+ "step",
8601
+ "steps",
8602
+ "task",
8603
+ "tasks",
8604
+ "item",
8605
+ "items"
8606
+ ])) {
8607
+ score += 0.55;
8608
+ }
8609
+ return score;
8610
+ };
8488
8611
  var scoreHeuristicMatch = ({
8489
8612
  query,
8490
8613
  queryTokens,
@@ -8501,7 +8624,8 @@ var scoreHeuristicMatch = ({
8501
8624
  const exactPhraseBoost = Math.max(normalizeText([result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")).includes(queryTokens.join(" ")) ? 1 : 0, scoreLoosePhraseMatch2(query, [result.title, result.source, result.chunkText, ...metadataValues].filter(Boolean).join(" ")));
8502
8625
  const sourcePathBoost = typeof result.source === "string" && queryTokens.some((token) => result.source?.toLowerCase().includes(token)) ? 0.5 : 0;
8503
8626
  const metadataBoost = metadataValues.length > 0 ? queryTokens.filter((token) => metadataValues.some((value) => value.toLowerCase().includes(token))).length / queryTokens.length : 0;
8504
- return result.score + overlapBoost + exactPhraseBoost + sourcePathBoost + metadataBoost;
8627
+ const structuredEvidenceBoost = scoreStructuredEvidenceMatch(queryTokens, result);
8628
+ return result.score + overlapBoost + exactPhraseBoost + sourcePathBoost + metadataBoost + structuredEvidenceBoost;
8505
8629
  };
8506
8630
  var normalizeText = (value) => tokenize3(value).join(" ");
8507
8631
  var applyRAGReranking = async ({
@@ -8736,32 +8860,59 @@ var stripHtmlTags = (value) => {
8736
8860
  `).replace(/<li\b[^>]*>/gi, "- ").replace(/<[^>]+>/g, " ");
8737
8861
  return decodeHtmlEntities(withoutTags);
8738
8862
  };
8863
+ var stripHtmlNoiseBlocks = (value) => value.replace(/<!--[\s\S]*?-->/g, " ").replace(/<(script|style|template|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi, " ").replace(/<([a-z0-9:_-]+)\b[^>]*\b(hidden|aria-hidden=(['"])true\3)[^>]*>[\s\S]*?<\/\1>/gi, " ").replace(/<(nav|footer|header|aside|form|dialog)\b[^>]*>[\s\S]*?<\/\1>/gi, " ").replace(/<([a-z0-9:_-]+)\b[^>]*\b(?:id|class)=(['"])[^'"]*(nav|menu|footer|header|sidebar|promo|banner|cookie|breadcrumb|share|social|subscribe|newsletter|modal)[^'"]*\2[^>]*>[\s\S]*?<\/\1>/gi, " ");
8864
+ var collectHtmlContentCandidates = (value) => {
8865
+ const patterns = [
8866
+ {
8867
+ contentGroup: 1,
8868
+ pattern: /<main\b[^>]*>([\s\S]*?)<\/main>/gi
8869
+ },
8870
+ {
8871
+ contentGroup: 1,
8872
+ pattern: /<article\b[^>]*>([\s\S]*?)<\/article>/gi
8873
+ },
8874
+ {
8875
+ contentGroup: 3,
8876
+ pattern: /<([a-z0-9:_-]+)\b[^>]*\brole=(['"])main\2[^>]*>([\s\S]*?)<\/\1>/gi
8877
+ },
8878
+ {
8879
+ contentGroup: 4,
8880
+ pattern: /<([a-z0-9:_-]+)\b[^>]*\b(?:id|class)=(['"])[^'"]*(content|article|main|post|body)[^'"]*\2[^>]*>([\s\S]*?)<\/\1>/gi
8881
+ }
8882
+ ];
8883
+ const candidates = [];
8884
+ for (const entry of patterns) {
8885
+ for (const match of value.matchAll(entry.pattern)) {
8886
+ const rawCandidate = match[entry.contentGroup];
8887
+ const candidate = typeof rawCandidate === "string" ? rawCandidate : "";
8888
+ if (candidate.trim()) {
8889
+ candidates.push(candidate.trim());
8890
+ }
8891
+ }
8892
+ }
8893
+ return candidates;
8894
+ };
8739
8895
  var extractMainHtmlContent = (value) => {
8740
8896
  const trimmed = value.trim();
8741
8897
  if (!/<html\b|<body\b|<main\b|<article\b/i.test(trimmed)) {
8742
8898
  return value;
8743
8899
  }
8744
- const boilerplateStripped = trimmed.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, " ").replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, " ").replace(/<(nav|footer|header|aside|form)\b[^>]*>[\s\S]*?<\/\1>/gi, " ");
8745
- const mainMatch = boilerplateStripped.match(/<main\b[^>]*>([\s\S]*?)<\/main>/i);
8746
- if (mainMatch?.[1]) {
8747
- return mainMatch[1];
8748
- }
8749
- const articleMatches = [
8750
- ...boilerplateStripped.matchAll(/<article\b[^>]*>([\s\S]*?)<\/article>/gi)
8751
- ].map((match) => match[1]?.trim()).filter(Boolean);
8752
- if (articleMatches.length > 0) {
8753
- return articleMatches.join(`
8754
- `);
8755
- }
8756
- const roleMainMatch = boilerplateStripped.match(/<([a-z0-9:_-]+)\b[^>]*\brole=(['"])main\2[^>]*>([\s\S]*?)<\/\1>/i);
8757
- if (roleMainMatch?.[3]) {
8758
- return roleMainMatch[3];
8900
+ const stripped = stripHtmlNoiseBlocks(trimmed);
8901
+ const candidates = collectHtmlContentCandidates(stripped);
8902
+ if (candidates.length > 0) {
8903
+ const bestCandidate = candidates.map((candidate) => ({
8904
+ candidate,
8905
+ score: stripHtmlTags(candidate).replace(/\s+/g, " ").trim().length
8906
+ })).sort((left, right) => right.score - left.score)[0]?.candidate;
8907
+ if (bestCandidate) {
8908
+ return bestCandidate;
8909
+ }
8759
8910
  }
8760
- const bodyMatch = boilerplateStripped.match(/<body\b[^>]*>([\s\S]*?)<\/body>/i);
8911
+ const bodyMatch = stripped.match(/<body\b[^>]*>([\s\S]*?)<\/body>/i);
8761
8912
  if (bodyMatch?.[1]) {
8762
8913
  return bodyMatch[1];
8763
8914
  }
8764
- return boilerplateStripped;
8915
+ return stripped;
8765
8916
  };
8766
8917
  var stripHtml = (value) => {
8767
8918
  const focused = extractMainHtmlContent(value);
@@ -8779,6 +8930,93 @@ var stripMarkdown = (value) => {
8779
8930
  `);
8780
8931
  return normalizeWhitespace(stripped);
8781
8932
  };
8933
+ var pdfNativeStructureUnits = (metadata) => {
8934
+ const blocks = Array.isArray(metadata?.pdfTextBlocks) ? metadata.pdfTextBlocks : [];
8935
+ const units = [];
8936
+ for (const block of blocks) {
8937
+ if (!block || typeof block !== "object") {
8938
+ continue;
8939
+ }
8940
+ const text = typeof block.text === "string" ? normalizeWhitespace(block.text) : "";
8941
+ if (!text) {
8942
+ continue;
8943
+ }
8944
+ const pageNumber = typeof block.pageNumber === "number" && Number.isFinite(block.pageNumber) ? block.pageNumber : undefined;
8945
+ const pdfBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
8946
+ const pdfTextKind = block.textKind === "table_like" ? "table_like" : "paragraph";
8947
+ const sectionTitle = pageNumber ? pdfTextKind === "table_like" ? `Page ${pageNumber} Table Block` : `Page ${pageNumber} Text Block` : pdfTextKind === "table_like" ? "Table Block" : "Text Block";
8948
+ units.push({
8949
+ pageNumber,
8950
+ pdfBlockNumber,
8951
+ pdfTextKind,
8952
+ preferredChunkUnits: pdfTextKind === "table_like" ? text.split(`
8953
+ `).filter(Boolean) : undefined,
8954
+ sectionDepth: 1,
8955
+ sectionKind: "pdf_block",
8956
+ sectionPath: [sectionTitle],
8957
+ sectionTitle,
8958
+ text
8959
+ });
8960
+ }
8961
+ return units;
8962
+ };
8963
+ var officeNativeStructureUnits = (metadata) => {
8964
+ const blocks = Array.isArray(metadata?.officeBlocks) ? metadata.officeBlocks : [];
8965
+ const units = [];
8966
+ const headingStack = [];
8967
+ const decorateOfficeSectionText = (text, sectionTitle) => {
8968
+ if (!sectionTitle || text.includes(sectionTitle)) {
8969
+ return text;
8970
+ }
8971
+ return normalizeWhitespace(`${sectionTitle}
8972
+ ${text}`);
8973
+ };
8974
+ for (const [index, block] of blocks.entries()) {
8975
+ if (!block || typeof block !== "object") {
8976
+ continue;
8977
+ }
8978
+ const text = typeof block.text === "string" ? normalizeWhitespace(block.text) : "";
8979
+ if (!text) {
8980
+ continue;
8981
+ }
8982
+ const officeBlockNumber = typeof block.blockNumber === "number" && Number.isFinite(block.blockNumber) ? block.blockNumber : undefined;
8983
+ const officeBlockKind = block.blockKind === "title" || block.blockKind === "heading" || block.blockKind === "list" || block.blockKind === "table" ? block.blockKind : "paragraph";
8984
+ const headingLevel = typeof block.headingLevel === "number" && Number.isFinite(block.headingLevel) ? block.headingLevel : undefined;
8985
+ if (officeBlockKind === "title" || officeBlockKind === "heading") {
8986
+ const level = officeBlockKind === "title" ? 1 : headingLevel ?? 1;
8987
+ headingStack[level - 1] = text;
8988
+ headingStack.length = level;
8989
+ const nextBlock = blocks[index + 1];
8990
+ const nextKind = nextBlock && typeof nextBlock === "object" ? nextBlock.blockKind : undefined;
8991
+ if (nextKind === "title" || nextKind === "heading" || nextKind === "list" || nextKind === "table" || !nextBlock) {
8992
+ units.push({
8993
+ officeBlockKind,
8994
+ officeBlockNumber,
8995
+ sectionDepth: headingStack.length,
8996
+ sectionKind: "office_heading",
8997
+ sectionPath: [...headingStack],
8998
+ sectionTitle: text,
8999
+ text
9000
+ });
9001
+ }
9002
+ continue;
9003
+ }
9004
+ const sectionPath = headingStack.length > 0 ? [...headingStack] : undefined;
9005
+ const sectionTitle = sectionPath?.at(-1);
9006
+ units.push({
9007
+ officeBlockKind,
9008
+ officeBlockNumber,
9009
+ preferredChunkUnits: officeBlockKind === "table" ? text.split(`
9010
+ `).filter(Boolean) : undefined,
9011
+ sectionDepth: sectionPath?.length,
9012
+ sectionKind: officeBlockKind === "paragraph" ? "office_heading" : "office_block",
9013
+ sectionPath,
9014
+ sectionTitle,
9015
+ text: officeBlockKind === "paragraph" ? decorateOfficeSectionText(text, sectionTitle) : text
9016
+ });
9017
+ }
9018
+ return units;
9019
+ };
8782
9020
  var markdownStructureUnits = (value) => {
8783
9021
  const lines = value.replace(/\r\n?/g, `
8784
9022
  `).split(`
@@ -9122,6 +9360,7 @@ var appendPdfLineBreak = (parts) => {
9122
9360
  parts.push(`
9123
9361
  `);
9124
9362
  };
9363
+ var PDF_CHROME_LINE_MAX_LENGTH = 80;
9125
9364
  var PDF_TEXT_OPERATOR_PATTERN = /(\[((?:\\.|[^\]])*)\]\s*TJ)|(\(((?:\\.|[^\\)])*)\)\s*Tj)|([-+]?\d*\.?\d+\s+[-+]?\d*\.?\d+\s+\(((?:\\.|[^\\)])*)\)\s*")|(\(((?:\\.|[^\\)])*)\)\s*')|((?:[-+]?\d*\.?\d+\s+){2}(?:Td|TD))|(T\*)|((?:[-+]?\d*\.?\d+\s+){6}Tm)/g;
9126
9365
  var extractTextFromPDFTextObject = (value) => {
9127
9366
  const parts = [];
@@ -9150,19 +9389,84 @@ var extractTextFromPDFTextObject = (value) => {
9150
9389
  }
9151
9390
  return parts.join("");
9152
9391
  };
9153
- var extractTextFromPDFBytes = (data) => {
9154
- const raw = Buffer.from(data).toString("latin1");
9155
- const textObjects = [...raw.matchAll(/BT([\s\S]*?)ET/g)].map((match) => extractTextFromPDFTextObject(match[1] ?? "")).filter(Boolean);
9156
- const combined = textObjects.length > 0 ? textObjects.join(`
9157
-
9158
- `) : [...raw.matchAll(/\(((?:\\.|[^\\)])*)\)\s*Tj/g)].map((match) => decodePdfLiteral(match[1] ?? "")).join(`
9159
- `);
9160
- return normalizeWhitespace(combined);
9392
+ var buildPDFNativeTextBlock = (text, blockNumber, pageNumber) => {
9393
+ const normalized = normalizeWhitespace(text);
9394
+ if (!normalized) {
9395
+ return;
9396
+ }
9397
+ const lineCount = normalized.split(`
9398
+ `).filter(Boolean).length;
9399
+ const textKind = normalized.includes(" | ") ? "table_like" : "paragraph";
9400
+ return {
9401
+ blockNumber,
9402
+ lineCount,
9403
+ pageNumber,
9404
+ text: normalized,
9405
+ textKind
9406
+ };
9407
+ };
9408
+ var isLikelyPDFPageLabel = (value) => /^page\s+\d+(?:\s+of\s+\d+)?$/i.test(value.trim());
9409
+ var suppressRepeatedPDFChrome = (blocks) => {
9410
+ const linePages = new Map;
9411
+ for (const block of blocks) {
9412
+ for (const line of block.text.split(`
9413
+ `)) {
9414
+ const normalized = normalizeWhitespace(line);
9415
+ if (!normalized || normalized.length > PDF_CHROME_LINE_MAX_LENGTH) {
9416
+ continue;
9417
+ }
9418
+ const pages = linePages.get(normalized) ?? new Set;
9419
+ pages.add(block.pageNumber);
9420
+ linePages.set(normalized, pages);
9421
+ }
9422
+ }
9423
+ return blocks.map((block) => {
9424
+ const keptLines = block.text.split(`
9425
+ `).map((line) => normalizeWhitespace(line)).filter((line) => {
9426
+ if (!line) {
9427
+ return false;
9428
+ }
9429
+ if (isLikelyPDFPageLabel(line)) {
9430
+ return false;
9431
+ }
9432
+ const repeatedPages = linePages.get(line);
9433
+ if (line.length <= PDF_CHROME_LINE_MAX_LENGTH && repeatedPages && repeatedPages.size > 1) {
9434
+ return false;
9435
+ }
9436
+ return true;
9437
+ });
9438
+ const text = normalizeWhitespace(keptLines.join(`
9439
+ `));
9440
+ if (!text) {
9441
+ return;
9442
+ }
9443
+ return buildPDFNativeTextBlock(text, block.blockNumber, block.pageNumber);
9444
+ }).filter((value) => Boolean(value));
9161
9445
  };
9162
- var estimatePDFPageCount = (data) => {
9446
+ var extractNativePDFText = (data) => {
9163
9447
  const raw = Buffer.from(data).toString("latin1");
9164
9448
  const count = [...raw.matchAll(/\/Type\s*\/Page\b/g)].length;
9165
- return count > 0 ? count : 1;
9449
+ const pageCount = count > 0 ? count : 1;
9450
+ const pageMarkers = [...raw.matchAll(/\/Type\s*\/Page\b/g)].map((match) => match.index ?? raw.length);
9451
+ const blocks = [...raw.matchAll(/BT([\s\S]*?)ET/g)].map((match, index) => {
9452
+ const blockText = extractTextFromPDFTextObject(match[1] ?? "");
9453
+ const objectEnd = (match.index ?? 0) + (match[0]?.length ?? 0);
9454
+ const pageIndex = pageMarkers.findIndex((marker) => marker >= objectEnd);
9455
+ const pageNumber = pageIndex >= 0 ? pageIndex + 1 : pageCount;
9456
+ return buildPDFNativeTextBlock(blockText, index + 1, pageNumber);
9457
+ }).filter((value) => Boolean(value));
9458
+ const visibleBlocks = suppressRepeatedPDFChrome(blocks);
9459
+ const fallbackText = [...raw.matchAll(/\(((?:\\.|[^\\)])*)\)\s*Tj/g)].map((match) => decodePdfLiteral(match[1] ?? "")).join(`
9460
+ `);
9461
+ const text = visibleBlocks.length > 0 ? normalizeWhitespace(visibleBlocks.map((block) => block.text).join(`
9462
+
9463
+ `)) : normalizeWhitespace(fallbackText);
9464
+ return {
9465
+ pageCount,
9466
+ text,
9467
+ textBlockCount: visibleBlocks.length,
9468
+ textBlocks: visibleBlocks
9469
+ };
9166
9470
  };
9167
9471
  var readUInt16LE = (data, offset) => data[offset] | data[offset + 1] << 8;
9168
9472
  var readUInt32LE = (data, offset) => (data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24) >>> 0;
@@ -9249,35 +9553,64 @@ var decodeGzipEntries = (data, input) => {
9249
9553
  var extractXmlText = (value) => normalizeWhitespace(decodeHtmlEntities(value.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ")));
9250
9554
  var extractOfficeParagraphText = (value) => normalizeWhitespace(decodeHtmlEntities(value.replace(/<w:tab\b[^>]*\/>/gi, "\t").replace(/<w:br\b[^>]*\/>/gi, `
9251
9555
  `).replace(/<[^>]+>/g, " ")));
9252
- var officeDocumentParagraphs = (entries) => {
9556
+ var officeDocumentBlocks = (entries) => {
9253
9557
  const documentEntry = entries.find((entry) => entry.path === "word/document.xml");
9254
9558
  if (!documentEntry) {
9255
9559
  return [];
9256
9560
  }
9257
9561
  const xml = decodeUtf8(documentEntry.data);
9258
- const paragraphs = [...xml.matchAll(/<w:p\b[\s\S]*?<\/w:p>/g)];
9259
- return paragraphs.map((match) => {
9260
- const paragraphXml = match[0] ?? "";
9261
- const text = extractOfficeParagraphText(paragraphXml);
9562
+ const bodyMatch = xml.match(/<w:body\b[^>]*>([\s\S]*?)<\/w:body>/i);
9563
+ const body = bodyMatch?.[1] ?? xml;
9564
+ const blocks = [];
9565
+ const blockPattern = /<(w:p|w:tbl)\b[\s\S]*?<\/\1>/g;
9566
+ for (const match of body.matchAll(blockPattern)) {
9567
+ const blockXml = match[0] ?? "";
9568
+ if (blockXml.startsWith("<w:tbl")) {
9569
+ const rows = [...blockXml.matchAll(/<w:tr\b[\s\S]*?<\/w:tr>/g)].map((rowMatch, rowIndex) => {
9570
+ const cells = [
9571
+ ...(rowMatch[0] ?? "").matchAll(/<w:tc\b[\s\S]*?<\/w:tc>/g)
9572
+ ].map((cellMatch) => extractOfficeParagraphText(cellMatch[0] ?? "")).filter(Boolean);
9573
+ if (cells.length === 0) {
9574
+ return "";
9575
+ }
9576
+ return `Row ${rowIndex + 1}. ${cells.map((cell, cellIndex) => `${String.fromCharCode(65 + cellIndex)}: ${cell}`).join(" | ")}`;
9577
+ }).filter(Boolean);
9578
+ const text2 = normalizeWhitespace(rows.join(`
9579
+ `));
9580
+ if (!text2) {
9581
+ continue;
9582
+ }
9583
+ blocks.push({
9584
+ blockKind: "table",
9585
+ blockNumber: blocks.length + 1,
9586
+ text: text2
9587
+ });
9588
+ continue;
9589
+ }
9590
+ const text = extractOfficeParagraphText(blockXml);
9262
9591
  if (!text) {
9263
- return "";
9592
+ continue;
9264
9593
  }
9265
- const styleMatch = paragraphXml.match(/<w:pStyle\b[^>]*w:val="([^"]+)"[^>]*\/?>/i);
9594
+ const styleMatch = blockXml.match(/<w:pStyle\b[^>]*w:val="([^"]+)"[^>]*\/?>/i);
9266
9595
  const style = (styleMatch?.[1] ?? "").toLowerCase();
9267
- if (style === "title") {
9268
- return text;
9269
- }
9270
9596
  const headingMatch = style.match(/^heading([1-6])$/);
9271
- if (headingMatch) {
9272
- return text;
9273
- }
9274
- return text;
9275
- }).filter(Boolean);
9597
+ const isListParagraph = /<w:numPr\b/i.test(blockXml) || style.includes("list") || style.includes("bullet");
9598
+ const blockKind = style === "title" ? "title" : headingMatch ? "heading" : isListParagraph ? "list" : "paragraph";
9599
+ const decoratedText = blockKind === "list" && !/^[-*]\s/.test(text) ? `- ${text}` : text;
9600
+ blocks.push({
9601
+ blockKind,
9602
+ blockNumber: blocks.length + 1,
9603
+ headingLevel: headingMatch ? Number.parseInt(headingMatch[1] ?? "1", 10) : undefined,
9604
+ style: style || undefined,
9605
+ text: decoratedText
9606
+ });
9607
+ }
9608
+ return blocks;
9276
9609
  };
9277
9610
  var officeDocumentText = (entries) => {
9278
- const paragraphs = officeDocumentParagraphs(entries);
9279
- if (paragraphs.length > 0) {
9280
- return normalizeWhitespace(paragraphs.join(`
9611
+ const blocks = officeDocumentBlocks(entries);
9612
+ if (blocks.length > 0) {
9613
+ return normalizeWhitespace(blocks.map((block) => block.text).join(`
9281
9614
 
9282
9615
  `));
9283
9616
  }
@@ -9288,11 +9621,7 @@ var officeDocumentText = (entries) => {
9288
9621
  return extractXmlText(decodeUtf8(documentEntry.data));
9289
9622
  };
9290
9623
  var officeDocumentSectionCount = (entries) => {
9291
- const documentEntry = entries.find((entry) => entry.path === "word/document.xml");
9292
- if (!documentEntry) {
9293
- return;
9294
- }
9295
- const count = [...decodeUtf8(documentEntry.data).matchAll(/<w:p\b/g)].length;
9624
+ const count = officeDocumentBlocks(entries).length;
9296
9625
  return count > 0 ? count : undefined;
9297
9626
  };
9298
9627
  var spreadsheetSharedStrings = (entries) => entries.filter((entry) => entry.path === "xl/sharedStrings.xml").flatMap((entry) => [
@@ -9822,8 +10151,10 @@ var createOfficeDocumentExtractor = () => ({
9822
10151
  let officeMetadata = {};
9823
10152
  let structuredDocuments = [];
9824
10153
  if (extension === ".docx" || extension === ".odt") {
10154
+ const officeBlocks = officeDocumentBlocks(entries);
9825
10155
  text = officeDocumentText(entries);
9826
10156
  officeMetadata = {
10157
+ officeBlocks,
9827
10158
  sectionCount: officeDocumentSectionCount(entries)
9828
10159
  };
9829
10160
  } else if (extension === ".xlsx" || extension === ".ods") {
@@ -10013,8 +10344,8 @@ var createPDFFileExtractor = () => ({
10013
10344
  name: "absolute_pdf",
10014
10345
  supports: pdfExtractorSupports,
10015
10346
  extract: (input) => {
10016
- const text = extractTextFromPDFBytes(input.data);
10017
- if (!text) {
10347
+ const extracted = extractNativePDFText(input.data);
10348
+ if (!extracted.text) {
10018
10349
  throw new Error("AbsoluteJS could not extract readable text from this PDF. Supply a custom extractor for scanned or image-only PDFs.");
10019
10350
  }
10020
10351
  return {
@@ -10024,10 +10355,12 @@ var createPDFFileExtractor = () => ({
10024
10355
  metadata: {
10025
10356
  ...input.metadata ?? {},
10026
10357
  fileKind: "pdf",
10027
- pageCount: estimatePDFPageCount(input.data)
10358
+ pageCount: extracted.pageCount,
10359
+ pdfTextBlockCount: extracted.textBlockCount,
10360
+ pdfTextBlocks: extracted.textBlocks
10028
10361
  },
10029
10362
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
10030
- text,
10363
+ text: extracted.text,
10031
10364
  title: input.title
10032
10365
  };
10033
10366
  }
@@ -10052,7 +10385,8 @@ var createRAGPDFOCRExtractor = (options) => ({
10052
10385
  name: `absolute_pdf_ocr:${options.provider.name}`,
10053
10386
  supports: pdfExtractorSupports,
10054
10387
  extract: async (input) => {
10055
- const nativeText = extractTextFromPDFBytes(input.data);
10388
+ const extracted = extractNativePDFText(input.data);
10389
+ const nativeText = extracted.text;
10056
10390
  const minLength = options.minExtractedTextLength ?? 80;
10057
10391
  const shouldUseNativeText = !options.alwaysOCR && nativeText.length >= minLength;
10058
10392
  if (shouldUseNativeText) {
@@ -10063,7 +10397,9 @@ var createRAGPDFOCRExtractor = (options) => ({
10063
10397
  metadata: {
10064
10398
  ...input.metadata ?? {},
10065
10399
  fileKind: "pdf",
10066
- pageCount: estimatePDFPageCount(input.data),
10400
+ pageCount: extracted.pageCount,
10401
+ pdfTextBlockCount: extracted.textBlockCount,
10402
+ pdfTextBlocks: extracted.textBlocks,
10067
10403
  pdfTextMode: "native"
10068
10404
  },
10069
10405
  source: input.source ?? input.path ?? input.name ?? `${slugify(input.title ?? DEFAULT_BINARY_NAME)}.pdf`,
@@ -10078,7 +10414,7 @@ var createRAGPDFOCRExtractor = (options) => ({
10078
10414
  const baseMetadata = {
10079
10415
  ...ocrMetadata(ocr),
10080
10416
  fileKind: "pdf",
10081
- pageCount: estimatePDFPageCount(input.data),
10417
+ pageCount: extracted.pageCount,
10082
10418
  pdfTextMode: "ocr"
10083
10419
  };
10084
10420
  const summaryDocument = {
@@ -10251,6 +10587,18 @@ var sourceAwareUnits = (document, format, normalizedText) => {
10251
10587
  }
10252
10588
  case "text":
10253
10589
  default:
10590
+ if (document.metadata?.fileKind === "office") {
10591
+ const sections = officeNativeStructureUnits(document.metadata);
10592
+ if (sections.length > 0) {
10593
+ return sections;
10594
+ }
10595
+ }
10596
+ if (document.metadata?.fileKind === "pdf") {
10597
+ const sections = pdfNativeStructureUnits(document.metadata);
10598
+ if (sections.length > 0) {
10599
+ return sections;
10600
+ }
10601
+ }
10254
10602
  if (document.metadata?.sourceNativeKind === "spreadsheet_sheet") {
10255
10603
  return spreadsheetStructureUnits(normalizedText, document.metadata);
10256
10604
  }
@@ -10574,6 +10922,11 @@ var prepareRAGDocument = (document, defaultChunking, chunkingRegistry) => {
10574
10922
  ...sectionTitle ? { sectionTitle } : {},
10575
10923
  ...sectionPath && sectionPath.length > 0 ? { sectionPath } : {},
10576
10924
  ...typeof entry.sectionDepth === "number" ? { sectionDepth: entry.sectionDepth } : {},
10925
+ ...typeof entry.pageNumber === "number" ? { pageNumber: entry.pageNumber } : {},
10926
+ ...typeof entry.officeBlockNumber === "number" ? { officeBlockNumber: entry.officeBlockNumber } : {},
10927
+ ...entry.officeBlockKind ? { officeBlockKind: entry.officeBlockKind } : {},
10928
+ ...typeof entry.pdfBlockNumber === "number" ? { pdfBlockNumber: entry.pdfBlockNumber } : {},
10929
+ ...entry.pdfTextKind ? { pdfTextKind: entry.pdfTextKind } : {},
10577
10930
  ...entry.sectionKind ? { sectionKind: entry.sectionKind } : {},
10578
10931
  ...sectionChunkId ? { sectionChunkId } : {},
10579
10932
  ...sectionChunkId && sectionChunkIndex >= 0 ? {
@@ -10962,9 +11315,25 @@ var annotateRetrievalChannels = (input) => {
10962
11315
  };
10963
11316
  });
10964
11317
  };
11318
+ var getStructuredSectionScoreWeight2 = (metadata) => {
11319
+ const pdfTextKind = typeof metadata?.pdfTextKind === "string" ? metadata.pdfTextKind : undefined;
11320
+ const officeBlockKind = typeof metadata?.officeBlockKind === "string" ? metadata.officeBlockKind : undefined;
11321
+ const sectionKind = typeof metadata?.sectionKind === "string" ? metadata.sectionKind : undefined;
11322
+ if (pdfTextKind === "table_like") {
11323
+ return 1.28;
11324
+ }
11325
+ if (officeBlockKind === "table" || officeBlockKind === "list") {
11326
+ return 1.24;
11327
+ }
11328
+ if (sectionKind === "pdf_block" || sectionKind === "office_block" || officeBlockKind === "paragraph" || pdfTextKind === "paragraph") {
11329
+ return 1.12;
11330
+ }
11331
+ return 1;
11332
+ };
10965
11333
  var buildTraceSectionCounts = (results) => {
10966
11334
  const sections = new Map;
10967
11335
  for (const result of results) {
11336
+ const weightedScore = result.score * getStructuredSectionScoreWeight2(result.metadata);
10968
11337
  const path = Array.isArray(result.metadata?.sectionPath) ? result.metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
10969
11338
  if (path.length === 0) {
10970
11339
  continue;
@@ -10991,6 +11360,7 @@ var buildTraceSectionCounts = (results) => {
10991
11360
  var buildTraceSectionScores = (results) => {
10992
11361
  const sections = new Map;
10993
11362
  for (const result of results) {
11363
+ const weightedScore = result.score * getStructuredSectionScoreWeight2(result.metadata);
10994
11364
  const path = Array.isArray(result.metadata?.sectionPath) ? result.metadata.sectionPath.filter((value) => typeof value === "string" && value.trim().length > 0) : [];
10995
11365
  if (path.length === 0) {
10996
11366
  continue;
@@ -10998,13 +11368,13 @@ var buildTraceSectionScores = (results) => {
10998
11368
  const key = path.join(" > ");
10999
11369
  const existing = sections.get(key);
11000
11370
  if (existing) {
11001
- existing.totalScore += result.score;
11371
+ existing.totalScore += weightedScore;
11002
11372
  continue;
11003
11373
  }
11004
11374
  sections.set(key, {
11005
11375
  key,
11006
11376
  label: path.at(-1) ?? key,
11007
- totalScore: result.score
11377
+ totalScore: weightedScore
11008
11378
  });
11009
11379
  }
11010
11380
  return [...sections.values()].sort((left, right) => {
@@ -11443,11 +11813,32 @@ var renderSourceLabels = (input) => {
11443
11813
  ].filter((row) => row.length > 0);
11444
11814
  return rows.length > 0 ? `<ul class="rag-source-labels">${rows.join("")}</ul>` : "";
11445
11815
  };
11816
+ var formatStructureKindLabel = (kind) => {
11817
+ switch (kind) {
11818
+ case "markdown_heading":
11819
+ return "Markdown heading";
11820
+ case "html_heading":
11821
+ return "HTML heading";
11822
+ case "office_heading":
11823
+ return "Office heading";
11824
+ case "office_block":
11825
+ return "Office block";
11826
+ case "pdf_block":
11827
+ return "PDF block";
11828
+ case "spreadsheet_rows":
11829
+ return "Spreadsheet rows";
11830
+ case "presentation_slide":
11831
+ return "Presentation slide";
11832
+ default:
11833
+ return;
11834
+ }
11835
+ };
11446
11836
  var renderChunkStructure = (structure) => {
11447
11837
  if (!structure) {
11448
11838
  return "";
11449
11839
  }
11450
11840
  const rows = [
11841
+ structure.section?.kind ? `<li><strong>Kind</strong> ${escapeHtml2(formatStructureKindLabel(structure.section.kind) ?? structure.section.kind)}</li>` : "",
11451
11842
  structure.section?.title ? `<li><strong>Section</strong> ${escapeHtml2(structure.section.title)}</li>` : "",
11452
11843
  structure.section?.path && structure.section.path.length > 1 ? `<li><strong>Section path</strong> ${escapeHtml2(structure.section.path.join(" > "))}</li>` : "",
11453
11844
  typeof structure.sequence?.sectionChunkIndex === "number" && typeof structure.sequence?.sectionChunkCount === "number" ? `<li><strong>Section chunk</strong> ${structure.sequence.sectionChunkIndex + 1} of ${structure.sequence.sectionChunkCount}</li>` : "",
@@ -23887,5 +24278,5 @@ export {
23887
24278
  aiChat
23888
24279
  };
23889
24280
 
23890
- //# debugId=3A168E4E2E133AED64756E2164756E21
24281
+ //# debugId=23520EDE705830A964756E2164756E21
23891
24282
  //# sourceMappingURL=index.js.map