@chialab/pdfjs-lib 1.0.0-alpha.28 → 1.0.0-alpha.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39173,79 +39173,6 @@ PDFPageProxy.prototype.getAnnotations = async function(params) {
39173
39173
  return makeSerializable(annotations);
39174
39174
  };
39175
39175
 
39176
- // src/lib/WasmFactory.ts
39177
- var WasmFactory = class {
39178
- async fetch({
39179
- filename
39180
- }) {
39181
- switch (filename) {
39182
- case "openjpeg.wasm":
39183
- return import("./openjpeg-TRZ4ANDN.js").then(
39184
- (module) => module.default
39185
- );
39186
- case "qcms_bg":
39187
- return import("./qcms_bg-PRPVJQFC.js").then(
39188
- (module) => module.default
39189
- );
39190
- }
39191
- return Uint8Array.from([]);
39192
- }
39193
- };
39194
-
39195
- // src/lib/StandardFontDataFactory.ts
39196
- var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
39197
- constructor() {
39198
- super({
39199
- baseUrl: null
39200
- });
39201
- }
39202
- /**
39203
- * Fetch the corresponding standard font data.
39204
- * We need to use specific dynamic imports for each font file for the bundler to include them.
39205
- */
39206
- async fetch({
39207
- filename
39208
- }) {
39209
- switch (filename) {
39210
- case "FoxitDingbats.pfb":
39211
- return import("./FoxitDingbats-UZC34T4F.js").then((module) => module.default);
39212
- case "FoxitFixed.pfb":
39213
- return import("./FoxitFixed-DSJVG3IJ.js").then(
39214
- (module) => module.default
39215
- );
39216
- case "FoxitFixedBold.pfb":
39217
- return import("./FoxitFixedBold-YSX4X5NA.js").then((module) => module.default);
39218
- case "FoxitFixedBoldItalic.pfb":
39219
- return import("./FoxitFixedBoldItalic-IFYYA6RR.js").then((module) => module.default);
39220
- case "FoxitFixedItalic.pfb":
39221
- return import("./FoxitFixedItalic-4PEFPCQV.js").then((module) => module.default);
39222
- case "FoxitSerif.pfb":
39223
- return import("./FoxitSerif-7SSP2H2U.js").then(
39224
- (module) => module.default
39225
- );
39226
- case "FoxitSerifBold.pfb":
39227
- return import("./FoxitSerifBold-KLMURJ6N.js").then((module) => module.default);
39228
- case "FoxitSerifBoldItalic.pfb":
39229
- return import("./FoxitSerifBoldItalic-VSYHWLRM.js").then((module) => module.default);
39230
- case "FoxitSerifItalic.pfb":
39231
- return import("./FoxitSerifItalic-NTOPMQDD.js").then((module) => module.default);
39232
- case "FoxitSymbol.pfb":
39233
- return import("./FoxitSymbol-QPUM74UN.js").then(
39234
- (module) => module.default
39235
- );
39236
- case "LiberationSans-Bold.ttf":
39237
- return import("./LiberationSans-Bold-BXFYN4PV.js").then((module) => module.default);
39238
- case "LiberationSans-BoldItalic.ttf":
39239
- return import("./LiberationSans-BoldItalic-DQQC5TNJ.js").then((module) => module.default);
39240
- case "LiberationSans-Italic.ttf":
39241
- return import("./LiberationSans-Italic-Z4MFN6PY.js").then((module) => module.default);
39242
- case "LiberationSans-Regular.ttf":
39243
- return import("./LiberationSans-Regular-MACKS2VL.js").then((module) => module.default);
39244
- }
39245
- return Uint8Array.from([]);
39246
- }
39247
- };
39248
-
39249
39176
  // src/lib/AnnotationData.ts
39250
39177
  function isTextAnnotation(annotation) {
39251
39178
  return annotation.subtype === "Text";
@@ -39335,59 +39262,6 @@ function isRedactAnnotation(annotation) {
39335
39262
  return annotation.subtype === "Redact";
39336
39263
  }
39337
39264
 
39338
- // src/lib/CanvasGraphics.ts
39339
- var {
39340
- beginDrawing,
39341
- beginText,
39342
- endText,
39343
- beginMarkedContent,
39344
- beginMarkedContentProps,
39345
- endMarkedContent
39346
- } = CanvasGraphics.prototype;
39347
- CanvasGraphics.prototype.beginDrawing = function(options) {
39348
- if (this.ctx instanceof SvgCanvasContext) {
39349
- options.transparency = false;
39350
- }
39351
- return beginDrawing.call(this, options);
39352
- };
39353
- CanvasGraphics.prototype.beginText = function(opIdx) {
39354
- beginText.call(this, opIdx);
39355
- if (this.ctx instanceof SvgCanvasContext) {
39356
- this.ctx.beginText();
39357
- }
39358
- };
39359
- CanvasGraphics.prototype.endText = function(opIdx) {
39360
- endText.call(this, opIdx);
39361
- if (this.ctx instanceof SvgCanvasContext) {
39362
- this.ctx.endText();
39363
- }
39364
- };
39365
- CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
39366
- beginMarkedContent.call(this, opIdx, this.ctx);
39367
- if (this.ctx instanceof SvgCanvasContext) {
39368
- this.ctx.beginMarkedContent(type);
39369
- }
39370
- };
39371
- CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
39372
- beginMarkedContentProps.call(this, opIdx, type, props);
39373
- if (this.ctx instanceof SvgCanvasContext) {
39374
- this.ctx.beginMarkedContent(type, props);
39375
- }
39376
- };
39377
- CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
39378
- if (this.ctx instanceof SvgCanvasContext) {
39379
- this.ctx.endMarkedContent();
39380
- }
39381
- endMarkedContent.call(this, opIdx);
39382
- };
39383
- Object.assign(CanvasGraphics.prototype, {
39384
- [OPS.beginText]: CanvasGraphics.prototype.beginText,
39385
- [OPS.endText]: CanvasGraphics.prototype.endText,
39386
- [OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
39387
- [OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
39388
- [OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
39389
- });
39390
-
39391
39265
  // src/lib/TextLayer.ts
39392
39266
  function isTextNode(node) {
39393
39267
  return node.role === "text";
@@ -39446,6 +39320,7 @@ async function loadTextLayerFonts(document2) {
39446
39320
  });
39447
39321
  return loadedFontsPromise;
39448
39322
  }
39323
+ var loadDefaultFonts2 = loadTextLayerFonts;
39449
39324
  var normalizeMarkedContentId = (idRef) => {
39450
39325
  if (idRef == null) {
39451
39326
  return null;
@@ -39458,10 +39333,321 @@ var normalizeMarkedContentId = (idRef) => {
39458
39333
  }
39459
39334
  return `${idRef}`;
39460
39335
  };
39336
+ var getNodeContents = (node) => {
39337
+ if (isTextNode(node)) {
39338
+ return typeof node.text === "string" ? node.text : node.text.map((t) => t.text).join("");
39339
+ }
39340
+ if (isElementNode(node)) {
39341
+ return node.children.map((n) => {
39342
+ if (typeof n === "string") {
39343
+ return n;
39344
+ }
39345
+ return getNodeContents(n);
39346
+ }).join("");
39347
+ }
39348
+ return "";
39349
+ };
39350
+ var findNode = (root, callback) => {
39351
+ for (let i = root.children.length - 1; i >= 0; i--) {
39352
+ const child = root.children[i];
39353
+ if (typeof child === "string") {
39354
+ continue;
39355
+ }
39356
+ if (callback(child, root)) {
39357
+ return child;
39358
+ }
39359
+ if (isElementNode(child)) {
39360
+ const found = findNode(child, callback);
39361
+ if (found) {
39362
+ return found;
39363
+ }
39364
+ }
39365
+ }
39366
+ return null;
39367
+ };
39368
+ var findNodes = (node, callback) => {
39369
+ return node.children.reduce((nodes, child) => {
39370
+ if (typeof child === "string") {
39371
+ return nodes;
39372
+ }
39373
+ if (callback(child, node)) {
39374
+ nodes.push(child);
39375
+ }
39376
+ if (isElementNode(child)) {
39377
+ nodes.push(...findNodes(child, callback));
39378
+ }
39379
+ return nodes;
39380
+ }, []);
39381
+ };
39382
+ var flattenNodes = (node) => {
39383
+ if (typeof node === "string") {
39384
+ return [];
39385
+ }
39386
+ if (isElementNode(node)) {
39387
+ return [node, ...node.children.flatMap(flattenNodes)];
39388
+ }
39389
+ return [node];
39390
+ };
39461
39391
  var MAX_TEXT_DIVS_TO_RENDER2 = 1e5;
39462
39392
  var DEFAULT_FONT_SIZE3 = 30;
39463
39393
  var DEFAULT_FONT_ASCENT = 0.8;
39464
39394
  var HYPHEN_REGEX = /-\n+$/;
39395
+ var decorateStructTree = (node, rootContainer, graphics, annotations, parents = []) => {
39396
+ let parent = parents.at(-1) || rootContainer;
39397
+ if ("role" in node) {
39398
+ const role = node.role.toLowerCase();
39399
+ switch (role) {
39400
+ case "root":
39401
+ case "document":
39402
+ case "art":
39403
+ parent.attrs ?? (parent.attrs = {});
39404
+ Object.assign(parent.attrs, node.attrs);
39405
+ for (const child of [...node.children]) {
39406
+ decorateStructTree(
39407
+ child,
39408
+ rootContainer,
39409
+ graphics,
39410
+ annotations,
39411
+ parents
39412
+ );
39413
+ }
39414
+ break;
39415
+ case "part":
39416
+ case "sect": {
39417
+ const section = {
39418
+ role: "section",
39419
+ children: [],
39420
+ attrs: node.attrs
39421
+ };
39422
+ while (parents.find(
39423
+ (p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
39424
+ )) {
39425
+ parents.pop();
39426
+ parent = parents.at(-1) || rootContainer;
39427
+ }
39428
+ parent.children.push(section);
39429
+ for (const child of [...node.children]) {
39430
+ decorateStructTree(child, rootContainer, graphics, annotations, [
39431
+ ...parents,
39432
+ section
39433
+ ]);
39434
+ }
39435
+ break;
39436
+ }
39437
+ case "lbl":
39438
+ case "lbody":
39439
+ case "span": {
39440
+ for (const child of [...node.children]) {
39441
+ decorateStructTree(
39442
+ child,
39443
+ rootContainer,
39444
+ graphics,
39445
+ annotations,
39446
+ parents
39447
+ );
39448
+ }
39449
+ break;
39450
+ }
39451
+ case "link": {
39452
+ const [ref, ...children] = node.children;
39453
+ if (ref.type !== "object") {
39454
+ console.warn(
39455
+ `Unsupported link type: ${ref.type}`
39456
+ );
39457
+ for (const child of children) {
39458
+ decorateStructTree(
39459
+ child,
39460
+ rootContainer,
39461
+ graphics,
39462
+ annotations,
39463
+ parents
39464
+ );
39465
+ }
39466
+ return;
39467
+ }
39468
+ const annotation = annotations?.find(
39469
+ (note) => note.id === ref.id
39470
+ );
39471
+ if (!annotation) {
39472
+ console.warn("Link not found", ref);
39473
+ for (const child of children) {
39474
+ decorateStructTree(
39475
+ child,
39476
+ rootContainer,
39477
+ graphics,
39478
+ annotations,
39479
+ parents
39480
+ );
39481
+ }
39482
+ return;
39483
+ }
39484
+ if (isLinkAnnotation(annotation)) {
39485
+ const anchor = {
39486
+ role: "a",
39487
+ href: annotation.url,
39488
+ children: []
39489
+ };
39490
+ parent.children.push(anchor);
39491
+ for (const child of children) {
39492
+ decorateStructTree(child, rootContainer, graphics, annotations, [
39493
+ ...parents,
39494
+ anchor
39495
+ ]);
39496
+ }
39497
+ } else {
39498
+ console.warn(`Unsupported annotation subtype: ${annotation.subtype}`);
39499
+ for (const child of children) {
39500
+ decorateStructTree(
39501
+ child,
39502
+ rootContainer,
39503
+ graphics,
39504
+ annotations,
39505
+ parents
39506
+ );
39507
+ }
39508
+ }
39509
+ break;
39510
+ }
39511
+ case "p": {
39512
+ if (node.children.length === 0) {
39513
+ break;
39514
+ }
39515
+ if (node.children.length === 1 && node.children[0].role === "Table") {
39516
+ decorateStructTree(
39517
+ node.children[0],
39518
+ rootContainer,
39519
+ graphics,
39520
+ annotations,
39521
+ parents
39522
+ );
39523
+ break;
39524
+ }
39525
+ const paragraph = {
39526
+ role: "p",
39527
+ children: [],
39528
+ attrs: node.attrs
39529
+ };
39530
+ parent.children.push(paragraph);
39531
+ for (const child of [...node.children]) {
39532
+ decorateStructTree(child, rootContainer, graphics, annotations, [
39533
+ ...parents,
39534
+ paragraph
39535
+ ]);
39536
+ }
39537
+ break;
39538
+ }
39539
+ case "l": {
39540
+ const list = {
39541
+ role: "ul",
39542
+ children: [],
39543
+ attrs: node.attrs
39544
+ };
39545
+ parent.children.push(list);
39546
+ for (const child of [...node.children]) {
39547
+ decorateStructTree(child, rootContainer, graphics, annotations, [
39548
+ ...parents,
39549
+ list
39550
+ ]);
39551
+ }
39552
+ break;
39553
+ }
39554
+ case "table":
39555
+ case "thead":
39556
+ case "tbody":
39557
+ case "tfoot":
39558
+ case "tr":
39559
+ case "h1":
39560
+ case "h2":
39561
+ case "h3":
39562
+ case "h4":
39563
+ case "h5":
39564
+ case "h6":
39565
+ case "li":
39566
+ case "td":
39567
+ case "th": {
39568
+ const block = {
39569
+ role,
39570
+ children: [],
39571
+ attrs: node.attrs
39572
+ };
39573
+ parent.children.push(block);
39574
+ for (const child of [...node.children]) {
39575
+ decorateStructTree(child, rootContainer, graphics, annotations, [
39576
+ ...parents,
39577
+ block
39578
+ ]);
39579
+ }
39580
+ break;
39581
+ }
39582
+ case "figure": {
39583
+ if (graphics) {
39584
+ for (const child of [...node.children]) {
39585
+ if (!("id" in child)) {
39586
+ continue;
39587
+ }
39588
+ const contentId = normalizeMarkedContentId(child.id);
39589
+ const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
39590
+ const graphic = findSvgNode(
39591
+ graphics,
39592
+ (node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
39593
+ );
39594
+ if (!graphic) {
39595
+ continue;
39596
+ }
39597
+ parent.children.push({
39598
+ role: "figure",
39599
+ href: graphic.attrs.href,
39600
+ x: graphic.attrs.x || 0,
39601
+ y: graphic.attrs.y || 0,
39602
+ width: graphic.attrs.width || 0,
39603
+ height: graphic.attrs.height || 0,
39604
+ alt: node.alt
39605
+ });
39606
+ }
39607
+ }
39608
+ break;
39609
+ }
39610
+ default: {
39611
+ const block = {
39612
+ role: "div",
39613
+ children: [],
39614
+ attrs: node.attrs
39615
+ };
39616
+ parent.children.push(block);
39617
+ for (const child of [...node.children]) {
39618
+ decorateStructTree(child, rootContainer, graphics, annotations, [
39619
+ ...parents,
39620
+ block
39621
+ ]);
39622
+ }
39623
+ break;
39624
+ }
39625
+ }
39626
+ } else {
39627
+ const contentId = normalizeMarkedContentId(node.id);
39628
+ findNode(rootContainer, (child, previousParent) => {
39629
+ if (child.id !== contentId) {
39630
+ return;
39631
+ }
39632
+ previousParent.children = previousParent.children.filter(
39633
+ (c) => c !== child
39634
+ );
39635
+ if (isElementNode(child)) {
39636
+ const children = child.children;
39637
+ const lastTextItem = parent.children.at(-1);
39638
+ if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && getNodeContents(lastTextItem).trim() === "") {
39639
+ while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && getNodeContents(children[0]).trim() === "") {
39640
+ children.shift();
39641
+ }
39642
+ }
39643
+ parent.children.push(...children);
39644
+ } else {
39645
+ parent.children.push(child);
39646
+ }
39647
+ return true;
39648
+ });
39649
+ }
39650
+ };
39465
39651
  async function createTextLayer(page, {
39466
39652
  canvasFactory,
39467
39653
  viewport = page.getViewport({ scale: 1 }),
@@ -39473,7 +39659,6 @@ async function createTextLayer(page, {
39473
39659
  const ascentCache = /* @__PURE__ */ new Map();
39474
39660
  const canvasCache = /* @__PURE__ */ new Map();
39475
39661
  const textDivs = [];
39476
- const markedContent = /* @__PURE__ */ new Map();
39477
39662
  const [tree, contentSource] = await Promise.all([
39478
39663
  page.getStructTree(),
39479
39664
  page.getTextContent({ includeMarkedContent: true })
@@ -39519,10 +39704,6 @@ async function createTextLayer(page, {
39519
39704
  const id2 = normalizeMarkedContentId(item);
39520
39705
  if (id2 != null) {
39521
39706
  container.id = id2;
39522
- markedContent.set(id2, {
39523
- node: container,
39524
- parent
39525
- });
39526
39707
  }
39527
39708
  } else if (item.type === "endMarkedContent") {
39528
39709
  container = parents.pop();
@@ -39685,215 +39866,6 @@ async function createTextLayer(page, {
39685
39866
  ascentCache.set(fontFamily, ratio);
39686
39867
  return ratio;
39687
39868
  };
39688
- const renderStructTreeNode = (node, parents) => {
39689
- let parent = parents[parents.length - 1] || rootContainer;
39690
- if ("role" in node) {
39691
- const role = node.role.toLowerCase();
39692
- switch (role) {
39693
- case "root":
39694
- case "document":
39695
- case "art":
39696
- parent.attrs ?? (parent.attrs = {});
39697
- Object.assign(parent.attrs, node.attrs);
39698
- for (const child of [...node.children]) {
39699
- renderStructTreeNode(child, parents);
39700
- }
39701
- break;
39702
- case "sect": {
39703
- const section = {
39704
- role: "section",
39705
- children: [],
39706
- attrs: node.attrs
39707
- };
39708
- while (parents.find(
39709
- (p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
39710
- )) {
39711
- parents.pop();
39712
- parent = parents.at(-1) || rootContainer;
39713
- }
39714
- parent.children.push(section);
39715
- for (const child of [...node.children]) {
39716
- renderStructTreeNode(child, [...parents, section]);
39717
- }
39718
- break;
39719
- }
39720
- case "lbl":
39721
- case "lbody":
39722
- case "span": {
39723
- for (const child of [...node.children]) {
39724
- renderStructTreeNode(child, parents);
39725
- }
39726
- break;
39727
- }
39728
- case "link": {
39729
- const [ref, ...children] = node.children;
39730
- if (ref.type !== "object") {
39731
- console.warn(
39732
- `Unsupported link type: ${ref.type}`
39733
- );
39734
- for (const child of children) {
39735
- renderStructTreeNode(child, parents);
39736
- }
39737
- return;
39738
- }
39739
- const annotation = annotations?.find(
39740
- (note) => note.id === ref.id
39741
- );
39742
- if (!annotation) {
39743
- console.warn("Link not found", ref);
39744
- for (const child of children) {
39745
- renderStructTreeNode(child, parents);
39746
- }
39747
- return;
39748
- }
39749
- if (isLinkAnnotation(annotation)) {
39750
- const anchor = {
39751
- role: "a",
39752
- href: annotation.url,
39753
- children: []
39754
- };
39755
- parent.children.push(anchor);
39756
- for (const child of children) {
39757
- renderStructTreeNode(child, [...parents, anchor]);
39758
- }
39759
- } else {
39760
- console.warn(
39761
- `Unsupported annotation subtype: ${annotation.subtype}`
39762
- );
39763
- for (const child of children) {
39764
- renderStructTreeNode(child, parents);
39765
- }
39766
- }
39767
- break;
39768
- }
39769
- case "p": {
39770
- if (node.children.length === 0) {
39771
- break;
39772
- }
39773
- if (node.children.length === 1 && node.children[0].role === "Table") {
39774
- renderStructTreeNode(node.children[0], parents);
39775
- break;
39776
- }
39777
- const paragraph = {
39778
- role: "p",
39779
- children: [],
39780
- attrs: node.attrs
39781
- };
39782
- parent.children.push(paragraph);
39783
- for (const child of [...node.children]) {
39784
- renderStructTreeNode(child, [...parents, paragraph]);
39785
- }
39786
- break;
39787
- }
39788
- case "l": {
39789
- const list = {
39790
- role: "ul",
39791
- children: [],
39792
- attrs: node.attrs
39793
- };
39794
- parent.children.push(list);
39795
- for (const child of [...node.children]) {
39796
- renderStructTreeNode(child, [...parents, list]);
39797
- }
39798
- break;
39799
- }
39800
- case "table":
39801
- case "thead":
39802
- case "tbody":
39803
- case "tfoot":
39804
- case "tr":
39805
- case "h1":
39806
- case "h2":
39807
- case "h3":
39808
- case "h4":
39809
- case "h5":
39810
- case "h6":
39811
- case "li":
39812
- case "td":
39813
- case "th": {
39814
- const block = {
39815
- role,
39816
- children: [],
39817
- attrs: node.attrs
39818
- };
39819
- parent.children.push(block);
39820
- for (const child of [...node.children]) {
39821
- renderStructTreeNode(child, [...parents, block]);
39822
- }
39823
- break;
39824
- }
39825
- case "figure": {
39826
- const ids = [...node.children].map((child) => {
39827
- const id2 = normalizeMarkedContentId(child);
39828
- if (!id2) {
39829
- return null;
39830
- }
39831
- const span = markedContent.get(id2);
39832
- if (!span) {
39833
- return id2;
39834
- }
39835
- if (span.parent) {
39836
- span.parent.children = span.parent.children.filter(
39837
- (child2) => child2 !== span.node
39838
- );
39839
- }
39840
- return id2;
39841
- }).filter((id2) => id2 !== null);
39842
- if (graphics) {
39843
- for (const markedId of ids) {
39844
- const figureId = normalizeMarkedContentId(markedId);
39845
- if (!figureId) {
39846
- continue;
39847
- }
39848
- const useId = `#${id("marked_content" /* MarkedContent */, figureId, graphics.attrs.id)}`;
39849
- const graphic = findSvgNode(
39850
- graphics,
39851
- (node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
39852
- );
39853
- if (!graphic) {
39854
- continue;
39855
- }
39856
- const figure = {
39857
- role: "figure",
39858
- href: graphic.attrs.href,
39859
- x: graphic.attrs.x || 0,
39860
- y: graphic.attrs.y || 0,
39861
- width: graphic.attrs.width || 0,
39862
- height: graphic.attrs.height || 0,
39863
- alt: node.alt
39864
- };
39865
- parent.children.push(figure);
39866
- }
39867
- }
39868
- break;
39869
- }
39870
- default: {
39871
- const block = {
39872
- role: "div",
39873
- children: [],
39874
- attrs: node.attrs
39875
- };
39876
- parent.children.push(block);
39877
- for (const child of [...node.children]) {
39878
- renderStructTreeNode(child, [...parents, block]);
39879
- }
39880
- break;
39881
- }
39882
- }
39883
- } else {
39884
- const span = markedContent.get(`${node.id}`);
39885
- if (!span) {
39886
- return;
39887
- }
39888
- if (span.parent) {
39889
- span.parent.children = span.parent.children.filter(
39890
- (child) => child !== span.node
39891
- );
39892
- }
39893
- parent.children.push(span.node);
39894
- }
39895
- };
39896
- await loadDefaultFonts();
39897
39869
  const reader = textContentSource.getReader();
39898
39870
  while (true) {
39899
39871
  const { value, done } = await reader.read();
@@ -39906,7 +39878,7 @@ async function createTextLayer(page, {
39906
39878
  const root = tree?.children[0]?.children[0];
39907
39879
  if (root?.children) {
39908
39880
  for (const child of root.children) {
39909
- renderStructTreeNode(child, [rootContainer]);
39881
+ decorateStructTree(child, rootContainer, graphics, annotations);
39910
39882
  }
39911
39883
  }
39912
39884
  ascentCache.clear();
@@ -39917,22 +39889,134 @@ async function createTextLayer(page, {
39917
39889
  return rootContainer;
39918
39890
  }
39919
39891
 
39920
- // src/lib/TextLayer_v2.ts
39921
- var import_opentype = __toESM(require_opentype(), 1);
39922
- var findNode = (node, callback) => {
39923
- for (let i = node.children.length - 1; i >= 0; i--) {
39924
- const child = node.children[i];
39925
- if (typeof child === "string") {
39926
- continue;
39927
- }
39928
- if (callback(child, node)) {
39929
- return;
39892
+ // src/lib/WasmFactory.ts
39893
+ var WasmFactory = class {
39894
+ async fetch({
39895
+ filename
39896
+ }) {
39897
+ switch (filename) {
39898
+ case "openjpeg.wasm":
39899
+ return import("./openjpeg-TRZ4ANDN.js").then(
39900
+ (module) => module.default
39901
+ );
39902
+ case "qcms_bg":
39903
+ return import("./qcms_bg-PRPVJQFC.js").then(
39904
+ (module) => module.default
39905
+ );
39930
39906
  }
39931
- if (isElementNode(child)) {
39932
- findNode(child, callback);
39907
+ return Uint8Array.from([]);
39908
+ }
39909
+ };
39910
+
39911
+ // src/lib/StandardFontDataFactory.ts
39912
+ var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
39913
+ constructor() {
39914
+ super({
39915
+ baseUrl: null
39916
+ });
39917
+ }
39918
+ /**
39919
+ * Fetch the corresponding standard font data.
39920
+ * We need to use specific dynamic imports for each font file for the bundler to include them.
39921
+ */
39922
+ async fetch({
39923
+ filename
39924
+ }) {
39925
+ switch (filename) {
39926
+ case "FoxitDingbats.pfb":
39927
+ return import("./FoxitDingbats-UZC34T4F.js").then((module) => module.default);
39928
+ case "FoxitFixed.pfb":
39929
+ return import("./FoxitFixed-DSJVG3IJ.js").then(
39930
+ (module) => module.default
39931
+ );
39932
+ case "FoxitFixedBold.pfb":
39933
+ return import("./FoxitFixedBold-YSX4X5NA.js").then((module) => module.default);
39934
+ case "FoxitFixedBoldItalic.pfb":
39935
+ return import("./FoxitFixedBoldItalic-IFYYA6RR.js").then((module) => module.default);
39936
+ case "FoxitFixedItalic.pfb":
39937
+ return import("./FoxitFixedItalic-4PEFPCQV.js").then((module) => module.default);
39938
+ case "FoxitSerif.pfb":
39939
+ return import("./FoxitSerif-7SSP2H2U.js").then(
39940
+ (module) => module.default
39941
+ );
39942
+ case "FoxitSerifBold.pfb":
39943
+ return import("./FoxitSerifBold-KLMURJ6N.js").then((module) => module.default);
39944
+ case "FoxitSerifBoldItalic.pfb":
39945
+ return import("./FoxitSerifBoldItalic-VSYHWLRM.js").then((module) => module.default);
39946
+ case "FoxitSerifItalic.pfb":
39947
+ return import("./FoxitSerifItalic-NTOPMQDD.js").then((module) => module.default);
39948
+ case "FoxitSymbol.pfb":
39949
+ return import("./FoxitSymbol-QPUM74UN.js").then(
39950
+ (module) => module.default
39951
+ );
39952
+ case "LiberationSans-Bold.ttf":
39953
+ return import("./LiberationSans-Bold-BXFYN4PV.js").then((module) => module.default);
39954
+ case "LiberationSans-BoldItalic.ttf":
39955
+ return import("./LiberationSans-BoldItalic-DQQC5TNJ.js").then((module) => module.default);
39956
+ case "LiberationSans-Italic.ttf":
39957
+ return import("./LiberationSans-Italic-Z4MFN6PY.js").then((module) => module.default);
39958
+ case "LiberationSans-Regular.ttf":
39959
+ return import("./LiberationSans-Regular-MACKS2VL.js").then((module) => module.default);
39933
39960
  }
39961
+ return Uint8Array.from([]);
39962
+ }
39963
+ };
39964
+
39965
+ // src/lib/CanvasGraphics.ts
39966
+ var {
39967
+ beginDrawing,
39968
+ beginText,
39969
+ endText,
39970
+ beginMarkedContent,
39971
+ beginMarkedContentProps,
39972
+ endMarkedContent
39973
+ } = CanvasGraphics.prototype;
39974
+ CanvasGraphics.prototype.beginDrawing = function(options) {
39975
+ if (this.ctx instanceof SvgCanvasContext) {
39976
+ options.transparency = false;
39977
+ }
39978
+ return beginDrawing.call(this, options);
39979
+ };
39980
+ CanvasGraphics.prototype.beginText = function(opIdx) {
39981
+ beginText.call(this, opIdx);
39982
+ if (this.ctx instanceof SvgCanvasContext) {
39983
+ this.ctx.beginText();
39934
39984
  }
39935
39985
  };
39986
+ CanvasGraphics.prototype.endText = function(opIdx) {
39987
+ endText.call(this, opIdx);
39988
+ if (this.ctx instanceof SvgCanvasContext) {
39989
+ this.ctx.endText();
39990
+ }
39991
+ };
39992
+ CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
39993
+ beginMarkedContent.call(this, opIdx, this.ctx);
39994
+ if (this.ctx instanceof SvgCanvasContext) {
39995
+ this.ctx.beginMarkedContent(type);
39996
+ }
39997
+ };
39998
+ CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
39999
+ beginMarkedContentProps.call(this, opIdx, type, props);
40000
+ if (this.ctx instanceof SvgCanvasContext) {
40001
+ this.ctx.beginMarkedContent(type, props);
40002
+ }
40003
+ };
40004
+ CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
40005
+ if (this.ctx instanceof SvgCanvasContext) {
40006
+ this.ctx.endMarkedContent();
40007
+ }
40008
+ endMarkedContent.call(this, opIdx);
40009
+ };
40010
+ Object.assign(CanvasGraphics.prototype, {
40011
+ [OPS.beginText]: CanvasGraphics.prototype.beginText,
40012
+ [OPS.endText]: CanvasGraphics.prototype.endText,
40013
+ [OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
40014
+ [OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
40015
+ [OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
40016
+ });
40017
+
40018
+ // src/lib/TextLayer_v2.ts
40019
+ var import_opentype = __toESM(require_opentype(), 1);
39936
40020
  var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
39937
40021
  let promise = null;
39938
40022
  return () => {
@@ -39948,12 +40032,6 @@ var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
39948
40032
  return promise;
39949
40033
  };
39950
40034
  })();
39951
- var computeText = (node) => {
39952
- if (typeof node.text === "string") {
39953
- return node.text;
39954
- }
39955
- return node.text.map((t) => t.text).join("");
39956
- };
39957
40035
  async function createTextLayerV2(page, {
39958
40036
  graphics,
39959
40037
  annotations: _annotations
@@ -40059,210 +40137,6 @@ async function createTextLayerV2(page, {
40059
40137
  });
40060
40138
  resetTextItem();
40061
40139
  };
40062
- const renderStructTreeNode = (node, parents) => {
40063
- let parent = parents.at(-1) || rootContainer;
40064
- if ("role" in node) {
40065
- const role = node.role.toLowerCase();
40066
- switch (role) {
40067
- case "root":
40068
- case "document":
40069
- case "art":
40070
- parent.attrs ?? (parent.attrs = {});
40071
- Object.assign(parent.attrs, node.attrs);
40072
- for (const child of [...node.children]) {
40073
- renderStructTreeNode(child, parents);
40074
- }
40075
- break;
40076
- case "part":
40077
- case "sect": {
40078
- const section = {
40079
- role: "section",
40080
- children: [],
40081
- attrs: node.attrs
40082
- };
40083
- while (parents.find(
40084
- (p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
40085
- )) {
40086
- parents.pop();
40087
- parent = parents.at(-1) || rootContainer;
40088
- }
40089
- parent.children.push(section);
40090
- for (const child of [...node.children]) {
40091
- renderStructTreeNode(child, [...parents, section]);
40092
- }
40093
- break;
40094
- }
40095
- case "lbl":
40096
- case "lbody":
40097
- case "span": {
40098
- for (const child of [...node.children]) {
40099
- renderStructTreeNode(child, parents);
40100
- }
40101
- break;
40102
- }
40103
- case "link": {
40104
- const [ref, ...children] = node.children;
40105
- if (ref.type !== "object") {
40106
- console.warn(
40107
- `Unsupported link type: ${ref.type}`
40108
- );
40109
- for (const child of children) {
40110
- renderStructTreeNode(child, parents);
40111
- }
40112
- return;
40113
- }
40114
- const annotation = annotations?.find(
40115
- (note) => note.id === ref.id
40116
- );
40117
- if (!annotation) {
40118
- console.warn("Link not found", ref);
40119
- for (const child of children) {
40120
- renderStructTreeNode(child, parents);
40121
- }
40122
- return;
40123
- }
40124
- if (isLinkAnnotation(annotation)) {
40125
- const anchor = {
40126
- role: "a",
40127
- href: annotation.url,
40128
- children: []
40129
- };
40130
- parent.children.push(anchor);
40131
- for (const child of children) {
40132
- renderStructTreeNode(child, [...parents, anchor]);
40133
- }
40134
- } else {
40135
- console.warn(
40136
- `Unsupported annotation subtype: ${annotation.subtype}`
40137
- );
40138
- for (const child of children) {
40139
- renderStructTreeNode(child, parents);
40140
- }
40141
- }
40142
- break;
40143
- }
40144
- case "p": {
40145
- if (node.children.length === 0) {
40146
- break;
40147
- }
40148
- if (node.children.length === 1 && node.children[0].role === "Table") {
40149
- renderStructTreeNode(node.children[0], parents);
40150
- break;
40151
- }
40152
- const paragraph = {
40153
- role: "p",
40154
- children: [],
40155
- attrs: node.attrs
40156
- };
40157
- parent.children.push(paragraph);
40158
- for (const child of [...node.children]) {
40159
- renderStructTreeNode(child, [...parents, paragraph]);
40160
- }
40161
- break;
40162
- }
40163
- case "l": {
40164
- const list = {
40165
- role: "ul",
40166
- children: [],
40167
- attrs: node.attrs
40168
- };
40169
- parent.children.push(list);
40170
- for (const child of [...node.children]) {
40171
- renderStructTreeNode(child, [...parents, list]);
40172
- }
40173
- break;
40174
- }
40175
- case "table":
40176
- case "thead":
40177
- case "tbody":
40178
- case "tfoot":
40179
- case "tr":
40180
- case "h1":
40181
- case "h2":
40182
- case "h3":
40183
- case "h4":
40184
- case "h5":
40185
- case "h6":
40186
- case "li":
40187
- case "td":
40188
- case "th": {
40189
- const block = {
40190
- role,
40191
- children: [],
40192
- attrs: node.attrs
40193
- };
40194
- parent.children.push(block);
40195
- for (const child of [...node.children]) {
40196
- renderStructTreeNode(child, [...parents, block]);
40197
- }
40198
- break;
40199
- }
40200
- case "figure": {
40201
- if (graphics) {
40202
- for (const child of [...node.children]) {
40203
- if (!("id" in child)) {
40204
- continue;
40205
- }
40206
- const contentId = normalizeMarkedContentId(child.id);
40207
- const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
40208
- const graphic = findSvgNode(
40209
- graphics,
40210
- (node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
40211
- );
40212
- if (!graphic) {
40213
- continue;
40214
- }
40215
- parent.children.push({
40216
- role: "figure",
40217
- href: graphic.attrs.href,
40218
- x: graphic.attrs.x || 0,
40219
- y: graphic.attrs.y || 0,
40220
- width: graphic.attrs.width || 0,
40221
- height: graphic.attrs.height || 0,
40222
- alt: node.alt
40223
- });
40224
- }
40225
- }
40226
- break;
40227
- }
40228
- default: {
40229
- const block = {
40230
- role: "div",
40231
- children: [],
40232
- attrs: node.attrs
40233
- };
40234
- parent.children.push(block);
40235
- for (const child of [...node.children]) {
40236
- renderStructTreeNode(child, [...parents, block]);
40237
- }
40238
- break;
40239
- }
40240
- }
40241
- } else {
40242
- const contentId = normalizeMarkedContentId(node.id);
40243
- findNode(rootContainer, (child, previousParent) => {
40244
- if (child.id !== contentId) {
40245
- return;
40246
- }
40247
- previousParent.children = previousParent.children.filter(
40248
- (c) => c !== child
40249
- );
40250
- if (isElementNode(child)) {
40251
- const children = child.children;
40252
- const lastTextItem = parent.children.at(-1);
40253
- if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && computeText(lastTextItem).trim() === "") {
40254
- while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && computeText(children[0]).trim() === "") {
40255
- children.shift();
40256
- }
40257
- }
40258
- parent.children.push(...children);
40259
- } else {
40260
- parent.children.push(child);
40261
- }
40262
- return true;
40263
- });
40264
- }
40265
- };
40266
40140
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
40267
40141
  const fnId = operatorsList.fnArray[i];
40268
40142
  const args = operatorsList.argsArray[i];
@@ -40460,9 +40334,22 @@ async function createTextLayerV2(page, {
40460
40334
  }
40461
40335
  }
40462
40336
  closeTextItem();
40463
- renderStructTreeNode(structTree, [rootContainer]);
40337
+ decorateStructTree(
40338
+ structTree,
40339
+ rootContainer,
40340
+ graphics,
40341
+ annotations
40342
+ );
40464
40343
  return rootContainer;
40465
40344
  }
40345
+
40346
+ // src/index.ts
40347
+ var textLayerUtils = {
40348
+ findNode,
40349
+ findNodes,
40350
+ getNodeContents,
40351
+ flattenNodes
40352
+ };
40466
40353
  export {
40467
40354
  AbortException,
40468
40355
  AnnotationEditorLayer,
@@ -40512,7 +40399,6 @@ export {
40512
40399
  createValidAbsoluteUrl,
40513
40400
  destroySvgContext,
40514
40401
  fetchData,
40515
- fontLoader,
40516
40402
  getDocument,
40517
40403
  getFilenameFromUrl,
40518
40404
  getPdfFilenameFromUrl,
@@ -40560,11 +40446,10 @@ export {
40560
40446
  isValidExplicitDest,
40561
40447
  isWatermarkAnnotation,
40562
40448
  isWidgetAnnotation,
40563
- loadDefaultFonts,
40449
+ loadDefaultFonts2 as loadDefaultFonts,
40564
40450
  loadTextLayerFonts,
40565
40451
  makeSerializable,
40566
40452
  noContextMenu,
40567
- normalizeMarkedContentId,
40568
40453
  normalizeUnicode,
40569
40454
  parseRgbaColor,
40570
40455
  renderSvgNode,
@@ -40572,6 +40457,7 @@ export {
40572
40457
  setLayerDimensions,
40573
40458
  shadow,
40574
40459
  stopEvent,
40460
+ textLayerUtils,
40575
40461
  toDataUrl,
40576
40462
  toSvgNode,
40577
40463
  toSvgString,