@chialab/pdfjs-lib 1.0.0-alpha.28 → 1.0.0-alpha.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5885,25 +5885,11 @@ _serializable = new WeakMap();
5885
5885
  // src/lib/Canvas.ts
5886
5886
  var Path2DConstructor = Path2D;
5887
5887
  async function createCanvas(width, height) {
5888
- await loadDefaultFonts();
5889
5888
  const canvas = document.createElement("canvas");
5890
5889
  canvas.width = width;
5891
5890
  canvas.height = height;
5892
5891
  return canvas;
5893
5892
  }
5894
- var loadingFontPromise = null;
5895
- async function loadDefaultFonts() {
5896
- if (loadingFontPromise === null) {
5897
- loadingFontPromise = import("./LiberationSans-Regular-3SH5NGZO.js").then((module) => new Blob([module.default], { type: "font/ttf" })).then(async (sansBlob) => {
5898
- const fontSans = new FontFace(
5899
- "Liberation Sans",
5900
- `url(${URL.createObjectURL(sansBlob)})`
5901
- );
5902
- document.fonts.add(await fontSans.load());
5903
- });
5904
- }
5905
- return loadingFontPromise;
5906
- }
5907
5893
 
5908
5894
  // src/lib/Path2D.ts
5909
5895
  function toFixed(value, digits = 6) {
@@ -27871,79 +27857,6 @@ PDFPageProxy.prototype.getAnnotations = async function(params) {
27871
27857
  return makeSerializable(annotations);
27872
27858
  };
27873
27859
 
27874
- // src/lib/WasmFactory.ts
27875
- var WasmFactory = class {
27876
- async fetch({
27877
- filename
27878
- }) {
27879
- switch (filename) {
27880
- case "openjpeg.wasm":
27881
- return import("./openjpeg-QLA762TL.js").then(
27882
- (module) => module.default
27883
- );
27884
- case "qcms_bg":
27885
- return import("./qcms_bg-BCJEADMU.js").then(
27886
- (module) => module.default
27887
- );
27888
- }
27889
- return Uint8Array.from([]);
27890
- }
27891
- };
27892
-
27893
- // src/lib/StandardFontDataFactory.ts
27894
- var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
27895
- constructor() {
27896
- super({
27897
- baseUrl: null
27898
- });
27899
- }
27900
- /**
27901
- * Fetch the corresponding standard font data.
27902
- * We need to use specific dynamic imports for each font file for the bundler to include them.
27903
- */
27904
- async fetch({
27905
- filename
27906
- }) {
27907
- switch (filename) {
27908
- case "FoxitDingbats.pfb":
27909
- return import("./FoxitDingbats-XZTZYAP6.js").then((module) => module.default);
27910
- case "FoxitFixed.pfb":
27911
- return import("./FoxitFixed-DRWD6QNM.js").then(
27912
- (module) => module.default
27913
- );
27914
- case "FoxitFixedBold.pfb":
27915
- return import("./FoxitFixedBold-A3IBPIFC.js").then((module) => module.default);
27916
- case "FoxitFixedBoldItalic.pfb":
27917
- return import("./FoxitFixedBoldItalic-V4ORMFGL.js").then((module) => module.default);
27918
- case "FoxitFixedItalic.pfb":
27919
- return import("./FoxitFixedItalic-Z7BSNTJA.js").then((module) => module.default);
27920
- case "FoxitSerif.pfb":
27921
- return import("./FoxitSerif-Y34FHWHO.js").then(
27922
- (module) => module.default
27923
- );
27924
- case "FoxitSerifBold.pfb":
27925
- return import("./FoxitSerifBold-NCWBT4GX.js").then((module) => module.default);
27926
- case "FoxitSerifBoldItalic.pfb":
27927
- return import("./FoxitSerifBoldItalic-YTEOG5ZU.js").then((module) => module.default);
27928
- case "FoxitSerifItalic.pfb":
27929
- return import("./FoxitSerifItalic-3H547RIJ.js").then((module) => module.default);
27930
- case "FoxitSymbol.pfb":
27931
- return import("./FoxitSymbol-EMTQEYPB.js").then(
27932
- (module) => module.default
27933
- );
27934
- case "LiberationSans-Bold.ttf":
27935
- return import("./LiberationSans-Bold-MGX34QV4.js").then((module) => module.default);
27936
- case "LiberationSans-BoldItalic.ttf":
27937
- return import("./LiberationSans-BoldItalic-WSEQ5LH5.js").then((module) => module.default);
27938
- case "LiberationSans-Italic.ttf":
27939
- return import("./LiberationSans-Italic-E4SLDR4M.js").then((module) => module.default);
27940
- case "LiberationSans-Regular.ttf":
27941
- return import("./LiberationSans-Regular-3SH5NGZO.js").then((module) => module.default);
27942
- }
27943
- return Uint8Array.from([]);
27944
- }
27945
- };
27946
-
27947
27860
  // src/lib/AnnotationData.ts
27948
27861
  function isTextAnnotation(annotation) {
27949
27862
  return annotation.subtype === "Text";
@@ -28033,59 +27946,6 @@ function isRedactAnnotation(annotation) {
28033
27946
  return annotation.subtype === "Redact";
28034
27947
  }
28035
27948
 
28036
- // src/lib/CanvasGraphics.ts
28037
- var {
28038
- beginDrawing,
28039
- beginText,
28040
- endText,
28041
- beginMarkedContent,
28042
- beginMarkedContentProps,
28043
- endMarkedContent
28044
- } = CanvasGraphics.prototype;
28045
- CanvasGraphics.prototype.beginDrawing = function(options) {
28046
- if (this.ctx instanceof SvgCanvasContext) {
28047
- options.transparency = false;
28048
- }
28049
- return beginDrawing.call(this, options);
28050
- };
28051
- CanvasGraphics.prototype.beginText = function(opIdx) {
28052
- beginText.call(this, opIdx);
28053
- if (this.ctx instanceof SvgCanvasContext) {
28054
- this.ctx.beginText();
28055
- }
28056
- };
28057
- CanvasGraphics.prototype.endText = function(opIdx) {
28058
- endText.call(this, opIdx);
28059
- if (this.ctx instanceof SvgCanvasContext) {
28060
- this.ctx.endText();
28061
- }
28062
- };
28063
- CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
28064
- beginMarkedContent.call(this, opIdx, this.ctx);
28065
- if (this.ctx instanceof SvgCanvasContext) {
28066
- this.ctx.beginMarkedContent(type);
28067
- }
28068
- };
28069
- CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
28070
- beginMarkedContentProps.call(this, opIdx, type, props);
28071
- if (this.ctx instanceof SvgCanvasContext) {
28072
- this.ctx.beginMarkedContent(type, props);
28073
- }
28074
- };
28075
- CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
28076
- if (this.ctx instanceof SvgCanvasContext) {
28077
- this.ctx.endMarkedContent();
28078
- }
28079
- endMarkedContent.call(this, opIdx);
28080
- };
28081
- Object.assign(CanvasGraphics.prototype, {
28082
- [OPS.beginText]: CanvasGraphics.prototype.beginText,
28083
- [OPS.endText]: CanvasGraphics.prototype.endText,
28084
- [OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
28085
- [OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
28086
- [OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
28087
- });
28088
-
28089
27949
  // src/lib/TextLayer.ts
28090
27950
  function isTextNode(node) {
28091
27951
  return node.role === "text";
@@ -28144,6 +28004,7 @@ async function loadTextLayerFonts(document2) {
28144
28004
  });
28145
28005
  return loadedFontsPromise;
28146
28006
  }
28007
+ var loadDefaultFonts = loadTextLayerFonts;
28147
28008
  var normalizeMarkedContentId = (idRef) => {
28148
28009
  if (idRef == null) {
28149
28010
  return null;
@@ -28156,10 +28017,321 @@ var normalizeMarkedContentId = (idRef) => {
28156
28017
  }
28157
28018
  return `${idRef}`;
28158
28019
  };
28020
+ var getNodeContents = (node) => {
28021
+ if (isTextNode(node)) {
28022
+ return typeof node.text === "string" ? node.text : node.text.map((t) => t.text).join("");
28023
+ }
28024
+ if (isElementNode(node)) {
28025
+ return node.children.map((n) => {
28026
+ if (typeof n === "string") {
28027
+ return n;
28028
+ }
28029
+ return getNodeContents(n);
28030
+ }).join("");
28031
+ }
28032
+ return "";
28033
+ };
28034
+ var findNode = (root, callback) => {
28035
+ for (let i = root.children.length - 1; i >= 0; i--) {
28036
+ const child = root.children[i];
28037
+ if (typeof child === "string") {
28038
+ continue;
28039
+ }
28040
+ if (callback(child, root)) {
28041
+ return child;
28042
+ }
28043
+ if (isElementNode(child)) {
28044
+ const found = findNode(child, callback);
28045
+ if (found) {
28046
+ return found;
28047
+ }
28048
+ }
28049
+ }
28050
+ return null;
28051
+ };
28052
+ var findNodes = (node, callback) => {
28053
+ return node.children.reduce((nodes, child) => {
28054
+ if (typeof child === "string") {
28055
+ return nodes;
28056
+ }
28057
+ if (callback(child, node)) {
28058
+ nodes.push(child);
28059
+ }
28060
+ if (isElementNode(child)) {
28061
+ nodes.push(...findNodes(child, callback));
28062
+ }
28063
+ return nodes;
28064
+ }, []);
28065
+ };
28066
+ var flattenNodes = (node) => {
28067
+ if (typeof node === "string") {
28068
+ return [];
28069
+ }
28070
+ if (isElementNode(node)) {
28071
+ return [node, ...node.children.flatMap(flattenNodes)];
28072
+ }
28073
+ return [node];
28074
+ };
28159
28075
  var MAX_TEXT_DIVS_TO_RENDER2 = 1e5;
28160
28076
  var DEFAULT_FONT_SIZE3 = 30;
28161
28077
  var DEFAULT_FONT_ASCENT = 0.8;
28162
28078
  var HYPHEN_REGEX = /-\n+$/;
28079
+ var decorateStructTree = (node, rootContainer, graphics, annotations, parents = []) => {
28080
+ let parent = parents.at(-1) || rootContainer;
28081
+ if ("role" in node) {
28082
+ const role = node.role.toLowerCase();
28083
+ switch (role) {
28084
+ case "root":
28085
+ case "document":
28086
+ case "art":
28087
+ parent.attrs ?? (parent.attrs = {});
28088
+ Object.assign(parent.attrs, node.attrs);
28089
+ for (const child of [...node.children]) {
28090
+ decorateStructTree(
28091
+ child,
28092
+ rootContainer,
28093
+ graphics,
28094
+ annotations,
28095
+ parents
28096
+ );
28097
+ }
28098
+ break;
28099
+ case "part":
28100
+ case "sect": {
28101
+ const section = {
28102
+ role: "section",
28103
+ children: [],
28104
+ attrs: node.attrs
28105
+ };
28106
+ while (parents.find(
28107
+ (p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
28108
+ )) {
28109
+ parents.pop();
28110
+ parent = parents.at(-1) || rootContainer;
28111
+ }
28112
+ parent.children.push(section);
28113
+ for (const child of [...node.children]) {
28114
+ decorateStructTree(child, rootContainer, graphics, annotations, [
28115
+ ...parents,
28116
+ section
28117
+ ]);
28118
+ }
28119
+ break;
28120
+ }
28121
+ case "lbl":
28122
+ case "lbody":
28123
+ case "span": {
28124
+ for (const child of [...node.children]) {
28125
+ decorateStructTree(
28126
+ child,
28127
+ rootContainer,
28128
+ graphics,
28129
+ annotations,
28130
+ parents
28131
+ );
28132
+ }
28133
+ break;
28134
+ }
28135
+ case "link": {
28136
+ const [ref, ...children] = node.children;
28137
+ if (ref.type !== "object") {
28138
+ console.warn(
28139
+ `Unsupported link type: ${ref.type}`
28140
+ );
28141
+ for (const child of children) {
28142
+ decorateStructTree(
28143
+ child,
28144
+ rootContainer,
28145
+ graphics,
28146
+ annotations,
28147
+ parents
28148
+ );
28149
+ }
28150
+ return;
28151
+ }
28152
+ const annotation = annotations?.find(
28153
+ (note) => note.id === ref.id
28154
+ );
28155
+ if (!annotation) {
28156
+ console.warn("Link not found", ref);
28157
+ for (const child of children) {
28158
+ decorateStructTree(
28159
+ child,
28160
+ rootContainer,
28161
+ graphics,
28162
+ annotations,
28163
+ parents
28164
+ );
28165
+ }
28166
+ return;
28167
+ }
28168
+ if (isLinkAnnotation(annotation)) {
28169
+ const anchor = {
28170
+ role: "a",
28171
+ href: annotation.url,
28172
+ children: []
28173
+ };
28174
+ parent.children.push(anchor);
28175
+ for (const child of children) {
28176
+ decorateStructTree(child, rootContainer, graphics, annotations, [
28177
+ ...parents,
28178
+ anchor
28179
+ ]);
28180
+ }
28181
+ } else {
28182
+ console.warn(`Unsupported annotation subtype: ${annotation.subtype}`);
28183
+ for (const child of children) {
28184
+ decorateStructTree(
28185
+ child,
28186
+ rootContainer,
28187
+ graphics,
28188
+ annotations,
28189
+ parents
28190
+ );
28191
+ }
28192
+ }
28193
+ break;
28194
+ }
28195
+ case "p": {
28196
+ if (node.children.length === 0) {
28197
+ break;
28198
+ }
28199
+ if (node.children.length === 1 && node.children[0].role === "Table") {
28200
+ decorateStructTree(
28201
+ node.children[0],
28202
+ rootContainer,
28203
+ graphics,
28204
+ annotations,
28205
+ parents
28206
+ );
28207
+ break;
28208
+ }
28209
+ const paragraph = {
28210
+ role: "p",
28211
+ children: [],
28212
+ attrs: node.attrs
28213
+ };
28214
+ parent.children.push(paragraph);
28215
+ for (const child of [...node.children]) {
28216
+ decorateStructTree(child, rootContainer, graphics, annotations, [
28217
+ ...parents,
28218
+ paragraph
28219
+ ]);
28220
+ }
28221
+ break;
28222
+ }
28223
+ case "l": {
28224
+ const list = {
28225
+ role: "ul",
28226
+ children: [],
28227
+ attrs: node.attrs
28228
+ };
28229
+ parent.children.push(list);
28230
+ for (const child of [...node.children]) {
28231
+ decorateStructTree(child, rootContainer, graphics, annotations, [
28232
+ ...parents,
28233
+ list
28234
+ ]);
28235
+ }
28236
+ break;
28237
+ }
28238
+ case "table":
28239
+ case "thead":
28240
+ case "tbody":
28241
+ case "tfoot":
28242
+ case "tr":
28243
+ case "h1":
28244
+ case "h2":
28245
+ case "h3":
28246
+ case "h4":
28247
+ case "h5":
28248
+ case "h6":
28249
+ case "li":
28250
+ case "td":
28251
+ case "th": {
28252
+ const block = {
28253
+ role,
28254
+ children: [],
28255
+ attrs: node.attrs
28256
+ };
28257
+ parent.children.push(block);
28258
+ for (const child of [...node.children]) {
28259
+ decorateStructTree(child, rootContainer, graphics, annotations, [
28260
+ ...parents,
28261
+ block
28262
+ ]);
28263
+ }
28264
+ break;
28265
+ }
28266
+ case "figure": {
28267
+ if (graphics) {
28268
+ for (const child of [...node.children]) {
28269
+ if (!("id" in child)) {
28270
+ continue;
28271
+ }
28272
+ const contentId = normalizeMarkedContentId(child.id);
28273
+ const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
28274
+ const graphic = findSvgNode(
28275
+ graphics,
28276
+ (node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
28277
+ );
28278
+ if (!graphic) {
28279
+ continue;
28280
+ }
28281
+ parent.children.push({
28282
+ role: "figure",
28283
+ href: graphic.attrs.href,
28284
+ x: graphic.attrs.x || 0,
28285
+ y: graphic.attrs.y || 0,
28286
+ width: graphic.attrs.width || 0,
28287
+ height: graphic.attrs.height || 0,
28288
+ alt: node.alt
28289
+ });
28290
+ }
28291
+ }
28292
+ break;
28293
+ }
28294
+ default: {
28295
+ const block = {
28296
+ role: "div",
28297
+ children: [],
28298
+ attrs: node.attrs
28299
+ };
28300
+ parent.children.push(block);
28301
+ for (const child of [...node.children]) {
28302
+ decorateStructTree(child, rootContainer, graphics, annotations, [
28303
+ ...parents,
28304
+ block
28305
+ ]);
28306
+ }
28307
+ break;
28308
+ }
28309
+ }
28310
+ } else {
28311
+ const contentId = normalizeMarkedContentId(node.id);
28312
+ findNode(rootContainer, (child, previousParent) => {
28313
+ if (child.id !== contentId) {
28314
+ return;
28315
+ }
28316
+ previousParent.children = previousParent.children.filter(
28317
+ (c) => c !== child
28318
+ );
28319
+ if (isElementNode(child)) {
28320
+ const children = child.children;
28321
+ const lastTextItem = parent.children.at(-1);
28322
+ if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && getNodeContents(lastTextItem).trim() === "") {
28323
+ while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && getNodeContents(children[0]).trim() === "") {
28324
+ children.shift();
28325
+ }
28326
+ }
28327
+ parent.children.push(...children);
28328
+ } else {
28329
+ parent.children.push(child);
28330
+ }
28331
+ return true;
28332
+ });
28333
+ }
28334
+ };
28163
28335
  async function createTextLayer(page, {
28164
28336
  canvasFactory,
28165
28337
  viewport = page.getViewport({ scale: 1 }),
@@ -28171,7 +28343,6 @@ async function createTextLayer(page, {
28171
28343
  const ascentCache = /* @__PURE__ */ new Map();
28172
28344
  const canvasCache = /* @__PURE__ */ new Map();
28173
28345
  const textDivs = [];
28174
- const markedContent = /* @__PURE__ */ new Map();
28175
28346
  const [tree, contentSource] = await Promise.all([
28176
28347
  page.getStructTree(),
28177
28348
  page.getTextContent({ includeMarkedContent: true })
@@ -28217,10 +28388,6 @@ async function createTextLayer(page, {
28217
28388
  const id2 = normalizeMarkedContentId(item);
28218
28389
  if (id2 != null) {
28219
28390
  container.id = id2;
28220
- markedContent.set(id2, {
28221
- node: container,
28222
- parent
28223
- });
28224
28391
  }
28225
28392
  } else if (item.type === "endMarkedContent") {
28226
28393
  container = parents.pop();
@@ -28383,215 +28550,6 @@ async function createTextLayer(page, {
28383
28550
  ascentCache.set(fontFamily, ratio);
28384
28551
  return ratio;
28385
28552
  };
28386
- const renderStructTreeNode = (node, parents) => {
28387
- let parent = parents[parents.length - 1] || rootContainer;
28388
- if ("role" in node) {
28389
- const role = node.role.toLowerCase();
28390
- switch (role) {
28391
- case "root":
28392
- case "document":
28393
- case "art":
28394
- parent.attrs ?? (parent.attrs = {});
28395
- Object.assign(parent.attrs, node.attrs);
28396
- for (const child of [...node.children]) {
28397
- renderStructTreeNode(child, parents);
28398
- }
28399
- break;
28400
- case "sect": {
28401
- const section = {
28402
- role: "section",
28403
- children: [],
28404
- attrs: node.attrs
28405
- };
28406
- while (parents.find(
28407
- (p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
28408
- )) {
28409
- parents.pop();
28410
- parent = parents.at(-1) || rootContainer;
28411
- }
28412
- parent.children.push(section);
28413
- for (const child of [...node.children]) {
28414
- renderStructTreeNode(child, [...parents, section]);
28415
- }
28416
- break;
28417
- }
28418
- case "lbl":
28419
- case "lbody":
28420
- case "span": {
28421
- for (const child of [...node.children]) {
28422
- renderStructTreeNode(child, parents);
28423
- }
28424
- break;
28425
- }
28426
- case "link": {
28427
- const [ref, ...children] = node.children;
28428
- if (ref.type !== "object") {
28429
- console.warn(
28430
- `Unsupported link type: ${ref.type}`
28431
- );
28432
- for (const child of children) {
28433
- renderStructTreeNode(child, parents);
28434
- }
28435
- return;
28436
- }
28437
- const annotation = annotations?.find(
28438
- (note) => note.id === ref.id
28439
- );
28440
- if (!annotation) {
28441
- console.warn("Link not found", ref);
28442
- for (const child of children) {
28443
- renderStructTreeNode(child, parents);
28444
- }
28445
- return;
28446
- }
28447
- if (isLinkAnnotation(annotation)) {
28448
- const anchor = {
28449
- role: "a",
28450
- href: annotation.url,
28451
- children: []
28452
- };
28453
- parent.children.push(anchor);
28454
- for (const child of children) {
28455
- renderStructTreeNode(child, [...parents, anchor]);
28456
- }
28457
- } else {
28458
- console.warn(
28459
- `Unsupported annotation subtype: ${annotation.subtype}`
28460
- );
28461
- for (const child of children) {
28462
- renderStructTreeNode(child, parents);
28463
- }
28464
- }
28465
- break;
28466
- }
28467
- case "p": {
28468
- if (node.children.length === 0) {
28469
- break;
28470
- }
28471
- if (node.children.length === 1 && node.children[0].role === "Table") {
28472
- renderStructTreeNode(node.children[0], parents);
28473
- break;
28474
- }
28475
- const paragraph = {
28476
- role: "p",
28477
- children: [],
28478
- attrs: node.attrs
28479
- };
28480
- parent.children.push(paragraph);
28481
- for (const child of [...node.children]) {
28482
- renderStructTreeNode(child, [...parents, paragraph]);
28483
- }
28484
- break;
28485
- }
28486
- case "l": {
28487
- const list = {
28488
- role: "ul",
28489
- children: [],
28490
- attrs: node.attrs
28491
- };
28492
- parent.children.push(list);
28493
- for (const child of [...node.children]) {
28494
- renderStructTreeNode(child, [...parents, list]);
28495
- }
28496
- break;
28497
- }
28498
- case "table":
28499
- case "thead":
28500
- case "tbody":
28501
- case "tfoot":
28502
- case "tr":
28503
- case "h1":
28504
- case "h2":
28505
- case "h3":
28506
- case "h4":
28507
- case "h5":
28508
- case "h6":
28509
- case "li":
28510
- case "td":
28511
- case "th": {
28512
- const block = {
28513
- role,
28514
- children: [],
28515
- attrs: node.attrs
28516
- };
28517
- parent.children.push(block);
28518
- for (const child of [...node.children]) {
28519
- renderStructTreeNode(child, [...parents, block]);
28520
- }
28521
- break;
28522
- }
28523
- case "figure": {
28524
- const ids = [...node.children].map((child) => {
28525
- const id2 = normalizeMarkedContentId(child);
28526
- if (!id2) {
28527
- return null;
28528
- }
28529
- const span = markedContent.get(id2);
28530
- if (!span) {
28531
- return id2;
28532
- }
28533
- if (span.parent) {
28534
- span.parent.children = span.parent.children.filter(
28535
- (child2) => child2 !== span.node
28536
- );
28537
- }
28538
- return id2;
28539
- }).filter((id2) => id2 !== null);
28540
- if (graphics) {
28541
- for (const markedId of ids) {
28542
- const figureId = normalizeMarkedContentId(markedId);
28543
- if (!figureId) {
28544
- continue;
28545
- }
28546
- const useId = `#${id("marked_content" /* MarkedContent */, figureId, graphics.attrs.id)}`;
28547
- const graphic = findSvgNode(
28548
- graphics,
28549
- (node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
28550
- );
28551
- if (!graphic) {
28552
- continue;
28553
- }
28554
- const figure = {
28555
- role: "figure",
28556
- href: graphic.attrs.href,
28557
- x: graphic.attrs.x || 0,
28558
- y: graphic.attrs.y || 0,
28559
- width: graphic.attrs.width || 0,
28560
- height: graphic.attrs.height || 0,
28561
- alt: node.alt
28562
- };
28563
- parent.children.push(figure);
28564
- }
28565
- }
28566
- break;
28567
- }
28568
- default: {
28569
- const block = {
28570
- role: "div",
28571
- children: [],
28572
- attrs: node.attrs
28573
- };
28574
- parent.children.push(block);
28575
- for (const child of [...node.children]) {
28576
- renderStructTreeNode(child, [...parents, block]);
28577
- }
28578
- break;
28579
- }
28580
- }
28581
- } else {
28582
- const span = markedContent.get(`${node.id}`);
28583
- if (!span) {
28584
- return;
28585
- }
28586
- if (span.parent) {
28587
- span.parent.children = span.parent.children.filter(
28588
- (child) => child !== span.node
28589
- );
28590
- }
28591
- parent.children.push(span.node);
28592
- }
28593
- };
28594
- await loadDefaultFonts();
28595
28553
  const reader = textContentSource.getReader();
28596
28554
  while (true) {
28597
28555
  const { value, done } = await reader.read();
@@ -28604,7 +28562,7 @@ async function createTextLayer(page, {
28604
28562
  const root = tree?.children[0]?.children[0];
28605
28563
  if (root?.children) {
28606
28564
  for (const child of root.children) {
28607
- renderStructTreeNode(child, [rootContainer]);
28565
+ decorateStructTree(child, rootContainer, graphics, annotations);
28608
28566
  }
28609
28567
  }
28610
28568
  ascentCache.clear();
@@ -28615,6 +28573,132 @@ async function createTextLayer(page, {
28615
28573
  return rootContainer;
28616
28574
  }
28617
28575
 
28576
+ // src/lib/WasmFactory.ts
28577
+ var WasmFactory = class {
28578
+ async fetch({
28579
+ filename
28580
+ }) {
28581
+ switch (filename) {
28582
+ case "openjpeg.wasm":
28583
+ return import("./openjpeg-QLA762TL.js").then(
28584
+ (module) => module.default
28585
+ );
28586
+ case "qcms_bg":
28587
+ return import("./qcms_bg-BCJEADMU.js").then(
28588
+ (module) => module.default
28589
+ );
28590
+ }
28591
+ return Uint8Array.from([]);
28592
+ }
28593
+ };
28594
+
28595
+ // src/lib/StandardFontDataFactory.ts
28596
+ var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
28597
+ constructor() {
28598
+ super({
28599
+ baseUrl: null
28600
+ });
28601
+ }
28602
+ /**
28603
+ * Fetch the corresponding standard font data.
28604
+ * We need to use specific dynamic imports for each font file for the bundler to include them.
28605
+ */
28606
+ async fetch({
28607
+ filename
28608
+ }) {
28609
+ switch (filename) {
28610
+ case "FoxitDingbats.pfb":
28611
+ return import("./FoxitDingbats-XZTZYAP6.js").then((module) => module.default);
28612
+ case "FoxitFixed.pfb":
28613
+ return import("./FoxitFixed-DRWD6QNM.js").then(
28614
+ (module) => module.default
28615
+ );
28616
+ case "FoxitFixedBold.pfb":
28617
+ return import("./FoxitFixedBold-A3IBPIFC.js").then((module) => module.default);
28618
+ case "FoxitFixedBoldItalic.pfb":
28619
+ return import("./FoxitFixedBoldItalic-V4ORMFGL.js").then((module) => module.default);
28620
+ case "FoxitFixedItalic.pfb":
28621
+ return import("./FoxitFixedItalic-Z7BSNTJA.js").then((module) => module.default);
28622
+ case "FoxitSerif.pfb":
28623
+ return import("./FoxitSerif-Y34FHWHO.js").then(
28624
+ (module) => module.default
28625
+ );
28626
+ case "FoxitSerifBold.pfb":
28627
+ return import("./FoxitSerifBold-NCWBT4GX.js").then((module) => module.default);
28628
+ case "FoxitSerifBoldItalic.pfb":
28629
+ return import("./FoxitSerifBoldItalic-YTEOG5ZU.js").then((module) => module.default);
28630
+ case "FoxitSerifItalic.pfb":
28631
+ return import("./FoxitSerifItalic-3H547RIJ.js").then((module) => module.default);
28632
+ case "FoxitSymbol.pfb":
28633
+ return import("./FoxitSymbol-EMTQEYPB.js").then(
28634
+ (module) => module.default
28635
+ );
28636
+ case "LiberationSans-Bold.ttf":
28637
+ return import("./LiberationSans-Bold-MGX34QV4.js").then((module) => module.default);
28638
+ case "LiberationSans-BoldItalic.ttf":
28639
+ return import("./LiberationSans-BoldItalic-WSEQ5LH5.js").then((module) => module.default);
28640
+ case "LiberationSans-Italic.ttf":
28641
+ return import("./LiberationSans-Italic-E4SLDR4M.js").then((module) => module.default);
28642
+ case "LiberationSans-Regular.ttf":
28643
+ return import("./LiberationSans-Regular-3SH5NGZO.js").then((module) => module.default);
28644
+ }
28645
+ return Uint8Array.from([]);
28646
+ }
28647
+ };
28648
+
28649
+ // src/lib/CanvasGraphics.ts
28650
+ var {
28651
+ beginDrawing,
28652
+ beginText,
28653
+ endText,
28654
+ beginMarkedContent,
28655
+ beginMarkedContentProps,
28656
+ endMarkedContent
28657
+ } = CanvasGraphics.prototype;
28658
+ CanvasGraphics.prototype.beginDrawing = function(options) {
28659
+ if (this.ctx instanceof SvgCanvasContext) {
28660
+ options.transparency = false;
28661
+ }
28662
+ return beginDrawing.call(this, options);
28663
+ };
28664
+ CanvasGraphics.prototype.beginText = function(opIdx) {
28665
+ beginText.call(this, opIdx);
28666
+ if (this.ctx instanceof SvgCanvasContext) {
28667
+ this.ctx.beginText();
28668
+ }
28669
+ };
28670
+ CanvasGraphics.prototype.endText = function(opIdx) {
28671
+ endText.call(this, opIdx);
28672
+ if (this.ctx instanceof SvgCanvasContext) {
28673
+ this.ctx.endText();
28674
+ }
28675
+ };
28676
+ CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
28677
+ beginMarkedContent.call(this, opIdx, this.ctx);
28678
+ if (this.ctx instanceof SvgCanvasContext) {
28679
+ this.ctx.beginMarkedContent(type);
28680
+ }
28681
+ };
28682
+ CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
28683
+ beginMarkedContentProps.call(this, opIdx, type, props);
28684
+ if (this.ctx instanceof SvgCanvasContext) {
28685
+ this.ctx.beginMarkedContent(type, props);
28686
+ }
28687
+ };
28688
+ CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
28689
+ if (this.ctx instanceof SvgCanvasContext) {
28690
+ this.ctx.endMarkedContent();
28691
+ }
28692
+ endMarkedContent.call(this, opIdx);
28693
+ };
28694
+ Object.assign(CanvasGraphics.prototype, {
28695
+ [OPS.beginText]: CanvasGraphics.prototype.beginText,
28696
+ [OPS.endText]: CanvasGraphics.prototype.endText,
28697
+ [OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
28698
+ [OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
28699
+ [OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
28700
+ });
28701
+
28618
28702
  // node_modules/opentype.js/dist/opentype.module.js
28619
28703
  if (!String.prototype.codePointAt) {
28620
28704
  (function() {
@@ -40863,20 +40947,6 @@ var opentype = /* @__PURE__ */ Object.freeze({
40863
40947
  var opentype_module_default = opentype;
40864
40948
 
40865
40949
  // src/lib/TextLayer_v2.ts
40866
- var findNode = (node, callback) => {
40867
- for (let i = node.children.length - 1; i >= 0; i--) {
40868
- const child = node.children[i];
40869
- if (typeof child === "string") {
40870
- continue;
40871
- }
40872
- if (callback(child, node)) {
40873
- return;
40874
- }
40875
- if (isElementNode(child)) {
40876
- findNode(child, callback);
40877
- }
40878
- }
40879
- };
40880
40950
  var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
40881
40951
  let promise = null;
40882
40952
  return () => {
@@ -40892,12 +40962,6 @@ var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
40892
40962
  return promise;
40893
40963
  };
40894
40964
  })();
40895
- var computeText = (node) => {
40896
- if (typeof node.text === "string") {
40897
- return node.text;
40898
- }
40899
- return node.text.map((t) => t.text).join("");
40900
- };
40901
40965
  async function createTextLayerV2(page, {
40902
40966
  graphics,
40903
40967
  annotations: _annotations
@@ -41003,210 +41067,6 @@ async function createTextLayerV2(page, {
41003
41067
  });
41004
41068
  resetTextItem();
41005
41069
  };
41006
- const renderStructTreeNode = (node, parents) => {
41007
- let parent = parents.at(-1) || rootContainer;
41008
- if ("role" in node) {
41009
- const role = node.role.toLowerCase();
41010
- switch (role) {
41011
- case "root":
41012
- case "document":
41013
- case "art":
41014
- parent.attrs ?? (parent.attrs = {});
41015
- Object.assign(parent.attrs, node.attrs);
41016
- for (const child of [...node.children]) {
41017
- renderStructTreeNode(child, parents);
41018
- }
41019
- break;
41020
- case "part":
41021
- case "sect": {
41022
- const section = {
41023
- role: "section",
41024
- children: [],
41025
- attrs: node.attrs
41026
- };
41027
- while (parents.find(
41028
- (p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
41029
- )) {
41030
- parents.pop();
41031
- parent = parents.at(-1) || rootContainer;
41032
- }
41033
- parent.children.push(section);
41034
- for (const child of [...node.children]) {
41035
- renderStructTreeNode(child, [...parents, section]);
41036
- }
41037
- break;
41038
- }
41039
- case "lbl":
41040
- case "lbody":
41041
- case "span": {
41042
- for (const child of [...node.children]) {
41043
- renderStructTreeNode(child, parents);
41044
- }
41045
- break;
41046
- }
41047
- case "link": {
41048
- const [ref, ...children] = node.children;
41049
- if (ref.type !== "object") {
41050
- console.warn(
41051
- `Unsupported link type: ${ref.type}`
41052
- );
41053
- for (const child of children) {
41054
- renderStructTreeNode(child, parents);
41055
- }
41056
- return;
41057
- }
41058
- const annotation = annotations?.find(
41059
- (note) => note.id === ref.id
41060
- );
41061
- if (!annotation) {
41062
- console.warn("Link not found", ref);
41063
- for (const child of children) {
41064
- renderStructTreeNode(child, parents);
41065
- }
41066
- return;
41067
- }
41068
- if (isLinkAnnotation(annotation)) {
41069
- const anchor = {
41070
- role: "a",
41071
- href: annotation.url,
41072
- children: []
41073
- };
41074
- parent.children.push(anchor);
41075
- for (const child of children) {
41076
- renderStructTreeNode(child, [...parents, anchor]);
41077
- }
41078
- } else {
41079
- console.warn(
41080
- `Unsupported annotation subtype: ${annotation.subtype}`
41081
- );
41082
- for (const child of children) {
41083
- renderStructTreeNode(child, parents);
41084
- }
41085
- }
41086
- break;
41087
- }
41088
- case "p": {
41089
- if (node.children.length === 0) {
41090
- break;
41091
- }
41092
- if (node.children.length === 1 && node.children[0].role === "Table") {
41093
- renderStructTreeNode(node.children[0], parents);
41094
- break;
41095
- }
41096
- const paragraph = {
41097
- role: "p",
41098
- children: [],
41099
- attrs: node.attrs
41100
- };
41101
- parent.children.push(paragraph);
41102
- for (const child of [...node.children]) {
41103
- renderStructTreeNode(child, [...parents, paragraph]);
41104
- }
41105
- break;
41106
- }
41107
- case "l": {
41108
- const list = {
41109
- role: "ul",
41110
- children: [],
41111
- attrs: node.attrs
41112
- };
41113
- parent.children.push(list);
41114
- for (const child of [...node.children]) {
41115
- renderStructTreeNode(child, [...parents, list]);
41116
- }
41117
- break;
41118
- }
41119
- case "table":
41120
- case "thead":
41121
- case "tbody":
41122
- case "tfoot":
41123
- case "tr":
41124
- case "h1":
41125
- case "h2":
41126
- case "h3":
41127
- case "h4":
41128
- case "h5":
41129
- case "h6":
41130
- case "li":
41131
- case "td":
41132
- case "th": {
41133
- const block = {
41134
- role,
41135
- children: [],
41136
- attrs: node.attrs
41137
- };
41138
- parent.children.push(block);
41139
- for (const child of [...node.children]) {
41140
- renderStructTreeNode(child, [...parents, block]);
41141
- }
41142
- break;
41143
- }
41144
- case "figure": {
41145
- if (graphics) {
41146
- for (const child of [...node.children]) {
41147
- if (!("id" in child)) {
41148
- continue;
41149
- }
41150
- const contentId = normalizeMarkedContentId(child.id);
41151
- const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
41152
- const graphic = findSvgNode(
41153
- graphics,
41154
- (node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
41155
- );
41156
- if (!graphic) {
41157
- continue;
41158
- }
41159
- parent.children.push({
41160
- role: "figure",
41161
- href: graphic.attrs.href,
41162
- x: graphic.attrs.x || 0,
41163
- y: graphic.attrs.y || 0,
41164
- width: graphic.attrs.width || 0,
41165
- height: graphic.attrs.height || 0,
41166
- alt: node.alt
41167
- });
41168
- }
41169
- }
41170
- break;
41171
- }
41172
- default: {
41173
- const block = {
41174
- role: "div",
41175
- children: [],
41176
- attrs: node.attrs
41177
- };
41178
- parent.children.push(block);
41179
- for (const child of [...node.children]) {
41180
- renderStructTreeNode(child, [...parents, block]);
41181
- }
41182
- break;
41183
- }
41184
- }
41185
- } else {
41186
- const contentId = normalizeMarkedContentId(node.id);
41187
- findNode(rootContainer, (child, previousParent) => {
41188
- if (child.id !== contentId) {
41189
- return;
41190
- }
41191
- previousParent.children = previousParent.children.filter(
41192
- (c) => c !== child
41193
- );
41194
- if (isElementNode(child)) {
41195
- const children = child.children;
41196
- const lastTextItem = parent.children.at(-1);
41197
- if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && computeText(lastTextItem).trim() === "") {
41198
- while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && computeText(children[0]).trim() === "") {
41199
- children.shift();
41200
- }
41201
- }
41202
- parent.children.push(...children);
41203
- } else {
41204
- parent.children.push(child);
41205
- }
41206
- return true;
41207
- });
41208
- }
41209
- };
41210
41070
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
41211
41071
  const fnId = operatorsList.fnArray[i];
41212
41072
  const args = operatorsList.argsArray[i];
@@ -41404,9 +41264,22 @@ async function createTextLayerV2(page, {
41404
41264
  }
41405
41265
  }
41406
41266
  closeTextItem();
41407
- renderStructTreeNode(structTree, [rootContainer]);
41267
+ decorateStructTree(
41268
+ structTree,
41269
+ rootContainer,
41270
+ graphics,
41271
+ annotations
41272
+ );
41408
41273
  return rootContainer;
41409
41274
  }
41275
+
41276
+ // src/index.ts
41277
+ var textLayerUtils = {
41278
+ findNode,
41279
+ findNodes,
41280
+ getNodeContents,
41281
+ flattenNodes
41282
+ };
41410
41283
  export {
41411
41284
  AbortException,
41412
41285
  AnnotationEditorLayer,
@@ -41456,7 +41329,6 @@ export {
41456
41329
  createValidAbsoluteUrl,
41457
41330
  destroySvgContext,
41458
41331
  fetchData,
41459
- fontLoader,
41460
41332
  getDocument,
41461
41333
  getFilenameFromUrl,
41462
41334
  getPdfFilenameFromUrl,
@@ -41508,7 +41380,6 @@ export {
41508
41380
  loadTextLayerFonts,
41509
41381
  makeSerializable,
41510
41382
  noContextMenu,
41511
- normalizeMarkedContentId,
41512
41383
  normalizeUnicode,
41513
41384
  parseRgbaColor,
41514
41385
  renderSvgNode,
@@ -41516,6 +41387,7 @@ export {
41516
41387
  setLayerDimensions,
41517
41388
  shadow,
41518
41389
  stopEvent,
41390
+ textLayerUtils,
41519
41391
  toDataUrl,
41520
41392
  toSvgNode,
41521
41393
  toSvgString,